selftune 0.2.31 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +83 -56
  2. package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
  3. package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/command-surface.ts +613 -2
  7. package/cli/selftune/create/baseline.ts +429 -0
  8. package/cli/selftune/create/check.ts +35 -0
  9. package/cli/selftune/create/init.ts +115 -0
  10. package/cli/selftune/create/package-candidate-state.ts +771 -0
  11. package/cli/selftune/create/package-evaluator.ts +710 -0
  12. package/cli/selftune/create/package-fingerprint.ts +142 -0
  13. package/cli/selftune/create/package-search.ts +377 -0
  14. package/cli/selftune/create/publish.ts +431 -0
  15. package/cli/selftune/create/readiness.ts +495 -0
  16. package/cli/selftune/create/replay.ts +330 -0
  17. package/cli/selftune/create/report.ts +74 -0
  18. package/cli/selftune/create/scaffold.ts +121 -0
  19. package/cli/selftune/create/skills-ref-adapter.ts +177 -0
  20. package/cli/selftune/create/status.ts +33 -0
  21. package/cli/selftune/create/templates.ts +249 -0
  22. package/cli/selftune/cron/setup.ts +1 -1
  23. package/cli/selftune/dashboard-action-events.ts +4 -1
  24. package/cli/selftune/dashboard-action-result.ts +789 -24
  25. package/cli/selftune/dashboard-action-stream.ts +80 -0
  26. package/cli/selftune/dashboard-contract.ts +146 -3
  27. package/cli/selftune/dashboard-server.ts +5 -4
  28. package/cli/selftune/eval/hooks-to-evals.ts +58 -35
  29. package/cli/selftune/eval/synthetic-evals.ts +145 -17
  30. package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
  31. package/cli/selftune/evolution/evolve-body.ts +9 -36
  32. package/cli/selftune/evolution/evolve.ts +8 -72
  33. package/cli/selftune/evolution/stopping-criteria.ts +5 -13
  34. package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
  35. package/cli/selftune/evolution/validate-host-replay.ts +115 -15
  36. package/cli/selftune/improve.ts +206 -0
  37. package/cli/selftune/index.ts +123 -6
  38. package/cli/selftune/init.ts +1 -1
  39. package/cli/selftune/localdb/queries/dashboard.ts +30 -0
  40. package/cli/selftune/localdb/schema.ts +52 -0
  41. package/cli/selftune/monitoring/watch.ts +257 -23
  42. package/cli/selftune/orchestrate/execute.ts +300 -1
  43. package/cli/selftune/orchestrate/finalize.ts +14 -0
  44. package/cli/selftune/orchestrate/plan.ts +22 -5
  45. package/cli/selftune/orchestrate/prepare.ts +59 -4
  46. package/cli/selftune/orchestrate/report.ts +1 -1
  47. package/cli/selftune/orchestrate.ts +34 -1
  48. package/cli/selftune/publish.ts +35 -0
  49. package/cli/selftune/routes/actions.ts +81 -15
  50. package/cli/selftune/routes/overview.ts +1 -1
  51. package/cli/selftune/routes/skill-report.ts +147 -2
  52. package/cli/selftune/run.ts +18 -0
  53. package/cli/selftune/schedule.ts +3 -3
  54. package/cli/selftune/search-run.ts +703 -0
  55. package/cli/selftune/status.ts +35 -11
  56. package/cli/selftune/testing-readiness.ts +431 -40
  57. package/cli/selftune/types.ts +316 -0
  58. package/cli/selftune/utils/eval-readiness.ts +1 -0
  59. package/cli/selftune/utils/json-output.ts +11 -0
  60. package/cli/selftune/utils/lifecycle-surface.ts +48 -0
  61. package/cli/selftune/utils/query-filter.ts +82 -1
  62. package/cli/selftune/utils/tui.ts +85 -2
  63. package/cli/selftune/verify.ts +205 -0
  64. package/cli/selftune/workflows/proposals.ts +1 -1
  65. package/cli/selftune/workflows/skill-scaffold.ts +141 -63
  66. package/cli/selftune/workflows/workflows.ts +4 -4
  67. package/package.json +1 -1
  68. package/skill/SKILL.md +148 -85
  69. package/skill/references/cli-quick-reference.md +16 -1
  70. package/skill/references/creator-playbook.md +31 -10
  71. package/skill/workflows/Baseline.md +8 -9
  72. package/skill/workflows/Contributions.md +4 -4
  73. package/skill/workflows/Create.md +173 -0
  74. package/skill/workflows/CreateTestDeploy.md +34 -30
  75. package/skill/workflows/Cron.md +2 -2
  76. package/skill/workflows/Dashboard.md +3 -3
  77. package/skill/workflows/Evals.md +13 -7
  78. package/skill/workflows/Evolve.md +75 -32
  79. package/skill/workflows/EvolveBody.md +22 -15
  80. package/skill/workflows/Hook.md +1 -1
  81. package/skill/workflows/Improve.md +168 -0
  82. package/skill/workflows/Initialize.md +3 -3
  83. package/skill/workflows/Orchestrate.md +49 -12
  84. package/skill/workflows/Publish.md +100 -0
  85. package/skill/workflows/Run.md +72 -0
  86. package/skill/workflows/Schedule.md +2 -2
  87. package/skill/workflows/SearchRun.md +89 -0
  88. package/skill/workflows/SignalsDashboard.md +2 -2
  89. package/skill/workflows/UnitTest.md +13 -4
  90. package/skill/workflows/Verify.md +136 -0
  91. package/skill/workflows/Watch.md +114 -47
  92. package/skill/workflows/Workflows.md +13 -8
  93. package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +0 -15
  94. package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +0 -1
  95. package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
@@ -745,6 +745,7 @@ export interface BodyEvolutionProposal {
745
745
  export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
746
746
 
747
747
  export type ValidationMode = "structural_guard" | "host_replay" | "llm_judge";
748
+ export type ReplayStagingMode = "routing" | "package";
748
749
 
749
750
  export interface RoutingReplayFixture {
750
751
  fixture_id: string;
@@ -753,6 +754,7 @@ export interface RoutingReplayFixture {
753
754
  target_skill_path: string;
754
755
  competing_skill_paths: string[];
755
756
  workspace_root?: string;
757
+ skill_staging_mode?: ReplayStagingMode;
756
758
  }
757
759
 
758
760
  export interface RoutingReplayEntryResult {
@@ -761,6 +763,30 @@ export interface RoutingReplayEntryResult {
761
763
  triggered: boolean;
762
764
  passed: boolean;
763
765
  evidence?: string;
766
+ runtime_metrics?: RuntimeReplayEntryMetrics;
767
+ }
768
+
769
+ export interface RuntimeReplayEntryMetrics {
770
+ input_tokens: number | null;
771
+ output_tokens: number | null;
772
+ cache_creation_input_tokens: number | null;
773
+ cache_read_input_tokens: number | null;
774
+ total_cost_usd: number | null;
775
+ duration_ms: number | null;
776
+ num_turns: number | null;
777
+ }
778
+
779
+ export interface RuntimeReplayAggregateMetrics {
780
+ eval_runs: number;
781
+ usage_observations: number;
782
+ total_duration_ms: number;
783
+ avg_duration_ms: number;
784
+ total_input_tokens: number | null;
785
+ total_output_tokens: number | null;
786
+ total_cache_creation_input_tokens: number | null;
787
+ total_cache_read_input_tokens: number | null;
788
+ total_cost_usd: number | null;
789
+ total_turns: number | null;
764
790
  }
765
791
 
766
792
  /** Result of validating a body evolution proposal. */
@@ -808,11 +834,193 @@ export interface BaselineResult {
808
834
  with_skill: boolean;
809
835
  triggered: boolean;
810
836
  pass: boolean;
837
+ evidence?: string;
811
838
  latency_ms?: number;
812
839
  tokens?: TokenUsageMetrics;
813
840
  measured_at: string;
814
841
  }
815
842
 
843
+ export type CreatePackageEvaluationStatus = "passed" | "replay_failed" | "baseline_failed";
844
+
845
+ export interface CreatePackageReplaySummary {
846
+ mode: ReplayStagingMode;
847
+ validation_mode: "host_replay";
848
+ agent: string;
849
+ proposal_id: string;
850
+ fixture_id: string;
851
+ total: number;
852
+ passed: number;
853
+ failed: number;
854
+ pass_rate: number;
855
+ runtime_metrics?: RuntimeReplayAggregateMetrics;
856
+ }
857
+
858
+ export interface CreatePackageBaselineSummary {
859
+ mode: ReplayStagingMode;
860
+ baseline_pass_rate: number;
861
+ with_skill_pass_rate: number;
862
+ lift: number;
863
+ adds_value: boolean;
864
+ measured_at: string;
865
+ sample_size?: number;
866
+ runtime_metrics?: {
867
+ with_skill: RuntimeReplayAggregateMetrics;
868
+ without_skill: RuntimeReplayAggregateMetrics;
869
+ };
870
+ }
871
+
872
+ export interface CreatePackageEvaluationEvidenceSample {
873
+ query: string;
874
+ evidence: string | null;
875
+ }
876
+
877
+ export interface CreatePackageEvaluationEvidenceSummary {
878
+ replay_failures: number;
879
+ baseline_wins: number;
880
+ baseline_regressions: number;
881
+ replay_failure_samples: CreatePackageEvaluationEvidenceSample[];
882
+ baseline_win_samples: CreatePackageEvaluationEvidenceSample[];
883
+ baseline_regression_samples: CreatePackageEvaluationEvidenceSample[];
884
+ }
885
+
886
+ export interface CreatePackageEvaluationEfficiencySummary {
887
+ with_skill: RuntimeReplayAggregateMetrics;
888
+ without_skill: RuntimeReplayAggregateMetrics;
889
+ }
890
+
891
+ export interface CreatePackageEvaluationWatchEfficiencyRegressionSummary {
892
+ sample_size: number;
893
+ baseline_avg_duration_ms: number | null;
894
+ observed_avg_duration_ms: number | null;
895
+ duration_delta_ratio: number | null;
896
+ baseline_avg_input_tokens: number | null;
897
+ observed_avg_input_tokens: number | null;
898
+ input_tokens_delta_ratio: number | null;
899
+ baseline_avg_output_tokens: number | null;
900
+ observed_avg_output_tokens: number | null;
901
+ output_tokens_delta_ratio: number | null;
902
+ baseline_avg_turns: number | null;
903
+ observed_avg_turns: number | null;
904
+ turns_delta_ratio: number | null;
905
+ }
906
+
907
+ export interface CreatePackageEvaluationWatchSummary {
908
+ snapshot: MonitoringSnapshot;
909
+ alert: string | null;
910
+ rolled_back: boolean;
911
+ recommendation: string;
912
+ recommended_command: string | null;
913
+ grade_alert: string | null;
914
+ grade_regression: { before: number; after: number; delta: number } | null;
915
+ efficiency_alert?: string | null;
916
+ efficiency_regression?: CreatePackageEvaluationWatchEfficiencyRegressionSummary | null;
917
+ }
918
+
919
+ export interface CreatePackageEvaluationGradingBaselineSummary {
920
+ proposal_id: string | null;
921
+ measured_at: string;
922
+ pass_rate: number;
923
+ mean_score: number | null;
924
+ sample_size: number;
925
+ }
926
+
927
+ export interface CreatePackageEvaluationGradingRecentSummary {
928
+ sample_size: number;
929
+ average_pass_rate: number | null;
930
+ average_mean_score: number | null;
931
+ newest_graded_at: string | null;
932
+ oldest_graded_at: string | null;
933
+ }
934
+
935
+ export interface CreatePackageEvaluationGradingSummary {
936
+ baseline: CreatePackageEvaluationGradingBaselineSummary | null;
937
+ recent: CreatePackageEvaluationGradingRecentSummary | null;
938
+ pass_rate_delta: number | null;
939
+ mean_score_delta: number | null;
940
+ regressed: boolean | null;
941
+ }
942
+
943
+ export interface CreatePackageEvaluationUnitTestFailureSummary {
944
+ test_id: string;
945
+ error: string | null;
946
+ failed_assertions: string[];
947
+ }
948
+
949
+ export interface CreatePackageEvaluationUnitTestSummary {
950
+ total: number;
951
+ passed: number;
952
+ failed: number;
953
+ pass_rate: number;
954
+ run_at: string;
955
+ failing_tests: CreatePackageEvaluationUnitTestFailureSummary[];
956
+ }
957
+
958
+ export interface CreatePackageBodySummary {
959
+ structural_valid: boolean;
960
+ structural_reason: string;
961
+ quality_score: number | null;
962
+ quality_reason: string | null;
963
+ quality_threshold: number;
964
+ quality_passed: boolean | null;
965
+ valid: boolean;
966
+ }
967
+
968
+ export type CreatePackageEvaluationSource = "fresh" | "artifact_cache" | "candidate_cache";
969
+ export type CreatePackageCandidateAcceptanceDecision = "root" | "accepted" | "rejected";
970
+
971
+ export interface CreatePackageCandidateAcceptanceSummary {
972
+ decision: CreatePackageCandidateAcceptanceDecision;
973
+ compared_to_candidate_id: string | null;
974
+ decided_at: string;
975
+ rationale: string;
976
+ replay_pass_rate_delta: number | null;
977
+ routing_pass_rate_delta: number | null;
978
+ baseline_lift_delta: number | null;
979
+ body_quality_delta: number | null;
980
+ unit_test_pass_rate_delta: number | null;
981
+ }
982
+
983
+ export interface CreatePackageEvaluationSummary {
984
+ skill_name: string;
985
+ skill_path: string;
986
+ mode: ReplayStagingMode;
987
+ package_fingerprint?: string;
988
+ candidate_id?: string;
989
+ parent_candidate_id?: string | null;
990
+ candidate_generation?: number | null;
991
+ evaluation_source?: CreatePackageEvaluationSource;
992
+ status: CreatePackageEvaluationStatus;
993
+ evaluation_passed: boolean;
994
+ next_command: string | null;
995
+ replay: CreatePackageReplaySummary;
996
+ routing?: CreatePackageReplaySummary;
997
+ baseline: CreatePackageBaselineSummary;
998
+ evidence?: CreatePackageEvaluationEvidenceSummary;
999
+ efficiency?: CreatePackageEvaluationEfficiencySummary;
1000
+ grading?: CreatePackageEvaluationGradingSummary;
1001
+ body?: CreatePackageBodySummary;
1002
+ unit_tests?: CreatePackageEvaluationUnitTestSummary;
1003
+ watch?: CreatePackageEvaluationWatchSummary;
1004
+ candidate_acceptance?: CreatePackageCandidateAcceptanceSummary;
1005
+ }
1006
+
1007
+ export interface CreatePackageCandidateRecord {
1008
+ candidate_id: string;
1009
+ skill_name: string;
1010
+ skill_path: string;
1011
+ package_fingerprint: string;
1012
+ parent_candidate_id: string | null;
1013
+ candidate_generation: number;
1014
+ evaluation_count: number;
1015
+ first_evaluated_at: string;
1016
+ last_evaluated_at: string;
1017
+ latest_status: CreatePackageEvaluationStatus;
1018
+ latest_evaluation_source: CreatePackageEvaluationSource | null;
1019
+ latest_acceptance_decision: CreatePackageCandidateAcceptanceDecision | null;
1020
+ artifact_path: string | null;
1021
+ summary: CreatePackageEvaluationSummary;
1022
+ }
1023
+
816
1024
  // ---------------------------------------------------------------------------
817
1025
  // Skill unit test types
818
1026
  // ---------------------------------------------------------------------------
@@ -863,6 +1071,78 @@ export interface UnitTestSuiteResult {
863
1071
  run_at: string;
864
1072
  }
865
1073
 
1074
+ export interface AgentSkillValidationIssue {
1075
+ level: "error" | "warning";
1076
+ code: string;
1077
+ message: string;
1078
+ path?: string;
1079
+ }
1080
+
1081
+ export interface AgentSkillValidationResult {
1082
+ ok: boolean;
1083
+ issues: AgentSkillValidationIssue[];
1084
+ raw_stdout: string;
1085
+ raw_stderr: string;
1086
+ exit_code: number | null;
1087
+ validator: "skills-ref";
1088
+ command: string | null;
1089
+ }
1090
+
1091
+ export type CreateCheckState =
1092
+ | "blocked_spec_validation"
1093
+ | "needs_spec_validation"
1094
+ | "needs_package_resources"
1095
+ | "needs_evals"
1096
+ | "needs_unit_tests"
1097
+ | "needs_routing_replay"
1098
+ | "needs_baseline"
1099
+ | "ready_to_publish";
1100
+
1101
+ export interface CreateCheckChecks {
1102
+ skill_md: boolean;
1103
+ frontmatter_present: boolean;
1104
+ skill_name_matches_dir: boolean;
1105
+ description_present: boolean;
1106
+ description_within_budget: boolean;
1107
+ skill_md_within_line_budget: boolean;
1108
+ manifest_present: boolean;
1109
+ workflow_entry: boolean;
1110
+ references_present: boolean;
1111
+ scripts_present: boolean;
1112
+ assets_present: boolean;
1113
+ evals_present: boolean;
1114
+ unit_tests_present: boolean;
1115
+ routing_replay_ready: boolean;
1116
+ routing_replay_recorded: boolean;
1117
+ package_replay_ready: boolean;
1118
+ baseline_present: boolean;
1119
+ }
1120
+
1121
+ export interface CreateCheckReadiness {
1122
+ ok: boolean;
1123
+ state: CreateCheckState;
1124
+ summary: string;
1125
+ next_command: string | null;
1126
+ checks: CreateCheckChecks;
1127
+ skill_name: string;
1128
+ skill_dir: string;
1129
+ skill_path: string;
1130
+ entry_workflow: string;
1131
+ manifest_present: boolean;
1132
+ description_quality: DescriptionQualityScore;
1133
+ }
1134
+
1135
+ export interface CreateCheckResult {
1136
+ skill: string;
1137
+ skill_dir: string;
1138
+ skill_path: string;
1139
+ ok: boolean;
1140
+ state: CreateCheckState;
1141
+ next_command: string | null;
1142
+ spec_validation: AgentSkillValidationResult;
1143
+ readiness: CreateCheckReadiness;
1144
+ }
1145
+
866
1146
  // ---------------------------------------------------------------------------
867
1147
  // Composability types
868
1148
  // ---------------------------------------------------------------------------
@@ -1034,3 +1314,39 @@ export interface WorkflowDiscoveryReport {
1034
1314
  total_sessions_analyzed: number;
1035
1315
  generated_at: string;
1036
1316
  }
1317
+
1318
+ // ---------------------------------------------------------------------------
1319
+ // Package search types (bounded package evolution)
1320
+ // ---------------------------------------------------------------------------
1321
+
1322
+ /** Provenance trail for a package search run. */
1323
+ export interface PackageSearchProvenance {
1324
+ frontier_size: number;
1325
+ parent_selection_method: string;
1326
+ candidate_fingerprints: string[];
1327
+ surface_plan?: {
1328
+ routing_count: number;
1329
+ body_count: number;
1330
+ weakness_source: string;
1331
+ routing_weakness: number | null;
1332
+ body_weakness: number | null;
1333
+ };
1334
+ evaluation_summaries: Array<{
1335
+ candidate_id: string;
1336
+ decision: string;
1337
+ rationale: string;
1338
+ }>;
1339
+ }
1340
+
1341
+ /** Result of a bounded package search run. */
1342
+ export interface PackageSearchRunResult {
1343
+ search_id: string;
1344
+ skill_name: string;
1345
+ parent_candidate_id: string | null;
1346
+ candidates_evaluated: number;
1347
+ winner_candidate_id: string | null;
1348
+ winner_rationale: string | null;
1349
+ started_at: string;
1350
+ completed_at: string;
1351
+ provenance: PackageSearchProvenance;
1352
+ }
@@ -0,0 +1 @@
1
+ export const MIN_LOG_READY_POSITIVES = 3;
@@ -0,0 +1,11 @@
1
+ export function extractJsonObject(text: string): Record<string, unknown> | null {
2
+ const trimmed = text.trim();
3
+ if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
4
+
5
+ try {
6
+ const parsed = JSON.parse(trimmed) as unknown;
7
+ return parsed && typeof parsed === "object" ? (parsed as Record<string, unknown>) : null;
8
+ } catch {
9
+ return null;
10
+ }
11
+ }
@@ -0,0 +1,48 @@
1
+ export function normalizeLifecycleCommand(command: string | null | undefined): string | null {
2
+ if (!command) return null;
3
+
4
+ let normalized = command;
5
+ normalized = normalized.replace(/\bselftune create replay\b/g, "selftune verify");
6
+ normalized = normalized.replace(/\bselftune create baseline\b/g, "selftune verify");
7
+ normalized = normalized.replace(/\bselftune create check\b/g, "selftune verify");
8
+ normalized = normalized.replace(/\bselftune create publish\b/g, "selftune publish");
9
+ normalized = normalized.replace(/\bselftune evolve[- ]body\b/g, "selftune improve --scope body");
10
+ normalized = normalized.replace(/\bselftune evolve\b/g, "selftune improve");
11
+ normalized = normalized.replace(/\bselftune search-run\b/g, "selftune improve --scope package");
12
+ normalized = normalized.replace(/\bselftune orchestrate\b/g, "selftune run");
13
+ normalized = normalized.replace(/\s+--watch(?=\s|$)/g, "");
14
+ normalized = normalized.replace(/\s{2,}/g, " ").trim();
15
+
16
+ return normalized;
17
+ }
18
+
19
+ export function normalizeLifecycleText(text: string | null | undefined): string {
20
+ if (!text) return "";
21
+
22
+ return text
23
+ .replace(/\bRun create replay\b/g, "Run verify")
24
+ .replace(/\brun create replay\b/g, "run verify")
25
+ .replace(/\bcreate replay\b/g, "verify")
26
+ .replace(/\bCreate replay\b/g, "Verify")
27
+ .replace(/\bRun create baseline\b/g, "Run verify")
28
+ .replace(/\brun create baseline\b/g, "run verify")
29
+ .replace(/\bcreate baseline\b/g, "verify")
30
+ .replace(/\bCreate baseline\b/g, "Verify")
31
+ .replace(/\bRun create check\b/g, "Run verify")
32
+ .replace(/\brun create check\b/g, "run verify")
33
+ .replace(/\bcreate check\b/g, "verify")
34
+ .replace(/\bCreate check\b/g, "Verify")
35
+ .replace(/\bRun create publish\b/g, "Run publish")
36
+ .replace(/\brun create publish\b/g, "run publish")
37
+ .replace(/\bcreate publish\b/g, "publish")
38
+ .replace(/\bCreate publish\b/g, "Publish")
39
+ .replace(/\bevolve body\b/g, "improve --scope body")
40
+ .replace(/\bEvolve body\b/g, "Improve --scope body")
41
+ .replace(/\bevolve\b/g, "improve")
42
+ .replace(/\bEvolve\b/g, "Improve")
43
+ .replace(/\bsearch-run\b/g, "improve --scope package")
44
+ .replace(/\bSearch-run\b/g, "Improve --scope package")
45
+ .replace(/\bselftune orchestrate\b/g, "selftune run")
46
+ .replace(/\bOrchestrate\b/g, "Run")
47
+ .replace(/\borchestrate\b/g, "run");
48
+ }
@@ -53,6 +53,42 @@ const LEADING_WRAPPED_QUERY_TAGS = [
53
53
  "local-command-stdout",
54
54
  "local-command-stderr",
55
55
  "command-name",
56
+ "command-message",
57
+ "command-args",
58
+ ] as const;
59
+
60
+ const SKILL_MAINTENANCE_VERBS = [
61
+ "grade",
62
+ "review",
63
+ "audit",
64
+ "inspect",
65
+ "analyze",
66
+ "analyse",
67
+ "understand",
68
+ "explain",
69
+ "find",
70
+ "locate",
71
+ "update",
72
+ "fix",
73
+ "repair",
74
+ "improve",
75
+ "debug",
76
+ "document",
77
+ "publish api",
78
+ ] as const;
79
+
80
+ const SKILL_MAINTENANCE_NOUNS = [
81
+ "skill",
82
+ "skills",
83
+ "readme",
84
+ "docs",
85
+ "documentation",
86
+ "workflow",
87
+ "workflows",
88
+ "reference",
89
+ "references",
90
+ "files",
91
+ "format",
56
92
  ] as const;
57
93
 
58
94
  function stripLeadingWrappedQueryText(query: string): string {
@@ -81,7 +117,7 @@ export function extractActionableQueryText(query: string): string | null {
81
117
  const trimmed = query.trim();
82
118
  if (!trimmed || trimmed === "-" || trimmed === "(query not found)") return null;
83
119
 
84
- const candidate = stripLeadingWrappedQueryText(trimmed) || trimmed;
120
+ const candidate = stripLeadingWrappedQueryText(trimmed);
85
121
  if (!candidate || candidate === "-" || candidate === "(query not found)") return null;
86
122
 
87
123
  const isBlocked =
@@ -92,6 +128,51 @@ export function extractActionableQueryText(query: string): string | null {
92
128
  return isBlocked ? null : candidate;
93
129
  }
94
130
 
131
+ function normalizeSkillNameVariants(skillName: string): string[] {
132
+ const trimmed = skillName.trim();
133
+ if (!trimmed) return [];
134
+
135
+ const variants = new Set<string>();
136
+ const lower = trimmed.toLowerCase();
137
+ variants.add(lower);
138
+ variants.add(lower.replace(/[-_]+/g, " "));
139
+ variants.add(lower.replace(/[-_\s]+/g, ""));
140
+ variants.add(
141
+ trimmed
142
+ .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
143
+ .replace(/[-_]+/g, " ")
144
+ .toLowerCase(),
145
+ );
146
+
147
+ return [...variants].filter(Boolean);
148
+ }
149
+
150
+ export function isLikelySkillMaintenanceQuery(query: string, skillName?: string): boolean {
151
+ const candidate = extractActionableQueryText(query);
152
+ if (!candidate) return false;
153
+
154
+ const lowered = candidate.toLowerCase().replace(/\s+/g, " ").trim();
155
+ const mentionsMaintenanceVerb = SKILL_MAINTENANCE_VERBS.some((verb) => lowered.includes(verb));
156
+ const mentionsMaintenanceNoun = SKILL_MAINTENANCE_NOUNS.some((noun) => lowered.includes(noun));
157
+ const mentionsHowItWorks = /\bhow\b[\s\S]{0,80}\bworks?\b/.test(lowered);
158
+ const mentionsSkillName = skillName
159
+ ? normalizeSkillNameVariants(skillName).some(
160
+ (variant) => variant.length > 0 && lowered.includes(variant),
161
+ )
162
+ : false;
163
+
164
+ if (mentionsHowItWorks && mentionsSkillName) return true;
165
+ if (mentionsMaintenanceVerb && mentionsMaintenanceNoun) return true;
166
+ if (mentionsMaintenanceVerb && mentionsSkillName) return true;
167
+ return false;
168
+ }
169
+
170
+ export function extractPositiveEvalQueryText(query: string, skillName?: string): string | null {
171
+ const candidate = extractActionableQueryText(query);
172
+ if (!candidate) return null;
173
+ return isLikelySkillMaintenanceQuery(candidate, skillName) ? null : candidate;
174
+ }
175
+
95
176
  export function isActionableQueryText(query: string): boolean {
96
177
  return extractActionableQueryText(query) !== null;
97
178
  }
@@ -26,13 +26,96 @@ function createNoopTUI(): EvolveTUI {
26
26
  return { step() {}, done() {}, fail() {}, finish() {}, destroy() {} };
27
27
  }
28
28
 
29
+ function createPlainTextTUI(opts: { skillName: string; model: string }): EvolveTUI {
30
+ const write = (s: string) => process.stderr.write(s);
31
+ let stepStartTime = Date.now();
32
+ let currentLabel = "";
33
+ let hasActiveStep = false;
34
+ let destroyed = false;
35
+
36
+ const checkMark = process.env.NO_COLOR ? "+" : "\u2713";
37
+ const crossMark = process.env.NO_COLOR ? "x" : "\u2717";
38
+
39
+ write(`\n selftune evolve \u2500\u2500 ${opts.skillName} \u2500\u2500 ${opts.model}\n\n`);
40
+
41
+ function formatTime(ms: number): string {
42
+ return `${(ms / 1000).toFixed(1)}s`;
43
+ }
44
+
45
+ function writeStartedLine(label: string): void {
46
+ write(` -> ${label}\n`);
47
+ }
48
+
49
+ function writeCompletedLine(marker: string, label: string, elapsed: number): void {
50
+ const time = formatTime(elapsed);
51
+ const padding = Math.max(1, 48 - label.length);
52
+ write(` ${marker} ${label}${" ".repeat(padding)}${time}\n`);
53
+ }
54
+
55
+ function completeCurrentStep(marker: string, label: string): void {
56
+ const elapsed = Date.now() - stepStartTime;
57
+ hasActiveStep = false;
58
+ writeCompletedLine(marker, label, elapsed);
59
+ }
60
+
61
+ return {
62
+ step(label: string): void {
63
+ if (destroyed) return;
64
+ if (hasActiveStep) {
65
+ completeCurrentStep(checkMark, currentLabel);
66
+ }
67
+ currentLabel = label;
68
+ stepStartTime = Date.now();
69
+ hasActiveStep = true;
70
+ writeStartedLine(label);
71
+ },
72
+
73
+ done(label: string): void {
74
+ if (destroyed) return;
75
+ if (hasActiveStep) {
76
+ completeCurrentStep(checkMark, label);
77
+ } else {
78
+ writeCompletedLine(checkMark, label, 0);
79
+ }
80
+ currentLabel = "";
81
+ },
82
+
83
+ fail(label: string): void {
84
+ if (destroyed) return;
85
+ if (hasActiveStep) {
86
+ completeCurrentStep(crossMark, label);
87
+ } else {
88
+ writeCompletedLine(crossMark, label, 0);
89
+ }
90
+ currentLabel = "";
91
+ },
92
+
93
+ finish(summary: string): void {
94
+ if (destroyed) return;
95
+ if (hasActiveStep) {
96
+ completeCurrentStep(checkMark, currentLabel);
97
+ }
98
+ write(`\n ${summary}\n`);
99
+ destroyed = true;
100
+ },
101
+
102
+ destroy(): void {
103
+ destroyed = true;
104
+ hasActiveStep = false;
105
+ currentLabel = "";
106
+ },
107
+ };
108
+ }
109
+
29
110
  export function createEvolveTUI(opts: { skillName: string; model: string }): EvolveTUI {
30
111
  const noColor = !!process.env.NO_COLOR;
31
112
  const isTTY = !!process.stderr.isTTY;
113
+ const isTestEnvironment = process.env.BUN_ENV?.includes("test");
32
114
 
33
- // If not a TTY, return no-op to avoid ANSI noise in pipes/tests
115
+ // Non-interactive agent runs still need durable progress lines. Keep tests
116
+ // silent by default unless explicitly forced.
34
117
  if (!isTTY && !process.env.SELFTUNE_TUI_FORCE) {
35
- return createNoopTUI();
118
+ return isTestEnvironment ? createNoopTUI() : createPlainTextTUI(opts);
36
119
  }
37
120
 
38
121
  const write = (s: string) => process.stderr.write(s);