selftune 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
  2. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
  3. package/apps/local-dashboard/dist/index.html +2 -2
  4. package/cli/selftune/analytics.ts +13 -11
  5. package/cli/selftune/badge/badge.ts +13 -9
  6. package/cli/selftune/canonical-export.ts +6 -6
  7. package/cli/selftune/contribute/contribute.ts +2 -1
  8. package/cli/selftune/cron/setup.ts +3 -1
  9. package/cli/selftune/dashboard-contract.ts +10 -0
  10. package/cli/selftune/dashboard.ts +10 -5
  11. package/cli/selftune/eval/baseline.ts +20 -30
  12. package/cli/selftune/eval/hooks-to-evals.ts +22 -12
  13. package/cli/selftune/eval/import-skillsbench.ts +21 -8
  14. package/cli/selftune/eval/unit-test-cli.ts +22 -11
  15. package/cli/selftune/evolution/description-quality.ts +224 -0
  16. package/cli/selftune/evolution/evolve-body.ts +17 -10
  17. package/cli/selftune/evolution/evolve.ts +70 -57
  18. package/cli/selftune/evolution/rollback.ts +7 -6
  19. package/cli/selftune/grading/auto-grade.ts +24 -22
  20. package/cli/selftune/grading/grade-session.ts +21 -17
  21. package/cli/selftune/hooks/auto-activate.ts +12 -3
  22. package/cli/selftune/hooks/prompt-log.ts +7 -1
  23. package/cli/selftune/index.ts +66 -69
  24. package/cli/selftune/ingestors/claude-replay.ts +29 -14
  25. package/cli/selftune/ingestors/codex-rollout.ts +6 -1
  26. package/cli/selftune/init.ts +14 -9
  27. package/cli/selftune/monitoring/watch.ts +32 -16
  28. package/cli/selftune/orchestrate.ts +18 -17
  29. package/cli/selftune/routes/skill-report.ts +17 -0
  30. package/cli/selftune/schedule.ts +23 -9
  31. package/cli/selftune/sync.ts +7 -3
  32. package/cli/selftune/types.ts +44 -10
  33. package/cli/selftune/utils/cli-error.ts +102 -0
  34. package/cli/selftune/workflows/workflows.ts +23 -17
  35. package/package.json +1 -1
  36. package/skill/SKILL.md +1 -1
  37. package/skill/Workflows/Evolve.md +4 -0
  38. package/skill/Workflows/Initialize.md +8 -8
  39. package/skill/settings_snippet.json +29 -6
  40. package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +0 -16
  41. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
@@ -0,0 +1,224 @@
1
+ /**
2
+ * description-quality.ts
3
+ *
4
+ * Pure, deterministic scoring function that evaluates the quality of a skill
5
+ * description for routing accuracy. No LLM calls — heuristic-only.
6
+ *
7
+ * Inspired by OpenAI's finding that "writing better skill descriptions improved
8
+ * routing accuracy more than any change to the underlying skill logic itself."
9
+ */
10
+
11
+ import type { DescriptionQualityScore } from "../types.js";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Constants
15
+ // ---------------------------------------------------------------------------
16
+
17
+ /** Optimal description length range (characters). */
18
+ const MIN_LENGTH = 40;
19
+ const MAX_LENGTH = 500;
20
+ const IDEAL_MIN = 80;
21
+ const IDEAL_MAX = 300;
22
+
23
+ /** Words that indicate trigger context — the description says *when* the skill fires. */
24
+ const TRIGGER_CONTEXT_WORDS = [
25
+ "when",
26
+ "if",
27
+ "after",
28
+ "before",
29
+ "during",
30
+ "while",
31
+ "upon",
32
+ "whenever",
33
+ "use when",
34
+ "trigger",
35
+ "activate",
36
+ ];
37
+
38
+ /** Vague words that weaken routing precision. */
39
+ const VAGUE_WORDS = [
40
+ "various",
41
+ "general",
42
+ "misc",
43
+ "miscellaneous",
44
+ "stuff",
45
+ "things",
46
+ "etc",
47
+ "and more",
48
+ "and so on",
49
+ "other",
50
+ "multiple",
51
+ "several",
52
+ "many",
53
+ "some",
54
+ "certain",
55
+ "related",
56
+ ];
57
+
58
+ /** Common filler phrases that add no routing signal. */
59
+ const FILLER_PHRASES = [
60
+ "this skill",
61
+ "a tool for",
62
+ "a tool that",
63
+ "helps with",
64
+ "is used for",
65
+ "can be used",
66
+ "is designed to",
67
+ ];
68
+
69
+ /** Action verbs that signal concrete behavior. */
70
+ const ACTION_VERBS = [
71
+ "run",
72
+ "execute",
73
+ "analyze",
74
+ "generate",
75
+ "create",
76
+ "deploy",
77
+ "validate",
78
+ "check",
79
+ "build",
80
+ "test",
81
+ "scan",
82
+ "extract",
83
+ "transform",
84
+ "monitor",
85
+ "grade",
86
+ "evolve",
87
+ "sync",
88
+ "watch",
89
+ "review",
90
+ "audit",
91
+ "parse",
92
+ "format",
93
+ "search",
94
+ "fetch",
95
+ "publish",
96
+ "install",
97
+ "configure",
98
+ "diagnose",
99
+ "debug",
100
+ "fix",
101
+ "optimize",
102
+ "measure",
103
+ ];
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Pre-compiled word-boundary patterns
107
+ // ---------------------------------------------------------------------------
108
+
109
+ /** Compile a word list into pre-built RegExp patterns at module load time. */
110
+ function compileWordPatterns(words: string[]): RegExp[] {
111
+ return words.map((w) => new RegExp(`\\b${w.replace(/\s+/g, "\\s+")}\\b`, "i"));
112
+ }
113
+
114
+ const TRIGGER_PATTERNS = compileWordPatterns(TRIGGER_CONTEXT_WORDS);
115
+ const VAGUE_PATTERNS = compileWordPatterns(VAGUE_WORDS);
116
+ const ACTION_PATTERNS = compileWordPatterns(ACTION_VERBS);
117
+
118
+ /** Count how many pre-compiled patterns match in a string. */
119
+ function countWordMatches(text: string, patterns: RegExp[]): number {
120
+ let count = 0;
121
+ for (const p of patterns) {
122
+ if (p.test(text)) count++;
123
+ }
124
+ return count;
125
+ }
126
+
127
+ // ---------------------------------------------------------------------------
128
+ // Criterion scorers
129
+ // ---------------------------------------------------------------------------
130
+
131
+ /** Score description length: 1.0 for ideal range, graded falloff outside. */
132
+ export function scoreLengthCriterion(description: string): number {
133
+ const len = description.length;
134
+ if (len < MIN_LENGTH) return len / MIN_LENGTH;
135
+ if (len >= IDEAL_MIN && len <= IDEAL_MAX) return 1.0;
136
+ if (len < IDEAL_MIN) return 0.7 + 0.3 * ((len - MIN_LENGTH) / (IDEAL_MIN - MIN_LENGTH));
137
+ if (len <= MAX_LENGTH) return 0.7 + 0.3 * ((MAX_LENGTH - len) / (MAX_LENGTH - IDEAL_MAX));
138
+ return Math.max(0.3, 0.7 - 0.4 * ((len - MAX_LENGTH) / MAX_LENGTH));
139
+ }
140
+
141
+ /** Score presence of trigger context words (when/if/before/after etc). */
142
+ export function scoreTriggerContextCriterion(description: string): number {
143
+ const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
144
+ if (matches === 0) return 0.0;
145
+ if (matches === 1) return 0.7;
146
+ return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
147
+ }
148
+
149
+ /** Score absence of vague words (lower is worse). */
150
+ export function scoreVaguenessCriterion(description: string): number {
151
+ const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
152
+ if (matches === 0) return 1.0;
153
+ if (matches === 1) return 0.6;
154
+ return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
155
+ }
156
+
157
+ /** Score whether description specifies at least one concrete action or domain. */
158
+ export function scoreSpecificityCriterion(description: string): number {
159
+ const lower = description.toLowerCase();
160
+ const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
161
+
162
+ const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
163
+ const words = description.split(/\s+/).length;
164
+ const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
165
+
166
+ if (!hasAction) return 0.2;
167
+ return Math.max(0.3, 1.0 - fillerRatio * 0.3);
168
+ }
169
+
170
+ /** Score whether description is not just the skill name restated. */
171
+ export function scoreNotJustNameCriterion(description: string, skillName?: string): number {
172
+ if (!skillName) return 1.0;
173
+ const descNorm = description
174
+ .toLowerCase()
175
+ .trim()
176
+ .replace(/[^a-z0-9\s]/g, "");
177
+ const nameNorm = skillName
178
+ .toLowerCase()
179
+ .trim()
180
+ .replace(/[^a-z0-9\s]/g, "");
181
+ const nameFromKebab = skillName.replace(/[-_]/g, " ").toLowerCase().trim();
182
+
183
+ if (descNorm === nameNorm || descNorm === nameFromKebab) return 0.0;
184
+ if (descNorm.length < nameNorm.length + 10) return 0.3;
185
+ return 1.0;
186
+ }
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // Main scoring function
190
+ // ---------------------------------------------------------------------------
191
+
192
+ /** Criterion weights — trigger context is weighted highest per OpenAI's finding. */
193
+ const WEIGHTS = {
194
+ length: 0.15,
195
+ trigger_context: 0.3,
196
+ vagueness: 0.2,
197
+ specificity: 0.2,
198
+ not_just_name: 0.15,
199
+ } as const;
200
+
201
+ /**
202
+ * Score a skill description on heuristic quality criteria.
203
+ * Returns a 0.0-1.0 composite score with per-criterion breakdown.
204
+ * Pure function — no I/O, no LLM calls.
205
+ */
206
+ export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
207
+ const criteria = {
208
+ length: scoreLengthCriterion(description),
209
+ trigger_context: scoreTriggerContextCriterion(description),
210
+ vagueness: scoreVaguenessCriterion(description),
211
+ specificity: scoreSpecificityCriterion(description),
212
+ not_just_name: scoreNotJustNameCriterion(description, skillName),
213
+ };
214
+
215
+ const composite = (Object.keys(WEIGHTS) as (keyof typeof WEIGHTS)[]).reduce(
216
+ (sum, key) => sum + criteria[key] * WEIGHTS[key],
217
+ 0,
218
+ );
219
+
220
+ return {
221
+ composite: +composite.toFixed(3),
222
+ criteria,
223
+ };
224
+ }
@@ -25,6 +25,7 @@ import type {
25
25
  QueryLogRecord,
26
26
  SkillUsageRecord,
27
27
  } from "../types.js";
28
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
28
29
  import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
29
30
  import { callViaSubagent } from "../utils/llm-call.js";
30
31
  import { appendAuditEntry } from "./audit.js";
@@ -710,8 +711,11 @@ Options:
710
711
  }
711
712
 
712
713
  if (!values.skill || !values["skill-path"]) {
713
- console.error("[ERROR] --skill and --skill-path are required");
714
- process.exit(1);
714
+ throw new CLIError(
715
+ "--skill and --skill-path are required",
716
+ "MISSING_FLAG",
717
+ "selftune evolve body --skill <name> --skill-path <path>",
718
+ );
715
719
  }
716
720
 
717
721
  const { detectAgent } = await import("../utils/llm-call.js");
@@ -719,15 +723,21 @@ Options:
719
723
  const studentAgent = values["student-agent"] ?? teacherAgent;
720
724
 
721
725
  if (!teacherAgent) {
722
- console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
723
- process.exit(1);
726
+ throw new CLIError(
727
+ "No agent CLI found. Install Claude Code, Codex, or OpenCode.",
728
+ "AGENT_NOT_FOUND",
729
+ "Install Claude Code, Codex, or OpenCode.",
730
+ );
724
731
  }
725
732
 
726
733
  // Parse target
727
734
  const targetStr = values.target ?? "body";
728
735
  if (targetStr !== "body" && targetStr !== "routing") {
729
- console.error("[ERROR] --target must be 'body' or 'routing'");
730
- process.exit(1);
736
+ throw new CLIError(
737
+ "--target must be 'body' or 'routing'",
738
+ "INVALID_FLAG",
739
+ "Use --target body or --target routing",
740
+ );
731
741
  }
732
742
 
733
743
  // Parse few-shot examples
@@ -763,8 +773,5 @@ Options:
763
773
  }
764
774
 
765
775
  if (import.meta.main) {
766
- cliMain().catch((err) => {
767
- console.error(`[FATAL] ${err}`);
768
- process.exit(1);
769
- });
776
+ cliMain().catch(handleCLIError);
770
777
  }
@@ -36,10 +36,12 @@ import type {
36
36
  SessionTelemetryRecord,
37
37
  SkillUsageRecord,
38
38
  } from "../types.js";
39
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
39
40
  import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
40
41
  import { createEvolveTUI } from "../utils/tui.js";
41
42
  import { appendAuditEntry } from "./audit.js";
42
43
  import { checkConstitution } from "./constitutional.js";
44
+ import { scoreDescription } from "./description-quality.js";
43
45
  import { appendEvidenceEntry } from "./evidence.js";
44
46
  import { extractFailurePatterns } from "./extract-patterns.js";
45
47
  import {
@@ -94,6 +96,8 @@ export interface EvolveResult {
94
96
  baselineResult?: BaselineMeasurement;
95
97
  gateValidation?: ValidationResult;
96
98
  sync_result?: SyncResult;
99
+ descriptionQualityBefore?: number;
100
+ descriptionQualityAfter?: number;
97
101
  }
98
102
 
99
103
  /**
@@ -247,16 +251,26 @@ export async function evolve(
247
251
  );
248
252
 
249
253
  /** Stamp every return with pipeline stats so callers always get them. */
250
- const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult => ({
251
- ...r,
252
- llmCallCount,
253
- elapsedMs: Date.now() - pipelineStart,
254
- ...(syncResult ? { sync_result: syncResult } : {}),
255
- });
254
+ const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult => {
255
+ const descQualityAfterScore = r.proposal
256
+ ? scoreDescription(r.proposal.proposed_description, options.skillName).composite
257
+ : undefined;
258
+ return {
259
+ ...r,
260
+ llmCallCount,
261
+ elapsedMs: Date.now() - pipelineStart,
262
+ ...(syncResult ? { sync_result: syncResult } : {}),
263
+ ...(descQualityBeforeScore != null
264
+ ? { descriptionQualityBefore: descQualityBeforeScore }
265
+ : {}),
266
+ ...(descQualityAfterScore != null ? { descriptionQualityAfter: descQualityAfterScore } : {}),
267
+ };
268
+ };
256
269
 
257
- // Hoisted so catch block can preserve partial results on error
270
+ // Hoisted so catch block and withStats can preserve partial results on error
258
271
  let lastProposal: EvolutionProposal | null = null;
259
272
  let lastValidation: ValidationResult | null = null;
273
+ let descQualityBeforeScore: number | undefined;
260
274
 
261
275
  try {
262
276
  // -----------------------------------------------------------------------
@@ -281,7 +295,11 @@ export async function evolve(
281
295
  const versionTag = skillVersion ? `, v${skillVersion}` : "";
282
296
  const createdAuditDetails = (message: string) =>
283
297
  `original_description:${rawContent}\n${message}`;
284
- tui.done(`Loaded SKILL.md (desc: ${currentDescription.length} chars${versionTag})`);
298
+ const descQualityBefore = scoreDescription(currentDescription, skillName);
299
+ descQualityBeforeScore = descQualityBefore.composite;
300
+ tui.done(
301
+ `Loaded SKILL.md (desc: ${currentDescription.length} chars${versionTag}, quality: ${descQualityBefore.composite})`,
302
+ );
285
303
 
286
304
  if (options.syncFirst) {
287
305
  tui.step(`Syncing source-truth telemetry${options.syncForce ? " (force)" : ""}...`);
@@ -1111,38 +1129,36 @@ Options:
1111
1129
  }
1112
1130
 
1113
1131
  if (!values.skill || !values["skill-path"]) {
1114
- console.error("[ERROR] --skill and --skill-path are required");
1115
- process.exit(1);
1132
+ throw new CLIError(
1133
+ "--skill and --skill-path are required",
1134
+ "MISSING_FLAG",
1135
+ "selftune evolve --skill <name> --skill-path <path>",
1136
+ );
1116
1137
  }
1117
1138
  if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
1118
- console.error("[ERROR] --sync-force requires --sync-first");
1119
- process.exit(1);
1139
+ throw new CLIError(
1140
+ "--sync-force requires --sync-first",
1141
+ "INVALID_FLAG",
1142
+ "Add --sync-first when using --sync-force",
1143
+ );
1120
1144
  }
1121
1145
 
1122
1146
  const { detectAgent } = await import("../utils/llm-call.js");
1123
1147
  const requestedAgent = values.agent;
1124
1148
  if (requestedAgent && !Bun.which(requestedAgent)) {
1125
- console.error(
1126
- JSON.stringify({
1127
- level: "error",
1128
- code: "agent_not_in_path",
1129
- message: `Agent CLI '${requestedAgent}' not found in PATH.`,
1130
- action: "Install it or omit --agent to use auto-detection.",
1131
- }),
1149
+ throw new CLIError(
1150
+ `Agent CLI '${requestedAgent}' not found in PATH.`,
1151
+ "AGENT_NOT_FOUND",
1152
+ "Install it or omit --agent to use auto-detection.",
1132
1153
  );
1133
- process.exit(1);
1134
1154
  }
1135
1155
  const agent = requestedAgent ?? detectAgent();
1136
1156
  if (!agent) {
1137
- console.error(
1138
- JSON.stringify({
1139
- level: "error",
1140
- code: "agent_not_found",
1141
- message: "No agent CLI (claude/codex/opencode) found in PATH.",
1142
- action: "Install Claude Code, Codex, or OpenCode.",
1143
- }),
1157
+ throw new CLIError(
1158
+ "No agent CLI (claude/codex/opencode) found in PATH.",
1159
+ "AGENT_NOT_FOUND",
1160
+ "Install Claude Code, Codex, or OpenCode.",
1144
1161
  );
1145
- process.exit(1);
1146
1162
  }
1147
1163
 
1148
1164
  // -------------------------------------------------------------------------
@@ -1150,20 +1166,27 @@ Options:
1150
1166
  // -------------------------------------------------------------------------
1151
1167
  const skillPath = values["skill-path"];
1152
1168
  if (!skillPath) {
1153
- console.error("[ERROR] --skill-path is required.");
1154
- process.exit(1);
1169
+ throw new CLIError(
1170
+ "--skill-path is required.",
1171
+ "MISSING_FLAG",
1172
+ "selftune evolve --skill <name> --skill-path <path>",
1173
+ );
1155
1174
  }
1156
1175
  if (!existsSync(skillPath)) {
1157
- console.error(`[ERROR] SKILL.md not found at: ${skillPath}`);
1158
- console.error(" Verify the --skill-path argument points to an existing SKILL.md file.");
1159
- process.exit(1);
1176
+ throw new CLIError(
1177
+ `SKILL.md not found at: ${skillPath}`,
1178
+ "FILE_NOT_FOUND",
1179
+ "Verify the --skill-path argument points to an existing SKILL.md file.",
1180
+ );
1160
1181
  }
1161
1182
 
1162
1183
  const evalSetPath = values["eval-set"];
1163
1184
  if (evalSetPath && !existsSync(evalSetPath)) {
1164
- console.error(`[ERROR] Eval set file not found at: ${evalSetPath}`);
1165
- console.error(" Verify the --eval-set argument points to an existing JSON file.");
1166
- process.exit(1);
1185
+ throw new CLIError(
1186
+ `Eval set file not found at: ${evalSetPath}`,
1187
+ "FILE_NOT_FOUND",
1188
+ "Verify the --eval-set argument points to an existing JSON file.",
1189
+ );
1167
1190
  }
1168
1191
 
1169
1192
  // If no eval-set provided, check that log files exist for auto-generation
@@ -1172,12 +1195,11 @@ Options:
1172
1195
  const hasSkillLog = querySkillUsageRecords(dbCheck).length > 0;
1173
1196
  const hasQueryLog = existsSync(QUERY_LOG);
1174
1197
  if (!hasSkillLog && !hasQueryLog) {
1175
- console.error("[ERROR] No eval set provided and no telemetry logs found.");
1176
- console.error(
1177
- " Either pass --eval-set <path> or generate logs first by using selftune-enabled skills.",
1198
+ throw new CLIError(
1199
+ `No eval set provided and no telemetry logs found. Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`,
1200
+ "MISSING_DATA",
1201
+ "Either pass --eval-set <path> or generate logs first by using selftune-enabled skills.",
1178
1202
  );
1179
- console.error(` Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`);
1180
- process.exit(1);
1181
1203
  }
1182
1204
  }
1183
1205
 
@@ -1244,6 +1266,12 @@ Options:
1244
1266
  rationale: result.proposal?.rationale ?? "",
1245
1267
  ...(result.skillVersion ? { version: result.skillVersion } : {}),
1246
1268
  dashboard_url: `http://localhost:3141/report/${encodeURIComponent(values.skill)}`,
1269
+ ...(result.descriptionQualityBefore != null
1270
+ ? { description_quality_before: result.descriptionQualityBefore }
1271
+ : {}),
1272
+ ...(result.descriptionQualityAfter != null
1273
+ ? { description_quality_after: result.descriptionQualityAfter }
1274
+ : {}),
1247
1275
  };
1248
1276
  console.log(JSON.stringify(summary, null, 2));
1249
1277
  }
@@ -1276,20 +1304,5 @@ Options:
1276
1304
  }
1277
1305
 
1278
1306
  if (import.meta.main) {
1279
- cliMain().catch((err) => {
1280
- const message = err instanceof Error ? err.message : String(err);
1281
- const stack = err instanceof Error ? err.stack : undefined;
1282
- console.error(`[FATAL] ${message}`);
1283
- if (stack && process.env.SELFTUNE_VERBOSE === "1") {
1284
- console.error(stack);
1285
- }
1286
- console.error(
1287
- "\nTroubleshooting:\n" +
1288
- " - Verify --skill-path points to a valid SKILL.md file\n" +
1289
- " - Ensure eval data exists (run `selftune eval generate` first) or pass --eval-set\n" +
1290
- " - Check that ANTHROPIC_API_KEY is set if using Claude\n" +
1291
- " - Re-run with --verbose for full diagnostic output",
1292
- );
1293
- process.exit(1);
1294
- });
1307
+ cliMain().catch(handleCLIError);
1295
1308
  }
@@ -13,6 +13,7 @@ import { parseArgs } from "node:util";
13
13
 
14
14
  import { updateContextAfterRollback } from "../memory/writer.js";
15
15
  import type { EvolutionAuditEntry } from "../types.js";
16
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
16
17
  import { replaceDescription } from "../utils/frontmatter.js";
17
18
  import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
18
19
 
@@ -233,8 +234,11 @@ Options:
233
234
  }
234
235
 
235
236
  if (!values.skill || !values["skill-path"]) {
236
- console.error("[ERROR] --skill and --skill-path are required");
237
- process.exit(1);
237
+ throw new CLIError(
238
+ "--skill and --skill-path are required",
239
+ "MISSING_FLAG",
240
+ "selftune evolve rollback --skill <name> --skill-path <path>",
241
+ );
238
242
  }
239
243
 
240
244
  const result = await rollback({
@@ -248,8 +252,5 @@ Options:
248
252
  }
249
253
 
250
254
  if (import.meta.main) {
251
- cliMain().catch((err) => {
252
- console.error(`[FATAL] ${err}`);
253
- process.exit(1);
254
- });
255
+ cliMain().catch(handleCLIError);
255
256
  }
@@ -17,6 +17,7 @@ import { AGENT_CANDIDATES, TELEMETRY_LOG } from "../constants.js";
17
17
  import { getDb } from "../localdb/db.js";
18
18
  import { querySessionTelemetry, querySkillUsageRecords } from "../localdb/queries.js";
19
19
  import type { GradingResult, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
20
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
20
21
  import { detectAgent as _detectAgent } from "../utils/llm-call.js";
21
22
  import { readExcerpt } from "../utils/transcript.js";
22
23
  import {
@@ -62,8 +63,7 @@ Options:
62
63
 
63
64
  const skill = values.skill;
64
65
  if (!skill) {
65
- console.error("[ERROR] --skill is required");
66
- process.exit(1);
66
+ throw new CLIError("--skill is required", "MISSING_FLAG", "selftune auto-grade --skill <name>");
67
67
  }
68
68
 
69
69
  // --- Determine agent ---
@@ -71,10 +71,11 @@ Options:
71
71
  const validAgents = [...AGENT_CANDIDATES];
72
72
  if (values.agent) {
73
73
  if (!validAgents.includes(values.agent)) {
74
- console.error(
75
- `[ERROR] Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
74
+ throw new CLIError(
75
+ `Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
76
+ "INVALID_FLAG",
77
+ `selftune auto-grade --skill <name> --agent ${validAgents[0]}`,
76
78
  );
77
- process.exit(1);
78
79
  }
79
80
  agent = values.agent;
80
81
  } else {
@@ -82,11 +83,11 @@ Options:
82
83
  }
83
84
 
84
85
  if (!agent) {
85
- console.error(
86
- `[ERROR] No supported agent CLI (${AGENT_CANDIDATES.join("/")}) found in PATH.\n` +
87
- "Install one of the supported agent CLIs.",
86
+ throw new CLIError(
87
+ `No supported agent CLI (${AGENT_CANDIDATES.join("/")}) found in PATH`,
88
+ "AGENT_NOT_FOUND",
89
+ "Install one of the supported agent CLIs",
88
90
  );
89
- process.exit(1);
90
91
  }
91
92
 
92
93
  console.error(`[INFO] Auto-grade via agent: ${agent}`);
@@ -104,21 +105,22 @@ Options:
104
105
  sessionId = values["session-id"];
105
106
  const resolved = resolveSessionById(telRecords, sessionId);
106
107
  if (!resolved) {
107
- console.error(
108
- `[ERROR] Session '${sessionId}' not found in telemetry or recoverable transcript data. ` +
109
- "Check the session ID or omit --session-id to auto-select the latest matching session.",
108
+ throw new CLIError(
109
+ `Session '${sessionId}' not found in telemetry or recoverable transcript data`,
110
+ "MISSING_DATA",
111
+ "Check the session ID or omit --session-id to auto-select the latest matching session",
110
112
  );
111
- process.exit(1);
112
113
  }
113
114
  telemetry = resolved.telemetry;
114
115
  transcriptPath = resolved.transcriptPath;
115
116
  } else {
116
117
  const resolved = resolveLatestSessionForSkill(telRecords, skillUsageRecords, skill);
117
118
  if (!resolved) {
118
- console.error(
119
- `[ERROR] No session found for skill '${skill}'. Run the skill first, or pass --session-id.`,
119
+ throw new CLIError(
120
+ `No session found for skill '${skill}'`,
121
+ "MISSING_DATA",
122
+ "Run the skill first, or pass --session-id",
120
123
  );
121
- process.exit(1);
122
124
  }
123
125
  telemetry = resolved.telemetry;
124
126
  sessionId = resolved.sessionId ?? "unknown";
@@ -159,8 +161,11 @@ Options:
159
161
  agent,
160
162
  });
161
163
  } catch (err) {
162
- console.error(`[ERROR] ${err instanceof Error ? err.message : String(err)}`);
163
- process.exit(1);
164
+ throw new CLIError(
165
+ `Grading failed: ${err instanceof Error ? err.message : String(err)}`,
166
+ "OPERATION_FAILED",
167
+ "Check agent availability and try again",
168
+ );
164
169
  }
165
170
 
166
171
  const outputPath = values.output ?? buildDefaultGradingOutputPath(sessionId);
@@ -193,8 +198,5 @@ Options:
193
198
 
194
199
  // Guard: only run when invoked directly
195
200
  if (import.meta.main) {
196
- cliMain().catch((err) => {
197
- console.error(`[FATAL] ${err}`);
198
- process.exit(1);
199
- });
201
+ cliMain().catch(handleCLIError);
200
202
  }