selftune 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
  2. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
  3. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
  4. package/apps/local-dashboard/dist/index.html +3 -3
  5. package/cli/selftune/activation-rules.ts +24 -48
  6. package/cli/selftune/analytics.ts +13 -11
  7. package/cli/selftune/badge/badge.ts +13 -9
  8. package/cli/selftune/canonical-export.ts +6 -6
  9. package/cli/selftune/constants.ts +7 -0
  10. package/cli/selftune/contribute/bundle.ts +9 -44
  11. package/cli/selftune/contribute/contribute.ts +2 -1
  12. package/cli/selftune/cron/setup.ts +3 -1
  13. package/cli/selftune/dashboard-contract.ts +22 -0
  14. package/cli/selftune/dashboard.ts +10 -5
  15. package/cli/selftune/eval/baseline.ts +20 -30
  16. package/cli/selftune/eval/hooks-to-evals.ts +27 -34
  17. package/cli/selftune/eval/import-skillsbench.ts +21 -8
  18. package/cli/selftune/eval/unit-test-cli.ts +22 -11
  19. package/cli/selftune/evolution/description-quality.ts +224 -0
  20. package/cli/selftune/evolution/evolve-body.ts +17 -10
  21. package/cli/selftune/evolution/evolve.ts +70 -57
  22. package/cli/selftune/evolution/rollback.ts +7 -6
  23. package/cli/selftune/grading/auto-grade.ts +27 -35
  24. package/cli/selftune/grading/grade-session.ts +24 -30
  25. package/cli/selftune/hooks/auto-activate.ts +12 -3
  26. package/cli/selftune/hooks/evolution-guard.ts +14 -24
  27. package/cli/selftune/hooks/prompt-log.ts +7 -9
  28. package/cli/selftune/hooks/session-stop.ts +0 -8
  29. package/cli/selftune/index.ts +66 -69
  30. package/cli/selftune/ingestors/claude-replay.ts +29 -14
  31. package/cli/selftune/ingestors/codex-rollout.ts +15 -5
  32. package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
  33. package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
  34. package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
  35. package/cli/selftune/init.ts +14 -9
  36. package/cli/selftune/localdb/queries.ts +57 -0
  37. package/cli/selftune/monitoring/watch.ts +39 -38
  38. package/cli/selftune/normalization.ts +2 -23
  39. package/cli/selftune/orchestrate.ts +224 -24
  40. package/cli/selftune/routes/skill-report.ts +17 -0
  41. package/cli/selftune/schedule.ts +74 -14
  42. package/cli/selftune/sync.ts +7 -3
  43. package/cli/selftune/types.ts +44 -10
  44. package/cli/selftune/utils/cli-error.ts +102 -0
  45. package/cli/selftune/utils/jsonl.ts +2 -0
  46. package/cli/selftune/workflows/workflows.ts +23 -17
  47. package/package.json +3 -1
  48. package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
  49. package/packages/ui/src/components/index.ts +1 -0
  50. package/packages/ui/src/components/section-cards.tsx +13 -0
  51. package/skill/SKILL.md +1 -1
  52. package/skill/Workflows/Evolve.md +4 -0
  53. package/skill/Workflows/Initialize.md +8 -8
  54. package/skill/Workflows/Orchestrate.md +11 -7
  55. package/skill/Workflows/Schedule.md +11 -0
  56. package/skill/references/logs.md +22 -21
  57. package/skill/settings_snippet.json +29 -6
  58. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
  59. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
  60. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
@@ -36,7 +36,7 @@ import type {
36
36
  SessionTelemetryRecord,
37
37
  SkillUsageRecord,
38
38
  } from "../types.js";
39
- import { readJsonl } from "../utils/jsonl.js";
39
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
40
40
  import { detectAgent } from "../utils/llm-call.js";
41
41
  import {
42
42
  filterActionableQueryRecords,
@@ -410,18 +410,27 @@ export async function cliMain(): Promise<void> {
410
410
  // --- Synthetic mode: generate evals from SKILL.md via LLM ---
411
411
  if (values.synthetic) {
412
412
  if (!values.skill) {
413
- console.error("[ERROR] --skill required with --synthetic");
414
- process.exit(1);
413
+ throw new CLIError(
414
+ "--skill required with --synthetic",
415
+ "MISSING_FLAG",
416
+ "selftune evals --synthetic --skill <name> --skill-path <path>",
417
+ );
415
418
  }
416
419
  if (!values["skill-path"]) {
417
- console.error("[ERROR] --skill-path required with --synthetic");
418
- process.exit(1);
420
+ throw new CLIError(
421
+ "--skill-path required with --synthetic",
422
+ "MISSING_FLAG",
423
+ "selftune evals --synthetic --skill <name> --skill-path <path>",
424
+ );
419
425
  }
420
426
 
421
427
  const agent = detectAgent();
422
428
  if (!agent) {
423
- console.error("[ERROR] No agent CLI found (claude/codex/opencode). Install one first.");
424
- process.exit(1);
429
+ throw new CLIError(
430
+ "No agent CLI found (claude/codex/opencode)",
431
+ "AGENT_NOT_FOUND",
432
+ "Install one of the supported agent CLIs",
433
+ );
425
434
  }
426
435
 
427
436
  const maxPerSide = Number.parseInt(values.max ?? "50", 10);
@@ -464,31 +473,15 @@ export async function cliMain(): Promise<void> {
464
473
  return;
465
474
  }
466
475
 
467
- // --- Log-based mode (original behavior) ---
468
- const skillLogPath = values["skill-log"] ?? SKILL_LOG;
469
- const queryLogPath = values["query-log"] ?? QUERY_LOG;
470
- const telemetryLogPath = values["telemetry-log"] ?? TELEMETRY_LOG;
471
-
476
+ // --- SQLite-based mode ---
472
477
  let skillRecords: SkillUsageRecord[];
473
478
  let queryRecords: QueryLogRecord[];
474
479
  let telemetryRecords: SessionTelemetryRecord[];
475
480
 
476
- // SQLite is the default path; JSONL fallback only for custom --*-log overrides
477
- if (
478
- skillLogPath === SKILL_LOG &&
479
- queryLogPath === QUERY_LOG &&
480
- telemetryLogPath === TELEMETRY_LOG
481
- ) {
482
- const db = getDb();
483
- skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
484
- queryRecords = queryQueryLog(db) as QueryLogRecord[];
485
- telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
486
- } else {
487
- // test/custom-path fallback
488
- skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
489
- queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
490
- telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
491
- }
481
+ const db = getDb();
482
+ skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
483
+ queryRecords = queryQueryLog(db) as QueryLogRecord[];
484
+ telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
492
485
 
493
486
  if (values["list-skills"]) {
494
487
  listSkills(skillRecords, queryRecords, telemetryRecords);
@@ -496,8 +489,11 @@ export async function cliMain(): Promise<void> {
496
489
  }
497
490
 
498
491
  if (!values.skill) {
499
- console.error("[ERROR] --skill required (or use --list-skills)");
500
- process.exit(1);
492
+ throw new CLIError(
493
+ "--skill required (or use --list-skills)",
494
+ "MISSING_FLAG",
495
+ "selftune evals --skill <name> or selftune evals --list-skills",
496
+ );
501
497
  }
502
498
 
503
499
  if (values.stats) {
@@ -525,8 +521,5 @@ export async function cliMain(): Promise<void> {
525
521
  }
526
522
 
527
523
  if (import.meta.main) {
528
- cliMain().catch((err) => {
529
- console.error(err);
530
- process.exit(1);
531
- });
524
+ cliMain().catch(handleCLIError);
532
525
  }
@@ -15,6 +15,7 @@ import { join } from "node:path";
15
15
  import { parseArgs } from "node:util";
16
16
 
17
17
  import type { EvalEntry, SkillsBenchTask } from "../types.js";
18
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
18
19
 
19
20
  // ---------------------------------------------------------------------------
20
21
  // Minimal TOML parser (handles the subset used by SkillsBench task.toml files)
@@ -175,13 +176,19 @@ export function cliMain(): void {
175
176
  });
176
177
 
177
178
  if (!values.dir) {
178
- console.error("[ERROR] --dir required (path to SkillsBench corpus directory)");
179
- process.exit(1);
179
+ throw new CLIError(
180
+ "--dir required (path to SkillsBench corpus directory)",
181
+ "MISSING_FLAG",
182
+ "selftune import-skillsbench --dir <path> --skill <name>",
183
+ );
180
184
  }
181
185
 
182
186
  if (!values.skill) {
183
- console.error("[ERROR] --skill required (target skill name)");
184
- process.exit(1);
187
+ throw new CLIError(
188
+ "--skill required (target skill name)",
189
+ "MISSING_FLAG",
190
+ "selftune import-skillsbench --dir <path> --skill <name>",
191
+ );
185
192
  }
186
193
 
187
194
  const matchStrategy = values["match-strategy"] === "fuzzy" ? "fuzzy" : "exact";
@@ -189,9 +196,11 @@ export function cliMain(): void {
189
196
  const tasks = parseSkillsBenchDir(values.dir);
190
197
 
191
198
  if (tasks.length === 0) {
192
- console.error(`[WARN] No tasks found in ${values.dir}/tasks/`);
193
- console.error("Expected structure: <dir>/tasks/<task-id>/instruction.md");
194
- process.exit(1);
199
+ throw new CLIError(
200
+ `No tasks found in ${values.dir}/tasks/`,
201
+ "MISSING_DATA",
202
+ "Expected structure: <dir>/tasks/<task-id>/instruction.md",
203
+ );
195
204
  }
196
205
 
197
206
  console.log(`Parsed ${tasks.length} tasks from ${values.dir}`);
@@ -218,5 +227,9 @@ export function cliMain(): void {
218
227
  }
219
228
 
220
229
  if (import.meta.main) {
221
- cliMain();
230
+ try {
231
+ cliMain();
232
+ } catch (err) {
233
+ handleCLIError(err);
234
+ }
222
235
  }
@@ -19,6 +19,7 @@ import { parseArgs } from "node:util";
19
19
 
20
20
  import { SELFTUNE_CONFIG_DIR } from "../constants.js";
21
21
  import type { EvalEntry } from "../types.js";
22
+ import { CLIError } from "../utils/cli-error.js";
22
23
  import { callLlm, detectAgent } from "../utils/llm-call.js";
23
24
  import { generateUnitTests } from "./generate-unit-tests.js";
24
25
  import type { AgentRunner } from "./unit-test.js";
@@ -43,8 +44,11 @@ export async function cliMain(): Promise<void> {
43
44
  });
44
45
 
45
46
  if (!values.skill) {
46
- console.error("[ERROR] --skill <name> is required.");
47
- process.exit(1);
47
+ throw new CLIError(
48
+ "--skill <name> is required",
49
+ "MISSING_FLAG",
50
+ "selftune eval unit-test --skill <name>",
51
+ );
48
52
  }
49
53
 
50
54
  const skillName = values.skill;
@@ -56,8 +60,11 @@ export async function cliMain(): Promise<void> {
56
60
  if (values.generate) {
57
61
  const agent = detectAgent();
58
62
  if (!agent) {
59
- console.error("[ERROR] No agent CLI found (claude/codex/opencode). Cannot generate tests.");
60
- process.exit(1);
63
+ throw new CLIError(
64
+ "No agent CLI found (claude/codex/opencode). Cannot generate tests",
65
+ "AGENT_NOT_FOUND",
66
+ "Install one of the supported agent CLIs",
67
+ );
61
68
  }
62
69
 
63
70
  let skillContent = `Skill: ${skillName}`;
@@ -86,8 +93,7 @@ export async function cliMain(): Promise<void> {
86
93
  const tests = await generateUnitTests(skillName, skillContent, evalFailures, llmCaller);
87
94
 
88
95
  if (tests.length === 0) {
89
- console.error("[ERROR] No tests generated. Check agent/LLM availability.");
90
- process.exit(1);
96
+ throw new CLIError("No tests generated", "OPERATION_FAILED", "Check agent/LLM availability");
91
97
  }
92
98
 
93
99
  // Ensure output directory exists
@@ -100,9 +106,11 @@ export async function cliMain(): Promise<void> {
100
106
  // Load and run tests
101
107
  const tests = loadUnitTests(testsPath);
102
108
  if (tests.length === 0) {
103
- console.error(`[ERROR] No tests found at ${testsPath}`);
104
- console.error(" Use --generate to create tests, or provide --tests <path>.");
105
- process.exit(1);
109
+ throw new CLIError(
110
+ `No tests found at ${testsPath}`,
111
+ "FILE_NOT_FOUND",
112
+ "Use --generate to create tests, or provide --tests <path>",
113
+ );
106
114
  }
107
115
 
108
116
  console.log(`Loaded ${tests.length} unit tests for skill '${skillName}'`);
@@ -112,8 +120,11 @@ export async function cliMain(): Promise<void> {
112
120
  if (values["run-agent"]) {
113
121
  const agent = detectAgent();
114
122
  if (!agent) {
115
- console.error("[ERROR] No agent CLI found. Cannot run agent-based tests.");
116
- process.exit(1);
123
+ throw new CLIError(
124
+ "No agent CLI found. Cannot run agent-based tests",
125
+ "AGENT_NOT_FOUND",
126
+ "Install one of the supported agent CLIs",
127
+ );
117
128
  }
118
129
  const modelFlag = values.model;
119
130
  agentRunner = async (query: string): Promise<string> => {
@@ -0,0 +1,224 @@
1
+ /**
2
+ * description-quality.ts
3
+ *
4
+ * Pure, deterministic scoring function that evaluates the quality of a skill
5
+ * description for routing accuracy. No LLM calls — heuristic-only.
6
+ *
7
+ * Inspired by OpenAI's finding that "writing better skill descriptions improved
8
+ * routing accuracy more than any change to the underlying skill logic itself."
9
+ */
10
+
11
+ import type { DescriptionQualityScore } from "../types.js";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Constants
15
+ // ---------------------------------------------------------------------------
16
+
17
+ /** Optimal description length range (characters). */
18
+ const MIN_LENGTH = 40;
19
+ const MAX_LENGTH = 500;
20
+ const IDEAL_MIN = 80;
21
+ const IDEAL_MAX = 300;
22
+
23
+ /** Words that indicate trigger context — the description says *when* the skill fires. */
24
+ const TRIGGER_CONTEXT_WORDS = [
25
+ "when",
26
+ "if",
27
+ "after",
28
+ "before",
29
+ "during",
30
+ "while",
31
+ "upon",
32
+ "whenever",
33
+ "use when",
34
+ "trigger",
35
+ "activate",
36
+ ];
37
+
38
+ /** Vague words that weaken routing precision. */
39
+ const VAGUE_WORDS = [
40
+ "various",
41
+ "general",
42
+ "misc",
43
+ "miscellaneous",
44
+ "stuff",
45
+ "things",
46
+ "etc",
47
+ "and more",
48
+ "and so on",
49
+ "other",
50
+ "multiple",
51
+ "several",
52
+ "many",
53
+ "some",
54
+ "certain",
55
+ "related",
56
+ ];
57
+
58
+ /** Common filler phrases that add no routing signal. */
59
+ const FILLER_PHRASES = [
60
+ "this skill",
61
+ "a tool for",
62
+ "a tool that",
63
+ "helps with",
64
+ "is used for",
65
+ "can be used",
66
+ "is designed to",
67
+ ];
68
+
69
+ /** Action verbs that signal concrete behavior. */
70
+ const ACTION_VERBS = [
71
+ "run",
72
+ "execute",
73
+ "analyze",
74
+ "generate",
75
+ "create",
76
+ "deploy",
77
+ "validate",
78
+ "check",
79
+ "build",
80
+ "test",
81
+ "scan",
82
+ "extract",
83
+ "transform",
84
+ "monitor",
85
+ "grade",
86
+ "evolve",
87
+ "sync",
88
+ "watch",
89
+ "review",
90
+ "audit",
91
+ "parse",
92
+ "format",
93
+ "search",
94
+ "fetch",
95
+ "publish",
96
+ "install",
97
+ "configure",
98
+ "diagnose",
99
+ "debug",
100
+ "fix",
101
+ "optimize",
102
+ "measure",
103
+ ];
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Pre-compiled word-boundary patterns
107
+ // ---------------------------------------------------------------------------
108
+
109
+ /** Compile a word list into pre-built RegExp patterns at module load time. */
110
+ function compileWordPatterns(words: string[]): RegExp[] {
111
+ return words.map((w) => new RegExp(`\\b${w.replace(/\s+/g, "\\s+")}\\b`, "i"));
112
+ }
113
+
114
+ const TRIGGER_PATTERNS = compileWordPatterns(TRIGGER_CONTEXT_WORDS);
115
+ const VAGUE_PATTERNS = compileWordPatterns(VAGUE_WORDS);
116
+ const ACTION_PATTERNS = compileWordPatterns(ACTION_VERBS);
117
+
118
+ /** Count how many pre-compiled patterns match in a string. */
119
+ function countWordMatches(text: string, patterns: RegExp[]): number {
120
+ let count = 0;
121
+ for (const p of patterns) {
122
+ if (p.test(text)) count++;
123
+ }
124
+ return count;
125
+ }
126
+
127
+ // ---------------------------------------------------------------------------
128
+ // Criterion scorers
129
+ // ---------------------------------------------------------------------------
130
+
131
+ /** Score description length: 1.0 for ideal range, graded falloff outside. */
132
+ export function scoreLengthCriterion(description: string): number {
133
+ const len = description.length;
134
+ if (len < MIN_LENGTH) return len / MIN_LENGTH;
135
+ if (len >= IDEAL_MIN && len <= IDEAL_MAX) return 1.0;
136
+ if (len < IDEAL_MIN) return 0.7 + 0.3 * ((len - MIN_LENGTH) / (IDEAL_MIN - MIN_LENGTH));
137
+ if (len <= MAX_LENGTH) return 0.7 + 0.3 * ((MAX_LENGTH - len) / (MAX_LENGTH - IDEAL_MAX));
138
+ return Math.max(0.3, 0.7 - 0.4 * ((len - MAX_LENGTH) / MAX_LENGTH));
139
+ }
140
+
141
+ /** Score presence of trigger context words (when/if/before/after etc). */
142
+ export function scoreTriggerContextCriterion(description: string): number {
143
+ const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
144
+ if (matches === 0) return 0.0;
145
+ if (matches === 1) return 0.7;
146
+ return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
147
+ }
148
+
149
+ /** Score absence of vague words (lower is worse). */
150
+ export function scoreVaguenessCriterion(description: string): number {
151
+ const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
152
+ if (matches === 0) return 1.0;
153
+ if (matches === 1) return 0.6;
154
+ return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
155
+ }
156
+
157
+ /** Score whether description specifies at least one concrete action or domain. */
158
+ export function scoreSpecificityCriterion(description: string): number {
159
+ const lower = description.toLowerCase();
160
+ const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
161
+
162
+ const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
163
+ const words = description.split(/\s+/).length;
164
+ const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
165
+
166
+ if (!hasAction) return 0.2;
167
+ return Math.max(0.3, 1.0 - fillerRatio * 0.3);
168
+ }
169
+
170
+ /** Score whether description is not just the skill name restated. */
171
+ export function scoreNotJustNameCriterion(description: string, skillName?: string): number {
172
+ if (!skillName) return 1.0;
173
+ const descNorm = description
174
+ .toLowerCase()
175
+ .trim()
176
+ .replace(/[^a-z0-9\s]/g, "");
177
+ const nameNorm = skillName
178
+ .toLowerCase()
179
+ .trim()
180
+ .replace(/[^a-z0-9\s]/g, "");
181
+ const nameFromKebab = skillName.replace(/[-_]/g, " ").toLowerCase().trim();
182
+
183
+ if (descNorm === nameNorm || descNorm === nameFromKebab) return 0.0;
184
+ if (descNorm.length < nameNorm.length + 10) return 0.3;
185
+ return 1.0;
186
+ }
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // Main scoring function
190
+ // ---------------------------------------------------------------------------
191
+
192
+ /** Criterion weights — trigger context is weighted highest per OpenAI's finding. */
193
+ const WEIGHTS = {
194
+ length: 0.15,
195
+ trigger_context: 0.3,
196
+ vagueness: 0.2,
197
+ specificity: 0.2,
198
+ not_just_name: 0.15,
199
+ } as const;
200
+
201
+ /**
202
+ * Score a skill description on heuristic quality criteria.
203
+ * Returns a 0.0-1.0 composite score with per-criterion breakdown.
204
+ * Pure function — no I/O, no LLM calls.
205
+ */
206
+ export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
207
+ const criteria = {
208
+ length: scoreLengthCriterion(description),
209
+ trigger_context: scoreTriggerContextCriterion(description),
210
+ vagueness: scoreVaguenessCriterion(description),
211
+ specificity: scoreSpecificityCriterion(description),
212
+ not_just_name: scoreNotJustNameCriterion(description, skillName),
213
+ };
214
+
215
+ const composite = (Object.keys(WEIGHTS) as (keyof typeof WEIGHTS)[]).reduce(
216
+ (sum, key) => sum + criteria[key] * WEIGHTS[key],
217
+ 0,
218
+ );
219
+
220
+ return {
221
+ composite: +composite.toFixed(3),
222
+ criteria,
223
+ };
224
+ }
@@ -25,6 +25,7 @@ import type {
25
25
  QueryLogRecord,
26
26
  SkillUsageRecord,
27
27
  } from "../types.js";
28
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
28
29
  import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
29
30
  import { callViaSubagent } from "../utils/llm-call.js";
30
31
  import { appendAuditEntry } from "./audit.js";
@@ -710,8 +711,11 @@ Options:
710
711
  }
711
712
 
712
713
  if (!values.skill || !values["skill-path"]) {
713
- console.error("[ERROR] --skill and --skill-path are required");
714
- process.exit(1);
714
+ throw new CLIError(
715
+ "--skill and --skill-path are required",
716
+ "MISSING_FLAG",
717
+ "selftune evolve body --skill <name> --skill-path <path>",
718
+ );
715
719
  }
716
720
 
717
721
  const { detectAgent } = await import("../utils/llm-call.js");
@@ -719,15 +723,21 @@ Options:
719
723
  const studentAgent = values["student-agent"] ?? teacherAgent;
720
724
 
721
725
  if (!teacherAgent) {
722
- console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
723
- process.exit(1);
726
+ throw new CLIError(
727
+ "No agent CLI found. Install Claude Code, Codex, or OpenCode.",
728
+ "AGENT_NOT_FOUND",
729
+ "Install Claude Code, Codex, or OpenCode.",
730
+ );
724
731
  }
725
732
 
726
733
  // Parse target
727
734
  const targetStr = values.target ?? "body";
728
735
  if (targetStr !== "body" && targetStr !== "routing") {
729
- console.error("[ERROR] --target must be 'body' or 'routing'");
730
- process.exit(1);
736
+ throw new CLIError(
737
+ "--target must be 'body' or 'routing'",
738
+ "INVALID_FLAG",
739
+ "Use --target body or --target routing",
740
+ );
731
741
  }
732
742
 
733
743
  // Parse few-shot examples
@@ -763,8 +773,5 @@ Options:
763
773
  }
764
774
 
765
775
  if (import.meta.main) {
766
- cliMain().catch((err) => {
767
- console.error(`[FATAL] ${err}`);
768
- process.exit(1);
769
- });
776
+ cliMain().catch(handleCLIError);
770
777
  }