selftune 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
- package/apps/local-dashboard/dist/index.html +2 -2
- package/cli/selftune/analytics.ts +13 -11
- package/cli/selftune/badge/badge.ts +13 -9
- package/cli/selftune/canonical-export.ts +6 -6
- package/cli/selftune/contribute/contribute.ts +2 -1
- package/cli/selftune/cron/setup.ts +3 -1
- package/cli/selftune/dashboard-contract.ts +10 -0
- package/cli/selftune/dashboard.ts +10 -5
- package/cli/selftune/eval/baseline.ts +20 -30
- package/cli/selftune/eval/hooks-to-evals.ts +22 -12
- package/cli/selftune/eval/import-skillsbench.ts +21 -8
- package/cli/selftune/eval/unit-test-cli.ts +22 -11
- package/cli/selftune/evolution/description-quality.ts +224 -0
- package/cli/selftune/evolution/evolve-body.ts +17 -10
- package/cli/selftune/evolution/evolve.ts +70 -57
- package/cli/selftune/evolution/rollback.ts +7 -6
- package/cli/selftune/grading/auto-grade.ts +24 -22
- package/cli/selftune/grading/grade-session.ts +21 -17
- package/cli/selftune/hooks/auto-activate.ts +12 -3
- package/cli/selftune/hooks/prompt-log.ts +7 -1
- package/cli/selftune/index.ts +66 -69
- package/cli/selftune/ingestors/claude-replay.ts +29 -14
- package/cli/selftune/ingestors/codex-rollout.ts +6 -1
- package/cli/selftune/init.ts +14 -9
- package/cli/selftune/monitoring/watch.ts +32 -16
- package/cli/selftune/orchestrate.ts +18 -17
- package/cli/selftune/routes/skill-report.ts +17 -0
- package/cli/selftune/schedule.ts +23 -9
- package/cli/selftune/sync.ts +7 -3
- package/cli/selftune/types.ts +44 -10
- package/cli/selftune/utils/cli-error.ts +102 -0
- package/cli/selftune/workflows/workflows.ts +23 -17
- package/package.json +1 -1
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/Evolve.md +4 -0
- package/skill/Workflows/Initialize.md +8 -8
- package/skill/settings_snippet.json +29 -6
- package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
|
@@ -5,10 +5,10 @@
|
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>selftune — Dashboard</title>
|
|
7
7
|
<link rel="icon" type="image/png" href="/favicon.png" />
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-DOu3iLD9.js"></script>
|
|
9
9
|
<link rel="modulepreload" crossorigin href="/assets/rolldown-runtime-Dw2cE7zH.js">
|
|
10
10
|
<link rel="modulepreload" crossorigin href="/assets/vendor-react-CKkiCskZ.js">
|
|
11
|
-
<link rel="modulepreload" crossorigin href="/assets/vendor-ui-
|
|
11
|
+
<link rel="modulepreload" crossorigin href="/assets/vendor-ui-DIwlrGlb.js">
|
|
12
12
|
<link rel="modulepreload" crossorigin href="/assets/vendor-table-pHbDxq36.js">
|
|
13
13
|
<link rel="stylesheet" crossorigin href="/assets/index-BMIS6uUh.css">
|
|
14
14
|
</head>
|
|
@@ -24,6 +24,7 @@ import { join } from "node:path";
|
|
|
24
24
|
|
|
25
25
|
import { SELFTUNE_CONFIG_DIR, SELFTUNE_CONFIG_PATH } from "./constants.js";
|
|
26
26
|
import type { SelftuneConfig } from "./types.js";
|
|
27
|
+
import { CLIError } from "./utils/cli-error.js";
|
|
27
28
|
|
|
28
29
|
// ---------------------------------------------------------------------------
|
|
29
30
|
// Configuration
|
|
@@ -280,11 +281,11 @@ https://github.com/selftune-dev/selftune#telemetry`);
|
|
|
280
281
|
try {
|
|
281
282
|
writeConfigField("analytics_disabled", true);
|
|
282
283
|
} catch {
|
|
283
|
-
|
|
284
|
-
"Failed to disable telemetry: cannot write ~/.selftune/config.json
|
|
285
|
-
|
|
284
|
+
throw new CLIError(
|
|
285
|
+
"Failed to disable telemetry: cannot write ~/.selftune/config.json",
|
|
286
|
+
"OPERATION_FAILED",
|
|
287
|
+
"Check file permissions, or set SELFTUNE_NO_ANALYTICS=1",
|
|
286
288
|
);
|
|
287
|
-
process.exit(1);
|
|
288
289
|
}
|
|
289
290
|
console.log("Telemetry disabled. No anonymous usage data will be sent.");
|
|
290
291
|
console.log("You can re-enable with: selftune telemetry enable");
|
|
@@ -294,11 +295,11 @@ https://github.com/selftune-dev/selftune#telemetry`);
|
|
|
294
295
|
try {
|
|
295
296
|
writeConfigField("analytics_disabled", false);
|
|
296
297
|
} catch {
|
|
297
|
-
|
|
298
|
-
"Failed to enable telemetry: cannot write ~/.selftune/config.json
|
|
299
|
-
|
|
298
|
+
throw new CLIError(
|
|
299
|
+
"Failed to enable telemetry: cannot write ~/.selftune/config.json",
|
|
300
|
+
"OPERATION_FAILED",
|
|
301
|
+
"Check file permissions",
|
|
300
302
|
);
|
|
301
|
-
process.exit(1);
|
|
302
303
|
}
|
|
303
304
|
console.log("Telemetry enabled. Anonymous usage data will be sent.");
|
|
304
305
|
console.log("Disable anytime with: selftune telemetry disable");
|
|
@@ -331,10 +332,11 @@ https://github.com/selftune-dev/selftune#telemetry`);
|
|
|
331
332
|
break;
|
|
332
333
|
}
|
|
333
334
|
default:
|
|
334
|
-
|
|
335
|
-
`Unknown telemetry subcommand: ${sub}
|
|
335
|
+
throw new CLIError(
|
|
336
|
+
`Unknown telemetry subcommand: ${sub}`,
|
|
337
|
+
"INVALID_FLAG",
|
|
338
|
+
"selftune telemetry --help",
|
|
336
339
|
);
|
|
337
|
-
process.exit(1);
|
|
338
340
|
}
|
|
339
341
|
}
|
|
340
342
|
|
|
@@ -24,6 +24,7 @@ import type {
|
|
|
24
24
|
SessionTelemetryRecord,
|
|
25
25
|
SkillUsageRecord,
|
|
26
26
|
} from "../types.js";
|
|
27
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
27
28
|
import type { BadgeFormat } from "./badge-data.js";
|
|
28
29
|
import { findSkillBadgeData } from "./badge-data.js";
|
|
29
30
|
import { formatBadgeOutput } from "./badge-svg.js";
|
|
@@ -58,15 +59,15 @@ export async function cliMain(): Promise<void> {
|
|
|
58
59
|
}
|
|
59
60
|
|
|
60
61
|
if (!values.skill) {
|
|
61
|
-
|
|
62
|
-
console.error(HELP);
|
|
63
|
-
process.exit(1);
|
|
62
|
+
throw new CLIError("--skill is required", "MISSING_FLAG", "selftune badge --skill <name>");
|
|
64
63
|
}
|
|
65
64
|
|
|
66
65
|
if (values.format && !VALID_FORMATS.has(values.format as BadgeFormat)) {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
66
|
+
throw new CLIError(
|
|
67
|
+
`Invalid format '${values.format}'. Must be one of: svg, markdown, url`,
|
|
68
|
+
"INVALID_FLAG",
|
|
69
|
+
"selftune badge --skill <name> --format svg",
|
|
70
|
+
);
|
|
70
71
|
}
|
|
71
72
|
|
|
72
73
|
const format: BadgeFormat =
|
|
@@ -90,8 +91,11 @@ export async function cliMain(): Promise<void> {
|
|
|
90
91
|
// Find skill badge data
|
|
91
92
|
const badgeData = findSkillBadgeData(result, values.skill);
|
|
92
93
|
if (!badgeData) {
|
|
93
|
-
|
|
94
|
-
|
|
94
|
+
throw new CLIError(
|
|
95
|
+
`Skill not found: ${values.skill}`,
|
|
96
|
+
"MISSING_DATA",
|
|
97
|
+
"selftune status --json # list available skill names",
|
|
98
|
+
);
|
|
95
99
|
}
|
|
96
100
|
|
|
97
101
|
// Generate output
|
|
@@ -106,5 +110,5 @@ export async function cliMain(): Promise<void> {
|
|
|
106
110
|
}
|
|
107
111
|
|
|
108
112
|
if (import.meta.main) {
|
|
109
|
-
cliMain();
|
|
113
|
+
cliMain().catch(handleCLIError);
|
|
110
114
|
}
|
|
@@ -26,13 +26,14 @@ import {
|
|
|
26
26
|
readCanonicalRecords,
|
|
27
27
|
serializeCanonicalRecords,
|
|
28
28
|
} from "./utils/canonical-log.js";
|
|
29
|
+
import { CLIError, handleCLIError } from "./utils/cli-error.js";
|
|
29
30
|
|
|
30
31
|
function exitWithUsage(message?: string): never {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
throw new CLIError(
|
|
33
|
+
message ?? "Invalid usage.",
|
|
34
|
+
"INVALID_FLAG",
|
|
35
|
+
"Usage: selftune export-canonical [--out FILE] [--platform NAME] [--record-kind KIND] [--pretty] [--log FILE] [--projects-dir PATH] [--push-payload]",
|
|
34
36
|
);
|
|
35
|
-
process.exit(1);
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
function validatePlatform(value: string | undefined): CanonicalPlatform | undefined {
|
|
@@ -195,7 +196,6 @@ if (import.meta.main) {
|
|
|
195
196
|
try {
|
|
196
197
|
cliMain();
|
|
197
198
|
} catch (error) {
|
|
198
|
-
|
|
199
|
-
exitWithUsage(message);
|
|
199
|
+
handleCLIError(error);
|
|
200
200
|
}
|
|
201
201
|
}
|
|
@@ -12,6 +12,7 @@ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
|
12
12
|
import { parseArgs } from "node:util";
|
|
13
13
|
|
|
14
14
|
import { CONTRIBUTIONS_DIR } from "../constants.js";
|
|
15
|
+
import { handleCLIError } from "../utils/cli-error.js";
|
|
15
16
|
import { assembleBundle } from "./bundle.js";
|
|
16
17
|
import { sanitizeBundle } from "./sanitize.js";
|
|
17
18
|
|
|
@@ -211,5 +212,5 @@ function submitToGitHub(json: string, outputPath: string): boolean {
|
|
|
211
212
|
}
|
|
212
213
|
|
|
213
214
|
if (import.meta.main) {
|
|
214
|
-
|
|
215
|
+
cliMain().catch(handleCLIError);
|
|
215
216
|
}
|
|
@@ -18,6 +18,8 @@ import { homedir } from "node:os";
|
|
|
18
18
|
import { join } from "node:path";
|
|
19
19
|
import { parseArgs } from "node:util";
|
|
20
20
|
|
|
21
|
+
import { handleCLIError } from "../utils/cli-error.js";
|
|
22
|
+
|
|
21
23
|
// ---------------------------------------------------------------------------
|
|
22
24
|
// Types & constants
|
|
23
25
|
// ---------------------------------------------------------------------------
|
|
@@ -262,5 +264,5 @@ Subcommands:
|
|
|
262
264
|
}
|
|
263
265
|
|
|
264
266
|
if (import.meta.main) {
|
|
265
|
-
|
|
267
|
+
cliMain().catch(handleCLIError);
|
|
266
268
|
}
|
|
@@ -242,4 +242,14 @@ export interface SkillReportResponse extends SkillReportPayload {
|
|
|
242
242
|
};
|
|
243
243
|
prompt_samples: PromptSample[];
|
|
244
244
|
session_metadata: SessionMeta[];
|
|
245
|
+
description_quality?: {
|
|
246
|
+
composite: number;
|
|
247
|
+
criteria: {
|
|
248
|
+
length: number;
|
|
249
|
+
trigger_context: number;
|
|
250
|
+
vagueness: number;
|
|
251
|
+
specificity: number;
|
|
252
|
+
not_just_name: number;
|
|
253
|
+
};
|
|
254
|
+
} | null;
|
|
245
255
|
}
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
* selftune dashboard --serve — Deprecated alias for the default behavior
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
import { CLIError } from "./utils/cli-error.js";
|
|
11
|
+
|
|
10
12
|
export async function cliMain(): Promise<void> {
|
|
11
13
|
const args = process.argv.slice(2);
|
|
12
14
|
|
|
@@ -22,11 +24,11 @@ Usage:
|
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
if (args.includes("--export") || args.includes("--out")) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
throw new CLIError(
|
|
28
|
+
"Legacy dashboard export was removed.",
|
|
29
|
+
"INVALID_FLAG",
|
|
27
30
|
"Use `selftune dashboard` to run the SPA locally, then share a route or screenshot instead.",
|
|
28
31
|
);
|
|
29
|
-
process.exit(1);
|
|
30
32
|
}
|
|
31
33
|
|
|
32
34
|
const portIdx = args.indexOf("--port");
|
|
@@ -34,8 +36,11 @@ Usage:
|
|
|
34
36
|
if (portIdx !== -1) {
|
|
35
37
|
const parsed = Number.parseInt(args[portIdx + 1], 10);
|
|
36
38
|
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 65535) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
+
throw new CLIError(
|
|
40
|
+
`Invalid port "${args[portIdx + 1]}": must be an integer between 1 and 65535.`,
|
|
41
|
+
"INVALID_FLAG",
|
|
42
|
+
"Provide a port number between 1 and 65535 (e.g., --port 3141).",
|
|
43
|
+
);
|
|
39
44
|
}
|
|
40
45
|
port = parsed;
|
|
41
46
|
}
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import { parseArgs } from "node:util";
|
|
12
12
|
|
|
13
13
|
import type { BaselineResult, EvalEntry } from "../types.js";
|
|
14
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
14
15
|
import { callLlm } from "../utils/llm-call.js";
|
|
15
16
|
import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
|
|
16
17
|
|
|
@@ -166,8 +167,11 @@ Options:
|
|
|
166
167
|
}
|
|
167
168
|
|
|
168
169
|
if (!values.skill || !values["skill-path"]) {
|
|
169
|
-
|
|
170
|
-
|
|
170
|
+
throw new CLIError(
|
|
171
|
+
"--skill and --skill-path are required",
|
|
172
|
+
"MISSING_FLAG",
|
|
173
|
+
"selftune grade baseline --skill <name> --skill-path <path>",
|
|
174
|
+
);
|
|
171
175
|
}
|
|
172
176
|
|
|
173
177
|
const { existsSync, readFileSync } = await import("node:fs");
|
|
@@ -175,8 +179,11 @@ Options:
|
|
|
175
179
|
// Read skill description
|
|
176
180
|
const skillPath = values["skill-path"];
|
|
177
181
|
if (!existsSync(skillPath)) {
|
|
178
|
-
|
|
179
|
-
|
|
182
|
+
throw new CLIError(
|
|
183
|
+
`SKILL.md not found at ${skillPath}`,
|
|
184
|
+
"FILE_NOT_FOUND",
|
|
185
|
+
"Provide a valid --skill-path pointing to SKILL.md",
|
|
186
|
+
);
|
|
180
187
|
}
|
|
181
188
|
const skillDescription = readFileSync(skillPath, "utf-8");
|
|
182
189
|
|
|
@@ -204,27 +211,19 @@ Options:
|
|
|
204
211
|
const { detectAgent } = await import("../utils/llm-call.js");
|
|
205
212
|
const requestedAgent = values.agent;
|
|
206
213
|
if (requestedAgent && !Bun.which(requestedAgent)) {
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
message: `Agent CLI '${requestedAgent}' not found in PATH.`,
|
|
212
|
-
action: "Install it or omit --agent to use auto-detection.",
|
|
213
|
-
}),
|
|
214
|
+
throw new CLIError(
|
|
215
|
+
`Agent CLI '${requestedAgent}' not found in PATH`,
|
|
216
|
+
"AGENT_NOT_FOUND",
|
|
217
|
+
"Install it or omit --agent to use auto-detection",
|
|
214
218
|
);
|
|
215
|
-
process.exit(1);
|
|
216
219
|
}
|
|
217
220
|
const agent = requestedAgent ?? detectAgent();
|
|
218
221
|
if (!agent) {
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
message: "No agent CLI (claude/codex/opencode) found in PATH.",
|
|
224
|
-
action: "Install Claude Code, Codex, or OpenCode.",
|
|
225
|
-
}),
|
|
222
|
+
throw new CLIError(
|
|
223
|
+
"No agent CLI (claude/codex/opencode) found in PATH",
|
|
224
|
+
"AGENT_NOT_FOUND",
|
|
225
|
+
"Install Claude Code, Codex, or OpenCode",
|
|
226
226
|
);
|
|
227
|
-
process.exit(1);
|
|
228
227
|
}
|
|
229
228
|
|
|
230
229
|
const result = await measureBaseline({
|
|
@@ -239,14 +238,5 @@ Options:
|
|
|
239
238
|
}
|
|
240
239
|
|
|
241
240
|
if (import.meta.main) {
|
|
242
|
-
cliMain().catch(
|
|
243
|
-
console.error(
|
|
244
|
-
JSON.stringify({
|
|
245
|
-
level: "fatal",
|
|
246
|
-
message: err instanceof Error ? err.message : String(err),
|
|
247
|
-
stack: err instanceof Error ? err.stack : undefined,
|
|
248
|
-
}),
|
|
249
|
-
);
|
|
250
|
-
process.exit(1);
|
|
251
|
-
});
|
|
241
|
+
cliMain().catch(handleCLIError);
|
|
252
242
|
}
|
|
@@ -36,6 +36,7 @@ import type {
|
|
|
36
36
|
SessionTelemetryRecord,
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
39
40
|
import { detectAgent } from "../utils/llm-call.js";
|
|
40
41
|
import {
|
|
41
42
|
filterActionableQueryRecords,
|
|
@@ -409,18 +410,27 @@ export async function cliMain(): Promise<void> {
|
|
|
409
410
|
// --- Synthetic mode: generate evals from SKILL.md via LLM ---
|
|
410
411
|
if (values.synthetic) {
|
|
411
412
|
if (!values.skill) {
|
|
412
|
-
|
|
413
|
-
|
|
413
|
+
throw new CLIError(
|
|
414
|
+
"--skill required with --synthetic",
|
|
415
|
+
"MISSING_FLAG",
|
|
416
|
+
"selftune evals --synthetic --skill <name> --skill-path <path>",
|
|
417
|
+
);
|
|
414
418
|
}
|
|
415
419
|
if (!values["skill-path"]) {
|
|
416
|
-
|
|
417
|
-
|
|
420
|
+
throw new CLIError(
|
|
421
|
+
"--skill-path required with --synthetic",
|
|
422
|
+
"MISSING_FLAG",
|
|
423
|
+
"selftune evals --synthetic --skill <name> --skill-path <path>",
|
|
424
|
+
);
|
|
418
425
|
}
|
|
419
426
|
|
|
420
427
|
const agent = detectAgent();
|
|
421
428
|
if (!agent) {
|
|
422
|
-
|
|
423
|
-
|
|
429
|
+
throw new CLIError(
|
|
430
|
+
"No agent CLI found (claude/codex/opencode)",
|
|
431
|
+
"AGENT_NOT_FOUND",
|
|
432
|
+
"Install one of the supported agent CLIs",
|
|
433
|
+
);
|
|
424
434
|
}
|
|
425
435
|
|
|
426
436
|
const maxPerSide = Number.parseInt(values.max ?? "50", 10);
|
|
@@ -479,8 +489,11 @@ export async function cliMain(): Promise<void> {
|
|
|
479
489
|
}
|
|
480
490
|
|
|
481
491
|
if (!values.skill) {
|
|
482
|
-
|
|
483
|
-
|
|
492
|
+
throw new CLIError(
|
|
493
|
+
"--skill required (or use --list-skills)",
|
|
494
|
+
"MISSING_FLAG",
|
|
495
|
+
"selftune evals --skill <name> or selftune evals --list-skills",
|
|
496
|
+
);
|
|
484
497
|
}
|
|
485
498
|
|
|
486
499
|
if (values.stats) {
|
|
@@ -508,8 +521,5 @@ export async function cliMain(): Promise<void> {
|
|
|
508
521
|
}
|
|
509
522
|
|
|
510
523
|
if (import.meta.main) {
|
|
511
|
-
cliMain().catch(
|
|
512
|
-
console.error(err);
|
|
513
|
-
process.exit(1);
|
|
514
|
-
});
|
|
524
|
+
cliMain().catch(handleCLIError);
|
|
515
525
|
}
|
|
@@ -15,6 +15,7 @@ import { join } from "node:path";
|
|
|
15
15
|
import { parseArgs } from "node:util";
|
|
16
16
|
|
|
17
17
|
import type { EvalEntry, SkillsBenchTask } from "../types.js";
|
|
18
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
18
19
|
|
|
19
20
|
// ---------------------------------------------------------------------------
|
|
20
21
|
// Minimal TOML parser (handles the subset used by SkillsBench task.toml files)
|
|
@@ -175,13 +176,19 @@ export function cliMain(): void {
|
|
|
175
176
|
});
|
|
176
177
|
|
|
177
178
|
if (!values.dir) {
|
|
178
|
-
|
|
179
|
-
|
|
179
|
+
throw new CLIError(
|
|
180
|
+
"--dir required (path to SkillsBench corpus directory)",
|
|
181
|
+
"MISSING_FLAG",
|
|
182
|
+
"selftune import-skillsbench --dir <path> --skill <name>",
|
|
183
|
+
);
|
|
180
184
|
}
|
|
181
185
|
|
|
182
186
|
if (!values.skill) {
|
|
183
|
-
|
|
184
|
-
|
|
187
|
+
throw new CLIError(
|
|
188
|
+
"--skill required (target skill name)",
|
|
189
|
+
"MISSING_FLAG",
|
|
190
|
+
"selftune import-skillsbench --dir <path> --skill <name>",
|
|
191
|
+
);
|
|
185
192
|
}
|
|
186
193
|
|
|
187
194
|
const matchStrategy = values["match-strategy"] === "fuzzy" ? "fuzzy" : "exact";
|
|
@@ -189,9 +196,11 @@ export function cliMain(): void {
|
|
|
189
196
|
const tasks = parseSkillsBenchDir(values.dir);
|
|
190
197
|
|
|
191
198
|
if (tasks.length === 0) {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
199
|
+
throw new CLIError(
|
|
200
|
+
`No tasks found in ${values.dir}/tasks/`,
|
|
201
|
+
"MISSING_DATA",
|
|
202
|
+
"Expected structure: <dir>/tasks/<task-id>/instruction.md",
|
|
203
|
+
);
|
|
195
204
|
}
|
|
196
205
|
|
|
197
206
|
console.log(`Parsed ${tasks.length} tasks from ${values.dir}`);
|
|
@@ -218,5 +227,9 @@ export function cliMain(): void {
|
|
|
218
227
|
}
|
|
219
228
|
|
|
220
229
|
if (import.meta.main) {
|
|
221
|
-
|
|
230
|
+
try {
|
|
231
|
+
cliMain();
|
|
232
|
+
} catch (err) {
|
|
233
|
+
handleCLIError(err);
|
|
234
|
+
}
|
|
222
235
|
}
|
|
@@ -19,6 +19,7 @@ import { parseArgs } from "node:util";
|
|
|
19
19
|
|
|
20
20
|
import { SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
21
21
|
import type { EvalEntry } from "../types.js";
|
|
22
|
+
import { CLIError } from "../utils/cli-error.js";
|
|
22
23
|
import { callLlm, detectAgent } from "../utils/llm-call.js";
|
|
23
24
|
import { generateUnitTests } from "./generate-unit-tests.js";
|
|
24
25
|
import type { AgentRunner } from "./unit-test.js";
|
|
@@ -43,8 +44,11 @@ export async function cliMain(): Promise<void> {
|
|
|
43
44
|
});
|
|
44
45
|
|
|
45
46
|
if (!values.skill) {
|
|
46
|
-
|
|
47
|
-
|
|
47
|
+
throw new CLIError(
|
|
48
|
+
"--skill <name> is required",
|
|
49
|
+
"MISSING_FLAG",
|
|
50
|
+
"selftune eval unit-test --skill <name>",
|
|
51
|
+
);
|
|
48
52
|
}
|
|
49
53
|
|
|
50
54
|
const skillName = values.skill;
|
|
@@ -56,8 +60,11 @@ export async function cliMain(): Promise<void> {
|
|
|
56
60
|
if (values.generate) {
|
|
57
61
|
const agent = detectAgent();
|
|
58
62
|
if (!agent) {
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
throw new CLIError(
|
|
64
|
+
"No agent CLI found (claude/codex/opencode). Cannot generate tests",
|
|
65
|
+
"AGENT_NOT_FOUND",
|
|
66
|
+
"Install one of the supported agent CLIs",
|
|
67
|
+
);
|
|
61
68
|
}
|
|
62
69
|
|
|
63
70
|
let skillContent = `Skill: ${skillName}`;
|
|
@@ -86,8 +93,7 @@ export async function cliMain(): Promise<void> {
|
|
|
86
93
|
const tests = await generateUnitTests(skillName, skillContent, evalFailures, llmCaller);
|
|
87
94
|
|
|
88
95
|
if (tests.length === 0) {
|
|
89
|
-
|
|
90
|
-
process.exit(1);
|
|
96
|
+
throw new CLIError("No tests generated", "OPERATION_FAILED", "Check agent/LLM availability");
|
|
91
97
|
}
|
|
92
98
|
|
|
93
99
|
// Ensure output directory exists
|
|
@@ -100,9 +106,11 @@ export async function cliMain(): Promise<void> {
|
|
|
100
106
|
// Load and run tests
|
|
101
107
|
const tests = loadUnitTests(testsPath);
|
|
102
108
|
if (tests.length === 0) {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
throw new CLIError(
|
|
110
|
+
`No tests found at ${testsPath}`,
|
|
111
|
+
"FILE_NOT_FOUND",
|
|
112
|
+
"Use --generate to create tests, or provide --tests <path>",
|
|
113
|
+
);
|
|
106
114
|
}
|
|
107
115
|
|
|
108
116
|
console.log(`Loaded ${tests.length} unit tests for skill '${skillName}'`);
|
|
@@ -112,8 +120,11 @@ export async function cliMain(): Promise<void> {
|
|
|
112
120
|
if (values["run-agent"]) {
|
|
113
121
|
const agent = detectAgent();
|
|
114
122
|
if (!agent) {
|
|
115
|
-
|
|
116
|
-
|
|
123
|
+
throw new CLIError(
|
|
124
|
+
"No agent CLI found. Cannot run agent-based tests",
|
|
125
|
+
"AGENT_NOT_FOUND",
|
|
126
|
+
"Install one of the supported agent CLIs",
|
|
127
|
+
);
|
|
117
128
|
}
|
|
118
129
|
const modelFlag = values.model;
|
|
119
130
|
agentRunner = async (query: string): Promise<string> => {
|