selftune 0.2.14 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
- package/apps/local-dashboard/dist/index.html +2 -2
- package/bin/run-hook.cjs +36 -0
- package/cli/selftune/analytics.ts +13 -11
- package/cli/selftune/badge/badge.ts +13 -9
- package/cli/selftune/canonical-export.ts +6 -6
- package/cli/selftune/contribute/contribute.ts +2 -1
- package/cli/selftune/cron/setup.ts +3 -1
- package/cli/selftune/dashboard-contract.ts +10 -0
- package/cli/selftune/dashboard.ts +10 -5
- package/cli/selftune/eval/baseline.ts +20 -30
- package/cli/selftune/eval/hooks-to-evals.ts +22 -12
- package/cli/selftune/eval/import-skillsbench.ts +21 -8
- package/cli/selftune/eval/unit-test-cli.ts +22 -11
- package/cli/selftune/evolution/description-quality.ts +224 -0
- package/cli/selftune/evolution/evolve-body.ts +17 -10
- package/cli/selftune/evolution/evolve.ts +94 -59
- package/cli/selftune/evolution/rollback.ts +7 -6
- package/cli/selftune/evolution/unblock-suggestions.ts +159 -0
- package/cli/selftune/grading/auto-grade.ts +24 -22
- package/cli/selftune/grading/grade-session.ts +21 -17
- package/cli/selftune/hooks/auto-activate.ts +12 -3
- package/cli/selftune/hooks/prompt-log.ts +7 -1
- package/cli/selftune/index.ts +66 -69
- package/cli/selftune/ingestors/claude-replay.ts +29 -14
- package/cli/selftune/ingestors/codex-rollout.ts +6 -1
- package/cli/selftune/init.ts +212 -36
- package/cli/selftune/monitoring/watch.ts +32 -16
- package/cli/selftune/orchestrate.ts +18 -17
- package/cli/selftune/routes/skill-report.ts +17 -0
- package/cli/selftune/schedule.ts +23 -9
- package/cli/selftune/sync.ts +7 -3
- package/cli/selftune/types.ts +45 -10
- package/cli/selftune/utils/cli-error.ts +102 -0
- package/cli/selftune/utils/hooks.ts +12 -2
- package/cli/selftune/workflows/workflows.ts +23 -17
- package/package.json +1 -1
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/AutoActivation.md +1 -1
- package/skill/Workflows/Evolve.md +4 -0
- package/skill/Workflows/Initialize.md +8 -8
- package/skill/settings_snippet.json +35 -12
- package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* description-quality.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure, deterministic scoring function that evaluates the quality of a skill
|
|
5
|
+
* description for routing accuracy. No LLM calls — heuristic-only.
|
|
6
|
+
*
|
|
7
|
+
* Inspired by OpenAI's finding that "writing better skill descriptions improved
|
|
8
|
+
* routing accuracy more than any change to the underlying skill logic itself."
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { DescriptionQualityScore } from "../types.js";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Constants
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
/** Optimal description length range (characters). */
|
|
18
|
+
const MIN_LENGTH = 40;
|
|
19
|
+
const MAX_LENGTH = 500;
|
|
20
|
+
const IDEAL_MIN = 80;
|
|
21
|
+
const IDEAL_MAX = 300;
|
|
22
|
+
|
|
23
|
+
/** Words that indicate trigger context — the description says *when* the skill fires. */
|
|
24
|
+
const TRIGGER_CONTEXT_WORDS = [
|
|
25
|
+
"when",
|
|
26
|
+
"if",
|
|
27
|
+
"after",
|
|
28
|
+
"before",
|
|
29
|
+
"during",
|
|
30
|
+
"while",
|
|
31
|
+
"upon",
|
|
32
|
+
"whenever",
|
|
33
|
+
"use when",
|
|
34
|
+
"trigger",
|
|
35
|
+
"activate",
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
/** Vague words that weaken routing precision. */
|
|
39
|
+
const VAGUE_WORDS = [
|
|
40
|
+
"various",
|
|
41
|
+
"general",
|
|
42
|
+
"misc",
|
|
43
|
+
"miscellaneous",
|
|
44
|
+
"stuff",
|
|
45
|
+
"things",
|
|
46
|
+
"etc",
|
|
47
|
+
"and more",
|
|
48
|
+
"and so on",
|
|
49
|
+
"other",
|
|
50
|
+
"multiple",
|
|
51
|
+
"several",
|
|
52
|
+
"many",
|
|
53
|
+
"some",
|
|
54
|
+
"certain",
|
|
55
|
+
"related",
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
/** Common filler phrases that add no routing signal. */
|
|
59
|
+
const FILLER_PHRASES = [
|
|
60
|
+
"this skill",
|
|
61
|
+
"a tool for",
|
|
62
|
+
"a tool that",
|
|
63
|
+
"helps with",
|
|
64
|
+
"is used for",
|
|
65
|
+
"can be used",
|
|
66
|
+
"is designed to",
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
/** Action verbs that signal concrete behavior. */
|
|
70
|
+
const ACTION_VERBS = [
|
|
71
|
+
"run",
|
|
72
|
+
"execute",
|
|
73
|
+
"analyze",
|
|
74
|
+
"generate",
|
|
75
|
+
"create",
|
|
76
|
+
"deploy",
|
|
77
|
+
"validate",
|
|
78
|
+
"check",
|
|
79
|
+
"build",
|
|
80
|
+
"test",
|
|
81
|
+
"scan",
|
|
82
|
+
"extract",
|
|
83
|
+
"transform",
|
|
84
|
+
"monitor",
|
|
85
|
+
"grade",
|
|
86
|
+
"evolve",
|
|
87
|
+
"sync",
|
|
88
|
+
"watch",
|
|
89
|
+
"review",
|
|
90
|
+
"audit",
|
|
91
|
+
"parse",
|
|
92
|
+
"format",
|
|
93
|
+
"search",
|
|
94
|
+
"fetch",
|
|
95
|
+
"publish",
|
|
96
|
+
"install",
|
|
97
|
+
"configure",
|
|
98
|
+
"diagnose",
|
|
99
|
+
"debug",
|
|
100
|
+
"fix",
|
|
101
|
+
"optimize",
|
|
102
|
+
"measure",
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Pre-compiled word-boundary patterns
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
/** Compile a word list into pre-built RegExp patterns at module load time. */
|
|
110
|
+
function compileWordPatterns(words: string[]): RegExp[] {
|
|
111
|
+
return words.map((w) => new RegExp(`\\b${w.replace(/\s+/g, "\\s+")}\\b`, "i"));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const TRIGGER_PATTERNS = compileWordPatterns(TRIGGER_CONTEXT_WORDS);
|
|
115
|
+
const VAGUE_PATTERNS = compileWordPatterns(VAGUE_WORDS);
|
|
116
|
+
const ACTION_PATTERNS = compileWordPatterns(ACTION_VERBS);
|
|
117
|
+
|
|
118
|
+
/** Count how many pre-compiled patterns match in a string. */
|
|
119
|
+
function countWordMatches(text: string, patterns: RegExp[]): number {
|
|
120
|
+
let count = 0;
|
|
121
|
+
for (const p of patterns) {
|
|
122
|
+
if (p.test(text)) count++;
|
|
123
|
+
}
|
|
124
|
+
return count;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
// Criterion scorers
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
/** Score description length: 1.0 for ideal range, graded falloff outside. */
|
|
132
|
+
export function scoreLengthCriterion(description: string): number {
|
|
133
|
+
const len = description.length;
|
|
134
|
+
if (len < MIN_LENGTH) return len / MIN_LENGTH;
|
|
135
|
+
if (len >= IDEAL_MIN && len <= IDEAL_MAX) return 1.0;
|
|
136
|
+
if (len < IDEAL_MIN) return 0.7 + 0.3 * ((len - MIN_LENGTH) / (IDEAL_MIN - MIN_LENGTH));
|
|
137
|
+
if (len <= MAX_LENGTH) return 0.7 + 0.3 * ((MAX_LENGTH - len) / (MAX_LENGTH - IDEAL_MAX));
|
|
138
|
+
return Math.max(0.3, 0.7 - 0.4 * ((len - MAX_LENGTH) / MAX_LENGTH));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** Score presence of trigger context words (when/if/before/after etc). */
|
|
142
|
+
export function scoreTriggerContextCriterion(description: string): number {
|
|
143
|
+
const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
|
|
144
|
+
if (matches === 0) return 0.0;
|
|
145
|
+
if (matches === 1) return 0.7;
|
|
146
|
+
return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/** Score absence of vague words (lower is worse). */
|
|
150
|
+
export function scoreVaguenessCriterion(description: string): number {
|
|
151
|
+
const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
|
|
152
|
+
if (matches === 0) return 1.0;
|
|
153
|
+
if (matches === 1) return 0.6;
|
|
154
|
+
return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Score whether description specifies at least one concrete action or domain. */
|
|
158
|
+
export function scoreSpecificityCriterion(description: string): number {
|
|
159
|
+
const lower = description.toLowerCase();
|
|
160
|
+
const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
|
|
161
|
+
|
|
162
|
+
const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
|
|
163
|
+
const words = description.split(/\s+/).length;
|
|
164
|
+
const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
|
|
165
|
+
|
|
166
|
+
if (!hasAction) return 0.2;
|
|
167
|
+
return Math.max(0.3, 1.0 - fillerRatio * 0.3);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/** Score whether description is not just the skill name restated. */
|
|
171
|
+
export function scoreNotJustNameCriterion(description: string, skillName?: string): number {
|
|
172
|
+
if (!skillName) return 1.0;
|
|
173
|
+
const descNorm = description
|
|
174
|
+
.toLowerCase()
|
|
175
|
+
.trim()
|
|
176
|
+
.replace(/[^a-z0-9\s]/g, "");
|
|
177
|
+
const nameNorm = skillName
|
|
178
|
+
.toLowerCase()
|
|
179
|
+
.trim()
|
|
180
|
+
.replace(/[^a-z0-9\s]/g, "");
|
|
181
|
+
const nameFromKebab = skillName.replace(/[-_]/g, " ").toLowerCase().trim();
|
|
182
|
+
|
|
183
|
+
if (descNorm === nameNorm || descNorm === nameFromKebab) return 0.0;
|
|
184
|
+
if (descNorm.length < nameNorm.length + 10) return 0.3;
|
|
185
|
+
return 1.0;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
// Main scoring function
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
/** Criterion weights — trigger context is weighted highest per OpenAI's finding. */
|
|
193
|
+
const WEIGHTS = {
|
|
194
|
+
length: 0.15,
|
|
195
|
+
trigger_context: 0.3,
|
|
196
|
+
vagueness: 0.2,
|
|
197
|
+
specificity: 0.2,
|
|
198
|
+
not_just_name: 0.15,
|
|
199
|
+
} as const;
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Score a skill description on heuristic quality criteria.
|
|
203
|
+
* Returns a 0.0-1.0 composite score with per-criterion breakdown.
|
|
204
|
+
* Pure function — no I/O, no LLM calls.
|
|
205
|
+
*/
|
|
206
|
+
export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
|
|
207
|
+
const criteria = {
|
|
208
|
+
length: scoreLengthCriterion(description),
|
|
209
|
+
trigger_context: scoreTriggerContextCriterion(description),
|
|
210
|
+
vagueness: scoreVaguenessCriterion(description),
|
|
211
|
+
specificity: scoreSpecificityCriterion(description),
|
|
212
|
+
not_just_name: scoreNotJustNameCriterion(description, skillName),
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
const composite = (Object.keys(WEIGHTS) as (keyof typeof WEIGHTS)[]).reduce(
|
|
216
|
+
(sum, key) => sum + criteria[key] * WEIGHTS[key],
|
|
217
|
+
0,
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
composite: +composite.toFixed(3),
|
|
222
|
+
criteria,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
@@ -25,6 +25,7 @@ import type {
|
|
|
25
25
|
QueryLogRecord,
|
|
26
26
|
SkillUsageRecord,
|
|
27
27
|
} from "../types.js";
|
|
28
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
28
29
|
import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
|
|
29
30
|
import { callViaSubagent } from "../utils/llm-call.js";
|
|
30
31
|
import { appendAuditEntry } from "./audit.js";
|
|
@@ -710,8 +711,11 @@ Options:
|
|
|
710
711
|
}
|
|
711
712
|
|
|
712
713
|
if (!values.skill || !values["skill-path"]) {
|
|
713
|
-
|
|
714
|
-
|
|
714
|
+
throw new CLIError(
|
|
715
|
+
"--skill and --skill-path are required",
|
|
716
|
+
"MISSING_FLAG",
|
|
717
|
+
"selftune evolve body --skill <name> --skill-path <path>",
|
|
718
|
+
);
|
|
715
719
|
}
|
|
716
720
|
|
|
717
721
|
const { detectAgent } = await import("../utils/llm-call.js");
|
|
@@ -719,15 +723,21 @@ Options:
|
|
|
719
723
|
const studentAgent = values["student-agent"] ?? teacherAgent;
|
|
720
724
|
|
|
721
725
|
if (!teacherAgent) {
|
|
722
|
-
|
|
723
|
-
|
|
726
|
+
throw new CLIError(
|
|
727
|
+
"No agent CLI found. Install Claude Code, Codex, or OpenCode.",
|
|
728
|
+
"AGENT_NOT_FOUND",
|
|
729
|
+
"Install Claude Code, Codex, or OpenCode.",
|
|
730
|
+
);
|
|
724
731
|
}
|
|
725
732
|
|
|
726
733
|
// Parse target
|
|
727
734
|
const targetStr = values.target ?? "body";
|
|
728
735
|
if (targetStr !== "body" && targetStr !== "routing") {
|
|
729
|
-
|
|
730
|
-
|
|
736
|
+
throw new CLIError(
|
|
737
|
+
"--target must be 'body' or 'routing'",
|
|
738
|
+
"INVALID_FLAG",
|
|
739
|
+
"Use --target body or --target routing",
|
|
740
|
+
);
|
|
731
741
|
}
|
|
732
742
|
|
|
733
743
|
// Parse few-shot examples
|
|
@@ -763,8 +773,5 @@ Options:
|
|
|
763
773
|
}
|
|
764
774
|
|
|
765
775
|
if (import.meta.main) {
|
|
766
|
-
cliMain().catch(
|
|
767
|
-
console.error(`[FATAL] ${err}`);
|
|
768
|
-
process.exit(1);
|
|
769
|
-
});
|
|
776
|
+
cliMain().catch(handleCLIError);
|
|
770
777
|
}
|
|
@@ -36,10 +36,12 @@ import type {
|
|
|
36
36
|
SessionTelemetryRecord,
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
39
40
|
import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
|
|
40
41
|
import { createEvolveTUI } from "../utils/tui.js";
|
|
41
42
|
import { appendAuditEntry } from "./audit.js";
|
|
42
43
|
import { checkConstitution } from "./constitutional.js";
|
|
44
|
+
import { scoreDescription } from "./description-quality.js";
|
|
43
45
|
import { appendEvidenceEntry } from "./evidence.js";
|
|
44
46
|
import { extractFailurePatterns } from "./extract-patterns.js";
|
|
45
47
|
import {
|
|
@@ -49,6 +51,7 @@ import {
|
|
|
49
51
|
selectFromFrontier,
|
|
50
52
|
} from "./pareto.js";
|
|
51
53
|
import { generateMultipleProposals, generateProposal } from "./propose-description.js";
|
|
54
|
+
import { buildUnblockSuggestions } from "./unblock-suggestions.js";
|
|
52
55
|
import type { ValidationResult } from "./validate-proposal.js";
|
|
53
56
|
import {
|
|
54
57
|
TRIGGER_CHECK_BATCH_SIZE,
|
|
@@ -94,6 +97,8 @@ export interface EvolveResult {
|
|
|
94
97
|
baselineResult?: BaselineMeasurement;
|
|
95
98
|
gateValidation?: ValidationResult;
|
|
96
99
|
sync_result?: SyncResult;
|
|
100
|
+
descriptionQualityBefore?: number;
|
|
101
|
+
descriptionQualityAfter?: number;
|
|
97
102
|
}
|
|
98
103
|
|
|
99
104
|
/**
|
|
@@ -247,16 +252,26 @@ export async function evolve(
|
|
|
247
252
|
);
|
|
248
253
|
|
|
249
254
|
/** Stamp every return with pipeline stats so callers always get them. */
|
|
250
|
-
const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult =>
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
255
|
+
const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult => {
|
|
256
|
+
const descQualityAfterScore = r.proposal
|
|
257
|
+
? scoreDescription(r.proposal.proposed_description, options.skillName).composite
|
|
258
|
+
: undefined;
|
|
259
|
+
return {
|
|
260
|
+
...r,
|
|
261
|
+
llmCallCount,
|
|
262
|
+
elapsedMs: Date.now() - pipelineStart,
|
|
263
|
+
...(syncResult ? { sync_result: syncResult } : {}),
|
|
264
|
+
...(descQualityBeforeScore != null
|
|
265
|
+
? { descriptionQualityBefore: descQualityBeforeScore }
|
|
266
|
+
: {}),
|
|
267
|
+
...(descQualityAfterScore != null ? { descriptionQualityAfter: descQualityAfterScore } : {}),
|
|
268
|
+
};
|
|
269
|
+
};
|
|
256
270
|
|
|
257
|
-
// Hoisted so catch block can preserve partial results on error
|
|
271
|
+
// Hoisted so catch block and withStats can preserve partial results on error
|
|
258
272
|
let lastProposal: EvolutionProposal | null = null;
|
|
259
273
|
let lastValidation: ValidationResult | null = null;
|
|
274
|
+
let descQualityBeforeScore: number | undefined;
|
|
260
275
|
|
|
261
276
|
try {
|
|
262
277
|
// -----------------------------------------------------------------------
|
|
@@ -281,7 +296,11 @@ export async function evolve(
|
|
|
281
296
|
const versionTag = skillVersion ? `, v${skillVersion}` : "";
|
|
282
297
|
const createdAuditDetails = (message: string) =>
|
|
283
298
|
`original_description:${rawContent}\n${message}`;
|
|
284
|
-
|
|
299
|
+
const descQualityBefore = scoreDescription(currentDescription, skillName);
|
|
300
|
+
descQualityBeforeScore = descQualityBefore.composite;
|
|
301
|
+
tui.done(
|
|
302
|
+
`Loaded SKILL.md (desc: ${currentDescription.length} chars${versionTag}, quality: ${descQualityBefore.composite})`,
|
|
303
|
+
);
|
|
285
304
|
|
|
286
305
|
if (options.syncFirst) {
|
|
287
306
|
tui.step(`Syncing source-truth telemetry${options.syncForce ? " (force)" : ""}...`);
|
|
@@ -1111,38 +1130,36 @@ Options:
|
|
|
1111
1130
|
}
|
|
1112
1131
|
|
|
1113
1132
|
if (!values.skill || !values["skill-path"]) {
|
|
1114
|
-
|
|
1115
|
-
|
|
1133
|
+
throw new CLIError(
|
|
1134
|
+
"--skill and --skill-path are required",
|
|
1135
|
+
"MISSING_FLAG",
|
|
1136
|
+
"selftune evolve --skill <name> --skill-path <path>",
|
|
1137
|
+
);
|
|
1116
1138
|
}
|
|
1117
1139
|
if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
|
|
1118
|
-
|
|
1119
|
-
|
|
1140
|
+
throw new CLIError(
|
|
1141
|
+
"--sync-force requires --sync-first",
|
|
1142
|
+
"INVALID_FLAG",
|
|
1143
|
+
"Add --sync-first when using --sync-force",
|
|
1144
|
+
);
|
|
1120
1145
|
}
|
|
1121
1146
|
|
|
1122
1147
|
const { detectAgent } = await import("../utils/llm-call.js");
|
|
1123
1148
|
const requestedAgent = values.agent;
|
|
1124
1149
|
if (requestedAgent && !Bun.which(requestedAgent)) {
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
message: `Agent CLI '${requestedAgent}' not found in PATH.`,
|
|
1130
|
-
action: "Install it or omit --agent to use auto-detection.",
|
|
1131
|
-
}),
|
|
1150
|
+
throw new CLIError(
|
|
1151
|
+
`Agent CLI '${requestedAgent}' not found in PATH.`,
|
|
1152
|
+
"AGENT_NOT_FOUND",
|
|
1153
|
+
"Install it or omit --agent to use auto-detection.",
|
|
1132
1154
|
);
|
|
1133
|
-
process.exit(1);
|
|
1134
1155
|
}
|
|
1135
1156
|
const agent = requestedAgent ?? detectAgent();
|
|
1136
1157
|
if (!agent) {
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
message: "No agent CLI (claude/codex/opencode) found in PATH.",
|
|
1142
|
-
action: "Install Claude Code, Codex, or OpenCode.",
|
|
1143
|
-
}),
|
|
1158
|
+
throw new CLIError(
|
|
1159
|
+
"No agent CLI (claude/codex/opencode) found in PATH.",
|
|
1160
|
+
"AGENT_NOT_FOUND",
|
|
1161
|
+
"Install Claude Code, Codex, or OpenCode.",
|
|
1144
1162
|
);
|
|
1145
|
-
process.exit(1);
|
|
1146
1163
|
}
|
|
1147
1164
|
|
|
1148
1165
|
// -------------------------------------------------------------------------
|
|
@@ -1150,20 +1167,27 @@ Options:
|
|
|
1150
1167
|
// -------------------------------------------------------------------------
|
|
1151
1168
|
const skillPath = values["skill-path"];
|
|
1152
1169
|
if (!skillPath) {
|
|
1153
|
-
|
|
1154
|
-
|
|
1170
|
+
throw new CLIError(
|
|
1171
|
+
"--skill-path is required.",
|
|
1172
|
+
"MISSING_FLAG",
|
|
1173
|
+
"selftune evolve --skill <name> --skill-path <path>",
|
|
1174
|
+
);
|
|
1155
1175
|
}
|
|
1156
1176
|
if (!existsSync(skillPath)) {
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1177
|
+
throw new CLIError(
|
|
1178
|
+
`SKILL.md not found at: ${skillPath}`,
|
|
1179
|
+
"FILE_NOT_FOUND",
|
|
1180
|
+
"Verify the --skill-path argument points to an existing SKILL.md file.",
|
|
1181
|
+
);
|
|
1160
1182
|
}
|
|
1161
1183
|
|
|
1162
1184
|
const evalSetPath = values["eval-set"];
|
|
1163
1185
|
if (evalSetPath && !existsSync(evalSetPath)) {
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1186
|
+
throw new CLIError(
|
|
1187
|
+
`Eval set file not found at: ${evalSetPath}`,
|
|
1188
|
+
"FILE_NOT_FOUND",
|
|
1189
|
+
"Verify the --eval-set argument points to an existing JSON file.",
|
|
1190
|
+
);
|
|
1167
1191
|
}
|
|
1168
1192
|
|
|
1169
1193
|
// If no eval-set provided, check that log files exist for auto-generation
|
|
@@ -1172,12 +1196,11 @@ Options:
|
|
|
1172
1196
|
const hasSkillLog = querySkillUsageRecords(dbCheck).length > 0;
|
|
1173
1197
|
const hasQueryLog = existsSync(QUERY_LOG);
|
|
1174
1198
|
if (!hasSkillLog && !hasQueryLog) {
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
"
|
|
1199
|
+
throw new CLIError(
|
|
1200
|
+
`No eval set provided and no telemetry logs found. Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`,
|
|
1201
|
+
"MISSING_DATA",
|
|
1202
|
+
"Either pass --eval-set <path> or generate logs first by using selftune-enabled skills.",
|
|
1178
1203
|
);
|
|
1179
|
-
console.error(` Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`);
|
|
1180
|
-
process.exit(1);
|
|
1181
1204
|
}
|
|
1182
1205
|
}
|
|
1183
1206
|
|
|
@@ -1244,11 +1267,22 @@ Options:
|
|
|
1244
1267
|
rationale: result.proposal?.rationale ?? "",
|
|
1245
1268
|
...(result.skillVersion ? { version: result.skillVersion } : {}),
|
|
1246
1269
|
dashboard_url: `http://localhost:3141/report/${encodeURIComponent(values.skill)}`,
|
|
1270
|
+
...(result.descriptionQualityBefore != null
|
|
1271
|
+
? { description_quality_before: result.descriptionQualityBefore }
|
|
1272
|
+
: {}),
|
|
1273
|
+
...(result.descriptionQualityAfter != null
|
|
1274
|
+
? { description_quality_after: result.descriptionQualityAfter }
|
|
1275
|
+
: {}),
|
|
1276
|
+
...(!result.deployed
|
|
1277
|
+
? {
|
|
1278
|
+
suggestions: buildUnblockSuggestions(result, values.skill),
|
|
1279
|
+
}
|
|
1280
|
+
: {}),
|
|
1247
1281
|
};
|
|
1248
1282
|
console.log(JSON.stringify(summary, null, 2));
|
|
1249
1283
|
}
|
|
1250
1284
|
|
|
1251
|
-
// Print human-readable status to stderr so
|
|
1285
|
+
// Print human-readable status to stderr so agents always see outcome + next steps
|
|
1252
1286
|
if (!result.deployed) {
|
|
1253
1287
|
console.error(`\n[NOT DEPLOYED] ${result.reason}`);
|
|
1254
1288
|
if (result.validation && !result.validation.improved) {
|
|
@@ -1267,29 +1301,30 @@ Options:
|
|
|
1267
1301
|
` Confidence ${result.proposal.confidence.toFixed(2)} below threshold ${values.confidence ?? "0.6"}`,
|
|
1268
1302
|
);
|
|
1269
1303
|
}
|
|
1270
|
-
|
|
1304
|
+
// Targeted suggestions based on specific failure reason
|
|
1305
|
+
const suggestions = buildUnblockSuggestions(result, values.skill);
|
|
1306
|
+
if (suggestions.length > 0) {
|
|
1307
|
+
console.error("\n Next steps:");
|
|
1308
|
+
for (const s of suggestions) {
|
|
1309
|
+
console.error(` → ${s}`);
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1271
1312
|
} else {
|
|
1272
1313
|
console.error(`\n[DEPLOYED] ${result.reason}`);
|
|
1314
|
+
// Show quality improvement if available
|
|
1315
|
+
if (result.descriptionQualityBefore != null && result.descriptionQualityAfter != null) {
|
|
1316
|
+
const delta = result.descriptionQualityAfter - result.descriptionQualityBefore;
|
|
1317
|
+
if (delta !== 0) {
|
|
1318
|
+
console.error(
|
|
1319
|
+
` Description quality: ${Math.round(result.descriptionQualityBefore * 100)}% → ${Math.round(result.descriptionQualityAfter * 100)}% (${delta >= 0 ? "+" : ""}${Math.round(delta * 100)}%)`,
|
|
1320
|
+
);
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1273
1323
|
}
|
|
1274
1324
|
|
|
1275
1325
|
process.exit(result.deployed ? 0 : 1);
|
|
1276
1326
|
}
|
|
1277
1327
|
|
|
1278
1328
|
if (import.meta.main) {
|
|
1279
|
-
cliMain().catch(
|
|
1280
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
1281
|
-
const stack = err instanceof Error ? err.stack : undefined;
|
|
1282
|
-
console.error(`[FATAL] ${message}`);
|
|
1283
|
-
if (stack && process.env.SELFTUNE_VERBOSE === "1") {
|
|
1284
|
-
console.error(stack);
|
|
1285
|
-
}
|
|
1286
|
-
console.error(
|
|
1287
|
-
"\nTroubleshooting:\n" +
|
|
1288
|
-
" - Verify --skill-path points to a valid SKILL.md file\n" +
|
|
1289
|
-
" - Ensure eval data exists (run `selftune eval generate` first) or pass --eval-set\n" +
|
|
1290
|
-
" - Check that ANTHROPIC_API_KEY is set if using Claude\n" +
|
|
1291
|
-
" - Re-run with --verbose for full diagnostic output",
|
|
1292
|
-
);
|
|
1293
|
-
process.exit(1);
|
|
1294
|
-
});
|
|
1329
|
+
cliMain().catch(handleCLIError);
|
|
1295
1330
|
}
|
|
@@ -13,6 +13,7 @@ import { parseArgs } from "node:util";
|
|
|
13
13
|
|
|
14
14
|
import { updateContextAfterRollback } from "../memory/writer.js";
|
|
15
15
|
import type { EvolutionAuditEntry } from "../types.js";
|
|
16
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
16
17
|
import { replaceDescription } from "../utils/frontmatter.js";
|
|
17
18
|
import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
|
|
18
19
|
|
|
@@ -233,8 +234,11 @@ Options:
|
|
|
233
234
|
}
|
|
234
235
|
|
|
235
236
|
if (!values.skill || !values["skill-path"]) {
|
|
236
|
-
|
|
237
|
-
|
|
237
|
+
throw new CLIError(
|
|
238
|
+
"--skill and --skill-path are required",
|
|
239
|
+
"MISSING_FLAG",
|
|
240
|
+
"selftune evolve rollback --skill <name> --skill-path <path>",
|
|
241
|
+
);
|
|
238
242
|
}
|
|
239
243
|
|
|
240
244
|
const result = await rollback({
|
|
@@ -248,8 +252,5 @@ Options:
|
|
|
248
252
|
}
|
|
249
253
|
|
|
250
254
|
if (import.meta.main) {
|
|
251
|
-
cliMain().catch(
|
|
252
|
-
console.error(`[FATAL] ${err}`);
|
|
253
|
-
process.exit(1);
|
|
254
|
-
});
|
|
255
|
+
cliMain().catch(handleCLIError);
|
|
255
256
|
}
|