selftune 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +38 -1
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +31 -12
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +479 -104
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +20 -3
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +145 -19
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/codex-rollout.ts +1 -1
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/init.ts +168 -5
- package/cli/selftune/last.ts +2 -2
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +18 -15
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +585 -19
- package/package.json +17 -6
- package/skill/SKILL.md +127 -10
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +73 -5
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +129 -15
- package/skill/Workflows/Initialize.md +58 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evolve-body.ts
|
|
3
|
+
*
|
|
4
|
+
* Body evolution orchestrator: coordinates full body or routing-table evolution
|
|
5
|
+
* through a pipeline of proposal generation, 3-gate validation, refinement,
|
|
6
|
+
* and deployment.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
10
|
+
import { parseArgs } from "node:util";
|
|
11
|
+
|
|
12
|
+
import { QUERY_LOG, SKILL_LOG } from "../constants.js";
|
|
13
|
+
import { buildEvalSet } from "../eval/hooks-to-evals.js";
|
|
14
|
+
import type {
|
|
15
|
+
BodyEvolutionProposal,
|
|
16
|
+
BodyValidationResult,
|
|
17
|
+
EvalEntry,
|
|
18
|
+
EvolutionAuditEntry,
|
|
19
|
+
EvolutionTarget,
|
|
20
|
+
FailurePattern,
|
|
21
|
+
GradingResult,
|
|
22
|
+
QueryLogRecord,
|
|
23
|
+
SkillUsageRecord,
|
|
24
|
+
} from "../types.js";
|
|
25
|
+
import { readJsonl } from "../utils/jsonl.js";
|
|
26
|
+
import { appendAuditEntry } from "./audit.js";
|
|
27
|
+
import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
|
|
28
|
+
import { extractFailurePatterns } from "./extract-patterns.js";
|
|
29
|
+
import { generateBodyProposal } from "./propose-body.js";
|
|
30
|
+
import { generateRoutingProposal } from "./propose-routing.js";
|
|
31
|
+
import { refineBodyProposal } from "./refine-body.js";
|
|
32
|
+
import { validateBodyProposal } from "./validate-body.js";
|
|
33
|
+
import { validateRoutingProposal } from "./validate-routing.js";
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Types
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
export interface EvolveBodyOptions {
|
|
40
|
+
skillName: string;
|
|
41
|
+
skillPath: string;
|
|
42
|
+
target: EvolutionTarget;
|
|
43
|
+
teacherAgent: string;
|
|
44
|
+
studentAgent: string;
|
|
45
|
+
teacherModel?: string;
|
|
46
|
+
studentModel?: string;
|
|
47
|
+
evalSetPath?: string;
|
|
48
|
+
dryRun: boolean;
|
|
49
|
+
maxIterations: number;
|
|
50
|
+
confidenceThreshold: number;
|
|
51
|
+
taskDescription?: string;
|
|
52
|
+
fewShotExamples?: string[];
|
|
53
|
+
gradingResults?: GradingResult[];
|
|
54
|
+
validationModel?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface EvolveBodyResult {
|
|
58
|
+
proposal: BodyEvolutionProposal | null;
|
|
59
|
+
validation: BodyValidationResult | null;
|
|
60
|
+
deployed: boolean;
|
|
61
|
+
auditEntries: EvolutionAuditEntry[];
|
|
62
|
+
reason: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Injectable dependencies for evolveBody(). When omitted, the real module
|
|
67
|
+
* imports are used. Pass overrides in tests to avoid mock.module().
|
|
68
|
+
*/
|
|
69
|
+
export interface EvolveBodyDeps {
|
|
70
|
+
extractFailurePatterns?: (
|
|
71
|
+
evalEntries: EvalEntry[],
|
|
72
|
+
skillUsage: SkillUsageRecord[],
|
|
73
|
+
skillName: string,
|
|
74
|
+
gradingResults?: GradingResult[],
|
|
75
|
+
) => FailurePattern[];
|
|
76
|
+
generateBodyProposal?: typeof import("./propose-body.js").generateBodyProposal;
|
|
77
|
+
generateRoutingProposal?: typeof import("./propose-routing.js").generateRoutingProposal;
|
|
78
|
+
validateBodyProposal?: typeof import("./validate-body.js").validateBodyProposal;
|
|
79
|
+
validateRoutingProposal?: typeof import("./validate-routing.js").validateRoutingProposal;
|
|
80
|
+
refineBodyProposal?: typeof import("./refine-body.js").refineBodyProposal;
|
|
81
|
+
appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
|
|
82
|
+
buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
|
|
83
|
+
readFileSync?: typeof readFileSync;
|
|
84
|
+
writeFileSync?: (path: string, data: string, encoding: string) => void;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// Audit helper
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
function createAuditEntry(
|
|
92
|
+
proposalId: string,
|
|
93
|
+
action: EvolutionAuditEntry["action"],
|
|
94
|
+
details: string,
|
|
95
|
+
skillName?: string,
|
|
96
|
+
): EvolutionAuditEntry {
|
|
97
|
+
return {
|
|
98
|
+
timestamp: new Date().toISOString(),
|
|
99
|
+
proposal_id: proposalId,
|
|
100
|
+
skill_name: skillName,
|
|
101
|
+
action,
|
|
102
|
+
details,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
// Main orchestrator
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
export async function evolveBody(
|
|
111
|
+
options: EvolveBodyOptions,
|
|
112
|
+
_deps: EvolveBodyDeps = {},
|
|
113
|
+
): Promise<EvolveBodyResult> {
|
|
114
|
+
const {
|
|
115
|
+
skillName,
|
|
116
|
+
skillPath,
|
|
117
|
+
target,
|
|
118
|
+
teacherAgent,
|
|
119
|
+
studentAgent,
|
|
120
|
+
teacherModel,
|
|
121
|
+
studentModel,
|
|
122
|
+
evalSetPath,
|
|
123
|
+
dryRun,
|
|
124
|
+
maxIterations,
|
|
125
|
+
confidenceThreshold,
|
|
126
|
+
fewShotExamples,
|
|
127
|
+
} = options;
|
|
128
|
+
|
|
129
|
+
// Resolve injectable dependencies
|
|
130
|
+
const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
|
|
131
|
+
const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
|
|
132
|
+
const _generateRoutingProposal = _deps.generateRoutingProposal ?? generateRoutingProposal;
|
|
133
|
+
const _validateBodyProposal = _deps.validateBodyProposal ?? validateBodyProposal;
|
|
134
|
+
const _validateRoutingProposal = _deps.validateRoutingProposal ?? validateRoutingProposal;
|
|
135
|
+
const _refineBodyProposal = _deps.refineBodyProposal ?? refineBodyProposal;
|
|
136
|
+
const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
|
|
137
|
+
const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
|
|
138
|
+
const _readFileSync = _deps.readFileSync ?? readFileSync;
|
|
139
|
+
const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
|
|
140
|
+
|
|
141
|
+
const auditEntries: EvolutionAuditEntry[] = [];
|
|
142
|
+
|
|
143
|
+
function recordAudit(
|
|
144
|
+
proposalId: string,
|
|
145
|
+
action: EvolutionAuditEntry["action"],
|
|
146
|
+
details: string,
|
|
147
|
+
): void {
|
|
148
|
+
const entry = createAuditEntry(proposalId, action, details, skillName);
|
|
149
|
+
auditEntries.push(entry);
|
|
150
|
+
try {
|
|
151
|
+
_appendAuditEntry(entry);
|
|
152
|
+
} catch {
|
|
153
|
+
// Fail-open
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
// Step 1: Read current SKILL.md
|
|
159
|
+
if (!existsSync(skillPath)) {
|
|
160
|
+
return {
|
|
161
|
+
proposal: null,
|
|
162
|
+
validation: null,
|
|
163
|
+
deployed: false,
|
|
164
|
+
auditEntries,
|
|
165
|
+
reason: `SKILL.md not found at ${skillPath}`,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const currentContent = _readFileSync(skillPath, "utf-8");
|
|
170
|
+
const parsed = parseSkillSections(currentContent);
|
|
171
|
+
|
|
172
|
+
// Step 2: Load eval set
|
|
173
|
+
let evalSet: EvalEntry[];
|
|
174
|
+
if (evalSetPath && existsSync(evalSetPath)) {
|
|
175
|
+
const raw = _readFileSync(evalSetPath, "utf-8");
|
|
176
|
+
const parsed: unknown = JSON.parse(raw);
|
|
177
|
+
if (!Array.isArray(parsed)) {
|
|
178
|
+
throw new Error("Eval set must be a JSON array");
|
|
179
|
+
}
|
|
180
|
+
evalSet = parsed as EvalEntry[];
|
|
181
|
+
} else {
|
|
182
|
+
const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
|
|
183
|
+
const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
|
|
184
|
+
evalSet = _buildEvalSet(skillRecords, queryRecords, skillName);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Step 3: Load skill usage and extract failure patterns
|
|
188
|
+
const skillUsage = readJsonl<SkillUsageRecord>(SKILL_LOG);
|
|
189
|
+
const failurePatterns = _extractFailurePatterns(
|
|
190
|
+
evalSet,
|
|
191
|
+
skillUsage,
|
|
192
|
+
skillName,
|
|
193
|
+
options.gradingResults,
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
if (failurePatterns.length === 0) {
|
|
197
|
+
return {
|
|
198
|
+
proposal: null,
|
|
199
|
+
validation: null,
|
|
200
|
+
deployed: false,
|
|
201
|
+
auditEntries,
|
|
202
|
+
reason: "No failure patterns found",
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
|
|
207
|
+
|
|
208
|
+
// Step 4: Generate -> validate -> refine loop
|
|
209
|
+
let lastProposal: BodyEvolutionProposal | null = null;
|
|
210
|
+
let lastValidation: BodyValidationResult | null = null;
|
|
211
|
+
|
|
212
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
213
|
+
// Generate proposal based on target
|
|
214
|
+
let proposal: BodyEvolutionProposal;
|
|
215
|
+
|
|
216
|
+
if (iteration === 0) {
|
|
217
|
+
if (target === "routing") {
|
|
218
|
+
const currentRouting = parsed.sections["Workflow Routing"] || "";
|
|
219
|
+
proposal = await _generateRoutingProposal(
|
|
220
|
+
currentRouting,
|
|
221
|
+
currentContent,
|
|
222
|
+
failurePatterns,
|
|
223
|
+
missedQueries,
|
|
224
|
+
skillName,
|
|
225
|
+
skillPath,
|
|
226
|
+
teacherAgent,
|
|
227
|
+
teacherModel,
|
|
228
|
+
);
|
|
229
|
+
} else {
|
|
230
|
+
proposal = await _generateBodyProposal(
|
|
231
|
+
currentContent,
|
|
232
|
+
failurePatterns,
|
|
233
|
+
missedQueries,
|
|
234
|
+
skillName,
|
|
235
|
+
skillPath,
|
|
236
|
+
teacherAgent,
|
|
237
|
+
teacherModel,
|
|
238
|
+
fewShotExamples,
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
} else if (lastProposal && lastValidation) {
|
|
242
|
+
// Refine from previous failed attempt
|
|
243
|
+
proposal = await _refineBodyProposal(
|
|
244
|
+
lastProposal,
|
|
245
|
+
lastValidation,
|
|
246
|
+
teacherAgent,
|
|
247
|
+
teacherModel,
|
|
248
|
+
);
|
|
249
|
+
} else {
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
lastProposal = proposal;
|
|
254
|
+
|
|
255
|
+
recordAudit(
|
|
256
|
+
proposal.proposal_id,
|
|
257
|
+
"created",
|
|
258
|
+
`${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
// Check confidence threshold
|
|
262
|
+
if (proposal.confidence < confidenceThreshold) {
|
|
263
|
+
recordAudit(
|
|
264
|
+
proposal.proposal_id,
|
|
265
|
+
"rejected",
|
|
266
|
+
`Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
|
|
267
|
+
);
|
|
268
|
+
|
|
269
|
+
if (iteration === maxIterations - 1) {
|
|
270
|
+
return {
|
|
271
|
+
proposal: lastProposal,
|
|
272
|
+
validation: null,
|
|
273
|
+
deployed: false,
|
|
274
|
+
auditEntries,
|
|
275
|
+
reason: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Validate (validationModel overrides studentModel for validation calls)
|
|
282
|
+
const validationModelFlag = options.validationModel ?? studentModel;
|
|
283
|
+
let validation: BodyValidationResult;
|
|
284
|
+
if (target === "routing") {
|
|
285
|
+
validation = await _validateRoutingProposal(
|
|
286
|
+
proposal,
|
|
287
|
+
evalSet,
|
|
288
|
+
studentAgent,
|
|
289
|
+
validationModelFlag,
|
|
290
|
+
);
|
|
291
|
+
} else {
|
|
292
|
+
validation = await _validateBodyProposal(
|
|
293
|
+
proposal,
|
|
294
|
+
evalSet,
|
|
295
|
+
studentAgent,
|
|
296
|
+
validationModelFlag,
|
|
297
|
+
);
|
|
298
|
+
}
|
|
299
|
+
lastValidation = validation;
|
|
300
|
+
|
|
301
|
+
recordAudit(
|
|
302
|
+
proposal.proposal_id,
|
|
303
|
+
"validated",
|
|
304
|
+
`Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
if (validation.improved) {
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
recordAudit(
|
|
312
|
+
proposal.proposal_id,
|
|
313
|
+
"rejected",
|
|
314
|
+
`Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
|
|
315
|
+
);
|
|
316
|
+
|
|
317
|
+
if (iteration === maxIterations - 1) {
|
|
318
|
+
return {
|
|
319
|
+
proposal: lastProposal,
|
|
320
|
+
validation: lastValidation,
|
|
321
|
+
deployed: false,
|
|
322
|
+
auditEntries,
|
|
323
|
+
reason: `Validation failed after ${maxIterations} iterations: ${validation.gates_passed}/${validation.gates_total} gates`,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Step 5: Deploy or dry-run
|
|
329
|
+
if (dryRun) {
|
|
330
|
+
return {
|
|
331
|
+
proposal: lastProposal,
|
|
332
|
+
validation: lastValidation,
|
|
333
|
+
deployed: false,
|
|
334
|
+
auditEntries,
|
|
335
|
+
reason: "Dry run - proposal validated but not deployed",
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (lastProposal && lastValidation && lastValidation.improved) {
|
|
340
|
+
// Deploy: write updated SKILL.md
|
|
341
|
+
if (target === "routing") {
|
|
342
|
+
const updatedContent = replaceSection(
|
|
343
|
+
currentContent,
|
|
344
|
+
"Workflow Routing",
|
|
345
|
+
lastProposal.proposed_body,
|
|
346
|
+
);
|
|
347
|
+
_writeFileSync(skillPath, updatedContent, "utf-8");
|
|
348
|
+
} else {
|
|
349
|
+
const updatedContent = replaceBody(currentContent, lastProposal.proposed_body);
|
|
350
|
+
_writeFileSync(skillPath, updatedContent, "utf-8");
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
recordAudit(
|
|
354
|
+
lastProposal.proposal_id,
|
|
355
|
+
"deployed",
|
|
356
|
+
`Deployed ${target} proposal for ${skillName}`,
|
|
357
|
+
);
|
|
358
|
+
|
|
359
|
+
return {
|
|
360
|
+
proposal: lastProposal,
|
|
361
|
+
validation: lastValidation,
|
|
362
|
+
deployed: true,
|
|
363
|
+
auditEntries,
|
|
364
|
+
reason: "Evolution deployed successfully",
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return {
|
|
369
|
+
proposal: lastProposal,
|
|
370
|
+
validation: lastValidation,
|
|
371
|
+
deployed: false,
|
|
372
|
+
auditEntries,
|
|
373
|
+
reason: "Evolution not deployed: validation did not pass",
|
|
374
|
+
};
|
|
375
|
+
} catch (error) {
|
|
376
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
377
|
+
return {
|
|
378
|
+
proposal: null,
|
|
379
|
+
validation: null,
|
|
380
|
+
deployed: false,
|
|
381
|
+
auditEntries,
|
|
382
|
+
reason: `Error during body evolution: ${errorMessage}`,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// ---------------------------------------------------------------------------
|
|
388
|
+
// CLI entry point
|
|
389
|
+
// ---------------------------------------------------------------------------
|
|
390
|
+
|
|
391
|
+
export async function cliMain(): Promise<void> {
|
|
392
|
+
const { values } = parseArgs({
|
|
393
|
+
options: {
|
|
394
|
+
skill: { type: "string" },
|
|
395
|
+
"skill-path": { type: "string" },
|
|
396
|
+
target: { type: "string", default: "body" },
|
|
397
|
+
"teacher-agent": { type: "string" },
|
|
398
|
+
"student-agent": { type: "string" },
|
|
399
|
+
"teacher-model": { type: "string" },
|
|
400
|
+
"student-model": { type: "string" },
|
|
401
|
+
"eval-set": { type: "string" },
|
|
402
|
+
"dry-run": { type: "boolean", default: false },
|
|
403
|
+
"max-iterations": { type: "string", default: "3" },
|
|
404
|
+
confidence: { type: "string", default: "0.6" },
|
|
405
|
+
"task-description": { type: "string" },
|
|
406
|
+
"few-shot": { type: "string" },
|
|
407
|
+
"validation-model": { type: "string" },
|
|
408
|
+
help: { type: "boolean", default: false },
|
|
409
|
+
},
|
|
410
|
+
strict: true,
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
if (values.help) {
|
|
414
|
+
console.log(`selftune evolve-body — Evolve a skill body or routing table
|
|
415
|
+
|
|
416
|
+
Usage:
|
|
417
|
+
selftune evolve-body --skill <name> --skill-path <path> [options]
|
|
418
|
+
|
|
419
|
+
Options:
|
|
420
|
+
--skill Skill name (required)
|
|
421
|
+
--skill-path Path to SKILL.md (required)
|
|
422
|
+
--target Evolution target: body, routing (default: body)
|
|
423
|
+
--teacher-agent Teacher agent CLI (claude, codex, etc.)
|
|
424
|
+
--student-agent Student agent CLI for validation
|
|
425
|
+
--teacher-model Model flag for teacher agent
|
|
426
|
+
--student-model Model flag for student agent
|
|
427
|
+
--eval-set Path to eval set JSON
|
|
428
|
+
--dry-run Validate without deploying
|
|
429
|
+
--max-iterations Max refinement iterations (default: 3)
|
|
430
|
+
--confidence Confidence threshold 0.0-1.0 (default: 0.6)
|
|
431
|
+
--task-description Optional task description context
|
|
432
|
+
--few-shot Comma-separated paths to example skill files
|
|
433
|
+
--validation-model Model for trigger-check validation calls (overrides --student-model for validation)
|
|
434
|
+
--help Show this help message`);
|
|
435
|
+
process.exit(0);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
if (!values.skill || !values["skill-path"]) {
|
|
439
|
+
console.error("[ERROR] --skill and --skill-path are required");
|
|
440
|
+
process.exit(1);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const { detectAgent } = await import("../utils/llm-call.js");
|
|
444
|
+
const teacherAgent = values["teacher-agent"] ?? detectAgent() ?? "";
|
|
445
|
+
const studentAgent = values["student-agent"] ?? teacherAgent;
|
|
446
|
+
|
|
447
|
+
if (!teacherAgent) {
|
|
448
|
+
console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
|
|
449
|
+
process.exit(1);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Parse target
|
|
453
|
+
const targetStr = values.target ?? "body";
|
|
454
|
+
if (targetStr !== "body" && targetStr !== "routing") {
|
|
455
|
+
console.error("[ERROR] --target must be 'body' or 'routing'");
|
|
456
|
+
process.exit(1);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Parse few-shot examples
|
|
460
|
+
let fewShotExamples: string[] | undefined;
|
|
461
|
+
if (values["few-shot"]) {
|
|
462
|
+
const paths = values["few-shot"].split(",").map((p) => p.trim());
|
|
463
|
+
fewShotExamples = paths.filter((p) => existsSync(p)).map((p) => readFileSync(p, "utf-8"));
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const result = await evolveBody({
|
|
467
|
+
skillName: values.skill,
|
|
468
|
+
skillPath: values["skill-path"],
|
|
469
|
+
target: targetStr as EvolutionTarget,
|
|
470
|
+
teacherAgent,
|
|
471
|
+
studentAgent,
|
|
472
|
+
teacherModel: values["teacher-model"],
|
|
473
|
+
studentModel: values["student-model"],
|
|
474
|
+
evalSetPath: values["eval-set"],
|
|
475
|
+
dryRun: values["dry-run"] ?? false,
|
|
476
|
+
maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),
|
|
477
|
+
confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
|
|
478
|
+
taskDescription: values["task-description"],
|
|
479
|
+
fewShotExamples,
|
|
480
|
+
validationModel: values["validation-model"],
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
console.log(JSON.stringify(result, null, 2));
|
|
484
|
+
process.exit(result.deployed ? 0 : 1);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
if (import.meta.main) {
|
|
488
|
+
cliMain().catch((err) => {
|
|
489
|
+
console.error(`[FATAL] ${err}`);
|
|
490
|
+
process.exit(1);
|
|
491
|
+
});
|
|
492
|
+
}
|