selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,586 @@
1
+ /**
2
+ * evolve-body.ts
3
+ *
4
+ * Body evolution orchestrator: coordinates full body or routing-table evolution
5
+ * through a pipeline of proposal generation, 3-gate validation, refinement,
6
+ * and deployment.
7
+ */
8
+
9
+ import { existsSync, readFileSync } from "node:fs";
10
+ import { parseArgs } from "node:util";
11
+
12
+ import { QUERY_LOG } from "../constants.js";
13
+ import { buildEvalSet } from "../eval/hooks-to-evals.js";
14
+ import { readGradingResultsForSkill } from "../grading/results.js";
15
+ import type {
16
+ BodyEvolutionProposal,
17
+ BodyValidationResult,
18
+ EvalEntry,
19
+ EvolutionAuditEntry,
20
+ EvolutionEvidenceEntry,
21
+ EvolutionTarget,
22
+ FailurePattern,
23
+ GradingResult,
24
+ QueryLogRecord,
25
+ SkillUsageRecord,
26
+ } from "../types.js";
27
+ import { readJsonl } from "../utils/jsonl.js";
28
+ import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
29
+ import { appendAuditEntry } from "./audit.js";
30
+ import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
31
+ import { appendEvidenceEntry } from "./evidence.js";
32
+ import { extractFailurePatterns } from "./extract-patterns.js";
33
+ import { generateBodyProposal } from "./propose-body.js";
34
+ import { generateRoutingProposal } from "./propose-routing.js";
35
+ import { refineBodyProposal } from "./refine-body.js";
36
+ import { validateBodyProposal } from "./validate-body.js";
37
+ import { validateRoutingProposal } from "./validate-routing.js";
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Types
41
+ // ---------------------------------------------------------------------------
42
+
43
+ export interface EvolveBodyOptions {
44
+ skillName: string;
45
+ skillPath: string;
46
+ target: EvolutionTarget;
47
+ teacherAgent: string;
48
+ studentAgent: string;
49
+ teacherModel?: string;
50
+ studentModel?: string;
51
+ evalSetPath?: string;
52
+ dryRun: boolean;
53
+ maxIterations: number;
54
+ confidenceThreshold: number;
55
+ taskDescription?: string;
56
+ fewShotExamples?: string[];
57
+ gradingResults?: GradingResult[];
58
+ validationModel?: string;
59
+ }
60
+
61
+ export interface EvolveBodyResult {
62
+ proposal: BodyEvolutionProposal | null;
63
+ validation: BodyValidationResult | null;
64
+ deployed: boolean;
65
+ auditEntries: EvolutionAuditEntry[];
66
+ reason: string;
67
+ }
68
+
69
+ /**
70
+ * Injectable dependencies for evolveBody(). When omitted, the real module
71
+ * imports are used. Pass overrides in tests to avoid mock.module().
72
+ */
73
+ export interface EvolveBodyDeps {
74
+ extractFailurePatterns?: (
75
+ evalEntries: EvalEntry[],
76
+ skillUsage: SkillUsageRecord[],
77
+ skillName: string,
78
+ gradingResults?: GradingResult[],
79
+ ) => FailurePattern[];
80
+ generateBodyProposal?: typeof import("./propose-body.js").generateBodyProposal;
81
+ generateRoutingProposal?: typeof import("./propose-routing.js").generateRoutingProposal;
82
+ validateBodyProposal?: typeof import("./validate-body.js").validateBodyProposal;
83
+ validateRoutingProposal?: typeof import("./validate-routing.js").validateRoutingProposal;
84
+ refineBodyProposal?: typeof import("./refine-body.js").refineBodyProposal;
85
+ appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
86
+ appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry;
87
+ buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
88
+ readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords;
89
+ readFileSync?: typeof readFileSync;
90
+ writeFileSync?: (path: string, data: string, encoding: string) => void;
91
+ }
92
+
93
+ // ---------------------------------------------------------------------------
94
+ // Audit helper
95
+ // ---------------------------------------------------------------------------
96
+
97
+ function createAuditEntry(
98
+ proposalId: string,
99
+ action: EvolutionAuditEntry["action"],
100
+ details: string,
101
+ skillName?: string,
102
+ ): EvolutionAuditEntry {
103
+ return {
104
+ timestamp: new Date().toISOString(),
105
+ proposal_id: proposalId,
106
+ skill_name: skillName,
107
+ action,
108
+ details,
109
+ };
110
+ }
111
+
112
+ // ---------------------------------------------------------------------------
113
+ // Main orchestrator
114
+ // ---------------------------------------------------------------------------
115
+
116
+ export async function evolveBody(
117
+ options: EvolveBodyOptions,
118
+ _deps: EvolveBodyDeps = {},
119
+ ): Promise<EvolveBodyResult> {
120
+ const {
121
+ skillName,
122
+ skillPath,
123
+ target,
124
+ teacherAgent,
125
+ studentAgent,
126
+ teacherModel,
127
+ studentModel,
128
+ evalSetPath,
129
+ dryRun,
130
+ maxIterations,
131
+ confidenceThreshold,
132
+ fewShotExamples,
133
+ } = options;
134
+
135
+ // Resolve injectable dependencies
136
+ const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
137
+ const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
138
+ const _generateRoutingProposal = _deps.generateRoutingProposal ?? generateRoutingProposal;
139
+ const _validateBodyProposal = _deps.validateBodyProposal ?? validateBodyProposal;
140
+ const _validateRoutingProposal = _deps.validateRoutingProposal ?? validateRoutingProposal;
141
+ const _refineBodyProposal = _deps.refineBodyProposal ?? refineBodyProposal;
142
+ const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
143
+ const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry;
144
+ const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
145
+ const _readEffectiveSkillUsageRecords =
146
+ _deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords;
147
+ const _readFileSync = _deps.readFileSync ?? readFileSync;
148
+ const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
149
+
150
+ const auditEntries: EvolutionAuditEntry[] = [];
151
+
152
+ function recordAudit(
153
+ proposalId: string,
154
+ action: EvolutionAuditEntry["action"],
155
+ details: string,
156
+ ): void {
157
+ const entry = createAuditEntry(proposalId, action, details, skillName);
158
+ auditEntries.push(entry);
159
+ try {
160
+ _appendAuditEntry(entry);
161
+ } catch {
162
+ // Fail-open
163
+ }
164
+ }
165
+
166
+ function recordEvidence(entry: EvolutionEvidenceEntry): void {
167
+ try {
168
+ _appendEvidenceEntry(entry);
169
+ } catch {
170
+ // Fail-open
171
+ }
172
+ }
173
+
174
+ try {
175
+ // Step 1: Read current SKILL.md
176
+ if (!existsSync(skillPath)) {
177
+ return {
178
+ proposal: null,
179
+ validation: null,
180
+ deployed: false,
181
+ auditEntries,
182
+ reason: `SKILL.md not found at ${skillPath}`,
183
+ };
184
+ }
185
+
186
+ const currentContent = _readFileSync(skillPath, "utf-8");
187
+ const parsed = parseSkillSections(currentContent);
188
+ const createdAuditDetails = (): string => `original_description:${currentContent}`;
189
+ const skillUsage = _readEffectiveSkillUsageRecords();
190
+
191
+ // Step 2: Load eval set
192
+ let evalSet: EvalEntry[];
193
+ if (evalSetPath && existsSync(evalSetPath)) {
194
+ const raw = _readFileSync(evalSetPath, "utf-8");
195
+ const parsed: unknown = JSON.parse(raw);
196
+ if (!Array.isArray(parsed)) {
197
+ throw new Error("Eval set must be a JSON array");
198
+ }
199
+ evalSet = parsed as EvalEntry[];
200
+ } else {
201
+ const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
202
+ evalSet = _buildEvalSet(skillUsage, queryRecords, skillName);
203
+ }
204
+
205
+ // Step 3: Load skill usage and extract failure patterns
206
+ const failurePatterns = _extractFailurePatterns(
207
+ evalSet,
208
+ skillUsage,
209
+ skillName,
210
+ options.gradingResults,
211
+ );
212
+
213
+ if (failurePatterns.length === 0) {
214
+ return {
215
+ proposal: null,
216
+ validation: null,
217
+ deployed: false,
218
+ auditEntries,
219
+ reason: "No failure patterns found",
220
+ };
221
+ }
222
+
223
+ const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
224
+
225
+ // Step 4: Generate -> validate -> refine loop
226
+ let lastProposal: BodyEvolutionProposal | null = null;
227
+ let lastValidation: BodyValidationResult | null = null;
228
+
229
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
230
+ // Generate proposal based on target
231
+ let proposal: BodyEvolutionProposal;
232
+
233
+ if (iteration === 0) {
234
+ if (target === "routing") {
235
+ const currentRouting = parsed.sections["Workflow Routing"] || "";
236
+ proposal = await _generateRoutingProposal(
237
+ currentRouting,
238
+ currentContent,
239
+ failurePatterns,
240
+ missedQueries,
241
+ skillName,
242
+ skillPath,
243
+ teacherAgent,
244
+ teacherModel,
245
+ );
246
+ } else {
247
+ proposal = await _generateBodyProposal(
248
+ currentContent,
249
+ failurePatterns,
250
+ missedQueries,
251
+ skillName,
252
+ skillPath,
253
+ teacherAgent,
254
+ teacherModel,
255
+ fewShotExamples,
256
+ );
257
+ }
258
+ } else if (lastProposal && lastValidation) {
259
+ // Refine from previous failed attempt
260
+ proposal = await _refineBodyProposal(
261
+ lastProposal,
262
+ lastValidation,
263
+ teacherAgent,
264
+ teacherModel,
265
+ );
266
+ } else {
267
+ break;
268
+ }
269
+
270
+ lastProposal = proposal;
271
+
272
+ recordAudit(proposal.proposal_id, "created", createdAuditDetails());
273
+ recordEvidence({
274
+ timestamp: new Date().toISOString(),
275
+ proposal_id: proposal.proposal_id,
276
+ skill_name: skillName,
277
+ skill_path: skillPath,
278
+ target,
279
+ stage: "created",
280
+ rationale: proposal.rationale,
281
+ confidence: proposal.confidence,
282
+ details: `${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
283
+ original_text: proposal.original_body,
284
+ proposed_text: proposal.proposed_body,
285
+ eval_set: evalSet,
286
+ });
287
+
288
+ // Check confidence threshold
289
+ if (proposal.confidence < confidenceThreshold) {
290
+ recordAudit(
291
+ proposal.proposal_id,
292
+ "rejected",
293
+ `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
294
+ );
295
+ recordEvidence({
296
+ timestamp: new Date().toISOString(),
297
+ proposal_id: proposal.proposal_id,
298
+ skill_name: skillName,
299
+ skill_path: skillPath,
300
+ target,
301
+ stage: "rejected",
302
+ rationale: proposal.rationale,
303
+ confidence: proposal.confidence,
304
+ details: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
305
+ });
306
+
307
+ if (iteration === maxIterations - 1) {
308
+ return {
309
+ proposal: lastProposal,
310
+ validation: null,
311
+ deployed: false,
312
+ auditEntries,
313
+ reason: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
314
+ };
315
+ }
316
+ continue;
317
+ }
318
+
319
+ // Validate (validationModel overrides studentModel for validation calls)
320
+ const validationModelFlag = options.validationModel ?? studentModel;
321
+ let validation: BodyValidationResult;
322
+ if (target === "routing") {
323
+ validation = await _validateRoutingProposal(
324
+ proposal,
325
+ evalSet,
326
+ studentAgent,
327
+ validationModelFlag,
328
+ );
329
+ } else {
330
+ validation = await _validateBodyProposal(
331
+ proposal,
332
+ evalSet,
333
+ studentAgent,
334
+ validationModelFlag,
335
+ );
336
+ }
337
+ lastValidation = validation;
338
+
339
+ recordAudit(
340
+ proposal.proposal_id,
341
+ "validated",
342
+ `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
343
+ );
344
+ recordEvidence({
345
+ timestamp: new Date().toISOString(),
346
+ proposal_id: proposal.proposal_id,
347
+ skill_name: skillName,
348
+ skill_path: skillPath,
349
+ target,
350
+ stage: "validated",
351
+ rationale: proposal.rationale,
352
+ confidence: proposal.confidence,
353
+ details: `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
354
+ validation: {
355
+ improved: validation.improved,
356
+ gates_passed: validation.gates_passed,
357
+ gates_total: validation.gates_total,
358
+ gate_results: validation.gate_results,
359
+ regressions: validation.regressions,
360
+ },
361
+ });
362
+
363
+ if (validation.improved) {
364
+ break;
365
+ }
366
+
367
+ recordAudit(
368
+ proposal.proposal_id,
369
+ "rejected",
370
+ `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
371
+ );
372
+ recordEvidence({
373
+ timestamp: new Date().toISOString(),
374
+ proposal_id: proposal.proposal_id,
375
+ skill_name: skillName,
376
+ skill_path: skillPath,
377
+ target,
378
+ stage: "rejected",
379
+ rationale: proposal.rationale,
380
+ confidence: proposal.confidence,
381
+ details: `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
382
+ validation: {
383
+ improved: validation.improved,
384
+ gates_passed: validation.gates_passed,
385
+ gates_total: validation.gates_total,
386
+ gate_results: validation.gate_results,
387
+ regressions: validation.regressions,
388
+ },
389
+ });
390
+
391
+ if (iteration === maxIterations - 1) {
392
+ return {
393
+ proposal: lastProposal,
394
+ validation: lastValidation,
395
+ deployed: false,
396
+ auditEntries,
397
+ reason: `Validation failed after ${maxIterations} iterations: ${validation.gates_passed}/${validation.gates_total} gates`,
398
+ };
399
+ }
400
+ }
401
+
402
+ // Step 5: Deploy or dry-run
403
+ if (dryRun) {
404
+ return {
405
+ proposal: lastProposal,
406
+ validation: lastValidation,
407
+ deployed: false,
408
+ auditEntries,
409
+ reason: "Dry run - proposal validated but not deployed",
410
+ };
411
+ }
412
+
413
+ if (lastProposal && lastValidation && lastValidation.improved) {
414
+ // Deploy: write updated SKILL.md
415
+ if (target === "routing") {
416
+ const updatedContent = replaceSection(
417
+ currentContent,
418
+ "Workflow Routing",
419
+ lastProposal.proposed_body,
420
+ );
421
+ _writeFileSync(skillPath, updatedContent, "utf-8");
422
+ } else {
423
+ const updatedContent = replaceBody(currentContent, lastProposal.proposed_body);
424
+ _writeFileSync(skillPath, updatedContent, "utf-8");
425
+ }
426
+
427
+ recordAudit(
428
+ lastProposal.proposal_id,
429
+ "deployed",
430
+ `Deployed ${target} proposal for ${skillName}`,
431
+ );
432
+ recordEvidence({
433
+ timestamp: new Date().toISOString(),
434
+ proposal_id: lastProposal.proposal_id,
435
+ skill_name: skillName,
436
+ skill_path: skillPath,
437
+ target,
438
+ stage: "deployed",
439
+ rationale: lastProposal.rationale,
440
+ confidence: lastProposal.confidence,
441
+ details: `Deployed ${target} proposal for ${skillName}`,
442
+ validation: {
443
+ improved: lastValidation.improved,
444
+ gates_passed: lastValidation.gates_passed,
445
+ gates_total: lastValidation.gates_total,
446
+ gate_results: lastValidation.gate_results,
447
+ regressions: lastValidation.regressions,
448
+ },
449
+ });
450
+
451
+ return {
452
+ proposal: lastProposal,
453
+ validation: lastValidation,
454
+ deployed: true,
455
+ auditEntries,
456
+ reason: "Evolution deployed successfully",
457
+ };
458
+ }
459
+
460
+ return {
461
+ proposal: lastProposal,
462
+ validation: lastValidation,
463
+ deployed: false,
464
+ auditEntries,
465
+ reason: "Evolution not deployed: validation did not pass",
466
+ };
467
+ } catch (error) {
468
+ const errorMessage = error instanceof Error ? error.message : String(error);
469
+ return {
470
+ proposal: null,
471
+ validation: null,
472
+ deployed: false,
473
+ auditEntries,
474
+ reason: `Error during body evolution: ${errorMessage}`,
475
+ };
476
+ }
477
+ }
478
+
479
+ // ---------------------------------------------------------------------------
480
+ // CLI entry point
481
+ // ---------------------------------------------------------------------------
482
+
483
+ export async function cliMain(): Promise<void> {
484
+ const { values } = parseArgs({
485
+ options: {
486
+ skill: { type: "string" },
487
+ "skill-path": { type: "string" },
488
+ target: { type: "string", default: "body" },
489
+ "teacher-agent": { type: "string" },
490
+ "student-agent": { type: "string" },
491
+ "teacher-model": { type: "string" },
492
+ "student-model": { type: "string" },
493
+ "eval-set": { type: "string" },
494
+ "dry-run": { type: "boolean", default: false },
495
+ "max-iterations": { type: "string", default: "3" },
496
+ confidence: { type: "string", default: "0.6" },
497
+ "task-description": { type: "string" },
498
+ "few-shot": { type: "string" },
499
+ "validation-model": { type: "string" },
500
+ help: { type: "boolean", default: false },
501
+ },
502
+ strict: true,
503
+ });
504
+
505
+ if (values.help) {
506
+ console.log(`selftune evolve body — Evolve a skill body or routing table
507
+
508
+ Usage:
509
+ selftune evolve body --skill <name> --skill-path <path> [options]
510
+
511
+ Options:
512
+ --skill Skill name (required)
513
+ --skill-path Path to SKILL.md (required)
514
+ --target Evolution target: body, routing (default: body)
515
+ --teacher-agent Teacher agent CLI (claude, codex, etc.)
516
+ --student-agent Student agent CLI for validation
517
+ --teacher-model Model flag for teacher agent
518
+ --student-model Model flag for student agent
519
+ --eval-set Path to eval set JSON
520
+ --dry-run Validate without deploying
521
+ --max-iterations Max refinement iterations (default: 3)
522
+ --confidence Confidence threshold 0.0-1.0 (default: 0.6)
523
+ --task-description Optional task description context
524
+ --few-shot Comma-separated paths to example skill files
525
+ --validation-model Model for trigger-check validation calls (overrides --student-model for validation)
526
+ --help Show this help message`);
527
+ process.exit(0);
528
+ }
529
+
530
+ if (!values.skill || !values["skill-path"]) {
531
+ console.error("[ERROR] --skill and --skill-path are required");
532
+ process.exit(1);
533
+ }
534
+
535
+ const { detectAgent } = await import("../utils/llm-call.js");
536
+ const teacherAgent = values["teacher-agent"] ?? detectAgent() ?? "";
537
+ const studentAgent = values["student-agent"] ?? teacherAgent;
538
+
539
+ if (!teacherAgent) {
540
+ console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
541
+ process.exit(1);
542
+ }
543
+
544
+ // Parse target
545
+ const targetStr = values.target ?? "body";
546
+ if (targetStr !== "body" && targetStr !== "routing") {
547
+ console.error("[ERROR] --target must be 'body' or 'routing'");
548
+ process.exit(1);
549
+ }
550
+
551
+ // Parse few-shot examples
552
+ let fewShotExamples: string[] | undefined;
553
+ if (values["few-shot"]) {
554
+ const paths = values["few-shot"].split(",").map((p) => p.trim());
555
+ fewShotExamples = paths.filter((p) => existsSync(p)).map((p) => readFileSync(p, "utf-8"));
556
+ }
557
+ const gradingResults = readGradingResultsForSkill(values.skill);
558
+
559
+ const result = await evolveBody({
560
+ skillName: values.skill,
561
+ skillPath: values["skill-path"],
562
+ target: targetStr as EvolutionTarget,
563
+ teacherAgent,
564
+ studentAgent,
565
+ teacherModel: values["teacher-model"],
566
+ studentModel: values["student-model"],
567
+ evalSetPath: values["eval-set"],
568
+ dryRun: values["dry-run"] ?? false,
569
+ maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),
570
+ confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
571
+ taskDescription: values["task-description"],
572
+ fewShotExamples,
573
+ gradingResults,
574
+ validationModel: values["validation-model"],
575
+ });
576
+
577
+ console.log(JSON.stringify(result, null, 2));
578
+ process.exit(result.deployed ? 0 : 1);
579
+ }
580
+
581
+ if (import.meta.main) {
582
+ cliMain().catch((err) => {
583
+ console.error(`[FATAL] ${err}`);
584
+ process.exit(1);
585
+ });
586
+ }