selftune 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.claude/agents/diagnosis-analyst.md +146 -0
  2. package/.claude/agents/evolution-reviewer.md +167 -0
  3. package/.claude/agents/integration-guide.md +200 -0
  4. package/.claude/agents/pattern-analyst.md +147 -0
  5. package/CHANGELOG.md +38 -1
  6. package/README.md +96 -256
  7. package/assets/BeforeAfter.gif +0 -0
  8. package/assets/FeedbackLoop.gif +0 -0
  9. package/assets/logo.svg +9 -0
  10. package/assets/skill-health-badge.svg +20 -0
  11. package/cli/selftune/activation-rules.ts +171 -0
  12. package/cli/selftune/badge/badge-data.ts +108 -0
  13. package/cli/selftune/badge/badge-svg.ts +212 -0
  14. package/cli/selftune/badge/badge.ts +103 -0
  15. package/cli/selftune/constants.ts +75 -1
  16. package/cli/selftune/contribute/bundle.ts +314 -0
  17. package/cli/selftune/contribute/contribute.ts +214 -0
  18. package/cli/selftune/contribute/sanitize.ts +162 -0
  19. package/cli/selftune/cron/setup.ts +266 -0
  20. package/cli/selftune/dashboard-server.ts +582 -0
  21. package/cli/selftune/dashboard.ts +31 -12
  22. package/cli/selftune/eval/baseline.ts +247 -0
  23. package/cli/selftune/eval/composability.ts +117 -0
  24. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  25. package/cli/selftune/eval/hooks-to-evals.ts +68 -2
  26. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  28. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  29. package/cli/selftune/eval/unit-test.ts +196 -0
  30. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  31. package/cli/selftune/evolution/evolve-body.ts +492 -0
  32. package/cli/selftune/evolution/evolve.ts +479 -104
  33. package/cli/selftune/evolution/extract-patterns.ts +32 -1
  34. package/cli/selftune/evolution/pareto.ts +314 -0
  35. package/cli/selftune/evolution/propose-body.ts +171 -0
  36. package/cli/selftune/evolution/propose-description.ts +100 -2
  37. package/cli/selftune/evolution/propose-routing.ts +166 -0
  38. package/cli/selftune/evolution/refine-body.ts +141 -0
  39. package/cli/selftune/evolution/rollback.ts +20 -3
  40. package/cli/selftune/evolution/validate-body.ts +254 -0
  41. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  42. package/cli/selftune/evolution/validate-routing.ts +177 -0
  43. package/cli/selftune/grading/grade-session.ts +145 -19
  44. package/cli/selftune/grading/pre-gates.ts +104 -0
  45. package/cli/selftune/hooks/auto-activate.ts +185 -0
  46. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  47. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  48. package/cli/selftune/index.ts +88 -0
  49. package/cli/selftune/ingestors/claude-replay.ts +351 -0
  50. package/cli/selftune/ingestors/codex-rollout.ts +1 -1
  51. package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
  52. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  53. package/cli/selftune/init.ts +168 -5
  54. package/cli/selftune/last.ts +2 -2
  55. package/cli/selftune/memory/writer.ts +447 -0
  56. package/cli/selftune/monitoring/watch.ts +25 -2
  57. package/cli/selftune/status.ts +18 -15
  58. package/cli/selftune/types.ts +377 -5
  59. package/cli/selftune/utils/frontmatter.ts +217 -0
  60. package/cli/selftune/utils/llm-call.ts +29 -3
  61. package/cli/selftune/utils/transcript.ts +35 -0
  62. package/cli/selftune/utils/trigger-check.ts +89 -0
  63. package/cli/selftune/utils/tui.ts +156 -0
  64. package/dashboard/index.html +585 -19
  65. package/package.json +17 -6
  66. package/skill/SKILL.md +127 -10
  67. package/skill/Workflows/AutoActivation.md +144 -0
  68. package/skill/Workflows/Badge.md +118 -0
  69. package/skill/Workflows/Baseline.md +121 -0
  70. package/skill/Workflows/Composability.md +100 -0
  71. package/skill/Workflows/Contribute.md +91 -0
  72. package/skill/Workflows/Cron.md +155 -0
  73. package/skill/Workflows/Dashboard.md +203 -0
  74. package/skill/Workflows/Doctor.md +37 -1
  75. package/skill/Workflows/Evals.md +73 -5
  76. package/skill/Workflows/EvolutionMemory.md +152 -0
  77. package/skill/Workflows/Evolve.md +111 -6
  78. package/skill/Workflows/EvolveBody.md +159 -0
  79. package/skill/Workflows/ImportSkillsBench.md +111 -0
  80. package/skill/Workflows/Ingest.md +129 -15
  81. package/skill/Workflows/Initialize.md +58 -3
  82. package/skill/Workflows/Replay.md +70 -0
  83. package/skill/Workflows/Rollback.md +20 -1
  84. package/skill/Workflows/UnitTest.md +138 -0
  85. package/skill/Workflows/Watch.md +22 -0
  86. package/skill/settings_snippet.json +23 -0
  87. package/templates/activation-rules-default.json +27 -0
  88. package/templates/multi-skill-settings.json +64 -0
  89. package/templates/single-skill-settings.json +58 -0
@@ -0,0 +1,492 @@
1
+ /**
2
+ * evolve-body.ts
3
+ *
4
+ * Body evolution orchestrator: coordinates full body or routing-table evolution
5
+ * through a pipeline of proposal generation, 3-gate validation, refinement,
6
+ * and deployment.
7
+ */
8
+
9
+ import { existsSync, readFileSync } from "node:fs";
10
+ import { parseArgs } from "node:util";
11
+
12
+ import { QUERY_LOG, SKILL_LOG } from "../constants.js";
13
+ import { buildEvalSet } from "../eval/hooks-to-evals.js";
14
+ import type {
15
+ BodyEvolutionProposal,
16
+ BodyValidationResult,
17
+ EvalEntry,
18
+ EvolutionAuditEntry,
19
+ EvolutionTarget,
20
+ FailurePattern,
21
+ GradingResult,
22
+ QueryLogRecord,
23
+ SkillUsageRecord,
24
+ } from "../types.js";
25
+ import { readJsonl } from "../utils/jsonl.js";
26
+ import { appendAuditEntry } from "./audit.js";
27
+ import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
28
+ import { extractFailurePatterns } from "./extract-patterns.js";
29
+ import { generateBodyProposal } from "./propose-body.js";
30
+ import { generateRoutingProposal } from "./propose-routing.js";
31
+ import { refineBodyProposal } from "./refine-body.js";
32
+ import { validateBodyProposal } from "./validate-body.js";
33
+ import { validateRoutingProposal } from "./validate-routing.js";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Types
37
+ // ---------------------------------------------------------------------------
38
+
39
+ export interface EvolveBodyOptions {
40
+ skillName: string;
41
+ skillPath: string;
42
+ target: EvolutionTarget;
43
+ teacherAgent: string;
44
+ studentAgent: string;
45
+ teacherModel?: string;
46
+ studentModel?: string;
47
+ evalSetPath?: string;
48
+ dryRun: boolean;
49
+ maxIterations: number;
50
+ confidenceThreshold: number;
51
+ taskDescription?: string;
52
+ fewShotExamples?: string[];
53
+ gradingResults?: GradingResult[];
54
+ validationModel?: string;
55
+ }
56
+
57
+ export interface EvolveBodyResult {
58
+ proposal: BodyEvolutionProposal | null;
59
+ validation: BodyValidationResult | null;
60
+ deployed: boolean;
61
+ auditEntries: EvolutionAuditEntry[];
62
+ reason: string;
63
+ }
64
+
65
+ /**
66
+ * Injectable dependencies for evolveBody(). When omitted, the real module
67
+ * imports are used. Pass overrides in tests to avoid mock.module().
68
+ */
69
+ export interface EvolveBodyDeps {
70
+ extractFailurePatterns?: (
71
+ evalEntries: EvalEntry[],
72
+ skillUsage: SkillUsageRecord[],
73
+ skillName: string,
74
+ gradingResults?: GradingResult[],
75
+ ) => FailurePattern[];
76
+ generateBodyProposal?: typeof import("./propose-body.js").generateBodyProposal;
77
+ generateRoutingProposal?: typeof import("./propose-routing.js").generateRoutingProposal;
78
+ validateBodyProposal?: typeof import("./validate-body.js").validateBodyProposal;
79
+ validateRoutingProposal?: typeof import("./validate-routing.js").validateRoutingProposal;
80
+ refineBodyProposal?: typeof import("./refine-body.js").refineBodyProposal;
81
+ appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
82
+ buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
83
+ readFileSync?: typeof readFileSync;
84
+ writeFileSync?: (path: string, data: string, encoding: string) => void;
85
+ }
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // Audit helper
89
+ // ---------------------------------------------------------------------------
90
+
91
+ function createAuditEntry(
92
+ proposalId: string,
93
+ action: EvolutionAuditEntry["action"],
94
+ details: string,
95
+ skillName?: string,
96
+ ): EvolutionAuditEntry {
97
+ return {
98
+ timestamp: new Date().toISOString(),
99
+ proposal_id: proposalId,
100
+ skill_name: skillName,
101
+ action,
102
+ details,
103
+ };
104
+ }
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // Main orchestrator
108
+ // ---------------------------------------------------------------------------
109
+
110
+ export async function evolveBody(
111
+ options: EvolveBodyOptions,
112
+ _deps: EvolveBodyDeps = {},
113
+ ): Promise<EvolveBodyResult> {
114
+ const {
115
+ skillName,
116
+ skillPath,
117
+ target,
118
+ teacherAgent,
119
+ studentAgent,
120
+ teacherModel,
121
+ studentModel,
122
+ evalSetPath,
123
+ dryRun,
124
+ maxIterations,
125
+ confidenceThreshold,
126
+ fewShotExamples,
127
+ } = options;
128
+
129
+ // Resolve injectable dependencies
130
+ const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
131
+ const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
132
+ const _generateRoutingProposal = _deps.generateRoutingProposal ?? generateRoutingProposal;
133
+ const _validateBodyProposal = _deps.validateBodyProposal ?? validateBodyProposal;
134
+ const _validateRoutingProposal = _deps.validateRoutingProposal ?? validateRoutingProposal;
135
+ const _refineBodyProposal = _deps.refineBodyProposal ?? refineBodyProposal;
136
+ const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
137
+ const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
138
+ const _readFileSync = _deps.readFileSync ?? readFileSync;
139
+ const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
140
+
141
+ const auditEntries: EvolutionAuditEntry[] = [];
142
+
143
+ function recordAudit(
144
+ proposalId: string,
145
+ action: EvolutionAuditEntry["action"],
146
+ details: string,
147
+ ): void {
148
+ const entry = createAuditEntry(proposalId, action, details, skillName);
149
+ auditEntries.push(entry);
150
+ try {
151
+ _appendAuditEntry(entry);
152
+ } catch {
153
+ // Fail-open
154
+ }
155
+ }
156
+
157
+ try {
158
+ // Step 1: Read current SKILL.md
159
+ if (!existsSync(skillPath)) {
160
+ return {
161
+ proposal: null,
162
+ validation: null,
163
+ deployed: false,
164
+ auditEntries,
165
+ reason: `SKILL.md not found at ${skillPath}`,
166
+ };
167
+ }
168
+
169
+ const currentContent = _readFileSync(skillPath, "utf-8");
170
+ const parsed = parseSkillSections(currentContent);
171
+
172
+ // Step 2: Load eval set
173
+ let evalSet: EvalEntry[];
174
+ if (evalSetPath && existsSync(evalSetPath)) {
175
+ const raw = _readFileSync(evalSetPath, "utf-8");
176
+ const parsed: unknown = JSON.parse(raw);
177
+ if (!Array.isArray(parsed)) {
178
+ throw new Error("Eval set must be a JSON array");
179
+ }
180
+ evalSet = parsed as EvalEntry[];
181
+ } else {
182
+ const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
183
+ const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
184
+ evalSet = _buildEvalSet(skillRecords, queryRecords, skillName);
185
+ }
186
+
187
+ // Step 3: Load skill usage and extract failure patterns
188
+ const skillUsage = readJsonl<SkillUsageRecord>(SKILL_LOG);
189
+ const failurePatterns = _extractFailurePatterns(
190
+ evalSet,
191
+ skillUsage,
192
+ skillName,
193
+ options.gradingResults,
194
+ );
195
+
196
+ if (failurePatterns.length === 0) {
197
+ return {
198
+ proposal: null,
199
+ validation: null,
200
+ deployed: false,
201
+ auditEntries,
202
+ reason: "No failure patterns found",
203
+ };
204
+ }
205
+
206
+ const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
207
+
208
+ // Step 4: Generate -> validate -> refine loop
209
+ let lastProposal: BodyEvolutionProposal | null = null;
210
+ let lastValidation: BodyValidationResult | null = null;
211
+
212
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
213
+ // Generate proposal based on target
214
+ let proposal: BodyEvolutionProposal;
215
+
216
+ if (iteration === 0) {
217
+ if (target === "routing") {
218
+ const currentRouting = parsed.sections["Workflow Routing"] || "";
219
+ proposal = await _generateRoutingProposal(
220
+ currentRouting,
221
+ currentContent,
222
+ failurePatterns,
223
+ missedQueries,
224
+ skillName,
225
+ skillPath,
226
+ teacherAgent,
227
+ teacherModel,
228
+ );
229
+ } else {
230
+ proposal = await _generateBodyProposal(
231
+ currentContent,
232
+ failurePatterns,
233
+ missedQueries,
234
+ skillName,
235
+ skillPath,
236
+ teacherAgent,
237
+ teacherModel,
238
+ fewShotExamples,
239
+ );
240
+ }
241
+ } else if (lastProposal && lastValidation) {
242
+ // Refine from previous failed attempt
243
+ proposal = await _refineBodyProposal(
244
+ lastProposal,
245
+ lastValidation,
246
+ teacherAgent,
247
+ teacherModel,
248
+ );
249
+ } else {
250
+ break;
251
+ }
252
+
253
+ lastProposal = proposal;
254
+
255
+ recordAudit(
256
+ proposal.proposal_id,
257
+ "created",
258
+ `${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
259
+ );
260
+
261
+ // Check confidence threshold
262
+ if (proposal.confidence < confidenceThreshold) {
263
+ recordAudit(
264
+ proposal.proposal_id,
265
+ "rejected",
266
+ `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
267
+ );
268
+
269
+ if (iteration === maxIterations - 1) {
270
+ return {
271
+ proposal: lastProposal,
272
+ validation: null,
273
+ deployed: false,
274
+ auditEntries,
275
+ reason: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
276
+ };
277
+ }
278
+ continue;
279
+ }
280
+
281
+ // Validate (validationModel overrides studentModel for validation calls)
282
+ const validationModelFlag = options.validationModel ?? studentModel;
283
+ let validation: BodyValidationResult;
284
+ if (target === "routing") {
285
+ validation = await _validateRoutingProposal(
286
+ proposal,
287
+ evalSet,
288
+ studentAgent,
289
+ validationModelFlag,
290
+ );
291
+ } else {
292
+ validation = await _validateBodyProposal(
293
+ proposal,
294
+ evalSet,
295
+ studentAgent,
296
+ validationModelFlag,
297
+ );
298
+ }
299
+ lastValidation = validation;
300
+
301
+ recordAudit(
302
+ proposal.proposal_id,
303
+ "validated",
304
+ `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
305
+ );
306
+
307
+ if (validation.improved) {
308
+ break;
309
+ }
310
+
311
+ recordAudit(
312
+ proposal.proposal_id,
313
+ "rejected",
314
+ `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
315
+ );
316
+
317
+ if (iteration === maxIterations - 1) {
318
+ return {
319
+ proposal: lastProposal,
320
+ validation: lastValidation,
321
+ deployed: false,
322
+ auditEntries,
323
+ reason: `Validation failed after ${maxIterations} iterations: ${validation.gates_passed}/${validation.gates_total} gates`,
324
+ };
325
+ }
326
+ }
327
+
328
+ // Step 5: Deploy or dry-run
329
+ if (dryRun) {
330
+ return {
331
+ proposal: lastProposal,
332
+ validation: lastValidation,
333
+ deployed: false,
334
+ auditEntries,
335
+ reason: "Dry run - proposal validated but not deployed",
336
+ };
337
+ }
338
+
339
+ if (lastProposal && lastValidation && lastValidation.improved) {
340
+ // Deploy: write updated SKILL.md
341
+ if (target === "routing") {
342
+ const updatedContent = replaceSection(
343
+ currentContent,
344
+ "Workflow Routing",
345
+ lastProposal.proposed_body,
346
+ );
347
+ _writeFileSync(skillPath, updatedContent, "utf-8");
348
+ } else {
349
+ const updatedContent = replaceBody(currentContent, lastProposal.proposed_body);
350
+ _writeFileSync(skillPath, updatedContent, "utf-8");
351
+ }
352
+
353
+ recordAudit(
354
+ lastProposal.proposal_id,
355
+ "deployed",
356
+ `Deployed ${target} proposal for ${skillName}`,
357
+ );
358
+
359
+ return {
360
+ proposal: lastProposal,
361
+ validation: lastValidation,
362
+ deployed: true,
363
+ auditEntries,
364
+ reason: "Evolution deployed successfully",
365
+ };
366
+ }
367
+
368
+ return {
369
+ proposal: lastProposal,
370
+ validation: lastValidation,
371
+ deployed: false,
372
+ auditEntries,
373
+ reason: "Evolution not deployed: validation did not pass",
374
+ };
375
+ } catch (error) {
376
+ const errorMessage = error instanceof Error ? error.message : String(error);
377
+ return {
378
+ proposal: null,
379
+ validation: null,
380
+ deployed: false,
381
+ auditEntries,
382
+ reason: `Error during body evolution: ${errorMessage}`,
383
+ };
384
+ }
385
+ }
386
+
387
+ // ---------------------------------------------------------------------------
388
+ // CLI entry point
389
+ // ---------------------------------------------------------------------------
390
+
391
+ export async function cliMain(): Promise<void> {
392
+ const { values } = parseArgs({
393
+ options: {
394
+ skill: { type: "string" },
395
+ "skill-path": { type: "string" },
396
+ target: { type: "string", default: "body" },
397
+ "teacher-agent": { type: "string" },
398
+ "student-agent": { type: "string" },
399
+ "teacher-model": { type: "string" },
400
+ "student-model": { type: "string" },
401
+ "eval-set": { type: "string" },
402
+ "dry-run": { type: "boolean", default: false },
403
+ "max-iterations": { type: "string", default: "3" },
404
+ confidence: { type: "string", default: "0.6" },
405
+ "task-description": { type: "string" },
406
+ "few-shot": { type: "string" },
407
+ "validation-model": { type: "string" },
408
+ help: { type: "boolean", default: false },
409
+ },
410
+ strict: true,
411
+ });
412
+
413
+ if (values.help) {
414
+ console.log(`selftune evolve-body — Evolve a skill body or routing table
415
+
416
+ Usage:
417
+ selftune evolve-body --skill <name> --skill-path <path> [options]
418
+
419
+ Options:
420
+ --skill Skill name (required)
421
+ --skill-path Path to SKILL.md (required)
422
+ --target Evolution target: body, routing (default: body)
423
+ --teacher-agent Teacher agent CLI (claude, codex, etc.)
424
+ --student-agent Student agent CLI for validation
425
+ --teacher-model Model flag for teacher agent
426
+ --student-model Model flag for student agent
427
+ --eval-set Path to eval set JSON
428
+ --dry-run Validate without deploying
429
+ --max-iterations Max refinement iterations (default: 3)
430
+ --confidence Confidence threshold 0.0-1.0 (default: 0.6)
431
+ --task-description Optional task description context
432
+ --few-shot Comma-separated paths to example skill files
433
+ --validation-model Model for trigger-check validation calls (overrides --student-model for validation)
434
+ --help Show this help message`);
435
+ process.exit(0);
436
+ }
437
+
438
+ if (!values.skill || !values["skill-path"]) {
439
+ console.error("[ERROR] --skill and --skill-path are required");
440
+ process.exit(1);
441
+ }
442
+
443
+ const { detectAgent } = await import("../utils/llm-call.js");
444
+ const teacherAgent = values["teacher-agent"] ?? detectAgent() ?? "";
445
+ const studentAgent = values["student-agent"] ?? teacherAgent;
446
+
447
+ if (!teacherAgent) {
448
+ console.error("[ERROR] No agent CLI found. Install Claude Code, Codex, or OpenCode.");
449
+ process.exit(1);
450
+ }
451
+
452
+ // Parse target
453
+ const targetStr = values.target ?? "body";
454
+ if (targetStr !== "body" && targetStr !== "routing") {
455
+ console.error("[ERROR] --target must be 'body' or 'routing'");
456
+ process.exit(1);
457
+ }
458
+
459
+ // Parse few-shot examples
460
+ let fewShotExamples: string[] | undefined;
461
+ if (values["few-shot"]) {
462
+ const paths = values["few-shot"].split(",").map((p) => p.trim());
463
+ fewShotExamples = paths.filter((p) => existsSync(p)).map((p) => readFileSync(p, "utf-8"));
464
+ }
465
+
466
+ const result = await evolveBody({
467
+ skillName: values.skill,
468
+ skillPath: values["skill-path"],
469
+ target: targetStr as EvolutionTarget,
470
+ teacherAgent,
471
+ studentAgent,
472
+ teacherModel: values["teacher-model"],
473
+ studentModel: values["student-model"],
474
+ evalSetPath: values["eval-set"],
475
+ dryRun: values["dry-run"] ?? false,
476
+ maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),
477
+ confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
478
+ taskDescription: values["task-description"],
479
+ fewShotExamples,
480
+ validationModel: values["validation-model"],
481
+ });
482
+
483
+ console.log(JSON.stringify(result, null, 2));
484
+ process.exit(result.deployed ? 0 : 1);
485
+ }
486
+
487
+ if (import.meta.main) {
488
+ cliMain().catch((err) => {
489
+ console.error(`[FATAL] ${err}`);
490
+ process.exit(1);
491
+ });
492
+ }