selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -50,6 +50,23 @@ export function buildProposalPrompt(
50
50
 
51
51
  const missedLines = missedQueries.map((q) => ` - "${q}"`).join("\n");
52
52
 
53
+ // Build failure feedback section if any patterns have feedback
54
+ const feedbackLines: string[] = [];
55
+ for (const p of failurePatterns) {
56
+ if (p.feedback && p.feedback.length > 0) {
57
+ for (const fb of p.feedback) {
58
+ feedbackLines.push(` Query: "${fb.query}"`);
59
+ feedbackLines.push(` Failure reason: ${fb.failure_reason}`);
60
+ feedbackLines.push(` Improvement hint: ${fb.improvement_hint}`);
61
+ if (fb.invocation_type) {
62
+ feedbackLines.push(` Invocation type: ${fb.invocation_type}`);
63
+ }
64
+ }
65
+ }
66
+ }
67
+ const feedbackSection =
68
+ feedbackLines.length > 0 ? `\n\nStructured Failure Analysis:\n${feedbackLines.join("\n")}` : "";
69
+
53
70
  return `Skill Name: ${skillName}
54
71
 
55
72
  Current Description:
@@ -59,7 +76,7 @@ Failure Patterns:
59
76
  ${patternLines.join("\n\n")}
60
77
 
61
78
  All Missed Queries:
62
- ${missedLines}
79
+ ${missedLines}${feedbackSection}
63
80
 
64
81
  Propose an improved description for the "${skillName}" skill that would correctly route the missed queries listed above. Output ONLY a JSON object with "proposed_description", "rationale", and "confidence" fields.`;
65
82
  }
@@ -113,6 +130,86 @@ export function parseProposalResponse(raw: string): {
113
130
  // Proposal generator
114
131
  // ---------------------------------------------------------------------------
115
132
 
133
+ /**
134
+ * Generate multiple proposals in parallel, each biased toward a different invocation type.
135
+ */
136
+ export async function generateMultipleProposals(
137
+ currentDescription: string,
138
+ failurePatterns: FailurePattern[],
139
+ missedQueries: string[],
140
+ skillName: string,
141
+ skillPath: string,
142
+ agent: string,
143
+ count = 3,
144
+ modelFlag?: string,
145
+ ): Promise<EvolutionProposal[]> {
146
+ const variations = buildPromptVariations(
147
+ currentDescription,
148
+ failurePatterns,
149
+ missedQueries,
150
+ skillName,
151
+ count,
152
+ );
153
+
154
+ const proposals = await Promise.all(
155
+ variations.map(async (prompt, i) => {
156
+ const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent, modelFlag);
157
+ const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
158
+
159
+ return {
160
+ proposal_id: `evo-${skillName}-${Date.now()}-${i}`,
161
+ skill_name: skillName,
162
+ skill_path: skillPath,
163
+ original_description: currentDescription,
164
+ proposed_description,
165
+ rationale,
166
+ failure_patterns: failurePatterns.map((p) => p.pattern_id),
167
+ eval_results: {
168
+ before: { total: 0, passed: 0, failed: 0, pass_rate: 0 },
169
+ after: { total: 0, passed: 0, failed: 0, pass_rate: 0 },
170
+ },
171
+ confidence,
172
+ created_at: new Date().toISOString(),
173
+ status: "pending" as const,
174
+ };
175
+ }),
176
+ );
177
+
178
+ return proposals;
179
+ }
180
+
181
+ /**
182
+ * Build prompt variations, each biased toward a different invocation type.
183
+ */
184
+ export function buildPromptVariations(
185
+ currentDescription: string,
186
+ failurePatterns: FailurePattern[],
187
+ missedQueries: string[],
188
+ skillName: string,
189
+ count: number,
190
+ ): string[] {
191
+ const biases: string[] = [
192
+ "Focus especially on improving explicit invocation (direct mentions of the skill).",
193
+ "Focus especially on improving implicit invocation (indirect references to skill capabilities).",
194
+ "Focus especially on improving contextual invocation (where the context implies the skill is needed).",
195
+ ];
196
+
197
+ const basePrompt = buildProposalPrompt(
198
+ currentDescription,
199
+ failurePatterns,
200
+ missedQueries,
201
+ skillName,
202
+ );
203
+ const variations: string[] = [];
204
+
205
+ for (let i = 0; i < count; i++) {
206
+ const bias = biases[i % biases.length];
207
+ variations.push(`${basePrompt}\n\nAdditional focus: ${bias}`);
208
+ }
209
+
210
+ return variations;
211
+ }
212
+
116
213
  /** Generate a complete evolution proposal using LLM. */
117
214
  export async function generateProposal(
118
215
  currentDescription: string,
@@ -121,9 +218,10 @@ export async function generateProposal(
121
218
  skillName: string,
122
219
  skillPath: string,
123
220
  agent: string,
221
+ modelFlag?: string,
124
222
  ): Promise<EvolutionProposal> {
125
223
  const prompt = buildProposalPrompt(currentDescription, failurePatterns, missedQueries, skillName);
126
- const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent);
224
+ const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent, modelFlag);
127
225
  const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
128
226
 
129
227
  return {
@@ -0,0 +1,166 @@
1
+ /**
2
+ * propose-routing.ts
3
+ *
4
+ * Generates improved routing table proposals using LLM analysis of failure
5
+ * patterns. Targets the `## Workflow Routing` section of a SKILL.md file.
6
+ */
7
+
8
+ import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
9
+ import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // System prompt
13
+ // ---------------------------------------------------------------------------
14
+
15
+ /** System prompt for the routing table proposer LLM. */
16
+ export const ROUTING_PROPOSER_SYSTEM = `You are a workflow routing optimizer for an AI agent skill system.
17
+
18
+ Your task is to analyze the current routing table and its failure patterns,
19
+ then propose an improved routing table that would correctly route missed queries
20
+ while preserving correct routing for existing queries.
21
+
22
+ Rules:
23
+ - The routing table must be a valid markdown table with | Trigger | Workflow | columns.
24
+ - Each row maps a trigger pattern to the workflow it should activate.
25
+ - Cover the semantic space of the missed queries without being too broad.
26
+ - Maintain the original intent and scope of the skill routing.
27
+ - Output ONLY valid JSON with exactly these fields:
28
+ - "proposed_routing" (string): the improved routing table in markdown format
29
+ - "rationale" (string): explanation of what changed and why
30
+ - "confidence" (number): 0.0-1.0 how confident you are this improves routing
31
+
32
+ Do NOT include any text outside the JSON object.`;
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Prompt builder
36
+ // ---------------------------------------------------------------------------
37
+
38
+ /** Build the user prompt for routing table proposal. */
39
+ export function buildRoutingProposalPrompt(
40
+ currentRouting: string,
41
+ fullSkillContent: string,
42
+ failurePatterns: FailurePattern[],
43
+ missedQueries: string[],
44
+ skillName: string,
45
+ ): string {
46
+ const patternLines = failurePatterns.map((p) => {
47
+ const queries = p.missed_queries.map((q) => ` - "${q}"`).join("\n");
48
+ return ` Pattern ${p.pattern_id} (frequency: ${p.frequency}, type: ${p.invocation_type}):\n${queries}`;
49
+ });
50
+
51
+ const missedLines = missedQueries.map((q) => ` - "${q}"`).join("\n");
52
+
53
+ // Build failure feedback section if any patterns have feedback
54
+ const feedbackLines: string[] = [];
55
+ for (const p of failurePatterns) {
56
+ if (p.feedback && p.feedback.length > 0) {
57
+ for (const fb of p.feedback) {
58
+ feedbackLines.push(` Query: "${fb.query}"`);
59
+ feedbackLines.push(` Failure reason: ${fb.failure_reason}`);
60
+ feedbackLines.push(` Improvement hint: ${fb.improvement_hint}`);
61
+ }
62
+ }
63
+ }
64
+ const feedbackSection =
65
+ feedbackLines.length > 0 ? `\n\nStructured Failure Analysis:\n${feedbackLines.join("\n")}` : "";
66
+
67
+ return `Skill Name: ${skillName}
68
+
69
+ Current Routing Table:
70
+ ${currentRouting}
71
+
72
+ Full Skill Content:
73
+ ${fullSkillContent}
74
+
75
+ Failure Patterns:
76
+ ${patternLines.join("\n\n")}
77
+
78
+ All Missed Queries:
79
+ ${missedLines}${feedbackSection}
80
+
81
+ Propose an improved routing table for the "${skillName}" skill that would correctly route the missed queries listed above. Output ONLY a JSON object with "proposed_routing", "rationale", and "confidence" fields.`;
82
+ }
83
+
84
+ // ---------------------------------------------------------------------------
85
+ // Response parser
86
+ // ---------------------------------------------------------------------------
87
+
88
+ /** Parse LLM response text into structured routing proposal data. */
89
+ export function parseRoutingProposalResponse(raw: string): {
90
+ proposed_routing: string;
91
+ rationale: string;
92
+ confidence: number;
93
+ } {
94
+ const cleaned = stripMarkdownFences(raw);
95
+
96
+ let parsed: unknown;
97
+ try {
98
+ parsed = JSON.parse(cleaned);
99
+ } catch {
100
+ throw new Error(`Failed to parse LLM response as JSON: ${cleaned.slice(0, 200)}`);
101
+ }
102
+
103
+ if (typeof parsed !== "object" || parsed === null) {
104
+ throw new Error("LLM response is not a JSON object");
105
+ }
106
+
107
+ const obj = parsed as Record<string, unknown>;
108
+
109
+ if (typeof obj.proposed_routing !== "string") {
110
+ throw new Error("Missing or invalid 'proposed_routing' field in LLM response");
111
+ }
112
+ if (typeof obj.rationale !== "string") {
113
+ throw new Error("Missing or invalid 'rationale' field in LLM response");
114
+ }
115
+ if (typeof obj.confidence !== "number") {
116
+ throw new Error("Missing or invalid 'confidence' field in LLM response");
117
+ }
118
+
119
+ const confidence = Math.max(0.0, Math.min(1.0, obj.confidence));
120
+
121
+ return {
122
+ proposed_routing: obj.proposed_routing,
123
+ rationale: obj.rationale,
124
+ confidence,
125
+ };
126
+ }
127
+
128
+ // ---------------------------------------------------------------------------
129
+ // Proposal generator
130
+ // ---------------------------------------------------------------------------
131
+
132
+ /** Generate a routing table evolution proposal using LLM. */
133
+ export async function generateRoutingProposal(
134
+ currentRouting: string,
135
+ fullSkillContent: string,
136
+ failurePatterns: FailurePattern[],
137
+ missedQueries: string[],
138
+ skillName: string,
139
+ skillPath: string,
140
+ agent: string,
141
+ modelFlag?: string,
142
+ ): Promise<BodyEvolutionProposal> {
143
+ const prompt = buildRoutingProposalPrompt(
144
+ currentRouting,
145
+ fullSkillContent,
146
+ failurePatterns,
147
+ missedQueries,
148
+ skillName,
149
+ );
150
+ const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
151
+ const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
152
+
153
+ return {
154
+ proposal_id: `evo-routing-${skillName}-${Date.now()}`,
155
+ skill_name: skillName,
156
+ skill_path: skillPath,
157
+ original_body: currentRouting,
158
+ proposed_body: proposed_routing,
159
+ rationale,
160
+ target: "routing" as EvolutionTarget,
161
+ failure_patterns: failurePatterns.map((p) => p.pattern_id),
162
+ confidence,
163
+ created_at: new Date().toISOString(),
164
+ status: "pending",
165
+ };
166
+ }
@@ -0,0 +1,141 @@
1
+ /**
2
+ * refine-body.ts
3
+ *
4
+ * Takes failure feedback from a validation pass and asks the teacher LLM
5
+ * to revise specific sections of a body proposal.
6
+ */
7
+
8
+ import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
9
+ import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // System prompt
13
+ // ---------------------------------------------------------------------------
14
+
15
+ /** System prompt for the body refiner (teacher) LLM. */
16
+ export const BODY_REFINER_SYSTEM = `You are an expert skill document refiner for an AI agent routing system.
17
+
18
+ You are given a proposed SKILL.md body that failed one or more validation gates.
19
+ Your task is to revise the body to address the specific failures while preserving
20
+ the parts that passed validation.
21
+
22
+ Rules:
23
+ - Address each failure reason specifically.
24
+ - Preserve structural elements: ## Workflow Routing table, ## sections.
25
+ - Keep the routing table as a valid markdown table with | Trigger | Workflow | columns.
26
+ - Do not make unnecessary changes to parts that passed validation.
27
+ - Output ONLY valid JSON with exactly these fields:
28
+ - "refined_body" (string): the revised skill body (markdown, everything below the title)
29
+ - "changes_made" (string): summary of what was changed
30
+ - "confidence" (number): 0.0-1.0 how confident you are this addresses the failures
31
+
32
+ Do NOT include any text outside the JSON object.`;
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Prompt builder
36
+ // ---------------------------------------------------------------------------
37
+
38
+ /** Build the refinement prompt from validation feedback. */
39
+ export function buildRefinementPrompt(
40
+ proposedBody: string,
41
+ validationResult: BodyValidationResult,
42
+ skillName: string,
43
+ regressionQueries?: string[],
44
+ ): string {
45
+ const failedGates = validationResult.gate_results
46
+ .filter((g) => !g.passed)
47
+ .map((g) => ` - ${g.gate}: ${g.reason}`)
48
+ .join("\n");
49
+
50
+ const regressionSection =
51
+ regressionQueries && regressionQueries.length > 0
52
+ ? `\n\nRegression Queries (these worked before but broke after):\n${regressionQueries.map((q) => ` - "${q}"`).join("\n")}`
53
+ : "";
54
+
55
+ return `Skill Name: ${skillName}
56
+
57
+ Current Proposed Body:
58
+ ${proposedBody}
59
+
60
+ Failed Validation Gates:
61
+ ${failedGates}
62
+ ${regressionSection}
63
+
64
+ Revise the proposed body to address the failed validation gates. Preserve what works, fix what doesn't. Output ONLY a JSON object with "refined_body", "changes_made", and "confidence" fields.`;
65
+ }
66
+
67
+ // ---------------------------------------------------------------------------
68
+ // Response parser
69
+ // ---------------------------------------------------------------------------
70
+
71
+ /** Parse LLM response text into structured refinement data. */
72
+ export function parseRefinementResponse(raw: string): {
73
+ refined_body: string;
74
+ changes_made: string;
75
+ confidence: number;
76
+ } {
77
+ const cleaned = stripMarkdownFences(raw);
78
+
79
+ let parsed: unknown;
80
+ try {
81
+ parsed = JSON.parse(cleaned);
82
+ } catch {
83
+ throw new Error(`Failed to parse LLM response as JSON: ${cleaned.slice(0, 200)}`);
84
+ }
85
+
86
+ if (typeof parsed !== "object" || parsed === null) {
87
+ throw new Error("LLM response is not a JSON object");
88
+ }
89
+
90
+ const obj = parsed as Record<string, unknown>;
91
+
92
+ if (typeof obj.refined_body !== "string") {
93
+ throw new Error("Missing or invalid 'refined_body' field in LLM response");
94
+ }
95
+ if (typeof obj.changes_made !== "string") {
96
+ throw new Error("Missing or invalid 'changes_made' field in LLM response");
97
+ }
98
+ if (typeof obj.confidence !== "number") {
99
+ throw new Error("Missing or invalid 'confidence' field in LLM response");
100
+ }
101
+
102
+ const confidence = Math.max(0.0, Math.min(1.0, obj.confidence));
103
+
104
+ return {
105
+ refined_body: obj.refined_body,
106
+ changes_made: obj.changes_made,
107
+ confidence,
108
+ };
109
+ }
110
+
111
+ // ---------------------------------------------------------------------------
112
+ // Refinement function
113
+ // ---------------------------------------------------------------------------
114
+
115
+ /** Refine a body proposal based on validation feedback. */
116
+ export async function refineBodyProposal(
117
+ proposal: BodyEvolutionProposal,
118
+ validationResult: BodyValidationResult,
119
+ agent: string,
120
+ modelFlag?: string,
121
+ ): Promise<BodyEvolutionProposal> {
122
+ const prompt = buildRefinementPrompt(
123
+ proposal.proposed_body,
124
+ validationResult,
125
+ proposal.skill_name,
126
+ validationResult.regressions,
127
+ );
128
+
129
+ const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
130
+ const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
131
+
132
+ return {
133
+ ...proposal,
134
+ proposal_id: `${proposal.proposal_id}-refined-${Date.now()}`,
135
+ proposed_body: refined_body,
136
+ rationale: `${proposal.rationale}\n\nRefinement: ${changes_made}`,
137
+ confidence,
138
+ created_at: new Date().toISOString(),
139
+ status: "pending",
140
+ };
141
+ }
@@ -11,6 +11,7 @@ import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from
11
11
  import { basename, dirname, join } from "node:path";
12
12
  import { parseArgs } from "node:util";
13
13
 
14
+ import { updateContextAfterRollback } from "../memory/writer.js";
14
15
  import type { EvolutionAuditEntry } from "../types.js";
15
16
  import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
16
17
  import { replaceDescription } from "./deploy-proposal.js";
@@ -153,11 +154,19 @@ export async function rollback(options: RollbackOptions): Promise<RollbackResult
153
154
  };
154
155
  appendAuditEntry(auditEntry, logPath);
155
156
 
156
- return {
157
+ const backupResult: RollbackResult = {
157
158
  rolledBack: true,
158
159
  restoredDescription: originalContent,
159
160
  reason: "Restored from backup file",
160
161
  };
162
+
163
+ try {
164
+ updateContextAfterRollback(skillName, backupResult);
165
+ } catch {
166
+ // Memory writes should never fail the main operation
167
+ }
168
+
169
+ return backupResult;
161
170
  }
162
171
 
163
172
  // Strategy 2: Restore from audit trail's created entry (description only)
@@ -177,11 +186,19 @@ export async function rollback(options: RollbackOptions): Promise<RollbackResult
177
186
  };
178
187
  appendAuditEntry(auditEntry, logPath);
179
188
 
180
- return {
189
+ const auditResult: RollbackResult = {
181
190
  rolledBack: true,
182
191
  restoredDescription: originalFromAudit,
183
192
  reason: "Restored from audit trail",
184
193
  };
194
+
195
+ try {
196
+ updateContextAfterRollback(skillName, auditResult);
197
+ } catch {
198
+ // Memory writes should never fail the main operation
199
+ }
200
+
201
+ return auditResult;
185
202
  }
186
203
 
187
204
  // No restoration source available
@@ -206,10 +223,10 @@ export async function cliMain(): Promise<void> {
206
223
  });
207
224
 
208
225
  if (values.help) {
209
- console.log(`selftune rollback — Rollback a skill to its pre-evolution state
226
+ console.log(`selftune evolve rollback — Rollback a skill to its pre-evolution state
210
227
 
211
228
  Usage:
212
- selftune rollback --skill <name> --skill-path <path> [options]
229
+ selftune evolve rollback --skill <name> --skill-path <path> [options]
213
230
 
214
231
  Options:
215
232
  --skill Skill name (required)