selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -50,6 +50,23 @@ export function buildProposalPrompt(
|
|
|
50
50
|
|
|
51
51
|
const missedLines = missedQueries.map((q) => ` - "${q}"`).join("\n");
|
|
52
52
|
|
|
53
|
+
// Build failure feedback section if any patterns have feedback
|
|
54
|
+
const feedbackLines: string[] = [];
|
|
55
|
+
for (const p of failurePatterns) {
|
|
56
|
+
if (p.feedback && p.feedback.length > 0) {
|
|
57
|
+
for (const fb of p.feedback) {
|
|
58
|
+
feedbackLines.push(` Query: "${fb.query}"`);
|
|
59
|
+
feedbackLines.push(` Failure reason: ${fb.failure_reason}`);
|
|
60
|
+
feedbackLines.push(` Improvement hint: ${fb.improvement_hint}`);
|
|
61
|
+
if (fb.invocation_type) {
|
|
62
|
+
feedbackLines.push(` Invocation type: ${fb.invocation_type}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const feedbackSection =
|
|
68
|
+
feedbackLines.length > 0 ? `\n\nStructured Failure Analysis:\n${feedbackLines.join("\n")}` : "";
|
|
69
|
+
|
|
53
70
|
return `Skill Name: ${skillName}
|
|
54
71
|
|
|
55
72
|
Current Description:
|
|
@@ -59,7 +76,7 @@ Failure Patterns:
|
|
|
59
76
|
${patternLines.join("\n\n")}
|
|
60
77
|
|
|
61
78
|
All Missed Queries:
|
|
62
|
-
${missedLines}
|
|
79
|
+
${missedLines}${feedbackSection}
|
|
63
80
|
|
|
64
81
|
Propose an improved description for the "${skillName}" skill that would correctly route the missed queries listed above. Output ONLY a JSON object with "proposed_description", "rationale", and "confidence" fields.`;
|
|
65
82
|
}
|
|
@@ -113,6 +130,86 @@ export function parseProposalResponse(raw: string): {
|
|
|
113
130
|
// Proposal generator
|
|
114
131
|
// ---------------------------------------------------------------------------
|
|
115
132
|
|
|
133
|
+
/**
|
|
134
|
+
* Generate multiple proposals in parallel, each biased toward a different invocation type.
|
|
135
|
+
*/
|
|
136
|
+
export async function generateMultipleProposals(
|
|
137
|
+
currentDescription: string,
|
|
138
|
+
failurePatterns: FailurePattern[],
|
|
139
|
+
missedQueries: string[],
|
|
140
|
+
skillName: string,
|
|
141
|
+
skillPath: string,
|
|
142
|
+
agent: string,
|
|
143
|
+
count = 3,
|
|
144
|
+
modelFlag?: string,
|
|
145
|
+
): Promise<EvolutionProposal[]> {
|
|
146
|
+
const variations = buildPromptVariations(
|
|
147
|
+
currentDescription,
|
|
148
|
+
failurePatterns,
|
|
149
|
+
missedQueries,
|
|
150
|
+
skillName,
|
|
151
|
+
count,
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
const proposals = await Promise.all(
|
|
155
|
+
variations.map(async (prompt, i) => {
|
|
156
|
+
const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent, modelFlag);
|
|
157
|
+
const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
proposal_id: `evo-${skillName}-${Date.now()}-${i}`,
|
|
161
|
+
skill_name: skillName,
|
|
162
|
+
skill_path: skillPath,
|
|
163
|
+
original_description: currentDescription,
|
|
164
|
+
proposed_description,
|
|
165
|
+
rationale,
|
|
166
|
+
failure_patterns: failurePatterns.map((p) => p.pattern_id),
|
|
167
|
+
eval_results: {
|
|
168
|
+
before: { total: 0, passed: 0, failed: 0, pass_rate: 0 },
|
|
169
|
+
after: { total: 0, passed: 0, failed: 0, pass_rate: 0 },
|
|
170
|
+
},
|
|
171
|
+
confidence,
|
|
172
|
+
created_at: new Date().toISOString(),
|
|
173
|
+
status: "pending" as const,
|
|
174
|
+
};
|
|
175
|
+
}),
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
return proposals;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Build prompt variations, each biased toward a different invocation type.
|
|
183
|
+
*/
|
|
184
|
+
export function buildPromptVariations(
|
|
185
|
+
currentDescription: string,
|
|
186
|
+
failurePatterns: FailurePattern[],
|
|
187
|
+
missedQueries: string[],
|
|
188
|
+
skillName: string,
|
|
189
|
+
count: number,
|
|
190
|
+
): string[] {
|
|
191
|
+
const biases: string[] = [
|
|
192
|
+
"Focus especially on improving explicit invocation (direct mentions of the skill).",
|
|
193
|
+
"Focus especially on improving implicit invocation (indirect references to skill capabilities).",
|
|
194
|
+
"Focus especially on improving contextual invocation (where the context implies the skill is needed).",
|
|
195
|
+
];
|
|
196
|
+
|
|
197
|
+
const basePrompt = buildProposalPrompt(
|
|
198
|
+
currentDescription,
|
|
199
|
+
failurePatterns,
|
|
200
|
+
missedQueries,
|
|
201
|
+
skillName,
|
|
202
|
+
);
|
|
203
|
+
const variations: string[] = [];
|
|
204
|
+
|
|
205
|
+
for (let i = 0; i < count; i++) {
|
|
206
|
+
const bias = biases[i % biases.length];
|
|
207
|
+
variations.push(`${basePrompt}\n\nAdditional focus: ${bias}`);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return variations;
|
|
211
|
+
}
|
|
212
|
+
|
|
116
213
|
/** Generate a complete evolution proposal using LLM. */
|
|
117
214
|
export async function generateProposal(
|
|
118
215
|
currentDescription: string,
|
|
@@ -121,9 +218,10 @@ export async function generateProposal(
|
|
|
121
218
|
skillName: string,
|
|
122
219
|
skillPath: string,
|
|
123
220
|
agent: string,
|
|
221
|
+
modelFlag?: string,
|
|
124
222
|
): Promise<EvolutionProposal> {
|
|
125
223
|
const prompt = buildProposalPrompt(currentDescription, failurePatterns, missedQueries, skillName);
|
|
126
|
-
const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent);
|
|
224
|
+
const rawResponse = await callLlm(PROPOSER_SYSTEM, prompt, agent, modelFlag);
|
|
127
225
|
const { proposed_description, rationale, confidence } = parseProposalResponse(rawResponse);
|
|
128
226
|
|
|
129
227
|
return {
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* propose-routing.ts
|
|
3
|
+
*
|
|
4
|
+
* Generates improved routing table proposals using LLM analysis of failure
|
|
5
|
+
* patterns. Targets the `## Workflow Routing` section of a SKILL.md file.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
|
|
9
|
+
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// System prompt
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
/** System prompt for the routing table proposer LLM. */
|
|
16
|
+
export const ROUTING_PROPOSER_SYSTEM = `You are a workflow routing optimizer for an AI agent skill system.
|
|
17
|
+
|
|
18
|
+
Your task is to analyze the current routing table and its failure patterns,
|
|
19
|
+
then propose an improved routing table that would correctly route missed queries
|
|
20
|
+
while preserving correct routing for existing queries.
|
|
21
|
+
|
|
22
|
+
Rules:
|
|
23
|
+
- The routing table must be a valid markdown table with | Trigger | Workflow | columns.
|
|
24
|
+
- Each row maps a trigger pattern to the workflow it should activate.
|
|
25
|
+
- Cover the semantic space of the missed queries without being too broad.
|
|
26
|
+
- Maintain the original intent and scope of the skill routing.
|
|
27
|
+
- Output ONLY valid JSON with exactly these fields:
|
|
28
|
+
- "proposed_routing" (string): the improved routing table in markdown format
|
|
29
|
+
- "rationale" (string): explanation of what changed and why
|
|
30
|
+
- "confidence" (number): 0.0-1.0 how confident you are this improves routing
|
|
31
|
+
|
|
32
|
+
Do NOT include any text outside the JSON object.`;
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Prompt builder
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
/** Build the user prompt for routing table proposal. */
|
|
39
|
+
export function buildRoutingProposalPrompt(
|
|
40
|
+
currentRouting: string,
|
|
41
|
+
fullSkillContent: string,
|
|
42
|
+
failurePatterns: FailurePattern[],
|
|
43
|
+
missedQueries: string[],
|
|
44
|
+
skillName: string,
|
|
45
|
+
): string {
|
|
46
|
+
const patternLines = failurePatterns.map((p) => {
|
|
47
|
+
const queries = p.missed_queries.map((q) => ` - "${q}"`).join("\n");
|
|
48
|
+
return ` Pattern ${p.pattern_id} (frequency: ${p.frequency}, type: ${p.invocation_type}):\n${queries}`;
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const missedLines = missedQueries.map((q) => ` - "${q}"`).join("\n");
|
|
52
|
+
|
|
53
|
+
// Build failure feedback section if any patterns have feedback
|
|
54
|
+
const feedbackLines: string[] = [];
|
|
55
|
+
for (const p of failurePatterns) {
|
|
56
|
+
if (p.feedback && p.feedback.length > 0) {
|
|
57
|
+
for (const fb of p.feedback) {
|
|
58
|
+
feedbackLines.push(` Query: "${fb.query}"`);
|
|
59
|
+
feedbackLines.push(` Failure reason: ${fb.failure_reason}`);
|
|
60
|
+
feedbackLines.push(` Improvement hint: ${fb.improvement_hint}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const feedbackSection =
|
|
65
|
+
feedbackLines.length > 0 ? `\n\nStructured Failure Analysis:\n${feedbackLines.join("\n")}` : "";
|
|
66
|
+
|
|
67
|
+
return `Skill Name: ${skillName}
|
|
68
|
+
|
|
69
|
+
Current Routing Table:
|
|
70
|
+
${currentRouting}
|
|
71
|
+
|
|
72
|
+
Full Skill Content:
|
|
73
|
+
${fullSkillContent}
|
|
74
|
+
|
|
75
|
+
Failure Patterns:
|
|
76
|
+
${patternLines.join("\n\n")}
|
|
77
|
+
|
|
78
|
+
All Missed Queries:
|
|
79
|
+
${missedLines}${feedbackSection}
|
|
80
|
+
|
|
81
|
+
Propose an improved routing table for the "${skillName}" skill that would correctly route the missed queries listed above. Output ONLY a JSON object with "proposed_routing", "rationale", and "confidence" fields.`;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
// Response parser
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
/** Parse LLM response text into structured routing proposal data. */
|
|
89
|
+
export function parseRoutingProposalResponse(raw: string): {
|
|
90
|
+
proposed_routing: string;
|
|
91
|
+
rationale: string;
|
|
92
|
+
confidence: number;
|
|
93
|
+
} {
|
|
94
|
+
const cleaned = stripMarkdownFences(raw);
|
|
95
|
+
|
|
96
|
+
let parsed: unknown;
|
|
97
|
+
try {
|
|
98
|
+
parsed = JSON.parse(cleaned);
|
|
99
|
+
} catch {
|
|
100
|
+
throw new Error(`Failed to parse LLM response as JSON: ${cleaned.slice(0, 200)}`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
104
|
+
throw new Error("LLM response is not a JSON object");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const obj = parsed as Record<string, unknown>;
|
|
108
|
+
|
|
109
|
+
if (typeof obj.proposed_routing !== "string") {
|
|
110
|
+
throw new Error("Missing or invalid 'proposed_routing' field in LLM response");
|
|
111
|
+
}
|
|
112
|
+
if (typeof obj.rationale !== "string") {
|
|
113
|
+
throw new Error("Missing or invalid 'rationale' field in LLM response");
|
|
114
|
+
}
|
|
115
|
+
if (typeof obj.confidence !== "number") {
|
|
116
|
+
throw new Error("Missing or invalid 'confidence' field in LLM response");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const confidence = Math.max(0.0, Math.min(1.0, obj.confidence));
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
proposed_routing: obj.proposed_routing,
|
|
123
|
+
rationale: obj.rationale,
|
|
124
|
+
confidence,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Proposal generator
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
/** Generate a routing table evolution proposal using LLM. */
|
|
133
|
+
export async function generateRoutingProposal(
|
|
134
|
+
currentRouting: string,
|
|
135
|
+
fullSkillContent: string,
|
|
136
|
+
failurePatterns: FailurePattern[],
|
|
137
|
+
missedQueries: string[],
|
|
138
|
+
skillName: string,
|
|
139
|
+
skillPath: string,
|
|
140
|
+
agent: string,
|
|
141
|
+
modelFlag?: string,
|
|
142
|
+
): Promise<BodyEvolutionProposal> {
|
|
143
|
+
const prompt = buildRoutingProposalPrompt(
|
|
144
|
+
currentRouting,
|
|
145
|
+
fullSkillContent,
|
|
146
|
+
failurePatterns,
|
|
147
|
+
missedQueries,
|
|
148
|
+
skillName,
|
|
149
|
+
);
|
|
150
|
+
const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
|
|
151
|
+
const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
proposal_id: `evo-routing-${skillName}-${Date.now()}`,
|
|
155
|
+
skill_name: skillName,
|
|
156
|
+
skill_path: skillPath,
|
|
157
|
+
original_body: currentRouting,
|
|
158
|
+
proposed_body: proposed_routing,
|
|
159
|
+
rationale,
|
|
160
|
+
target: "routing" as EvolutionTarget,
|
|
161
|
+
failure_patterns: failurePatterns.map((p) => p.pattern_id),
|
|
162
|
+
confidence,
|
|
163
|
+
created_at: new Date().toISOString(),
|
|
164
|
+
status: "pending",
|
|
165
|
+
};
|
|
166
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* refine-body.ts
|
|
3
|
+
*
|
|
4
|
+
* Takes failure feedback from a validation pass and asks the teacher LLM
|
|
5
|
+
* to revise specific sections of a body proposal.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
|
|
9
|
+
import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// System prompt
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
/** System prompt for the body refiner (teacher) LLM. */
|
|
16
|
+
export const BODY_REFINER_SYSTEM = `You are an expert skill document refiner for an AI agent routing system.
|
|
17
|
+
|
|
18
|
+
You are given a proposed SKILL.md body that failed one or more validation gates.
|
|
19
|
+
Your task is to revise the body to address the specific failures while preserving
|
|
20
|
+
the parts that passed validation.
|
|
21
|
+
|
|
22
|
+
Rules:
|
|
23
|
+
- Address each failure reason specifically.
|
|
24
|
+
- Preserve structural elements: ## Workflow Routing table, ## sections.
|
|
25
|
+
- Keep the routing table as a valid markdown table with | Trigger | Workflow | columns.
|
|
26
|
+
- Do not make unnecessary changes to parts that passed validation.
|
|
27
|
+
- Output ONLY valid JSON with exactly these fields:
|
|
28
|
+
- "refined_body" (string): the revised skill body (markdown, everything below the title)
|
|
29
|
+
- "changes_made" (string): summary of what was changed
|
|
30
|
+
- "confidence" (number): 0.0-1.0 how confident you are this addresses the failures
|
|
31
|
+
|
|
32
|
+
Do NOT include any text outside the JSON object.`;
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Prompt builder
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
/** Build the refinement prompt from validation feedback. */
|
|
39
|
+
export function buildRefinementPrompt(
|
|
40
|
+
proposedBody: string,
|
|
41
|
+
validationResult: BodyValidationResult,
|
|
42
|
+
skillName: string,
|
|
43
|
+
regressionQueries?: string[],
|
|
44
|
+
): string {
|
|
45
|
+
const failedGates = validationResult.gate_results
|
|
46
|
+
.filter((g) => !g.passed)
|
|
47
|
+
.map((g) => ` - ${g.gate}: ${g.reason}`)
|
|
48
|
+
.join("\n");
|
|
49
|
+
|
|
50
|
+
const regressionSection =
|
|
51
|
+
regressionQueries && regressionQueries.length > 0
|
|
52
|
+
? `\n\nRegression Queries (these worked before but broke after):\n${regressionQueries.map((q) => ` - "${q}"`).join("\n")}`
|
|
53
|
+
: "";
|
|
54
|
+
|
|
55
|
+
return `Skill Name: ${skillName}
|
|
56
|
+
|
|
57
|
+
Current Proposed Body:
|
|
58
|
+
${proposedBody}
|
|
59
|
+
|
|
60
|
+
Failed Validation Gates:
|
|
61
|
+
${failedGates}
|
|
62
|
+
${regressionSection}
|
|
63
|
+
|
|
64
|
+
Revise the proposed body to address the failed validation gates. Preserve what works, fix what doesn't. Output ONLY a JSON object with "refined_body", "changes_made", and "confidence" fields.`;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Response parser
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
/** Parse LLM response text into structured refinement data. */
|
|
72
|
+
export function parseRefinementResponse(raw: string): {
|
|
73
|
+
refined_body: string;
|
|
74
|
+
changes_made: string;
|
|
75
|
+
confidence: number;
|
|
76
|
+
} {
|
|
77
|
+
const cleaned = stripMarkdownFences(raw);
|
|
78
|
+
|
|
79
|
+
let parsed: unknown;
|
|
80
|
+
try {
|
|
81
|
+
parsed = JSON.parse(cleaned);
|
|
82
|
+
} catch {
|
|
83
|
+
throw new Error(`Failed to parse LLM response as JSON: ${cleaned.slice(0, 200)}`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
87
|
+
throw new Error("LLM response is not a JSON object");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const obj = parsed as Record<string, unknown>;
|
|
91
|
+
|
|
92
|
+
if (typeof obj.refined_body !== "string") {
|
|
93
|
+
throw new Error("Missing or invalid 'refined_body' field in LLM response");
|
|
94
|
+
}
|
|
95
|
+
if (typeof obj.changes_made !== "string") {
|
|
96
|
+
throw new Error("Missing or invalid 'changes_made' field in LLM response");
|
|
97
|
+
}
|
|
98
|
+
if (typeof obj.confidence !== "number") {
|
|
99
|
+
throw new Error("Missing or invalid 'confidence' field in LLM response");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const confidence = Math.max(0.0, Math.min(1.0, obj.confidence));
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
refined_body: obj.refined_body,
|
|
106
|
+
changes_made: obj.changes_made,
|
|
107
|
+
confidence,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Refinement function
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
/** Refine a body proposal based on validation feedback. */
|
|
116
|
+
export async function refineBodyProposal(
|
|
117
|
+
proposal: BodyEvolutionProposal,
|
|
118
|
+
validationResult: BodyValidationResult,
|
|
119
|
+
agent: string,
|
|
120
|
+
modelFlag?: string,
|
|
121
|
+
): Promise<BodyEvolutionProposal> {
|
|
122
|
+
const prompt = buildRefinementPrompt(
|
|
123
|
+
proposal.proposed_body,
|
|
124
|
+
validationResult,
|
|
125
|
+
proposal.skill_name,
|
|
126
|
+
validationResult.regressions,
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
|
|
130
|
+
const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
...proposal,
|
|
134
|
+
proposal_id: `${proposal.proposal_id}-refined-${Date.now()}`,
|
|
135
|
+
proposed_body: refined_body,
|
|
136
|
+
rationale: `${proposal.rationale}\n\nRefinement: ${changes_made}`,
|
|
137
|
+
confidence,
|
|
138
|
+
created_at: new Date().toISOString(),
|
|
139
|
+
status: "pending",
|
|
140
|
+
};
|
|
141
|
+
}
|
|
@@ -11,6 +11,7 @@ import { existsSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from
|
|
|
11
11
|
import { basename, dirname, join } from "node:path";
|
|
12
12
|
import { parseArgs } from "node:util";
|
|
13
13
|
|
|
14
|
+
import { updateContextAfterRollback } from "../memory/writer.js";
|
|
14
15
|
import type { EvolutionAuditEntry } from "../types.js";
|
|
15
16
|
import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
|
|
16
17
|
import { replaceDescription } from "./deploy-proposal.js";
|
|
@@ -153,11 +154,19 @@ export async function rollback(options: RollbackOptions): Promise<RollbackResult
|
|
|
153
154
|
};
|
|
154
155
|
appendAuditEntry(auditEntry, logPath);
|
|
155
156
|
|
|
156
|
-
|
|
157
|
+
const backupResult: RollbackResult = {
|
|
157
158
|
rolledBack: true,
|
|
158
159
|
restoredDescription: originalContent,
|
|
159
160
|
reason: "Restored from backup file",
|
|
160
161
|
};
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
updateContextAfterRollback(skillName, backupResult);
|
|
165
|
+
} catch {
|
|
166
|
+
// Memory writes should never fail the main operation
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return backupResult;
|
|
161
170
|
}
|
|
162
171
|
|
|
163
172
|
// Strategy 2: Restore from audit trail's created entry (description only)
|
|
@@ -177,11 +186,19 @@ export async function rollback(options: RollbackOptions): Promise<RollbackResult
|
|
|
177
186
|
};
|
|
178
187
|
appendAuditEntry(auditEntry, logPath);
|
|
179
188
|
|
|
180
|
-
|
|
189
|
+
const auditResult: RollbackResult = {
|
|
181
190
|
rolledBack: true,
|
|
182
191
|
restoredDescription: originalFromAudit,
|
|
183
192
|
reason: "Restored from audit trail",
|
|
184
193
|
};
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
updateContextAfterRollback(skillName, auditResult);
|
|
197
|
+
} catch {
|
|
198
|
+
// Memory writes should never fail the main operation
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return auditResult;
|
|
185
202
|
}
|
|
186
203
|
|
|
187
204
|
// No restoration source available
|
|
@@ -206,10 +223,10 @@ export async function cliMain(): Promise<void> {
|
|
|
206
223
|
});
|
|
207
224
|
|
|
208
225
|
if (values.help) {
|
|
209
|
-
console.log(`selftune rollback — Rollback a skill to its pre-evolution state
|
|
226
|
+
console.log(`selftune evolve rollback — Rollback a skill to its pre-evolution state
|
|
210
227
|
|
|
211
228
|
Usage:
|
|
212
|
-
selftune rollback --skill <name> --skill-path <path> [options]
|
|
229
|
+
selftune evolve rollback --skill <name> --skill-path <path> [options]
|
|
213
230
|
|
|
214
231
|
Options:
|
|
215
232
|
--skill Skill name (required)
|