selftune 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +38 -1
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +31 -12
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +479 -104
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +20 -3
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +145 -19
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/codex-rollout.ts +1 -1
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/init.ts +168 -5
- package/cli/selftune/last.ts +2 -2
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +18 -15
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +585 -19
- package/package.json +17 -6
- package/skill/SKILL.md +127 -10
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +73 -5
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +129 -15
- package/skill/Workflows/Initialize.md +58 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill unit test runner.
|
|
3
|
+
*
|
|
4
|
+
* Loads, runs, and reports on skill-level unit tests.
|
|
5
|
+
* Tests are stored as JSON arrays of SkillUnitTest objects.
|
|
6
|
+
*
|
|
7
|
+
* Assertion types:
|
|
8
|
+
* - contains / not_contains: check transcript for substring
|
|
9
|
+
* - regex: check transcript against a regex pattern
|
|
10
|
+
* - tool_called / tool_not_called: check transcript for tool usage
|
|
11
|
+
* - json_path: check key=value in parsed JSON from transcript
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
15
|
+
import type {
|
|
16
|
+
SkillAssertion,
|
|
17
|
+
SkillUnitTest,
|
|
18
|
+
UnitTestResult,
|
|
19
|
+
UnitTestSuiteResult,
|
|
20
|
+
} from "../types.js";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Assertion checker (deterministic, no agent needed)
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
/** Check a single assertion against a transcript string. */
|
|
27
|
+
export function checkAssertion(
|
|
28
|
+
assertion: SkillAssertion,
|
|
29
|
+
transcript: string,
|
|
30
|
+
): { passed: boolean; actual?: string } {
|
|
31
|
+
switch (assertion.type) {
|
|
32
|
+
case "contains":
|
|
33
|
+
return {
|
|
34
|
+
passed: transcript.includes(assertion.value),
|
|
35
|
+
actual: transcript.includes(assertion.value) ? assertion.value : "(not found)",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
case "not_contains":
|
|
39
|
+
return {
|
|
40
|
+
passed: !transcript.includes(assertion.value),
|
|
41
|
+
actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
case "regex": {
|
|
45
|
+
const re = new RegExp(assertion.value);
|
|
46
|
+
const match = re.exec(transcript);
|
|
47
|
+
return {
|
|
48
|
+
passed: match !== null,
|
|
49
|
+
actual: match ? match[0] : "(no match)",
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
case "tool_called":
|
|
54
|
+
return {
|
|
55
|
+
passed: transcript.includes(assertion.value),
|
|
56
|
+
actual: transcript.includes(assertion.value) ? assertion.value : "(tool not found)",
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
case "tool_not_called":
|
|
60
|
+
return {
|
|
61
|
+
passed: !transcript.includes(assertion.value),
|
|
62
|
+
actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
case "json_path": {
|
|
66
|
+
// Simple key=value check: "status=ok" looks for {"status":"ok"} in transcript
|
|
67
|
+
const eqIdx = assertion.value.indexOf("=");
|
|
68
|
+
if (eqIdx < 0) {
|
|
69
|
+
return { passed: false, actual: "invalid json_path format (expected key=value)" };
|
|
70
|
+
}
|
|
71
|
+
const key = assertion.value.slice(0, eqIdx);
|
|
72
|
+
const expected = assertion.value.slice(eqIdx + 1);
|
|
73
|
+
try {
|
|
74
|
+
const parsed = JSON.parse(transcript);
|
|
75
|
+
const actual = String(parsed[key] ?? "");
|
|
76
|
+
return { passed: actual === expected, actual };
|
|
77
|
+
} catch {
|
|
78
|
+
// Try to find JSON in the transcript
|
|
79
|
+
const jsonMatch = transcript.match(/\{[^}]+\}/);
|
|
80
|
+
if (jsonMatch) {
|
|
81
|
+
try {
|
|
82
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
83
|
+
const actual = String(parsed[key] ?? "");
|
|
84
|
+
return { passed: actual === expected, actual };
|
|
85
|
+
} catch {
|
|
86
|
+
return { passed: false, actual: "(json parse error)" };
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return { passed: false, actual: "(no json found)" };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
default:
|
|
94
|
+
return { passed: false, actual: `unknown assertion type: ${assertion.type}` };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
// Load unit tests from JSON file
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
/** Load unit tests from a JSON file. Returns empty array on error. */
|
|
103
|
+
export function loadUnitTests(testsPath: string): SkillUnitTest[] {
|
|
104
|
+
try {
|
|
105
|
+
if (!existsSync(testsPath)) {
|
|
106
|
+
console.warn(`[WARN] Unit test file not found: ${testsPath}`);
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
const raw = readFileSync(testsPath, "utf-8");
|
|
110
|
+
const parsed = JSON.parse(raw);
|
|
111
|
+
if (!Array.isArray(parsed)) {
|
|
112
|
+
console.warn(`[WARN] Unit test file is not an array: ${testsPath}`);
|
|
113
|
+
return [];
|
|
114
|
+
}
|
|
115
|
+
return parsed as SkillUnitTest[];
|
|
116
|
+
} catch (err) {
|
|
117
|
+
console.warn(`[WARN] Failed to load unit tests from ${testsPath}:`, err);
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// Run a single unit test
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
/** Agent function type: takes a query, returns transcript text. */
|
|
127
|
+
export type AgentRunner = (query: string) => Promise<string>;
|
|
128
|
+
|
|
129
|
+
/** Run a single unit test against an agent runner. */
|
|
130
|
+
export async function runUnitTest(
|
|
131
|
+
test: SkillUnitTest,
|
|
132
|
+
agent: AgentRunner,
|
|
133
|
+
): Promise<UnitTestResult> {
|
|
134
|
+
const start = Date.now();
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
const transcript = await agent(test.query);
|
|
138
|
+
const assertionResults = test.assertions.map((assertion) => {
|
|
139
|
+
const result = checkAssertion(assertion, transcript);
|
|
140
|
+
return { assertion, passed: result.passed, actual: result.actual };
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const allPassed = assertionResults.every((r) => r.passed);
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
test_id: test.id,
|
|
147
|
+
passed: allPassed,
|
|
148
|
+
assertion_results: assertionResults,
|
|
149
|
+
duration_ms: Date.now() - start,
|
|
150
|
+
};
|
|
151
|
+
} catch (err) {
|
|
152
|
+
return {
|
|
153
|
+
test_id: test.id,
|
|
154
|
+
passed: false,
|
|
155
|
+
assertion_results: test.assertions.map((assertion) => ({
|
|
156
|
+
assertion,
|
|
157
|
+
passed: false,
|
|
158
|
+
actual: "error",
|
|
159
|
+
})),
|
|
160
|
+
duration_ms: Date.now() - start,
|
|
161
|
+
error: err instanceof Error ? err.message : String(err),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
// Run a full unit test suite
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
/** Run all unit tests and return aggregated results. */
|
|
171
|
+
export async function runUnitTestSuite(
|
|
172
|
+
tests: SkillUnitTest[],
|
|
173
|
+
skillName: string,
|
|
174
|
+
agent: AgentRunner,
|
|
175
|
+
): Promise<UnitTestSuiteResult> {
|
|
176
|
+
const results: UnitTestResult[] = [];
|
|
177
|
+
|
|
178
|
+
for (const t of tests) {
|
|
179
|
+
const result = await runUnitTest(t, agent);
|
|
180
|
+
results.push(result);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const passed = results.filter((r) => r.passed).length;
|
|
184
|
+
const failed = results.filter((r) => !r.passed).length;
|
|
185
|
+
const total = results.length;
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
skill_name: skillName,
|
|
189
|
+
total,
|
|
190
|
+
passed,
|
|
191
|
+
failed,
|
|
192
|
+
pass_rate: total > 0 ? passed / total : 0,
|
|
193
|
+
results,
|
|
194
|
+
run_at: new Date().toISOString(),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
10
|
-
import type { EvolutionProposal } from "../types.js";
|
|
10
|
+
import type { EvolutionProposal, SkillSections } from "../types.js";
|
|
11
11
|
import type { ValidationResult } from "./validate-proposal.js";
|
|
12
12
|
|
|
13
13
|
// ---------------------------------------------------------------------------
|
|
@@ -93,6 +93,147 @@ export function replaceDescription(currentContent: string, newDescription: strin
|
|
|
93
93
|
return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Structured SKILL.md parsing
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Parse a SKILL.md file into named sections.
|
|
102
|
+
*
|
|
103
|
+
* Splits the content into:
|
|
104
|
+
* - frontmatter: YAML frontmatter block (if present, including delimiters)
|
|
105
|
+
* - title: the first `# Heading` line
|
|
106
|
+
* - description: content between the title and the first `## ` heading
|
|
107
|
+
* - sections: map of `## Name` -> content (up to next `##` or EOF)
|
|
108
|
+
*/
|
|
109
|
+
export function parseSkillSections(content: string): SkillSections {
|
|
110
|
+
const lines = content.split("\n");
|
|
111
|
+
let idx = 0;
|
|
112
|
+
|
|
113
|
+
// --- frontmatter ---
|
|
114
|
+
let frontmatter = "";
|
|
115
|
+
if (lines[0]?.trim() === "---") {
|
|
116
|
+
const endIdx = lines.indexOf("---", 1);
|
|
117
|
+
if (endIdx > 0) {
|
|
118
|
+
frontmatter = lines.slice(0, endIdx + 1).join("\n");
|
|
119
|
+
idx = endIdx + 1;
|
|
120
|
+
// skip blank line after frontmatter
|
|
121
|
+
if (idx < lines.length && lines[idx].trim() === "") idx++;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// --- title ---
|
|
126
|
+
let title = "";
|
|
127
|
+
while (idx < lines.length) {
|
|
128
|
+
if (lines[idx].startsWith("# ") && !lines[idx].startsWith("## ")) {
|
|
129
|
+
title = lines[idx];
|
|
130
|
+
idx++;
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
idx++;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// --- description (between title and first ## heading) ---
|
|
137
|
+
const descLines: string[] = [];
|
|
138
|
+
while (idx < lines.length && !lines[idx].startsWith("## ")) {
|
|
139
|
+
descLines.push(lines[idx]);
|
|
140
|
+
idx++;
|
|
141
|
+
}
|
|
142
|
+
// Trim leading/trailing blank lines from description
|
|
143
|
+
const description = descLines.join("\n").trim();
|
|
144
|
+
|
|
145
|
+
// --- remaining ## sections ---
|
|
146
|
+
const sections: Record<string, string> = {};
|
|
147
|
+
let currentSection = "";
|
|
148
|
+
const sectionLines: string[] = [];
|
|
149
|
+
|
|
150
|
+
while (idx < lines.length) {
|
|
151
|
+
if (lines[idx].startsWith("## ")) {
|
|
152
|
+
// Flush previous section
|
|
153
|
+
if (currentSection) {
|
|
154
|
+
sections[currentSection] = sectionLines.join("\n").trim();
|
|
155
|
+
sectionLines.length = 0;
|
|
156
|
+
}
|
|
157
|
+
currentSection = lines[idx].replace(/^## /, "").trim();
|
|
158
|
+
idx++;
|
|
159
|
+
} else {
|
|
160
|
+
sectionLines.push(lines[idx]);
|
|
161
|
+
idx++;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
// Flush last section
|
|
165
|
+
if (currentSection) {
|
|
166
|
+
sections[currentSection] = sectionLines.join("\n").trim();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return { frontmatter, title, description, sections };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// Section replacement
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Replace a named `## Section` block in a SKILL.md file.
|
|
178
|
+
*
|
|
179
|
+
* If the section does not exist, appends it at the end.
|
|
180
|
+
*/
|
|
181
|
+
export function replaceSection(content: string, sectionName: string, newContent: string): string {
|
|
182
|
+
const lines = content.split("\n");
|
|
183
|
+
const heading = `## ${sectionName}`;
|
|
184
|
+
let startIdx = -1;
|
|
185
|
+
let endIdx = lines.length;
|
|
186
|
+
|
|
187
|
+
for (let i = 0; i < lines.length; i++) {
|
|
188
|
+
if (
|
|
189
|
+
lines[i].startsWith(heading) &&
|
|
190
|
+
(lines[i].length === heading.length || lines[i][heading.length] === " ")
|
|
191
|
+
) {
|
|
192
|
+
startIdx = i;
|
|
193
|
+
// Find end: next ## heading or EOF
|
|
194
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
195
|
+
if (lines[j].startsWith("## ")) {
|
|
196
|
+
endIdx = j;
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (startIdx === -1) {
|
|
205
|
+
// Section not found — append
|
|
206
|
+
const trimmed = content.trimEnd();
|
|
207
|
+
return `${trimmed}\n\n${heading}\n\n${newContent}\n`;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const before = lines.slice(0, startIdx);
|
|
211
|
+
const after = lines.slice(endIdx);
|
|
212
|
+
return [...before, heading, "", newContent, "", ...after].join("\n");
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Replace the entire body below frontmatter with a proposed body.
|
|
217
|
+
*
|
|
218
|
+
* Preserves frontmatter (if present) and the `# Title` line intact.
|
|
219
|
+
*/
|
|
220
|
+
export function replaceBody(currentContent: string, proposedBody: string): string {
|
|
221
|
+
const parsed = parseSkillSections(currentContent);
|
|
222
|
+
const parts: string[] = [];
|
|
223
|
+
|
|
224
|
+
if (parsed.frontmatter) {
|
|
225
|
+
parts.push(parsed.frontmatter);
|
|
226
|
+
parts.push("");
|
|
227
|
+
}
|
|
228
|
+
if (parsed.title) {
|
|
229
|
+
parts.push(parsed.title);
|
|
230
|
+
parts.push("");
|
|
231
|
+
}
|
|
232
|
+
parts.push(proposedBody);
|
|
233
|
+
|
|
234
|
+
return `${parts.join("\n").trimEnd()}\n`;
|
|
235
|
+
}
|
|
236
|
+
|
|
96
237
|
// ---------------------------------------------------------------------------
|
|
97
238
|
// Commit message builder
|
|
98
239
|
// ---------------------------------------------------------------------------
|