selftune 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.claude/agents/diagnosis-analyst.md +146 -0
  2. package/.claude/agents/evolution-reviewer.md +167 -0
  3. package/.claude/agents/integration-guide.md +200 -0
  4. package/.claude/agents/pattern-analyst.md +147 -0
  5. package/CHANGELOG.md +38 -1
  6. package/README.md +96 -256
  7. package/assets/BeforeAfter.gif +0 -0
  8. package/assets/FeedbackLoop.gif +0 -0
  9. package/assets/logo.svg +9 -0
  10. package/assets/skill-health-badge.svg +20 -0
  11. package/cli/selftune/activation-rules.ts +171 -0
  12. package/cli/selftune/badge/badge-data.ts +108 -0
  13. package/cli/selftune/badge/badge-svg.ts +212 -0
  14. package/cli/selftune/badge/badge.ts +103 -0
  15. package/cli/selftune/constants.ts +75 -1
  16. package/cli/selftune/contribute/bundle.ts +314 -0
  17. package/cli/selftune/contribute/contribute.ts +214 -0
  18. package/cli/selftune/contribute/sanitize.ts +162 -0
  19. package/cli/selftune/cron/setup.ts +266 -0
  20. package/cli/selftune/dashboard-server.ts +582 -0
  21. package/cli/selftune/dashboard.ts +31 -12
  22. package/cli/selftune/eval/baseline.ts +247 -0
  23. package/cli/selftune/eval/composability.ts +117 -0
  24. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  25. package/cli/selftune/eval/hooks-to-evals.ts +68 -2
  26. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  28. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  29. package/cli/selftune/eval/unit-test.ts +196 -0
  30. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  31. package/cli/selftune/evolution/evolve-body.ts +492 -0
  32. package/cli/selftune/evolution/evolve.ts +479 -104
  33. package/cli/selftune/evolution/extract-patterns.ts +32 -1
  34. package/cli/selftune/evolution/pareto.ts +314 -0
  35. package/cli/selftune/evolution/propose-body.ts +171 -0
  36. package/cli/selftune/evolution/propose-description.ts +100 -2
  37. package/cli/selftune/evolution/propose-routing.ts +166 -0
  38. package/cli/selftune/evolution/refine-body.ts +141 -0
  39. package/cli/selftune/evolution/rollback.ts +20 -3
  40. package/cli/selftune/evolution/validate-body.ts +254 -0
  41. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  42. package/cli/selftune/evolution/validate-routing.ts +177 -0
  43. package/cli/selftune/grading/grade-session.ts +145 -19
  44. package/cli/selftune/grading/pre-gates.ts +104 -0
  45. package/cli/selftune/hooks/auto-activate.ts +185 -0
  46. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  47. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  48. package/cli/selftune/index.ts +88 -0
  49. package/cli/selftune/ingestors/claude-replay.ts +351 -0
  50. package/cli/selftune/ingestors/codex-rollout.ts +1 -1
  51. package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
  52. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  53. package/cli/selftune/init.ts +168 -5
  54. package/cli/selftune/last.ts +2 -2
  55. package/cli/selftune/memory/writer.ts +447 -0
  56. package/cli/selftune/monitoring/watch.ts +25 -2
  57. package/cli/selftune/status.ts +18 -15
  58. package/cli/selftune/types.ts +377 -5
  59. package/cli/selftune/utils/frontmatter.ts +217 -0
  60. package/cli/selftune/utils/llm-call.ts +29 -3
  61. package/cli/selftune/utils/transcript.ts +35 -0
  62. package/cli/selftune/utils/trigger-check.ts +89 -0
  63. package/cli/selftune/utils/tui.ts +156 -0
  64. package/dashboard/index.html +585 -19
  65. package/package.json +17 -6
  66. package/skill/SKILL.md +127 -10
  67. package/skill/Workflows/AutoActivation.md +144 -0
  68. package/skill/Workflows/Badge.md +118 -0
  69. package/skill/Workflows/Baseline.md +121 -0
  70. package/skill/Workflows/Composability.md +100 -0
  71. package/skill/Workflows/Contribute.md +91 -0
  72. package/skill/Workflows/Cron.md +155 -0
  73. package/skill/Workflows/Dashboard.md +203 -0
  74. package/skill/Workflows/Doctor.md +37 -1
  75. package/skill/Workflows/Evals.md +73 -5
  76. package/skill/Workflows/EvolutionMemory.md +152 -0
  77. package/skill/Workflows/Evolve.md +111 -6
  78. package/skill/Workflows/EvolveBody.md +159 -0
  79. package/skill/Workflows/ImportSkillsBench.md +111 -0
  80. package/skill/Workflows/Ingest.md +129 -15
  81. package/skill/Workflows/Initialize.md +58 -3
  82. package/skill/Workflows/Replay.md +70 -0
  83. package/skill/Workflows/Rollback.md +20 -1
  84. package/skill/Workflows/UnitTest.md +138 -0
  85. package/skill/Workflows/Watch.md +22 -0
  86. package/skill/settings_snippet.json +23 -0
  87. package/templates/activation-rules-default.json +27 -0
  88. package/templates/multi-skill-settings.json +64 -0
  89. package/templates/single-skill-settings.json +58 -0
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Skill unit test runner.
3
+ *
4
+ * Loads, runs, and reports on skill-level unit tests.
5
+ * Tests are stored as JSON arrays of SkillUnitTest objects.
6
+ *
7
+ * Assertion types:
8
+ * - contains / not_contains: check transcript for substring
9
+ * - regex: check transcript against a regex pattern
10
+ * - tool_called / tool_not_called: check transcript for tool usage
11
+ * - json_path: check key=value in parsed JSON from transcript
12
+ */
13
+
14
+ import { existsSync, readFileSync } from "node:fs";
15
+ import type {
16
+ SkillAssertion,
17
+ SkillUnitTest,
18
+ UnitTestResult,
19
+ UnitTestSuiteResult,
20
+ } from "../types.js";
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Assertion checker (deterministic, no agent needed)
24
+ // ---------------------------------------------------------------------------
25
+
26
+ /** Check a single assertion against a transcript string. */
27
+ export function checkAssertion(
28
+ assertion: SkillAssertion,
29
+ transcript: string,
30
+ ): { passed: boolean; actual?: string } {
31
+ switch (assertion.type) {
32
+ case "contains":
33
+ return {
34
+ passed: transcript.includes(assertion.value),
35
+ actual: transcript.includes(assertion.value) ? assertion.value : "(not found)",
36
+ };
37
+
38
+ case "not_contains":
39
+ return {
40
+ passed: !transcript.includes(assertion.value),
41
+ actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
42
+ };
43
+
44
+ case "regex": {
45
+ const re = new RegExp(assertion.value);
46
+ const match = re.exec(transcript);
47
+ return {
48
+ passed: match !== null,
49
+ actual: match ? match[0] : "(no match)",
50
+ };
51
+ }
52
+
53
+ case "tool_called":
54
+ return {
55
+ passed: transcript.includes(assertion.value),
56
+ actual: transcript.includes(assertion.value) ? assertion.value : "(tool not found)",
57
+ };
58
+
59
+ case "tool_not_called":
60
+ return {
61
+ passed: !transcript.includes(assertion.value),
62
+ actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
63
+ };
64
+
65
+ case "json_path": {
66
+ // Simple key=value check: "status=ok" looks for {"status":"ok"} in transcript
67
+ const eqIdx = assertion.value.indexOf("=");
68
+ if (eqIdx < 0) {
69
+ return { passed: false, actual: "invalid json_path format (expected key=value)" };
70
+ }
71
+ const key = assertion.value.slice(0, eqIdx);
72
+ const expected = assertion.value.slice(eqIdx + 1);
73
+ try {
74
+ const parsed = JSON.parse(transcript);
75
+ const actual = String(parsed[key] ?? "");
76
+ return { passed: actual === expected, actual };
77
+ } catch {
78
+ // Try to find JSON in the transcript
79
+ const jsonMatch = transcript.match(/\{[^}]+\}/);
80
+ if (jsonMatch) {
81
+ try {
82
+ const parsed = JSON.parse(jsonMatch[0]);
83
+ const actual = String(parsed[key] ?? "");
84
+ return { passed: actual === expected, actual };
85
+ } catch {
86
+ return { passed: false, actual: "(json parse error)" };
87
+ }
88
+ }
89
+ return { passed: false, actual: "(no json found)" };
90
+ }
91
+ }
92
+
93
+ default:
94
+ return { passed: false, actual: `unknown assertion type: ${assertion.type}` };
95
+ }
96
+ }
97
+
98
+ // ---------------------------------------------------------------------------
99
+ // Load unit tests from JSON file
100
+ // ---------------------------------------------------------------------------
101
+
102
+ /** Load unit tests from a JSON file. Returns empty array on error. */
103
+ export function loadUnitTests(testsPath: string): SkillUnitTest[] {
104
+ try {
105
+ if (!existsSync(testsPath)) {
106
+ console.warn(`[WARN] Unit test file not found: ${testsPath}`);
107
+ return [];
108
+ }
109
+ const raw = readFileSync(testsPath, "utf-8");
110
+ const parsed = JSON.parse(raw);
111
+ if (!Array.isArray(parsed)) {
112
+ console.warn(`[WARN] Unit test file is not an array: ${testsPath}`);
113
+ return [];
114
+ }
115
+ return parsed as SkillUnitTest[];
116
+ } catch (err) {
117
+ console.warn(`[WARN] Failed to load unit tests from ${testsPath}:`, err);
118
+ return [];
119
+ }
120
+ }
121
+
122
+ // ---------------------------------------------------------------------------
123
+ // Run a single unit test
124
+ // ---------------------------------------------------------------------------
125
+
126
+ /** Agent function type: takes a query, returns transcript text. */
127
+ export type AgentRunner = (query: string) => Promise<string>;
128
+
129
+ /** Run a single unit test against an agent runner. */
130
+ export async function runUnitTest(
131
+ test: SkillUnitTest,
132
+ agent: AgentRunner,
133
+ ): Promise<UnitTestResult> {
134
+ const start = Date.now();
135
+
136
+ try {
137
+ const transcript = await agent(test.query);
138
+ const assertionResults = test.assertions.map((assertion) => {
139
+ const result = checkAssertion(assertion, transcript);
140
+ return { assertion, passed: result.passed, actual: result.actual };
141
+ });
142
+
143
+ const allPassed = assertionResults.every((r) => r.passed);
144
+
145
+ return {
146
+ test_id: test.id,
147
+ passed: allPassed,
148
+ assertion_results: assertionResults,
149
+ duration_ms: Date.now() - start,
150
+ };
151
+ } catch (err) {
152
+ return {
153
+ test_id: test.id,
154
+ passed: false,
155
+ assertion_results: test.assertions.map((assertion) => ({
156
+ assertion,
157
+ passed: false,
158
+ actual: "error",
159
+ })),
160
+ duration_ms: Date.now() - start,
161
+ error: err instanceof Error ? err.message : String(err),
162
+ };
163
+ }
164
+ }
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // Run a full unit test suite
168
+ // ---------------------------------------------------------------------------
169
+
170
+ /** Run all unit tests and return aggregated results. */
171
+ export async function runUnitTestSuite(
172
+ tests: SkillUnitTest[],
173
+ skillName: string,
174
+ agent: AgentRunner,
175
+ ): Promise<UnitTestSuiteResult> {
176
+ const results: UnitTestResult[] = [];
177
+
178
+ for (const t of tests) {
179
+ const result = await runUnitTest(t, agent);
180
+ results.push(result);
181
+ }
182
+
183
+ const passed = results.filter((r) => r.passed).length;
184
+ const failed = results.filter((r) => !r.passed).length;
185
+ const total = results.length;
186
+
187
+ return {
188
+ skill_name: skillName,
189
+ total,
190
+ passed,
191
+ failed,
192
+ pass_rate: total > 0 ? passed / total : 0,
193
+ results,
194
+ run_at: new Date().toISOString(),
195
+ };
196
+ }
@@ -7,7 +7,7 @@
7
7
  */
8
8
 
9
9
  import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
10
- import type { EvolutionProposal } from "../types.js";
10
+ import type { EvolutionProposal, SkillSections } from "../types.js";
11
11
  import type { ValidationResult } from "./validate-proposal.js";
12
12
 
13
13
  // ---------------------------------------------------------------------------
@@ -93,6 +93,147 @@ export function replaceDescription(currentContent: string, newDescription: strin
93
93
  return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
94
94
  }
95
95
 
96
+ // ---------------------------------------------------------------------------
97
+ // Structured SKILL.md parsing
98
+ // ---------------------------------------------------------------------------
99
+
100
+ /**
101
+ * Parse a SKILL.md file into named sections.
102
+ *
103
+ * Splits the content into:
104
+ * - frontmatter: YAML frontmatter block (if present, including delimiters)
105
+ * - title: the first `# Heading` line
106
+ * - description: content between the title and the first `## ` heading
107
+ * - sections: map of `## Name` -> content (up to next `##` or EOF)
108
+ */
109
+ export function parseSkillSections(content: string): SkillSections {
110
+ const lines = content.split("\n");
111
+ let idx = 0;
112
+
113
+ // --- frontmatter ---
114
+ let frontmatter = "";
115
+ if (lines[0]?.trim() === "---") {
116
+ const endIdx = lines.indexOf("---", 1);
117
+ if (endIdx > 0) {
118
+ frontmatter = lines.slice(0, endIdx + 1).join("\n");
119
+ idx = endIdx + 1;
120
+ // skip blank line after frontmatter
121
+ if (idx < lines.length && lines[idx].trim() === "") idx++;
122
+ }
123
+ }
124
+
125
+ // --- title ---
126
+ let title = "";
127
+ while (idx < lines.length) {
128
+ if (lines[idx].startsWith("# ") && !lines[idx].startsWith("## ")) {
129
+ title = lines[idx];
130
+ idx++;
131
+ break;
132
+ }
133
+ idx++;
134
+ }
135
+
136
+ // --- description (between title and first ## heading) ---
137
+ const descLines: string[] = [];
138
+ while (idx < lines.length && !lines[idx].startsWith("## ")) {
139
+ descLines.push(lines[idx]);
140
+ idx++;
141
+ }
142
+ // Trim leading/trailing blank lines from description
143
+ const description = descLines.join("\n").trim();
144
+
145
+ // --- remaining ## sections ---
146
+ const sections: Record<string, string> = {};
147
+ let currentSection = "";
148
+ const sectionLines: string[] = [];
149
+
150
+ while (idx < lines.length) {
151
+ if (lines[idx].startsWith("## ")) {
152
+ // Flush previous section
153
+ if (currentSection) {
154
+ sections[currentSection] = sectionLines.join("\n").trim();
155
+ sectionLines.length = 0;
156
+ }
157
+ currentSection = lines[idx].replace(/^## /, "").trim();
158
+ idx++;
159
+ } else {
160
+ sectionLines.push(lines[idx]);
161
+ idx++;
162
+ }
163
+ }
164
+ // Flush last section
165
+ if (currentSection) {
166
+ sections[currentSection] = sectionLines.join("\n").trim();
167
+ }
168
+
169
+ return { frontmatter, title, description, sections };
170
+ }
171
+
172
+ // ---------------------------------------------------------------------------
173
+ // Section replacement
174
+ // ---------------------------------------------------------------------------
175
+
176
+ /**
177
+ * Replace a named `## Section` block in a SKILL.md file.
178
+ *
179
+ * If the section does not exist, appends it at the end.
180
+ */
181
+ export function replaceSection(content: string, sectionName: string, newContent: string): string {
182
+ const lines = content.split("\n");
183
+ const heading = `## ${sectionName}`;
184
+ let startIdx = -1;
185
+ let endIdx = lines.length;
186
+
187
+ for (let i = 0; i < lines.length; i++) {
188
+ if (
189
+ lines[i].startsWith(heading) &&
190
+ (lines[i].length === heading.length || lines[i][heading.length] === " ")
191
+ ) {
192
+ startIdx = i;
193
+ // Find end: next ## heading or EOF
194
+ for (let j = i + 1; j < lines.length; j++) {
195
+ if (lines[j].startsWith("## ")) {
196
+ endIdx = j;
197
+ break;
198
+ }
199
+ }
200
+ break;
201
+ }
202
+ }
203
+
204
+ if (startIdx === -1) {
205
+ // Section not found — append
206
+ const trimmed = content.trimEnd();
207
+ return `${trimmed}\n\n${heading}\n\n${newContent}\n`;
208
+ }
209
+
210
+ const before = lines.slice(0, startIdx);
211
+ const after = lines.slice(endIdx);
212
+ return [...before, heading, "", newContent, "", ...after].join("\n");
213
+ }
214
+
215
+ /**
216
+ * Replace the entire body below frontmatter with a proposed body.
217
+ *
218
+ * Preserves frontmatter (if present) and the `# Title` line intact.
219
+ */
220
+ export function replaceBody(currentContent: string, proposedBody: string): string {
221
+ const parsed = parseSkillSections(currentContent);
222
+ const parts: string[] = [];
223
+
224
+ if (parsed.frontmatter) {
225
+ parts.push(parsed.frontmatter);
226
+ parts.push("");
227
+ }
228
+ if (parsed.title) {
229
+ parts.push(parsed.title);
230
+ parts.push("");
231
+ }
232
+ parts.push(proposedBody);
233
+
234
+ return `${parts.join("\n").trimEnd()}\n`;
235
+ }
236
+
96
237
  // ---------------------------------------------------------------------------
97
238
  // Commit message builder
98
239
  // ---------------------------------------------------------------------------