selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,152 @@
1
+ /**
2
+ * CLI entrypoint for skill unit tests.
3
+ *
4
+ * Usage:
5
+ * selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
6
+ *
7
+ * --skill <name> Skill name (required)
8
+ * --tests <path> Path to unit test JSON file (default: ~/.selftune/unit-tests/<skill>.json)
9
+ * --run-agent Actually run tests through an agent (otherwise dry-run with static checks)
10
+ * --generate Generate tests from skill content using LLM (requires agent)
11
+ * --skill-path <p> Path to skill file (used with --generate for content)
12
+ * --eval-set <p> Path to eval set JSON (used with --generate for failure context)
13
+ * --model <m> Model flag for LLM calls
14
+ */
15
+
16
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
17
+ import { join } from "node:path";
18
+ import { parseArgs } from "node:util";
19
+
20
+ import { SELFTUNE_CONFIG_DIR } from "../constants.js";
21
+ import type { EvalEntry } from "../types.js";
22
+ import { callLlm, detectAgent } from "../utils/llm-call.js";
23
+ import { generateUnitTests } from "./generate-unit-tests.js";
24
+ import type { AgentRunner } from "./unit-test.js";
25
+ import { loadUnitTests, runUnitTestSuite } from "./unit-test.js";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // CLI
29
+ // ---------------------------------------------------------------------------
30
+
31
+ export async function cliMain(): Promise<void> {
32
+ const { values } = parseArgs({
33
+ options: {
34
+ skill: { type: "string" },
35
+ tests: { type: "string" },
36
+ "run-agent": { type: "boolean", default: false },
37
+ generate: { type: "boolean", default: false },
38
+ "skill-path": { type: "string" },
39
+ "eval-set": { type: "string" },
40
+ model: { type: "string" },
41
+ },
42
+ strict: true,
43
+ });
44
+
45
+ if (!values.skill) {
46
+ console.error("[ERROR] --skill <name> is required.");
47
+ process.exit(1);
48
+ }
49
+
50
+ const skillName = values.skill;
51
+ const unitTestDir = join(SELFTUNE_CONFIG_DIR, "unit-tests");
52
+ const defaultTestsPath = join(unitTestDir, `${skillName}.json`);
53
+ const testsPath = values.tests ?? defaultTestsPath;
54
+
55
+ // --generate: create tests from skill content
56
+ if (values.generate) {
57
+ const agent = detectAgent();
58
+ if (!agent) {
59
+ console.error("[ERROR] No agent CLI found (claude/codex/opencode). Cannot generate tests.");
60
+ process.exit(1);
61
+ }
62
+
63
+ let skillContent = `Skill: ${skillName}`;
64
+ if (values["skill-path"] && existsSync(values["skill-path"])) {
65
+ skillContent = readFileSync(values["skill-path"], "utf-8");
66
+ } else if (values["skill-path"]) {
67
+ console.warn(`[WARN] Skill path not found: ${values["skill-path"]}. Using skill name only.`);
68
+ }
69
+
70
+ let evalFailures: EvalEntry[] = [];
71
+ if (values["eval-set"] && existsSync(values["eval-set"])) {
72
+ try {
73
+ const raw = readFileSync(values["eval-set"], "utf-8");
74
+ const entries: EvalEntry[] = JSON.parse(raw);
75
+ evalFailures = entries.filter((e) => e.should_trigger);
76
+ } catch {
77
+ console.warn("[WARN] Failed to parse eval set. Proceeding without failure context.");
78
+ }
79
+ }
80
+
81
+ const modelFlag = values.model;
82
+ const llmCaller = (systemPrompt: string, userPrompt: string) =>
83
+ callLlm(systemPrompt, userPrompt, agent, modelFlag);
84
+
85
+ console.log(`Generating unit tests for skill '${skillName}'...`);
86
+ const tests = await generateUnitTests(skillName, skillContent, evalFailures, llmCaller);
87
+
88
+ if (tests.length === 0) {
89
+ console.error("[ERROR] No tests generated. Check agent/LLM availability.");
90
+ process.exit(1);
91
+ }
92
+
93
+ // Ensure output directory exists
94
+ mkdirSync(unitTestDir, { recursive: true });
95
+ writeFileSync(testsPath, JSON.stringify(tests, null, 2), "utf-8");
96
+ console.log(`Generated ${tests.length} unit tests -> ${testsPath}`);
97
+ return;
98
+ }
99
+
100
+ // Load and run tests
101
+ const tests = loadUnitTests(testsPath);
102
+ if (tests.length === 0) {
103
+ console.error(`[ERROR] No tests found at ${testsPath}`);
104
+ console.error(" Use --generate to create tests, or provide --tests <path>.");
105
+ process.exit(1);
106
+ }
107
+
108
+ console.log(`Loaded ${tests.length} unit tests for skill '${skillName}'`);
109
+
110
+ let agentRunner: AgentRunner;
111
+
112
+ if (values["run-agent"]) {
113
+ const agent = detectAgent();
114
+ if (!agent) {
115
+ console.error("[ERROR] No agent CLI found. Cannot run agent-based tests.");
116
+ process.exit(1);
117
+ }
118
+ const modelFlag = values.model;
119
+ agentRunner = async (query: string): Promise<string> => {
120
+ return callLlm("You are a helpful assistant.", query, agent, modelFlag);
121
+ };
122
+ } else {
123
+ // Dry-run: use query as transcript (only static assertions like contains work meaningfully)
124
+ console.log("(dry-run mode — use --run-agent for full agent execution)\n");
125
+ agentRunner = async (query: string): Promise<string> => query;
126
+ }
127
+
128
+ const suite = await runUnitTestSuite(tests, skillName, agentRunner);
129
+
130
+ // Print results
131
+ console.log(`\nResults for '${suite.skill_name}':`);
132
+ console.log(` Total: ${suite.total} Passed: ${suite.passed} Failed: ${suite.failed}`);
133
+ console.log(` Pass rate: ${(suite.pass_rate * 100).toFixed(1)}%`);
134
+
135
+ if (suite.failed > 0) {
136
+ console.log("\nFailed tests:");
137
+ for (const r of suite.results.filter((r) => !r.passed)) {
138
+ console.log(` [FAIL] ${r.test_id} (${r.duration_ms}ms)`);
139
+ if (r.error) {
140
+ console.log(` Error: ${r.error}`);
141
+ }
142
+ for (const a of r.assertion_results.filter((a) => !a.passed)) {
143
+ console.log(
144
+ ` - ${a.assertion.type}: expected "${a.assertion.value}", got "${a.actual}"`,
145
+ );
146
+ }
147
+ }
148
+ }
149
+
150
+ console.log(`\n${JSON.stringify(suite, null, 2)}`);
151
+ process.exit(suite.failed > 0 ? 1 : 0);
152
+ }
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Skill unit test runner.
3
+ *
4
+ * Loads, runs, and reports on skill-level unit tests.
5
+ * Tests are stored as JSON arrays of SkillUnitTest objects.
6
+ *
7
+ * Assertion types:
8
+ * - contains / not_contains: check transcript for substring
9
+ * - regex: check transcript against a regex pattern
10
+ * - tool_called / tool_not_called: check transcript for tool usage
11
+ * - json_path: check key=value in parsed JSON from transcript
12
+ */
13
+
14
+ import { existsSync, readFileSync } from "node:fs";
15
+ import type {
16
+ SkillAssertion,
17
+ SkillUnitTest,
18
+ UnitTestResult,
19
+ UnitTestSuiteResult,
20
+ } from "../types.js";
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Assertion checker (deterministic, no agent needed)
24
+ // ---------------------------------------------------------------------------
25
+
26
+ /** Check a single assertion against a transcript string. */
27
+ export function checkAssertion(
28
+ assertion: SkillAssertion,
29
+ transcript: string,
30
+ ): { passed: boolean; actual?: string } {
31
+ switch (assertion.type) {
32
+ case "contains":
33
+ return {
34
+ passed: transcript.includes(assertion.value),
35
+ actual: transcript.includes(assertion.value) ? assertion.value : "(not found)",
36
+ };
37
+
38
+ case "not_contains":
39
+ return {
40
+ passed: !transcript.includes(assertion.value),
41
+ actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
42
+ };
43
+
44
+ case "regex": {
45
+ const re = new RegExp(assertion.value);
46
+ const match = re.exec(transcript);
47
+ return {
48
+ passed: match !== null,
49
+ actual: match ? match[0] : "(no match)",
50
+ };
51
+ }
52
+
53
+ case "tool_called":
54
+ return {
55
+ passed: transcript.includes(assertion.value),
56
+ actual: transcript.includes(assertion.value) ? assertion.value : "(tool not found)",
57
+ };
58
+
59
+ case "tool_not_called":
60
+ return {
61
+ passed: !transcript.includes(assertion.value),
62
+ actual: transcript.includes(assertion.value) ? `found: ${assertion.value}` : "(absent)",
63
+ };
64
+
65
+ case "json_path": {
66
+ // Simple key=value check: "status=ok" looks for {"status":"ok"} in transcript
67
+ const eqIdx = assertion.value.indexOf("=");
68
+ if (eqIdx < 0) {
69
+ return { passed: false, actual: "invalid json_path format (expected key=value)" };
70
+ }
71
+ const key = assertion.value.slice(0, eqIdx);
72
+ const expected = assertion.value.slice(eqIdx + 1);
73
+ try {
74
+ const parsed = JSON.parse(transcript);
75
+ const actual = String(parsed[key] ?? "");
76
+ return { passed: actual === expected, actual };
77
+ } catch {
78
+ // Try to find JSON in the transcript
79
+ const jsonMatch = transcript.match(/\{[^}]+\}/);
80
+ if (jsonMatch) {
81
+ try {
82
+ const parsed = JSON.parse(jsonMatch[0]);
83
+ const actual = String(parsed[key] ?? "");
84
+ return { passed: actual === expected, actual };
85
+ } catch {
86
+ return { passed: false, actual: "(json parse error)" };
87
+ }
88
+ }
89
+ return { passed: false, actual: "(no json found)" };
90
+ }
91
+ }
92
+
93
+ default:
94
+ return { passed: false, actual: `unknown assertion type: ${assertion.type}` };
95
+ }
96
+ }
97
+
98
+ // ---------------------------------------------------------------------------
99
+ // Load unit tests from JSON file
100
+ // ---------------------------------------------------------------------------
101
+
102
+ /** Load unit tests from a JSON file. Returns empty array on error. */
103
+ export function loadUnitTests(testsPath: string): SkillUnitTest[] {
104
+ try {
105
+ if (!existsSync(testsPath)) {
106
+ console.warn(`[WARN] Unit test file not found: ${testsPath}`);
107
+ return [];
108
+ }
109
+ const raw = readFileSync(testsPath, "utf-8");
110
+ const parsed = JSON.parse(raw);
111
+ if (!Array.isArray(parsed)) {
112
+ console.warn(`[WARN] Unit test file is not an array: ${testsPath}`);
113
+ return [];
114
+ }
115
+ return parsed as SkillUnitTest[];
116
+ } catch (err) {
117
+ console.warn(`[WARN] Failed to load unit tests from ${testsPath}:`, err);
118
+ return [];
119
+ }
120
+ }
121
+
122
+ // ---------------------------------------------------------------------------
123
+ // Run a single unit test
124
+ // ---------------------------------------------------------------------------
125
+
126
+ /** Agent function type: takes a query, returns transcript text. */
127
+ export type AgentRunner = (query: string) => Promise<string>;
128
+
129
+ /** Run a single unit test against an agent runner. */
130
+ export async function runUnitTest(
131
+ test: SkillUnitTest,
132
+ agent: AgentRunner,
133
+ ): Promise<UnitTestResult> {
134
+ const start = Date.now();
135
+
136
+ try {
137
+ const transcript = await agent(test.query);
138
+ const assertionResults = test.assertions.map((assertion) => {
139
+ const result = checkAssertion(assertion, transcript);
140
+ return { assertion, passed: result.passed, actual: result.actual };
141
+ });
142
+
143
+ const allPassed = assertionResults.every((r) => r.passed);
144
+
145
+ return {
146
+ test_id: test.id,
147
+ passed: allPassed,
148
+ assertion_results: assertionResults,
149
+ duration_ms: Date.now() - start,
150
+ };
151
+ } catch (err) {
152
+ return {
153
+ test_id: test.id,
154
+ passed: false,
155
+ assertion_results: test.assertions.map((assertion) => ({
156
+ assertion,
157
+ passed: false,
158
+ actual: "error",
159
+ })),
160
+ duration_ms: Date.now() - start,
161
+ error: err instanceof Error ? err.message : String(err),
162
+ };
163
+ }
164
+ }
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // Run a full unit test suite
168
+ // ---------------------------------------------------------------------------
169
+
170
+ /** Run all unit tests and return aggregated results. */
171
+ export async function runUnitTestSuite(
172
+ tests: SkillUnitTest[],
173
+ skillName: string,
174
+ agent: AgentRunner,
175
+ ): Promise<UnitTestSuiteResult> {
176
+ const results: UnitTestResult[] = [];
177
+
178
+ for (const t of tests) {
179
+ const result = await runUnitTest(t, agent);
180
+ results.push(result);
181
+ }
182
+
183
+ const passed = results.filter((r) => r.passed).length;
184
+ const failed = results.filter((r) => !r.passed).length;
185
+ const total = results.length;
186
+
187
+ return {
188
+ skill_name: skillName,
189
+ total,
190
+ passed,
191
+ failed,
192
+ pass_rate: total > 0 ? passed / total : 0,
193
+ results,
194
+ run_at: new Date().toISOString(),
195
+ };
196
+ }
@@ -7,7 +7,7 @@
7
7
  */
8
8
 
9
9
  import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
10
- import type { EvolutionProposal } from "../types.js";
10
+ import type { EvolutionProposal, SkillSections } from "../types.js";
11
11
  import type { ValidationResult } from "./validate-proposal.js";
12
12
 
13
13
  // ---------------------------------------------------------------------------
@@ -93,6 +93,147 @@ export function replaceDescription(currentContent: string, newDescription: strin
93
93
  return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
94
94
  }
95
95
 
96
+ // ---------------------------------------------------------------------------
97
+ // Structured SKILL.md parsing
98
+ // ---------------------------------------------------------------------------
99
+
100
+ /**
101
+ * Parse a SKILL.md file into named sections.
102
+ *
103
+ * Splits the content into:
104
+ * - frontmatter: YAML frontmatter block (if present, including delimiters)
105
+ * - title: the first `# Heading` line
106
+ * - description: content between the title and the first `## ` heading
107
+ * - sections: map of `## Name` -> content (up to next `##` or EOF)
108
+ */
109
+ export function parseSkillSections(content: string): SkillSections {
110
+ const lines = content.split("\n");
111
+ let idx = 0;
112
+
113
+ // --- frontmatter ---
114
+ let frontmatter = "";
115
+ if (lines[0]?.trim() === "---") {
116
+ const endIdx = lines.indexOf("---", 1);
117
+ if (endIdx > 0) {
118
+ frontmatter = lines.slice(0, endIdx + 1).join("\n");
119
+ idx = endIdx + 1;
120
+ // skip blank line after frontmatter
121
+ if (idx < lines.length && lines[idx].trim() === "") idx++;
122
+ }
123
+ }
124
+
125
+ // --- title ---
126
+ let title = "";
127
+ while (idx < lines.length) {
128
+ if (lines[idx].startsWith("# ") && !lines[idx].startsWith("## ")) {
129
+ title = lines[idx];
130
+ idx++;
131
+ break;
132
+ }
133
+ idx++;
134
+ }
135
+
136
+ // --- description (between title and first ## heading) ---
137
+ const descLines: string[] = [];
138
+ while (idx < lines.length && !lines[idx].startsWith("## ")) {
139
+ descLines.push(lines[idx]);
140
+ idx++;
141
+ }
142
+ // Trim leading/trailing blank lines from description
143
+ const description = descLines.join("\n").trim();
144
+
145
+ // --- remaining ## sections ---
146
+ const sections: Record<string, string> = {};
147
+ let currentSection = "";
148
+ const sectionLines: string[] = [];
149
+
150
+ while (idx < lines.length) {
151
+ if (lines[idx].startsWith("## ")) {
152
+ // Flush previous section
153
+ if (currentSection) {
154
+ sections[currentSection] = sectionLines.join("\n").trim();
155
+ sectionLines.length = 0;
156
+ }
157
+ currentSection = lines[idx].replace(/^## /, "").trim();
158
+ idx++;
159
+ } else {
160
+ sectionLines.push(lines[idx]);
161
+ idx++;
162
+ }
163
+ }
164
+ // Flush last section
165
+ if (currentSection) {
166
+ sections[currentSection] = sectionLines.join("\n").trim();
167
+ }
168
+
169
+ return { frontmatter, title, description, sections };
170
+ }
171
+
172
+ // ---------------------------------------------------------------------------
173
+ // Section replacement
174
+ // ---------------------------------------------------------------------------
175
+
176
+ /**
177
+ * Replace a named `## Section` block in a SKILL.md file.
178
+ *
179
+ * If the section does not exist, appends it at the end.
180
+ */
181
+ export function replaceSection(content: string, sectionName: string, newContent: string): string {
182
+ const lines = content.split("\n");
183
+ const heading = `## ${sectionName}`;
184
+ let startIdx = -1;
185
+ let endIdx = lines.length;
186
+
187
+ for (let i = 0; i < lines.length; i++) {
188
+ if (
189
+ lines[i].startsWith(heading) &&
190
+ (lines[i].length === heading.length || lines[i][heading.length] === " ")
191
+ ) {
192
+ startIdx = i;
193
+ // Find end: next ## heading or EOF
194
+ for (let j = i + 1; j < lines.length; j++) {
195
+ if (lines[j].startsWith("## ")) {
196
+ endIdx = j;
197
+ break;
198
+ }
199
+ }
200
+ break;
201
+ }
202
+ }
203
+
204
+ if (startIdx === -1) {
205
+ // Section not found — append
206
+ const trimmed = content.trimEnd();
207
+ return `${trimmed}\n\n${heading}\n\n${newContent}\n`;
208
+ }
209
+
210
+ const before = lines.slice(0, startIdx);
211
+ const after = lines.slice(endIdx);
212
+ return [...before, heading, "", newContent, "", ...after].join("\n");
213
+ }
214
+
215
+ /**
216
+ * Replace the entire body below frontmatter with a proposed body.
217
+ *
218
+ * Preserves frontmatter (if present) and the `# Title` line intact.
219
+ */
220
+ export function replaceBody(currentContent: string, proposedBody: string): string {
221
+ const parsed = parseSkillSections(currentContent);
222
+ const parts: string[] = [];
223
+
224
+ if (parsed.frontmatter) {
225
+ parts.push(parsed.frontmatter);
226
+ parts.push("");
227
+ }
228
+ if (parsed.title) {
229
+ parts.push(parsed.title);
230
+ parts.push("");
231
+ }
232
+ parts.push(proposedBody);
233
+
234
+ return `${parts.join("\n").trimEnd()}\n`;
235
+ }
236
+
96
237
  // ---------------------------------------------------------------------------
97
238
  // Commit message builder
98
239
  // ---------------------------------------------------------------------------
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Evolution evidence trail: append and read proposal/eval artifacts that power
3
+ * explainable dashboard drill-downs.
4
+ */
5
+
6
+ import { EVOLUTION_EVIDENCE_LOG } from "../constants.js";
7
+ import type { EvolutionEvidenceEntry } from "../types.js";
8
+ import { appendJsonl, readJsonl } from "../utils/jsonl.js";
9
+
10
+ /** Append a structured evidence artifact to the evolution evidence log. */
11
+ export function appendEvidenceEntry(
12
+ entry: EvolutionEvidenceEntry,
13
+ logPath: string = EVOLUTION_EVIDENCE_LOG,
14
+ ): void {
15
+ appendJsonl(logPath, entry);
16
+ }
17
+
18
+ /** Read all evidence entries, optionally filtered by exact skill name. */
19
+ export function readEvidenceTrail(
20
+ skillName?: string,
21
+ logPath: string = EVOLUTION_EVIDENCE_LOG,
22
+ ): EvolutionEvidenceEntry[] {
23
+ const entries = readJsonl<EvolutionEvidenceEntry>(logPath);
24
+ if (!skillName) return entries;
25
+ return entries.filter((entry) => entry.skill_name === skillName);
26
+ }