selftune 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +16 -0
  3. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +2 -0
  4. package/apps/local-dashboard/dist/assets/rolldown-runtime-Dw2cE7zH.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +11 -0
  6. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +8 -0
  7. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +12 -0
  8. package/apps/local-dashboard/dist/index.html +16 -15
  9. package/bin/selftune.cjs +1 -1
  10. package/cli/selftune/activation-rules.ts +1 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +94 -0
  13. package/cli/selftune/auth/device-code.ts +32 -0
  14. package/cli/selftune/auto-update.ts +12 -0
  15. package/cli/selftune/badge/badge.ts +1 -0
  16. package/cli/selftune/canonical-export.ts +5 -0
  17. package/cli/selftune/claude-agents.ts +154 -0
  18. package/cli/selftune/contribute/bundle.ts +1 -0
  19. package/cli/selftune/contribute/contribute.ts +1 -0
  20. package/cli/selftune/cron/setup.ts +2 -2
  21. package/cli/selftune/dashboard-server.ts +1 -0
  22. package/cli/selftune/eval/hooks-to-evals.ts +1 -0
  23. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  24. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  25. package/cli/selftune/eval/unit-test.ts +1 -0
  26. package/cli/selftune/evolution/deploy-proposal.ts +9 -238
  27. package/cli/selftune/evolution/evolve-body.ts +93 -6
  28. package/cli/selftune/evolution/evolve.ts +3 -7
  29. package/cli/selftune/evolution/propose-body.ts +3 -2
  30. package/cli/selftune/evolution/propose-routing.ts +3 -2
  31. package/cli/selftune/evolution/refine-body.ts +3 -2
  32. package/cli/selftune/evolution/rollback.ts +1 -1
  33. package/cli/selftune/export.ts +1 -0
  34. package/cli/selftune/grading/grade-session.ts +8 -0
  35. package/cli/selftune/hooks/auto-activate.ts +1 -0
  36. package/cli/selftune/hooks/evolution-guard.ts +1 -1
  37. package/cli/selftune/hooks/prompt-log.ts +1 -0
  38. package/cli/selftune/hooks/session-stop.ts +34 -40
  39. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  40. package/cli/selftune/hooks/skill-eval.ts +1 -1
  41. package/cli/selftune/index.ts +23 -14
  42. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  43. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  44. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  45. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  46. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  47. package/cli/selftune/init.ts +121 -29
  48. package/cli/selftune/localdb/db.ts +1 -0
  49. package/cli/selftune/localdb/direct-write.ts +39 -0
  50. package/cli/selftune/localdb/materialize.ts +2 -0
  51. package/cli/selftune/localdb/queries.ts +53 -0
  52. package/cli/selftune/localdb/schema.ts +28 -0
  53. package/cli/selftune/normalization.ts +1 -0
  54. package/cli/selftune/observability.ts +1 -0
  55. package/cli/selftune/repair/skill-usage.ts +1 -0
  56. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  57. package/cli/selftune/routes/overview.ts +1 -0
  58. package/cli/selftune/routes/report.ts +1 -1
  59. package/cli/selftune/routes/skill-report.ts +2 -1
  60. package/cli/selftune/status.ts +1 -1
  61. package/cli/selftune/sync.ts +30 -1
  62. package/cli/selftune/uninstall.ts +412 -0
  63. package/cli/selftune/utils/canonical-log.ts +2 -0
  64. package/cli/selftune/utils/frontmatter.ts +50 -7
  65. package/cli/selftune/utils/jsonl.ts +1 -0
  66. package/cli/selftune/utils/llm-call.ts +131 -3
  67. package/cli/selftune/utils/skill-log.ts +1 -0
  68. package/cli/selftune/utils/transcript.ts +1 -0
  69. package/cli/selftune/utils/trigger-check.ts +1 -1
  70. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  71. package/cli/selftune/workflows/workflows.ts +1 -0
  72. package/package.json +37 -33
  73. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  74. package/packages/telemetry-contract/package.json +1 -1
  75. package/packages/telemetry-contract/src/schemas.ts +1 -0
  76. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  77. package/packages/ui/README.md +35 -34
  78. package/packages/ui/package.json +3 -3
  79. package/packages/ui/src/components/ActivityTimeline.tsx +50 -43
  80. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  81. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  82. package/packages/ui/src/components/InfoTip.tsx +4 -3
  83. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  84. package/packages/ui/src/components/section-cards.tsx +20 -25
  85. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  86. package/packages/ui/src/lib/constants.tsx +1 -0
  87. package/packages/ui/src/primitives/badge.tsx +12 -15
  88. package/packages/ui/src/primitives/button.tsx +7 -7
  89. package/packages/ui/src/primitives/card.tsx +15 -26
  90. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  91. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  92. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  93. package/packages/ui/src/primitives/label.tsx +6 -6
  94. package/packages/ui/src/primitives/select.tsx +28 -37
  95. package/packages/ui/src/primitives/table.tsx +17 -44
  96. package/packages/ui/src/primitives/tabs.tsx +14 -21
  97. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  98. package/skill/SKILL.md +70 -57
  99. package/skill/Workflows/AlphaUpload.md +4 -4
  100. package/skill/Workflows/AutoActivation.md +11 -6
  101. package/skill/Workflows/Badge.md +22 -16
  102. package/skill/Workflows/Baseline.md +34 -36
  103. package/skill/Workflows/Composability.md +16 -11
  104. package/skill/Workflows/Contribute.md +26 -21
  105. package/skill/Workflows/Cron.md +23 -22
  106. package/skill/Workflows/Dashboard.md +32 -27
  107. package/skill/Workflows/Doctor.md +33 -27
  108. package/skill/Workflows/Evals.md +48 -47
  109. package/skill/Workflows/EvolutionMemory.md +31 -21
  110. package/skill/Workflows/Evolve.md +84 -82
  111. package/skill/Workflows/EvolveBody.md +58 -47
  112. package/skill/Workflows/Grade.md +16 -13
  113. package/skill/Workflows/ImportSkillsBench.md +9 -6
  114. package/skill/Workflows/Ingest.md +36 -21
  115. package/skill/Workflows/Initialize.md +108 -40
  116. package/skill/Workflows/Orchestrate.md +22 -16
  117. package/skill/Workflows/Replay.md +12 -7
  118. package/skill/Workflows/Rollback.md +13 -6
  119. package/skill/Workflows/Schedule.md +6 -6
  120. package/skill/Workflows/Sync.md +18 -11
  121. package/skill/Workflows/UnitTest.md +28 -17
  122. package/skill/Workflows/Watch.md +28 -21
  123. package/skill/agents/diagnosis-analyst.md +11 -0
  124. package/skill/agents/evolution-reviewer.md +15 -1
  125. package/skill/agents/integration-guide.md +10 -0
  126. package/skill/agents/pattern-analyst.md +12 -1
  127. package/skill/references/grading-methodology.md +23 -24
  128. package/skill/references/interactive-config.md +7 -7
  129. package/skill/references/invocation-taxonomy.md +22 -20
  130. package/skill/references/logs.md +14 -6
  131. package/skill/references/setup-patterns.md +4 -2
  132. package/.claude/agents/diagnosis-analyst.md +0 -156
  133. package/.claude/agents/evolution-reviewer.md +0 -180
  134. package/.claude/agents/integration-guide.md +0 -212
  135. package/.claude/agents/pattern-analyst.md +0 -160
  136. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +0 -1
  137. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +0 -15
  138. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +0 -60
  139. package/apps/local-dashboard/dist/assets/vendor-table-dK1QMLq9.js +0 -26
  140. package/apps/local-dashboard/dist/assets/vendor-ui-CO2mrx6e.js +0 -341
@@ -1,97 +1,18 @@
1
1
  /**
2
2
  * deploy-proposal.ts
3
3
  *
4
- * Deploys a validated evolution proposal by updating SKILL.md, creating a
5
- * backup, building a commit message with metrics, and optionally creating
6
- * a git branch and PR via `gh pr create`.
7
- */
8
-
9
- import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
10
- import type { EvolutionProposal, SkillSections } from "../types.js";
11
- import type { ValidationResult } from "./validate-proposal.js";
12
-
13
- // ---------------------------------------------------------------------------
14
- // Types
15
- // ---------------------------------------------------------------------------
16
-
17
- export interface DeployOptions {
18
- proposal: EvolutionProposal;
19
- validation: ValidationResult;
20
- skillPath: string;
21
- createPr: boolean;
22
- branchPrefix?: string; // default "selftune/evolve"
23
- }
24
-
25
- export interface DeployResult {
26
- skillMdUpdated: boolean;
27
- backupPath: string | null;
28
- branchName: string | null;
29
- commitMessage: string;
30
- }
31
-
32
- // ---------------------------------------------------------------------------
33
- // SKILL.md reading
34
- // ---------------------------------------------------------------------------
35
-
36
- /** Read the contents of a SKILL.md file. Throws if the file does not exist. */
37
- export function readSkillMd(skillPath: string): string {
38
- if (!existsSync(skillPath)) {
39
- throw new Error(`SKILL.md not found at ${skillPath}`);
40
- }
41
- return readFileSync(skillPath, "utf-8");
42
- }
43
-
44
- // ---------------------------------------------------------------------------
45
- // Description replacement
46
- // ---------------------------------------------------------------------------
47
-
48
- /**
49
- * Replace the description section of a SKILL.md file.
4
+ * SKILL.md manipulation utilities for the evolution pipeline: description
5
+ * replacement, structured section parsing, section replacement, and full
6
+ * body replacement.
50
7
  *
51
- * The description is defined as the content between the first `#` heading
52
- * and the first `##` heading. If no `##` heading exists, the entire body
53
- * after the first heading is replaced.
8
+ * Evolution is a local personalization the evolved description reflects how
9
+ * *this user* works, not a change the skill creator should adopt. A future
10
+ * upstream feedback channel (anonymized patterns, not raw descriptions) may
11
+ * let end-users send useful signal back to skill creators, but that's a
12
+ * separate concern from deploy. See TD-019 in tech-debt-tracker.md.
54
13
  */
55
- export function replaceDescription(currentContent: string, newDescription: string): string {
56
- const lines = currentContent.split("\n");
57
-
58
- // Find the first # heading line
59
- let headingIndex = -1;
60
- for (let i = 0; i < lines.length; i++) {
61
- if (lines[i].startsWith("# ") && !lines[i].startsWith("## ")) {
62
- headingIndex = i;
63
- break;
64
- }
65
- }
66
14
 
67
- // If no heading found, just prepend the description
68
- if (headingIndex === -1) {
69
- return `${newDescription}\n${currentContent}`;
70
- }
71
-
72
- // Find the first ## heading after the main heading
73
- let subHeadingIndex = -1;
74
- for (let i = headingIndex + 1; i < lines.length; i++) {
75
- if (lines[i].startsWith("## ")) {
76
- subHeadingIndex = i;
77
- break;
78
- }
79
- }
80
-
81
- // Build the new content, preserving any preamble before the first heading
82
- const preamble = headingIndex > 0 ? `${lines.slice(0, headingIndex).join("\n")}\n` : "";
83
- const headingLine = lines[headingIndex];
84
- const descriptionBlock = newDescription.length > 0 ? `\n${newDescription}\n` : "\n";
85
-
86
- if (subHeadingIndex === -1) {
87
- // No sub-heading: preamble + heading + new description + trailing newline
88
- return `${preamble}${headingLine}\n${descriptionBlock}\n`;
89
- }
90
-
91
- // Preamble + heading + description + everything from the first ## onward
92
- const afterSubHeading = lines.slice(subHeadingIndex).join("\n");
93
- return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
94
- }
15
+ import type { SkillSections } from "../types.js";
95
16
 
96
17
  // ---------------------------------------------------------------------------
97
18
  // Structured SKILL.md parsing
@@ -233,153 +154,3 @@ export function replaceBody(currentContent: string, proposedBody: string): strin
233
154
 
234
155
  return `${parts.join("\n").trimEnd()}\n`;
235
156
  }
236
-
237
- // ---------------------------------------------------------------------------
238
- // Commit message builder
239
- // ---------------------------------------------------------------------------
240
-
241
- /** Build a commit message that includes the skill name and pass rate change. */
242
- export function buildCommitMessage(
243
- proposal: EvolutionProposal,
244
- validation: ValidationResult,
245
- ): string {
246
- const changePercent = Math.round(validation.net_change * 100);
247
- const sign = changePercent >= 0 ? "+" : "";
248
- const passRateStr = `${sign}${changePercent}% pass rate`;
249
-
250
- return `evolve(${proposal.skill_name}): ${passRateStr}`;
251
- }
252
-
253
- // ---------------------------------------------------------------------------
254
- // Git/GH operations (PR creation)
255
- // ---------------------------------------------------------------------------
256
-
257
- /** Sanitize a string for use in a git branch name. */
258
- function sanitizeForGitRef(name: string): string {
259
- return name
260
- .replace(/[^a-zA-Z0-9._-]/g, "-")
261
- .replace(/\.{2,}/g, ".")
262
- .replace(/^[.-]|[.-]$/g, "")
263
- .replace(/-{2,}/g, "-");
264
- }
265
-
266
- /** Generate a branch name from the prefix and skill name. */
267
- function makeBranchName(prefix: string, skillName: string): string {
268
- const timestamp = Date.now();
269
- const safeName = sanitizeForGitRef(skillName) || "untitled";
270
- return `${prefix}/${safeName}-${timestamp}`;
271
- }
272
-
273
- /**
274
- * Run a git/gh command via Bun.spawn. Returns stdout on success.
275
- * Throws on non-zero exit code or if the command exceeds timeoutMs.
276
- */
277
- async function runCommand(args: string[], cwd?: string, timeoutMs = 30_000): Promise<string> {
278
- const proc = Bun.spawn(args, {
279
- cwd,
280
- stdout: "pipe",
281
- stderr: "pipe",
282
- });
283
-
284
- let timedOut = false;
285
- const timer = setTimeout(() => {
286
- timedOut = true;
287
- proc.kill();
288
- }, timeoutMs);
289
-
290
- try {
291
- // Read stdout and stderr concurrently to avoid deadlock when both pipes fill.
292
- const [stdout, stderr] = await Promise.all([
293
- new Response(proc.stdout).text(),
294
- new Response(proc.stderr).text(),
295
- ]);
296
- const exitCode = await proc.exited;
297
-
298
- if (timedOut) {
299
- throw new Error(`Command timed out after ${timeoutMs}ms: ${args.join(" ")}`);
300
- }
301
-
302
- if (exitCode !== 0) {
303
- throw new Error(`Command failed (exit ${exitCode}): ${args.join(" ")}\n${stderr}`);
304
- }
305
-
306
- return stdout.trim();
307
- } finally {
308
- clearTimeout(timer);
309
- }
310
- }
311
-
312
- // ---------------------------------------------------------------------------
313
- // Main deploy function
314
- // ---------------------------------------------------------------------------
315
-
316
- /** Deploy a validated evolution proposal to SKILL.md and optionally create a PR. */
317
- export async function deployProposal(options: DeployOptions): Promise<DeployResult> {
318
- const { proposal, validation, skillPath, createPr, branchPrefix = "selftune/evolve" } = options;
319
-
320
- // Step 1: Read current SKILL.md
321
- const currentContent = readSkillMd(skillPath);
322
-
323
- // Step 2: Create backup (unique per deploy to avoid overwriting previous backups)
324
- const backupTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
325
- const backupPath = `${skillPath}.${backupTimestamp}.bak`;
326
- copyFileSync(skillPath, backupPath);
327
-
328
- // Step 3: Replace description and write
329
- const updatedContent = replaceDescription(currentContent, proposal.proposed_description);
330
- writeFileSync(skillPath, updatedContent, "utf-8");
331
-
332
- // Step 4: Build commit message
333
- const commitMessage = buildCommitMessage(proposal, validation);
334
-
335
- // Step 5: Optionally create branch and PR
336
- let branchName: string | null = null;
337
-
338
- if (createPr) {
339
- branchName = makeBranchName(branchPrefix, proposal.skill_name);
340
-
341
- try {
342
- // Create and checkout branch
343
- await runCommand(["git", "checkout", "-b", branchName]);
344
-
345
- // Stage the SKILL.md
346
- await runCommand(["git", "add", skillPath]);
347
-
348
- // Commit
349
- await runCommand(["git", "commit", "-m", commitMessage]);
350
-
351
- // Push
352
- await runCommand(["git", "push", "-u", "origin", branchName]);
353
-
354
- // Create PR
355
- await runCommand([
356
- "gh",
357
- "pr",
358
- "create",
359
- "--title",
360
- commitMessage,
361
- "--body",
362
- `Proposal: ${proposal.proposal_id}\nRationale: ${proposal.rationale}\nNet change: ${validation.net_change > 0 ? "+" : ""}${Math.round(validation.net_change * 100)}%`,
363
- ]);
364
- } catch (err) {
365
- // Git/GH operations are best-effort in test environments.
366
- // The branch name is still returned for tracking.
367
- console.error(`[WARN] Git/GH operation failed: ${err instanceof Error ? err.message : err}`);
368
- }
369
- }
370
-
371
- return {
372
- skillMdUpdated: true,
373
- backupPath,
374
- branchName,
375
- commitMessage,
376
- };
377
- }
378
-
379
- // ---------------------------------------------------------------------------
380
- // CLI entry guard
381
- // ---------------------------------------------------------------------------
382
-
383
- if (import.meta.main) {
384
- console.log("deploy-proposal: use deployProposal() programmatically or via evolve CLI");
385
- }
@@ -25,7 +25,8 @@ import type {
25
25
  QueryLogRecord,
26
26
  SkillUsageRecord,
27
27
  } from "../types.js";
28
-
28
+ import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
29
+ import { callViaSubagent } from "../utils/llm-call.js";
29
30
  import { appendAuditEntry } from "./audit.js";
30
31
  import { checkConstitutionSizeOnly } from "./constitutional.js";
31
32
  import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
57
58
  fewShotExamples?: string[];
58
59
  gradingResults?: GradingResult[];
59
60
  validationModel?: string;
61
+ teacherEffort?: EffortLevel;
62
+ /** Run evolution-reviewer subagent as Gate 4 before deployment. */
63
+ useReviewer?: boolean;
60
64
  }
61
65
 
62
66
  export interface EvolveBodyResult {
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
89
93
  readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
90
94
  readFileSync?: typeof readFileSync;
91
95
  writeFileSync?: (path: string, data: string, encoding: string) => void;
96
+ callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
92
97
  }
93
98
 
94
99
  // ---------------------------------------------------------------------------
@@ -110,6 +115,19 @@ function createAuditEntry(
110
115
  };
111
116
  }
112
117
 
118
+ // ---------------------------------------------------------------------------
119
+ // Pipeline defaults — enforced even when the calling agent omits flags
120
+ // ---------------------------------------------------------------------------
121
+
122
+ /** Default teacher model: Opus 4.6 for highest-quality proposals. */
123
+ const DEFAULT_TEACHER_MODEL = "opus";
124
+
125
+ /** Default student model: Haiku for cheap, fast validation gates. */
126
+ const DEFAULT_STUDENT_MODEL = "haiku";
127
+
128
+ /** Default teacher effort: extended thinking for multi-constraint reasoning. */
129
+ const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
130
+
113
131
  // ---------------------------------------------------------------------------
114
132
  // Main orchestrator
115
133
  // ---------------------------------------------------------------------------
@@ -124,8 +142,6 @@ export async function evolveBody(
124
142
  target,
125
143
  teacherAgent,
126
144
  studentAgent,
127
- teacherModel,
128
- studentModel,
129
145
  evalSetPath,
130
146
  dryRun,
131
147
  maxIterations,
@@ -133,6 +149,11 @@ export async function evolveBody(
133
149
  fewShotExamples,
134
150
  } = options;
135
151
 
152
+ // Apply pipeline defaults for models/effort when not explicitly provided
153
+ const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
154
+ const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
155
+ const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
156
+
136
157
  // Resolve injectable dependencies
137
158
  const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
138
159
  const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
@@ -151,6 +172,7 @@ export async function evolveBody(
151
172
  });
152
173
  const _readFileSync = _deps.readFileSync ?? readFileSync;
153
174
  const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
175
+ const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
154
176
 
155
177
  const auditEntries: EvolutionAuditEntry[] = [];
156
178
 
@@ -306,6 +328,7 @@ export async function evolveBody(
306
328
  skillPath,
307
329
  teacherAgent,
308
330
  teacherModel,
331
+ teacherEffort,
309
332
  );
310
333
  } else {
311
334
  proposal = await _generateBodyProposal(
@@ -318,6 +341,7 @@ export async function evolveBody(
318
341
  teacherModel,
319
342
  fewShotExamples,
320
343
  executionContext,
344
+ teacherEffort,
321
345
  );
322
346
  }
323
347
  } else if (lastProposal && lastValidation) {
@@ -327,6 +351,7 @@ export async function evolveBody(
327
351
  lastValidation,
328
352
  teacherAgent,
329
353
  teacherModel,
354
+ options.teacherEffort,
330
355
  );
331
356
  } else {
332
357
  break;
@@ -496,7 +521,63 @@ export async function evolveBody(
496
521
  }
497
522
  }
498
523
 
499
- // Step 5: Deploy or dry-run
524
+ // Step 5: Optional evolution-reviewer gate (Gate 4)
525
+ if (options.useReviewer && lastProposal && lastValidation?.improved) {
526
+ try {
527
+ const reviewPrompt = [
528
+ `Review this ${target} evolution proposal for the "${skillName}" skill.`,
529
+ ``,
530
+ `Proposal ID: ${lastProposal.proposal_id}`,
531
+ `Skill path: ${skillPath}`,
532
+ `Target: ${target}`,
533
+ `Confidence: ${lastProposal.confidence}`,
534
+ `Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
535
+ `Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
536
+ ``,
537
+ `Original content:`,
538
+ lastProposal.original_body,
539
+ ``,
540
+ `Proposed content:`,
541
+ lastProposal.proposed_body,
542
+ ``,
543
+ `Rationale: ${lastProposal.rationale}`,
544
+ ].join("\n");
545
+
546
+ const reviewOutput = await _callViaSubagent({
547
+ agentName: "evolution-reviewer",
548
+ prompt: reviewPrompt,
549
+ maxTurns: 8,
550
+ allowedTools: ["Read", "Grep", "Glob", "Bash"],
551
+ });
552
+
553
+ const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
554
+ recordAudit(
555
+ lastProposal.proposal_id,
556
+ isRejected ? "rejected" : "validated",
557
+ `Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
558
+ );
559
+
560
+ if (isRejected) {
561
+ return {
562
+ proposal: lastProposal,
563
+ validation: lastValidation,
564
+ deployed: false,
565
+ auditEntries,
566
+ reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
567
+ };
568
+ }
569
+ } catch (reviewError) {
570
+ // Fail-open: if reviewer crashes, log it and continue to deploy
571
+ const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
572
+ recordAudit(
573
+ lastProposal.proposal_id,
574
+ "validated",
575
+ `Evolution reviewer failed (fail-open): ${msg}`,
576
+ );
577
+ }
578
+ }
579
+
580
+ // Step 6: Deploy or dry-run
500
581
  if (dryRun) {
501
582
  return {
502
583
  proposal: lastProposal,
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
594
675
  "task-description": { type: "string" },
595
676
  "few-shot": { type: "string" },
596
677
  "validation-model": { type: "string" },
678
+ "teacher-effort": { type: "string", default: "high" },
679
+ review: { type: "boolean", default: false },
597
680
  help: { type: "boolean", default: false },
598
681
  },
599
682
  strict: true,
@@ -611,8 +694,8 @@ Options:
611
694
  --target Evolution target: body, routing (default: body)
612
695
  --teacher-agent Teacher agent CLI (claude, codex, etc.)
613
696
  --student-agent Student agent CLI for validation
614
- --teacher-model Model flag for teacher agent
615
- --student-model Model flag for student agent
697
+ --teacher-model Model flag for teacher agent (default: opus)
698
+ --student-model Model flag for student agent (default: haiku)
616
699
  --eval-set Path to eval set JSON
617
700
  --dry-run Validate without deploying
618
701
  --max-iterations Max refinement iterations (default: 3)
@@ -620,6 +703,8 @@ Options:
620
703
  --task-description Optional task description context
621
704
  --few-shot Comma-separated paths to example skill files
622
705
  --validation-model Model for trigger-check validation calls (overrides --student-model for validation)
706
+ --teacher-effort Effort level for teacher LLM: low, medium, high, max (default: high)
707
+ --review Run evolution-reviewer subagent before deployment (Gate 4)
623
708
  --help Show this help message`);
624
709
  process.exit(0);
625
710
  }
@@ -669,6 +754,8 @@ Options:
669
754
  fewShotExamples,
670
755
  gradingResults,
671
756
  validationModel: values["validation-model"],
757
+ teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
758
+ useReviewer: values.review ?? false,
672
759
  });
673
760
 
674
761
  console.log(JSON.stringify(result, null, 2));
@@ -36,8 +36,7 @@ import type {
36
36
  SessionTelemetryRecord,
37
37
  SkillUsageRecord,
38
38
  } from "../types.js";
39
- import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js";
40
-
39
+ import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
41
40
  import { createEvolveTUI } from "../utils/tui.js";
42
41
  import { appendAuditEntry } from "./audit.js";
43
42
  import { checkConstitution } from "./constitutional.js";
@@ -959,11 +958,8 @@ export async function evolve(
959
958
  copyFileSync(skillPath, backupPath);
960
959
  tui.done(`Backup created at ${backupPath}`);
961
960
 
962
- // Replace the frontmatter description
963
- const updatedContent = replaceFrontmatterDescription(
964
- rawContent,
965
- lastProposal.proposed_description,
966
- );
961
+ // Replace the description (handles both frontmatter and plain markdown)
962
+ const updatedContent = replaceDescription(rawContent, lastProposal.proposed_description);
967
963
  writeFileSync(skillPath, updatedContent, "utf-8");
968
964
  tui.done(`Deployed updated description to ${skillPath}`);
969
965
 
@@ -7,7 +7,7 @@
7
7
  */
8
8
 
9
9
  import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
10
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
11
11
 
12
12
  // ---------------------------------------------------------------------------
13
13
  // System prompt
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
160
160
  modelFlag?: string,
161
161
  fewShotExamples?: string[],
162
162
  executionContext?: ExecutionContext,
163
+ effort?: EffortLevel,
163
164
  ): Promise<BodyEvolutionProposal> {
164
165
  const prompt = buildBodyGenerationPrompt(
165
166
  currentContent,
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
169
170
  fewShotExamples,
170
171
  executionContext,
171
172
  );
172
- const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
173
+ const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
173
174
  const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
174
175
 
175
176
  return {
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
9
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
9
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
10
 
11
11
  // ---------------------------------------------------------------------------
12
12
  // System prompt
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
139
139
  skillPath: string,
140
140
  agent: string,
141
141
  modelFlag?: string,
142
+ effort?: EffortLevel,
142
143
  ): Promise<BodyEvolutionProposal> {
143
144
  const prompt = buildRoutingProposalPrompt(
144
145
  currentRouting,
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
147
148
  missedQueries,
148
149
  skillName,
149
150
  );
150
- const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
151
+ const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
151
152
  const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
152
153
 
153
154
  return {
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
9
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
9
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
10
 
11
11
  // ---------------------------------------------------------------------------
12
12
  // System prompt
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
118
118
  validationResult: BodyValidationResult,
119
119
  agent: string,
120
120
  modelFlag?: string,
121
+ effort?: EffortLevel,
121
122
  ): Promise<BodyEvolutionProposal> {
122
123
  const prompt = buildRefinementPrompt(
123
124
  proposal.proposed_body,
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
126
127
  validationResult.regressions,
127
128
  );
128
129
 
129
- const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
130
+ const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
130
131
  const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
131
132
 
132
133
  return {
@@ -13,8 +13,8 @@ import { parseArgs } from "node:util";
13
13
 
14
14
  import { updateContextAfterRollback } from "../memory/writer.js";
15
15
  import type { EvolutionAuditEntry } from "../types.js";
16
+ import { replaceDescription } from "../utils/frontmatter.js";
16
17
  import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
17
- import { replaceDescription } from "./deploy-proposal.js";
18
18
 
19
19
  // ---------------------------------------------------------------------------
20
20
  // Types
@@ -5,6 +5,7 @@
5
5
  */
6
6
  import { mkdirSync, writeFileSync } from "node:fs";
7
7
  import { join } from "node:path";
8
+
8
9
  import { getDb } from "./localdb/db.js";
9
10
  import {
10
11
  getOrchestrateRuns,
@@ -884,6 +884,14 @@ Options:
884
884
  }
885
885
  writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
886
886
 
887
+ // Persist to SQLite for upload staging (fail-open)
888
+ try {
889
+ const { writeGradingResultToDb } = await import("../localdb/direct-write.js");
890
+ writeGradingResultToDb(result);
891
+ } catch {
892
+ // fail-open: grading file is already written above
893
+ }
894
+
887
895
  printSummary(result);
888
896
  console.log(`\nWrote ${outputPath}`);
889
897
  }
@@ -11,6 +11,7 @@
11
11
 
12
12
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
13
13
  import { dirname } from "node:path";
14
+
14
15
  import {
15
16
  CLAUDE_SETTINGS_PATH,
16
17
  EVOLUTION_AUDIT_LOG,
@@ -16,8 +16,8 @@
16
16
 
17
17
  import { existsSync, readFileSync } from "node:fs";
18
18
  import { basename, dirname, join } from "node:path";
19
- import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
20
19
 
20
+ import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
21
21
  import type { PreToolUsePayload } from "../types.js";
22
22
  import { readJsonl } from "../utils/jsonl.js";
23
23
 
@@ -11,6 +11,7 @@
11
11
  import { readdirSync } from "node:fs";
12
12
  import { homedir } from "node:os";
13
13
  import { join } from "node:path";
14
+
14
15
  import { CANONICAL_LOG, QUERY_LOG, SKIP_PREFIXES } from "../constants.js";
15
16
  import {
16
17
  appendCanonicalRecord,