npm - selftune - Versions diffs - 0.2.9 → 0.2.12 - Mend

selftune 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/README.md +35 -35
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +16 -0
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +2 -0
package/apps/local-dashboard/dist/assets/rolldown-runtime-Dw2cE7zH.js +1 -0
package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +11 -0
package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +8 -0
package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +12 -0
package/apps/local-dashboard/dist/index.html +16 -15
package/bin/selftune.cjs +1 -1
package/cli/selftune/activation-rules.ts +1 -0
package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
package/cli/selftune/alpha-upload/stage-canonical.ts +94 -0
package/cli/selftune/auth/device-code.ts +32 -0
package/cli/selftune/auto-update.ts +12 -0
package/cli/selftune/badge/badge.ts +1 -0
package/cli/selftune/canonical-export.ts +5 -0
package/cli/selftune/claude-agents.ts +154 -0
package/cli/selftune/contribute/bundle.ts +1 -0
package/cli/selftune/contribute/contribute.ts +1 -0
package/cli/selftune/cron/setup.ts +2 -2
package/cli/selftune/dashboard-server.ts +1 -0
package/cli/selftune/eval/hooks-to-evals.ts +1 -0
package/cli/selftune/eval/import-skillsbench.ts +1 -0
package/cli/selftune/eval/synthetic-evals.ts +2 -3
package/cli/selftune/eval/unit-test.ts +1 -0
package/cli/selftune/evolution/deploy-proposal.ts +9 -238
package/cli/selftune/evolution/evolve-body.ts +93 -6
package/cli/selftune/evolution/evolve.ts +3 -7
package/cli/selftune/evolution/propose-body.ts +3 -2
package/cli/selftune/evolution/propose-routing.ts +3 -2
package/cli/selftune/evolution/refine-body.ts +3 -2
package/cli/selftune/evolution/rollback.ts +1 -1
package/cli/selftune/export.ts +1 -0
package/cli/selftune/grading/grade-session.ts +8 -0
package/cli/selftune/hooks/auto-activate.ts +1 -0
package/cli/selftune/hooks/evolution-guard.ts +1 -1
package/cli/selftune/hooks/prompt-log.ts +1 -0
package/cli/selftune/hooks/session-stop.ts +34 -40
package/cli/selftune/hooks/skill-change-guard.ts +1 -0
package/cli/selftune/hooks/skill-eval.ts +1 -1
package/cli/selftune/index.ts +23 -14
package/cli/selftune/ingestors/claude-replay.ts +1 -0
package/cli/selftune/ingestors/codex-rollout.ts +1 -0
package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
package/cli/selftune/init.ts +121 -29
package/cli/selftune/localdb/db.ts +1 -0
package/cli/selftune/localdb/direct-write.ts +39 -0
package/cli/selftune/localdb/materialize.ts +2 -0
package/cli/selftune/localdb/queries.ts +53 -0
package/cli/selftune/localdb/schema.ts +28 -0
package/cli/selftune/normalization.ts +1 -0
package/cli/selftune/observability.ts +1 -0
package/cli/selftune/repair/skill-usage.ts +1 -0
package/cli/selftune/routes/orchestrate-runs.ts +1 -0
package/cli/selftune/routes/overview.ts +1 -0
package/cli/selftune/routes/report.ts +1 -1
package/cli/selftune/routes/skill-report.ts +2 -1
package/cli/selftune/status.ts +1 -1
package/cli/selftune/sync.ts +30 -1
package/cli/selftune/uninstall.ts +412 -0
package/cli/selftune/utils/canonical-log.ts +2 -0
package/cli/selftune/utils/frontmatter.ts +50 -7
package/cli/selftune/utils/jsonl.ts +1 -0
package/cli/selftune/utils/llm-call.ts +131 -3
package/cli/selftune/utils/skill-log.ts +1 -0
package/cli/selftune/utils/transcript.ts +1 -0
package/cli/selftune/utils/trigger-check.ts +1 -1
package/cli/selftune/workflows/skill-md-writer.ts +5 -5
package/cli/selftune/workflows/workflows.ts +1 -0
package/package.json +37 -33
package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/schemas.ts +1 -0
package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
package/packages/ui/README.md +35 -34
package/packages/ui/package.json +3 -3
package/packages/ui/src/components/ActivityTimeline.tsx +50 -43
package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
package/packages/ui/src/components/InfoTip.tsx +4 -3
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
package/packages/ui/src/components/section-cards.tsx +20 -25
package/packages/ui/src/components/skill-health-grid.tsx +213 -193
package/packages/ui/src/lib/constants.tsx +1 -0
package/packages/ui/src/primitives/badge.tsx +12 -15
package/packages/ui/src/primitives/button.tsx +7 -7
package/packages/ui/src/primitives/card.tsx +15 -26
package/packages/ui/src/primitives/checkbox.tsx +7 -8
package/packages/ui/src/primitives/collapsible.tsx +5 -5
package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
package/packages/ui/src/primitives/label.tsx +6 -6
package/packages/ui/src/primitives/select.tsx +28 -37
package/packages/ui/src/primitives/table.tsx +17 -44
package/packages/ui/src/primitives/tabs.tsx +14 -21
package/packages/ui/src/primitives/tooltip.tsx +10 -22
package/skill/SKILL.md +70 -57
package/skill/Workflows/AlphaUpload.md +4 -4
package/skill/Workflows/AutoActivation.md +11 -6
package/skill/Workflows/Badge.md +22 -16
package/skill/Workflows/Baseline.md +34 -36
package/skill/Workflows/Composability.md +16 -11
package/skill/Workflows/Contribute.md +26 -21
package/skill/Workflows/Cron.md +23 -22
package/skill/Workflows/Dashboard.md +32 -27
package/skill/Workflows/Doctor.md +33 -27
package/skill/Workflows/Evals.md +48 -47
package/skill/Workflows/EvolutionMemory.md +31 -21
package/skill/Workflows/Evolve.md +84 -82
package/skill/Workflows/EvolveBody.md +58 -47
package/skill/Workflows/Grade.md +16 -13
package/skill/Workflows/ImportSkillsBench.md +9 -6
package/skill/Workflows/Ingest.md +36 -21
package/skill/Workflows/Initialize.md +108 -40
package/skill/Workflows/Orchestrate.md +22 -16
package/skill/Workflows/Replay.md +12 -7
package/skill/Workflows/Rollback.md +13 -6
package/skill/Workflows/Schedule.md +6 -6
package/skill/Workflows/Sync.md +18 -11
package/skill/Workflows/UnitTest.md +28 -17
package/skill/Workflows/Watch.md +28 -21
package/skill/agents/diagnosis-analyst.md +11 -0
package/skill/agents/evolution-reviewer.md +15 -1
package/skill/agents/integration-guide.md +10 -0
package/skill/agents/pattern-analyst.md +12 -1
package/skill/references/grading-methodology.md +23 -24
package/skill/references/interactive-config.md +7 -7
package/skill/references/invocation-taxonomy.md +22 -20
package/skill/references/logs.md +14 -6
package/skill/references/setup-patterns.md +4 -2
package/.claude/agents/diagnosis-analyst.md +0 -156
package/.claude/agents/evolution-reviewer.md +0 -180
package/.claude/agents/integration-guide.md +0 -212
package/.claude/agents/pattern-analyst.md +0 -160
package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +0 -1
package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +0 -15
package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +0 -60
package/apps/local-dashboard/dist/assets/vendor-table-dK1QMLq9.js +0 -26
package/apps/local-dashboard/dist/assets/vendor-ui-CO2mrx6e.js +0 -341

package/cli/selftune/evolution/deploy-proposal.ts CHANGED Viewed

@@ -1,97 +1,18 @@
 /**
  * deploy-proposal.ts
  *
- * Deploys a validated evolution proposal by updating SKILL.md, creating a
- * backup, building a commit message with metrics, and optionally creating
- * a git branch and PR via `gh pr create`.
- */
-import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
-import type { EvolutionProposal, SkillSections } from "../types.js";
-import type { ValidationResult } from "./validate-proposal.js";
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-export interface DeployOptions {
-  proposal: EvolutionProposal;
-  validation: ValidationResult;
-  skillPath: string;
-  createPr: boolean;
-  branchPrefix?: string; // default "selftune/evolve"
-}
-export interface DeployResult {
-  skillMdUpdated: boolean;
-  backupPath: string | null;
-  branchName: string | null;
-  commitMessage: string;
-}
-// ---------------------------------------------------------------------------
-// SKILL.md reading
-// ---------------------------------------------------------------------------
-/** Read the contents of a SKILL.md file. Throws if the file does not exist. */
-export function readSkillMd(skillPath: string): string {
-  if (!existsSync(skillPath)) {
-    throw new Error(`SKILL.md not found at ${skillPath}`);
-  }
-  return readFileSync(skillPath, "utf-8");
-}
-// ---------------------------------------------------------------------------
-// Description replacement
-// ---------------------------------------------------------------------------
-/**
- * Replace the description section of a SKILL.md file.
+ * SKILL.md manipulation utilities for the evolution pipeline: description
+ * replacement, structured section parsing, section replacement, and full
+ * body replacement.
  *
- * The description is defined as the content between the first `#` heading
- * and the first `##` heading. If no `##` heading exists, the entire body
- * after the first heading is replaced.
+ * Evolution is a local personalization — the evolved description reflects how
+ * *this user* works, not a change the skill creator should adopt. A future
+ * upstream feedback channel (anonymized patterns, not raw descriptions) may
+ * let end-users send useful signal back to skill creators, but that's a
+ * separate concern from deploy. See TD-019 in tech-debt-tracker.md.
  */
-export function replaceDescription(currentContent: string, newDescription: string): string {
-  const lines = currentContent.split("\n");
-  // Find the first # heading line
-  let headingIndex = -1;
-  for (let i = 0; i < lines.length; i++) {
-    if (lines[i].startsWith("# ") && !lines[i].startsWith("## ")) {
-      headingIndex = i;
-      break;
-    }
-  }
-  // If no heading found, just prepend the description
-  if (headingIndex === -1) {
-    return `${newDescription}\n${currentContent}`;
-  }
-  // Find the first ## heading after the main heading
-  let subHeadingIndex = -1;
-  for (let i = headingIndex + 1; i < lines.length; i++) {
-    if (lines[i].startsWith("## ")) {
-      subHeadingIndex = i;
-      break;
-    }
-  }
-  // Build the new content, preserving any preamble before the first heading
-  const preamble = headingIndex > 0 ? `${lines.slice(0, headingIndex).join("\n")}\n` : "";
-  const headingLine = lines[headingIndex];
-  const descriptionBlock = newDescription.length > 0 ? `\n${newDescription}\n` : "\n";
-  if (subHeadingIndex === -1) {
-    // No sub-heading: preamble + heading + new description + trailing newline
-    return `${preamble}${headingLine}\n${descriptionBlock}\n`;
-  }
-  // Preamble + heading + description + everything from the first ## onward
-  const afterSubHeading = lines.slice(subHeadingIndex).join("\n");
-  return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
-}
+import type { SkillSections } from "../types.js";
 // ---------------------------------------------------------------------------
 // Structured SKILL.md parsing
@@ -233,153 +154,3 @@ export function replaceBody(currentContent: string, proposedBody: string): strin
   return `${parts.join("\n").trimEnd()}\n`;
 }
-// ---------------------------------------------------------------------------
-// Commit message builder
-// ---------------------------------------------------------------------------
-/** Build a commit message that includes the skill name and pass rate change. */
-export function buildCommitMessage(
-  proposal: EvolutionProposal,
-  validation: ValidationResult,
-): string {
-  const changePercent = Math.round(validation.net_change * 100);
-  const sign = changePercent >= 0 ? "+" : "";
-  const passRateStr = `${sign}${changePercent}% pass rate`;
-  return `evolve(${proposal.skill_name}): ${passRateStr}`;
-}
-// ---------------------------------------------------------------------------
-// Git/GH operations (PR creation)
-// ---------------------------------------------------------------------------
-/** Sanitize a string for use in a git branch name. */
-function sanitizeForGitRef(name: string): string {
-  return name
-    .replace(/[^a-zA-Z0-9._-]/g, "-")
-    .replace(/\.{2,}/g, ".")
-    .replace(/^[.-]|[.-]$/g, "")
-    .replace(/-{2,}/g, "-");
-}
-/** Generate a branch name from the prefix and skill name. */
-function makeBranchName(prefix: string, skillName: string): string {
-  const timestamp = Date.now();
-  const safeName = sanitizeForGitRef(skillName) || "untitled";
-  return `${prefix}/${safeName}-${timestamp}`;
-}
-/**
- * Run a git/gh command via Bun.spawn. Returns stdout on success.
- * Throws on non-zero exit code or if the command exceeds timeoutMs.
- */
-async function runCommand(args: string[], cwd?: string, timeoutMs = 30_000): Promise<string> {
-  const proc = Bun.spawn(args, {
-    cwd,
-    stdout: "pipe",
-    stderr: "pipe",
-  });
-  let timedOut = false;
-  const timer = setTimeout(() => {
-    timedOut = true;
-    proc.kill();
-  }, timeoutMs);
-  try {
-    // Read stdout and stderr concurrently to avoid deadlock when both pipes fill.
-    const [stdout, stderr] = await Promise.all([
-      new Response(proc.stdout).text(),
-      new Response(proc.stderr).text(),
-    ]);
-    const exitCode = await proc.exited;
-    if (timedOut) {
-      throw new Error(`Command timed out after ${timeoutMs}ms: ${args.join(" ")}`);
-    }
-    if (exitCode !== 0) {
-      throw new Error(`Command failed (exit ${exitCode}): ${args.join(" ")}\n${stderr}`);
-    }
-    return stdout.trim();
-  } finally {
-    clearTimeout(timer);
-  }
-}
-// ---------------------------------------------------------------------------
-// Main deploy function
-// ---------------------------------------------------------------------------
-/** Deploy a validated evolution proposal to SKILL.md and optionally create a PR. */
-export async function deployProposal(options: DeployOptions): Promise<DeployResult> {
-  const { proposal, validation, skillPath, createPr, branchPrefix = "selftune/evolve" } = options;
-  // Step 1: Read current SKILL.md
-  const currentContent = readSkillMd(skillPath);
-  // Step 2: Create backup (unique per deploy to avoid overwriting previous backups)
-  const backupTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
-  const backupPath = `${skillPath}.${backupTimestamp}.bak`;
-  copyFileSync(skillPath, backupPath);
-  // Step 3: Replace description and write
-  const updatedContent = replaceDescription(currentContent, proposal.proposed_description);
-  writeFileSync(skillPath, updatedContent, "utf-8");
-  // Step 4: Build commit message
-  const commitMessage = buildCommitMessage(proposal, validation);
-  // Step 5: Optionally create branch and PR
-  let branchName: string | null = null;
-  if (createPr) {
-    branchName = makeBranchName(branchPrefix, proposal.skill_name);
-    try {
-      // Create and checkout branch
-      await runCommand(["git", "checkout", "-b", branchName]);
-      // Stage the SKILL.md
-      await runCommand(["git", "add", skillPath]);
-      // Commit
-      await runCommand(["git", "commit", "-m", commitMessage]);
-      // Push
-      await runCommand(["git", "push", "-u", "origin", branchName]);
-      // Create PR
-      await runCommand([
-        "gh",
-        "pr",
-        "create",
-        "--title",
-        commitMessage,
-        "--body",
-        `Proposal: ${proposal.proposal_id}\nRationale: ${proposal.rationale}\nNet change: ${validation.net_change > 0 ? "+" : ""}${Math.round(validation.net_change * 100)}%`,
-      ]);
-    } catch (err) {
-      // Git/GH operations are best-effort in test environments.
-      // The branch name is still returned for tracking.
-      console.error(`[WARN] Git/GH operation failed: ${err instanceof Error ? err.message : err}`);
-    }
-  }
-  return {
-    skillMdUpdated: true,
-    backupPath,
-    branchName,
-    commitMessage,
-  };
-}
-// ---------------------------------------------------------------------------
-// CLI entry guard
-// ---------------------------------------------------------------------------
-if (import.meta.main) {
-  console.log("deploy-proposal: use deployProposal() programmatically or via evolve CLI");
-}

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -25,7 +25,8 @@ import type {
   QueryLogRecord,
   SkillUsageRecord,
 } from "../types.js";
+import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
+import { callViaSubagent } from "../utils/llm-call.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitutionSizeOnly } from "./constitutional.js";
 import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
   fewShotExamples?: string[];
   gradingResults?: GradingResult[];
   validationModel?: string;
+  teacherEffort?: EffortLevel;
+  /** Run evolution-reviewer subagent as Gate 4 before deployment. */
+  useReviewer?: boolean;
 }
 export interface EvolveBodyResult {
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
   readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
   readFileSync?: typeof readFileSync;
   writeFileSync?: (path: string, data: string, encoding: string) => void;
+  callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
 }
 // ---------------------------------------------------------------------------
@@ -110,6 +115,19 @@ function createAuditEntry(
   };
 }
+// ---------------------------------------------------------------------------
+// Pipeline defaults — enforced even when the calling agent omits flags
+// ---------------------------------------------------------------------------
+/** Default teacher model: Opus 4.6 for highest-quality proposals. */
+const DEFAULT_TEACHER_MODEL = "opus";
+/** Default student model: Haiku for cheap, fast validation gates. */
+const DEFAULT_STUDENT_MODEL = "haiku";
+/** Default teacher effort: extended thinking for multi-constraint reasoning. */
+const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
 // ---------------------------------------------------------------------------
 // Main orchestrator
 // ---------------------------------------------------------------------------
@@ -124,8 +142,6 @@ export async function evolveBody(
     target,
     teacherAgent,
     studentAgent,
-    teacherModel,
-    studentModel,
     evalSetPath,
     dryRun,
     maxIterations,
@@ -133,6 +149,11 @@ export async function evolveBody(
     fewShotExamples,
   } = options;
+  // Apply pipeline defaults for models/effort when not explicitly provided
+  const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
+  const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
+  const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
   // Resolve injectable dependencies
   const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
   const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
@@ -151,6 +172,7 @@ export async function evolveBody(
     });
   const _readFileSync = _deps.readFileSync ?? readFileSync;
   const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
+  const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
   const auditEntries: EvolutionAuditEntry[] = [];
@@ -306,6 +328,7 @@ export async function evolveBody(
             skillPath,
             teacherAgent,
             teacherModel,
+            teacherEffort,
           );
         } else {
           proposal = await _generateBodyProposal(
@@ -318,6 +341,7 @@ export async function evolveBody(
             teacherModel,
             fewShotExamples,
             executionContext,
+            teacherEffort,
           );
         }
       } else if (lastProposal && lastValidation) {
@@ -327,6 +351,7 @@ export async function evolveBody(
           lastValidation,
           teacherAgent,
           teacherModel,
+          options.teacherEffort,
         );
       } else {
         break;
@@ -496,7 +521,63 @@ export async function evolveBody(
       }
     }
-    // Step 5: Deploy or dry-run
+    // Step 5: Optional evolution-reviewer gate (Gate 4)
+    if (options.useReviewer && lastProposal && lastValidation?.improved) {
+      try {
+        const reviewPrompt = [
+          `Review this ${target} evolution proposal for the "${skillName}" skill.`,
+          ``,
+          `Proposal ID: ${lastProposal.proposal_id}`,
+          `Skill path: ${skillPath}`,
+          `Target: ${target}`,
+          `Confidence: ${lastProposal.confidence}`,
+          `Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
+          `Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
+          ``,
+          `Original content:`,
+          lastProposal.original_body,
+          ``,
+          `Proposed content:`,
+          lastProposal.proposed_body,
+          ``,
+          `Rationale: ${lastProposal.rationale}`,
+        ].join("\n");
+        const reviewOutput = await _callViaSubagent({
+          agentName: "evolution-reviewer",
+          prompt: reviewPrompt,
+          maxTurns: 8,
+          allowedTools: ["Read", "Grep", "Glob", "Bash"],
+        });
+        const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
+        recordAudit(
+          lastProposal.proposal_id,
+          isRejected ? "rejected" : "validated",
+          `Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
+        );
+        if (isRejected) {
+          return {
+            proposal: lastProposal,
+            validation: lastValidation,
+            deployed: false,
+            auditEntries,
+            reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
+          };
+        }
+      } catch (reviewError) {
+        // Fail-open: if reviewer crashes, log it and continue to deploy
+        const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
+        recordAudit(
+          lastProposal.proposal_id,
+          "validated",
+          `Evolution reviewer failed (fail-open): ${msg}`,
+        );
+      }
+    }
+    // Step 6: Deploy or dry-run
     if (dryRun) {
       return {
         proposal: lastProposal,
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
       "task-description": { type: "string" },
       "few-shot": { type: "string" },
       "validation-model": { type: "string" },
+      "teacher-effort": { type: "string", default: "high" },
+      review: { type: "boolean", default: false },
       help: { type: "boolean", default: false },
     },
     strict: true,
@@ -611,8 +694,8 @@ Options:
   --target            Evolution target: body, routing (default: body)
   --teacher-agent     Teacher agent CLI (claude, codex, etc.)
   --student-agent     Student agent CLI for validation
-  --teacher-model     Model flag for teacher agent
-  --student-model     Model flag for student agent
+  --teacher-model     Model flag for teacher agent (default: opus)
+  --student-model     Model flag for student agent (default: haiku)
   --eval-set          Path to eval set JSON
   --dry-run           Validate without deploying
   --max-iterations    Max refinement iterations (default: 3)
@@ -620,6 +703,8 @@ Options:
   --task-description  Optional task description context
   --few-shot          Comma-separated paths to example skill files
   --validation-model  Model for trigger-check validation calls (overrides --student-model for validation)
+  --teacher-effort    Effort level for teacher LLM: low, medium, high, max (default: high)
+  --review            Run evolution-reviewer subagent before deployment (Gate 4)
   --help              Show this help message`);
     process.exit(0);
   }
@@ -669,6 +754,8 @@ Options:
     fewShotExamples,
     gradingResults,
     validationModel: values["validation-model"],
+    teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
+    useReviewer: values.review ?? false,
   });
   console.log(JSON.stringify(result, null, 2));

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -36,8 +36,7 @@ import type {
   SessionTelemetryRecord,
   SkillUsageRecord,
 } from "../types.js";
-import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js";
+import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
 import { createEvolveTUI } from "../utils/tui.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitution } from "./constitutional.js";
@@ -959,11 +958,8 @@ export async function evolve(
       copyFileSync(skillPath, backupPath);
       tui.done(`Backup created at ${backupPath}`);
-      // Replace the frontmatter description
-      const updatedContent = replaceFrontmatterDescription(
-        rawContent,
-        lastProposal.proposed_description,
-      );
+      // Replace the description (handles both frontmatter and plain markdown)
+      const updatedContent = replaceDescription(rawContent, lastProposal.proposed_description);
       writeFileSync(skillPath, updatedContent, "utf-8");
       tui.done(`Deployed updated description to ${skillPath}`);

package/cli/selftune/evolution/propose-body.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  */
 import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
   modelFlag?: string,
   fewShotExamples?: string[],
   executionContext?: ExecutionContext,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildBodyGenerationPrompt(
     currentContent,
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
     fewShotExamples,
     executionContext,
   );
-  const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
   const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
   return {

package/cli/selftune/evolution/propose-routing.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
   skillPath: string,
   agent: string,
   modelFlag?: string,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildRoutingProposalPrompt(
     currentRouting,
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
     missedQueries,
     skillName,
   );
-  const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
   const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
   return {

package/cli/selftune/evolution/refine-body.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
   validationResult: BodyValidationResult,
   agent: string,
   modelFlag?: string,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildRefinementPrompt(
     proposal.proposed_body,
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
     validationResult.regressions,
   );
-  const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
   const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
   return {

package/cli/selftune/evolution/rollback.ts CHANGED Viewed

@@ -13,8 +13,8 @@ import { parseArgs } from "node:util";
 import { updateContextAfterRollback } from "../memory/writer.js";
 import type { EvolutionAuditEntry } from "../types.js";
+import { replaceDescription } from "../utils/frontmatter.js";
 import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
-import { replaceDescription } from "./deploy-proposal.js";
 // ---------------------------------------------------------------------------
 // Types

package/cli/selftune/export.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import { mkdirSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { getDb } from "./localdb/db.js";
 import {
   getOrchestrateRuns,

package/cli/selftune/grading/grade-session.ts CHANGED Viewed

@@ -884,6 +884,14 @@ Options:
   }
   writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
+  // Persist to SQLite for upload staging (fail-open)
+  try {
+    const { writeGradingResultToDb } = await import("../localdb/direct-write.js");
+    writeGradingResultToDb(result);
+  } catch {
+    // fail-open: grading file is already written above
+  }
   printSummary(result);
   console.log(`\nWrote ${outputPath}`);
 }

package/cli/selftune/hooks/auto-activate.ts CHANGED Viewed

@@ -11,6 +11,7 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { dirname } from "node:path";
 import {
   CLAUDE_SETTINGS_PATH,
   EVOLUTION_AUDIT_LOG,

package/cli/selftune/hooks/evolution-guard.ts CHANGED Viewed

@@ -16,8 +16,8 @@
 import { existsSync, readFileSync } from "node:fs";
 import { basename, dirname, join } from "node:path";
-import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
+import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
 import type { PreToolUsePayload } from "../types.js";
 import { readJsonl } from "../utils/jsonl.js";

package/cli/selftune/hooks/prompt-log.ts CHANGED Viewed

@@ -11,6 +11,7 @@
 import { readdirSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 import { CANONICAL_LOG, QUERY_LOG, SKIP_PREFIXES } from "../constants.js";
 import {
   appendCanonicalRecord,