npm - selftune - Versions diffs - 0.2.0 → 0.2.1 - Mend

selftune 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/.claude/agents/diagnosis-analyst.md +20 -10
package/.claude/agents/evolution-reviewer.md +14 -1
package/.claude/agents/integration-guide.md +18 -6
package/.claude/agents/pattern-analyst.md +18 -5
package/CHANGELOG.md +12 -4
package/README.md +43 -35
package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
package/apps/local-dashboard/dist/favicon.png +0 -0
package/apps/local-dashboard/dist/index.html +17 -0
package/apps/local-dashboard/dist/logo.png +0 -0
package/apps/local-dashboard/dist/logo.svg +9 -0
package/cli/selftune/badge/badge-data.ts +1 -1
package/cli/selftune/badge/badge.ts +4 -8
package/cli/selftune/canonical-export.ts +183 -0
package/cli/selftune/constants.ts +28 -0
package/cli/selftune/contribute/contribute.ts +1 -1
package/cli/selftune/cron/setup.ts +17 -17
package/cli/selftune/dashboard-contract.ts +202 -0
package/cli/selftune/dashboard-server.ts +653 -186
package/cli/selftune/dashboard.ts +41 -176
package/cli/selftune/eval/baseline.ts +5 -4
package/cli/selftune/eval/composability-v2.ts +273 -0
package/cli/selftune/eval/hooks-to-evals.ts +34 -15
package/cli/selftune/eval/unit-test-cli.ts +1 -1
package/cli/selftune/evolution/evidence.ts +26 -0
package/cli/selftune/evolution/evolve-body.ts +105 -11
package/cli/selftune/evolution/evolve.ts +371 -25
package/cli/selftune/evolution/extract-patterns.ts +87 -29
package/cli/selftune/evolution/rollback.ts +2 -2
package/cli/selftune/grading/auto-grade.ts +200 -0
package/cli/selftune/grading/grade-session.ts +448 -97
package/cli/selftune/grading/results.ts +42 -0
package/cli/selftune/hooks/prompt-log.ts +172 -2
package/cli/selftune/hooks/session-stop.ts +123 -3
package/cli/selftune/hooks/skill-eval.ts +119 -3
package/cli/selftune/index.ts +395 -116
package/cli/selftune/ingestors/claude-replay.ts +140 -114
package/cli/selftune/ingestors/codex-rollout.ts +345 -46
package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
package/cli/selftune/init.ts +227 -14
package/cli/selftune/last.ts +14 -5
package/cli/selftune/localdb/db.ts +63 -0
package/cli/selftune/localdb/materialize.ts +428 -0
package/cli/selftune/localdb/queries.ts +376 -0
package/cli/selftune/localdb/schema.ts +204 -0
package/cli/selftune/monitoring/watch.ts +66 -15
package/cli/selftune/normalization.ts +682 -0
package/cli/selftune/observability.ts +19 -44
package/cli/selftune/orchestrate.ts +1073 -0
package/cli/selftune/quickstart.ts +203 -0
package/cli/selftune/repair/skill-usage.ts +576 -0
package/cli/selftune/schedule.ts +561 -0
package/cli/selftune/status.ts +48 -26
package/cli/selftune/sync.ts +627 -0
package/cli/selftune/types.ts +148 -0
package/cli/selftune/utils/canonical-log.ts +45 -0
package/cli/selftune/utils/hooks.ts +41 -0
package/cli/selftune/utils/html.ts +27 -0
package/cli/selftune/utils/llm-call.ts +78 -20
package/cli/selftune/utils/math.ts +10 -0
package/cli/selftune/utils/query-filter.ts +139 -0
package/cli/selftune/utils/skill-discovery.ts +340 -0
package/cli/selftune/utils/skill-log.ts +68 -0
package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
package/cli/selftune/utils/transcript.ts +272 -26
package/cli/selftune/workflows/discover.ts +254 -0
package/cli/selftune/workflows/skill-md-writer.ts +288 -0
package/cli/selftune/workflows/workflows.ts +188 -0
package/package.json +21 -8
package/packages/telemetry-contract/README.md +11 -0
package/packages/telemetry-contract/fixtures/golden.json +87 -0
package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
package/packages/telemetry-contract/index.ts +1 -0
package/packages/telemetry-contract/package.json +19 -0
package/packages/telemetry-contract/src/index.ts +2 -0
package/packages/telemetry-contract/src/types.ts +163 -0
package/packages/telemetry-contract/src/validators.ts +109 -0
package/skill/SKILL.md +84 -53
package/skill/Workflows/AutoActivation.md +17 -16
package/skill/Workflows/Badge.md +6 -0
package/skill/Workflows/Baseline.md +46 -23
package/skill/Workflows/Composability.md +12 -5
package/skill/Workflows/Contribute.md +17 -14
package/skill/Workflows/Cron.md +56 -79
package/skill/Workflows/Dashboard.md +45 -34
package/skill/Workflows/Doctor.md +30 -17
package/skill/Workflows/Evals.md +64 -40
package/skill/Workflows/EvolutionMemory.md +2 -0
package/skill/Workflows/Evolve.md +102 -47
package/skill/Workflows/EvolveBody.md +6 -6
package/skill/Workflows/Grade.md +36 -31
package/skill/Workflows/ImportSkillsBench.md +11 -5
package/skill/Workflows/Ingest.md +43 -36
package/skill/Workflows/Initialize.md +44 -30
package/skill/Workflows/Orchestrate.md +139 -0
package/skill/Workflows/Replay.md +39 -18
package/skill/Workflows/Rollback.md +3 -3
package/skill/Workflows/Schedule.md +61 -0
package/skill/Workflows/Sync.md +88 -0
package/skill/Workflows/UnitTest.md +34 -22
package/skill/Workflows/Watch.md +14 -4
package/skill/Workflows/Workflows.md +129 -0
package/skill/assets/activation-rules-default.json +26 -0
package/skill/assets/multi-skill-settings.json +63 -0
package/skill/assets/single-skill-settings.json +57 -0
package/skill/references/invocation-taxonomy.md +2 -2
package/skill/references/logs.md +164 -2
package/skill/references/setup-patterns.md +65 -0
package/skill/references/version-history.md +40 -0
package/skill/settings_snippet.json +1 -1
package/templates/multi-skill-settings.json +7 -7
package/templates/single-skill-settings.json +6 -6
package/dashboard/index.html +0 -1680

package/cli/selftune/eval/hooks-to-evals.ts CHANGED Viewed

@@ -27,7 +27,13 @@ import type {
 } from "../types.js";
 import { readJsonl } from "../utils/jsonl.js";
 import { detectAgent } from "../utils/llm-call.js";
+import {
+  filterActionableQueryRecords,
+  filterActionableSkillUsageRecords,
+} from "../utils/query-filter.js";
 import { seededShuffle } from "../utils/seeded-random.js";
+import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
+import { isHighConfidencePositiveSkillRecord } from "../utils/skill-usage-confidence.js";
 import { generateSyntheticEvals } from "./synthetic-evals.js";
 // ---------------------------------------------------------------------------
@@ -116,14 +122,16 @@ export function buildEvalSet(
   seed = 42,
   annotateTaxonomy = true,
 ): EvalEntry[] {
+  const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
+  const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
   const effectiveMaxPerSide = Number.isNaN(maxPerSide) || maxPerSide <= 0 ? 50 : maxPerSide;
   const effectiveSeed = Number.isNaN(seed) ? 42 : seed;
   // Build set of positive query texts (for exclusion from negatives)
   const positiveQueries = new Set<string>();
-  for (const r of skillRecords) {
+  for (const r of actionableSkillRecords) {
     if (!r || typeof r.skill_name !== "string" || typeof r.query !== "string") continue;
-    if (r.skill_name === skillName) {
+    if (isHighConfidencePositiveSkillRecord(r, skillName)) {
       const q = (r.query ?? "").trim();
       if (q && q !== "(query not found)") {
         positiveQueries.add(q);
@@ -134,9 +142,9 @@ export function buildEvalSet(
   // Build deduplicated positives with taxonomy classification
   const seen = new Set<string>();
   const positives: EvalEntry[] = [];
-  for (const r of skillRecords) {
+  for (const r of actionableSkillRecords) {
     if (!r || typeof r.skill_name !== "string" || typeof r.query !== "string") continue;
-    if (r.skill_name !== skillName) continue;
+    if (!isHighConfidencePositiveSkillRecord(r, skillName)) continue;
     const q = (r.query ?? "").trim();
     if (!q || q === "(query not found)" || seen.has(q)) continue;
     seen.add(q);
@@ -153,7 +161,7 @@ export function buildEvalSet(
   if (includeNegatives) {
     const negCandidates: string[] = [];
     const negSeen = new Set<string>();
-    for (const r of queryRecords) {
+    for (const r of actionableQueryRecords) {
       if (!r || typeof r.query !== "string") continue;
       const q = (r.query ?? "").trim();
       if (!q || positiveQueries.has(q) || negSeen.has(q)) continue;
@@ -198,13 +206,17 @@ export function listSkills(
   queryRecords: QueryLogRecord[],
   telemetryRecords: SessionTelemetryRecord[],
 ): void {
+  const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
+  const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
   const counts = new Map<string, number>();
-  for (const r of skillRecords) {
+  for (const r of actionableSkillRecords) {
     const name = r.skill_name ?? "unknown";
     counts.set(name, (counts.get(name) ?? 0) + 1);
   }
-  console.log(`Skill triggers in skill_usage_log (${skillRecords.length} total records):`);
+  console.log(
+    `Skill triggers in skill_usage_log (${actionableSkillRecords.length} actionable records):`,
+  );
   if (counts.size > 0) {
     const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]);
     for (const [name, count] of sorted) {
@@ -214,8 +226,8 @@ export function listSkills(
     console.log("  (none yet -- trigger some skills in Claude Code to populate)");
   }
-  console.log(`\nAll queries in all_queries_log: ${queryRecords.length}`);
-  if (queryRecords.length === 0) {
+  console.log(`\nActionable queries in all_queries_log: ${actionableQueryRecords.length}`);
+  if (actionableQueryRecords.length === 0) {
     console.log("  (none yet -- make sure prompt_log_hook is installed)");
   }
@@ -303,14 +315,16 @@ export function printEvalStats(
 ): void {
   const pos = evalSet.filter((e) => e.should_trigger);
   const neg = evalSet.filter((e) => !e.should_trigger);
-  const totalTriggers = skillRecords.filter((r) => r.skill_name === skillName).length;
+  const actionableSkillRecords = filterActionableSkillUsageRecords(skillRecords);
+  const actionableQueryRecords = filterActionableQueryRecords(queryRecords);
+  const totalTriggers = actionableSkillRecords.filter((r) => r.skill_name === skillName).length;
   console.log(`Wrote ${evalSet.length} eval entries to ${outputPath}`);
   console.log(
     `  Positives (should_trigger=true) : ${pos.length}  (from ${totalTriggers} logged triggers)`,
   );
   console.log(
-    `  Negatives (should_trigger=false): ${neg.length}  (from ${queryRecords.length} total logged queries)`,
+    `  Negatives (should_trigger=false): ${neg.length}  (from ${actionableQueryRecords.length} actionable logged queries)`,
   );
   if (annotateTaxonomy && pos.length > 0) {
@@ -336,7 +350,7 @@ export function printEvalStats(
   console.log();
   if (pos.length === 0) {
     console.log(`[WARN] No positives for skill '${skillName}'.`);
-    const names = [...new Set(skillRecords.map((r) => r.skill_name))].sort();
+    const names = [...new Set(actionableSkillRecords.map((r) => r.skill_name))].sort();
     if (names.length > 0) {
       console.log(`       Known skills: ${names.join(", ")}`);
     }
@@ -366,6 +380,7 @@ export async function cliMain(): Promise<void> {
     options: {
       skill: { type: "string" },
       output: { type: "string" },
+      out: { type: "string" },
       max: { type: "string", default: "50" },
       seed: { type: "string", default: "42" },
       "list-skills": { type: "boolean", default: false },
@@ -409,7 +424,7 @@ export async function cliMain(): Promise<void> {
       modelFlag: values.model,
     });
-    const outputPath = values.output ?? `${values.skill}_trigger_eval.json`;
+    const outputPath = values.output ?? values.out ?? `${values.skill}_trigger_eval.json`;
     writeFileSync(outputPath, JSON.stringify(evalSet, null, 2), "utf-8");
     const pos = evalSet.filter((e) => e.should_trigger);
@@ -440,7 +455,11 @@ export async function cliMain(): Promise<void> {
   }
   // --- Log-based mode (original behavior) ---
-  const skillRecords = readJsonl<SkillUsageRecord>(values["skill-log"] ?? SKILL_LOG);
+  const skillLogPath = values["skill-log"] ?? SKILL_LOG;
+  const skillRecords =
+    skillLogPath === SKILL_LOG
+      ? readEffectiveSkillUsageRecords()
+      : readJsonl<SkillUsageRecord>(skillLogPath);
   const queryRecords = readJsonl<QueryLogRecord>(values["query-log"] ?? QUERY_LOG);
   const telemetryRecords = readJsonl<SessionTelemetryRecord>(
     values["telemetry-log"] ?? TELEMETRY_LOG,
@@ -475,7 +494,7 @@ export async function cliMain(): Promise<void> {
     annotateTaxonomy,
   );
-  const outputPath = values.output ?? `${values.skill}_trigger_eval.json`;
+  const outputPath = values.output ?? values.out ?? `${values.skill}_trigger_eval.json`;
   writeFileSync(outputPath, JSON.stringify(evalSet, null, 2), "utf-8");
   printEvalStats(evalSet, values.skill, outputPath, skillRecords, queryRecords, annotateTaxonomy);
 }

package/cli/selftune/eval/unit-test-cli.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * CLI entrypoint for skill unit tests.
  *
  * Usage:
- *   selftune unit-test --skill <name> --tests <path> [--run-agent] [--generate]
+ *   selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
  *
  *   --skill <name>    Skill name (required)
  *   --tests <path>    Path to unit test JSON file (default: ~/.selftune/unit-tests/<skill>.json)

package/cli/selftune/evolution/evidence.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Evolution evidence trail: append and read proposal/eval artifacts that power
+ * explainable dashboard drill-downs.
+ */
+import { EVOLUTION_EVIDENCE_LOG } from "../constants.js";
+import type { EvolutionEvidenceEntry } from "../types.js";
+import { appendJsonl, readJsonl } from "../utils/jsonl.js";
+/** Append a structured evidence artifact to the evolution evidence log. */
+export function appendEvidenceEntry(
+  entry: EvolutionEvidenceEntry,
+  logPath: string = EVOLUTION_EVIDENCE_LOG,
+): void {
+  appendJsonl(logPath, entry);
+}
+/** Read all evidence entries, optionally filtered by exact skill name. */
+export function readEvidenceTrail(
+  skillName?: string,
+  logPath: string = EVOLUTION_EVIDENCE_LOG,
+): EvolutionEvidenceEntry[] {
+  const entries = readJsonl<EvolutionEvidenceEntry>(logPath);
+  if (!skillName) return entries;
+  return entries.filter((entry) => entry.skill_name === skillName);
+}

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -9,13 +9,15 @@
 import { existsSync, readFileSync } from "node:fs";
 import { parseArgs } from "node:util";
-import { QUERY_LOG, SKILL_LOG } from "../constants.js";
+import { QUERY_LOG } from "../constants.js";
 import { buildEvalSet } from "../eval/hooks-to-evals.js";
+import { readGradingResultsForSkill } from "../grading/results.js";
 import type {
   BodyEvolutionProposal,
   BodyValidationResult,
   EvalEntry,
   EvolutionAuditEntry,
+  EvolutionEvidenceEntry,
   EvolutionTarget,
   FailurePattern,
   GradingResult,
@@ -23,8 +25,10 @@ import type {
   SkillUsageRecord,
 } from "../types.js";
 import { readJsonl } from "../utils/jsonl.js";
+import { readEffectiveSkillUsageRecords } from "../utils/skill-log.js";
 import { appendAuditEntry } from "./audit.js";
 import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
+import { appendEvidenceEntry } from "./evidence.js";
 import { extractFailurePatterns } from "./extract-patterns.js";
 import { generateBodyProposal } from "./propose-body.js";
 import { generateRoutingProposal } from "./propose-routing.js";
@@ -79,7 +83,9 @@ export interface EvolveBodyDeps {
   validateRoutingProposal?: typeof import("./validate-routing.js").validateRoutingProposal;
   refineBodyProposal?: typeof import("./refine-body.js").refineBodyProposal;
   appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
+  appendEvidenceEntry?: typeof import("./evidence.js").appendEvidenceEntry;
   buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
+  readEffectiveSkillUsageRecords?: typeof import("../utils/skill-log.js").readEffectiveSkillUsageRecords;
   readFileSync?: typeof readFileSync;
   writeFileSync?: (path: string, data: string, encoding: string) => void;
 }
@@ -134,7 +140,10 @@ export async function evolveBody(
   const _validateRoutingProposal = _deps.validateRoutingProposal ?? validateRoutingProposal;
   const _refineBodyProposal = _deps.refineBodyProposal ?? refineBodyProposal;
   const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
+  const _appendEvidenceEntry = _deps.appendEvidenceEntry ?? appendEvidenceEntry;
   const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
+  const _readEffectiveSkillUsageRecords =
+    _deps.readEffectiveSkillUsageRecords ?? readEffectiveSkillUsageRecords;
   const _readFileSync = _deps.readFileSync ?? readFileSync;
   const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
@@ -154,6 +163,14 @@ export async function evolveBody(
     }
   }
+  function recordEvidence(entry: EvolutionEvidenceEntry): void {
+    try {
+      _appendEvidenceEntry(entry);
+    } catch {
+      // Fail-open
+    }
+  }
   try {
     // Step 1: Read current SKILL.md
     if (!existsSync(skillPath)) {
@@ -168,6 +185,8 @@ export async function evolveBody(
     const currentContent = _readFileSync(skillPath, "utf-8");
     const parsed = parseSkillSections(currentContent);
+    const createdAuditDetails = (): string => `original_description:${currentContent}`;
+    const skillUsage = _readEffectiveSkillUsageRecords();
     // Step 2: Load eval set
     let evalSet: EvalEntry[];
@@ -179,13 +198,11 @@ export async function evolveBody(
       }
       evalSet = parsed as EvalEntry[];
     } else {
-      const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
       const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
-      evalSet = _buildEvalSet(skillRecords, queryRecords, skillName);
+      evalSet = _buildEvalSet(skillUsage, queryRecords, skillName);
     }
     // Step 3: Load skill usage and extract failure patterns
-    const skillUsage = readJsonl<SkillUsageRecord>(SKILL_LOG);
     const failurePatterns = _extractFailurePatterns(
       evalSet,
       skillUsage,
@@ -252,11 +269,21 @@ export async function evolveBody(
       lastProposal = proposal;
-      recordAudit(
-        proposal.proposal_id,
-        "created",
-        `${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
-      );
+      recordAudit(proposal.proposal_id, "created", createdAuditDetails());
+      recordEvidence({
+        timestamp: new Date().toISOString(),
+        proposal_id: proposal.proposal_id,
+        skill_name: skillName,
+        skill_path: skillPath,
+        target,
+        stage: "created",
+        rationale: proposal.rationale,
+        confidence: proposal.confidence,
+        details: `${target} proposal created for ${skillName} (iteration ${iteration + 1})`,
+        original_text: proposal.original_body,
+        proposed_text: proposal.proposed_body,
+        eval_set: evalSet,
+      });
       // Check confidence threshold
       if (proposal.confidence < confidenceThreshold) {
@@ -265,6 +292,17 @@ export async function evolveBody(
           "rejected",
           `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
         );
+        recordEvidence({
+          timestamp: new Date().toISOString(),
+          proposal_id: proposal.proposal_id,
+          skill_name: skillName,
+          skill_path: skillPath,
+          target,
+          stage: "rejected",
+          rationale: proposal.rationale,
+          confidence: proposal.confidence,
+          details: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
+        });
         if (iteration === maxIterations - 1) {
           return {
@@ -303,6 +341,24 @@ export async function evolveBody(
         "validated",
         `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
       );
+      recordEvidence({
+        timestamp: new Date().toISOString(),
+        proposal_id: proposal.proposal_id,
+        skill_name: skillName,
+        skill_path: skillPath,
+        target,
+        stage: "validated",
+        rationale: proposal.rationale,
+        confidence: proposal.confidence,
+        details: `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
+        validation: {
+          improved: validation.improved,
+          gates_passed: validation.gates_passed,
+          gates_total: validation.gates_total,
+          gate_results: validation.gate_results,
+          regressions: validation.regressions,
+        },
+      });
       if (validation.improved) {
         break;
@@ -313,6 +369,24 @@ export async function evolveBody(
         "rejected",
         `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
       );
+      recordEvidence({
+        timestamp: new Date().toISOString(),
+        proposal_id: proposal.proposal_id,
+        skill_name: skillName,
+        skill_path: skillPath,
+        target,
+        stage: "rejected",
+        rationale: proposal.rationale,
+        confidence: proposal.confidence,
+        details: `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
+        validation: {
+          improved: validation.improved,
+          gates_passed: validation.gates_passed,
+          gates_total: validation.gates_total,
+          gate_results: validation.gate_results,
+          regressions: validation.regressions,
+        },
+      });
       if (iteration === maxIterations - 1) {
         return {
@@ -355,6 +429,24 @@ export async function evolveBody(
         "deployed",
         `Deployed ${target} proposal for ${skillName}`,
       );
+      recordEvidence({
+        timestamp: new Date().toISOString(),
+        proposal_id: lastProposal.proposal_id,
+        skill_name: skillName,
+        skill_path: skillPath,
+        target,
+        stage: "deployed",
+        rationale: lastProposal.rationale,
+        confidence: lastProposal.confidence,
+        details: `Deployed ${target} proposal for ${skillName}`,
+        validation: {
+          improved: lastValidation.improved,
+          gates_passed: lastValidation.gates_passed,
+          gates_total: lastValidation.gates_total,
+          gate_results: lastValidation.gate_results,
+          regressions: lastValidation.regressions,
+        },
+      });
       return {
         proposal: lastProposal,
@@ -411,10 +503,10 @@ export async function cliMain(): Promise<void> {
   });
   if (values.help) {
-    console.log(`selftune evolve-body — Evolve a skill body or routing table
+    console.log(`selftune evolve body — Evolve a skill body or routing table
 Usage:
-  selftune evolve-body --skill <name> --skill-path <path> [options]
+  selftune evolve body --skill <name> --skill-path <path> [options]
 Options:
   --skill             Skill name (required)
@@ -462,6 +554,7 @@ Options:
     const paths = values["few-shot"].split(",").map((p) => p.trim());
     fewShotExamples = paths.filter((p) => existsSync(p)).map((p) => readFileSync(p, "utf-8"));
   }
+  const gradingResults = readGradingResultsForSkill(values.skill);
   const result = await evolveBody({
     skillName: values.skill,
@@ -477,6 +570,7 @@ Options:
     confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
     taskDescription: values["task-description"],
     fewShotExamples,
+    gradingResults,
     validationModel: values["validation-model"],
   });