npm - selftune - Versions diffs - 0.2.19 → 0.2.20 - Mend

selftune 0.2.19 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/apps/local-dashboard/dist/assets/{index-DnhnXQm6.js → index-D8O-RG1I.js} +2 -2
package/apps/local-dashboard/dist/index.html +1 -1
package/cli/selftune/dashboard-contract.ts +4 -0
package/cli/selftune/eval/family-overlap.ts +320 -1
package/cli/selftune/evolution/evidence.ts +5 -0
package/cli/selftune/evolution/evolve-body.ts +62 -2
package/cli/selftune/evolution/evolve.ts +58 -1
package/cli/selftune/evolution/validate-body.ts +10 -0
package/cli/selftune/evolution/validate-host-replay.ts +236 -0
package/cli/selftune/evolution/validate-proposal.ts +10 -0
package/cli/selftune/evolution/validate-routing.ts +112 -5
package/cli/selftune/localdb/direct-write.ts +8 -3
package/cli/selftune/localdb/materialize.ts +7 -2
package/cli/selftune/localdb/queries.ts +11 -1
package/cli/selftune/localdb/schema.ts +10 -1
package/cli/selftune/routes/skill-report.ts +6 -1
package/cli/selftune/types.ts +54 -0
package/cli/selftune/utils/text-similarity.ts +73 -0
package/package.json +1 -1
package/packages/ui/src/components/EvidenceViewer.tsx +85 -2
package/packages/ui/src/components/EvolutionTimeline.tsx +23 -1
package/packages/ui/src/types.ts +4 -0
package/skill/Workflows/Composability.md +15 -1
package/skill/Workflows/Evolve.md +23 -0

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -43,7 +43,7 @@ import { createEvolveTUI } from "../utils/tui.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitution } from "./constitutional.js";
 import { scoreDescription } from "./description-quality.js";
-import { appendEvidenceEntry } from "./evidence.js";
+import { appendEvidenceEntry, buildValidationEvidenceRef } from "./evidence.js";
 import { extractFailurePatterns } from "./extract-patterns.js";
 import {
   computeInvocationScores,
@@ -139,6 +139,10 @@ function createAuditEntry(
   evalSnapshot?: EvalPassRate,
   skillName?: string,
   iterationsUsed?: number,
+  provenance?: Pick<
+    EvolutionAuditEntry,
+    "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
+  >,
 ): EvolutionAuditEntry {
   return {
     timestamp: new Date().toISOString(),
@@ -148,6 +152,14 @@ function createAuditEntry(
     ...(skillName ? { skill_name: skillName } : {}),
     ...(evalSnapshot ? { eval_snapshot: evalSnapshot } : {}),
     ...(iterationsUsed != null ? { iterations_used: iterationsUsed } : {}),
+    ...(provenance?.validation_mode ? { validation_mode: provenance.validation_mode } : {}),
+    ...(provenance?.validation_agent ? { validation_agent: provenance.validation_agent } : {}),
+    ...(provenance?.validation_fixture_id
+      ? { validation_fixture_id: provenance.validation_fixture_id }
+      : {}),
+    ...(provenance?.validation_evidence_ref
+      ? { validation_evidence_ref: provenance.validation_evidence_ref }
+      : {}),
   };
 }
@@ -289,6 +301,10 @@ export async function evolve(
     details: string,
     evalSnapshot?: EvalPassRate,
     iterationsUsed?: number,
+    provenance?: Pick<
+      EvolutionAuditEntry,
+      "validation_mode" | "validation_agent" | "validation_fixture_id" | "validation_evidence_ref"
+    >,
   ): void {
     const entry = createAuditEntry(
       proposalId,
@@ -297,6 +313,7 @@ export async function evolve(
       evalSnapshot,
       skillName,
       iterationsUsed,
+      provenance,
     );
     auditEntries.push(entry);
     try {
@@ -637,10 +654,18 @@ export async function evolve(
           options.validationModel,
         );
         llmCallCount += countValidationLlmCalls(evalSet.length);
+        const evidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
         recordAudit(
           proposal.proposal_id,
           "validated",
           `Pareto validation: improved=${validation.improved}`,
+          undefined,
+          undefined,
+          {
+            validation_mode: validation.validation_mode,
+            validation_agent: validation.validation_agent,
+            validation_evidence_ref: evidenceRef,
+          },
         );
         recordEvidence({
           timestamp: new Date().toISOString(),
@@ -660,6 +685,9 @@ export async function evolve(
             regressions: validation.regressions,
             new_passes: validation.new_passes,
             per_entry_results: validation.per_entry_results,
+            validation_mode: validation.validation_mode,
+            validation_agent: validation.validation_agent,
+            validation_evidence_ref: evidenceRef,
           },
         });
@@ -866,11 +894,18 @@ export async function evolve(
           failed: evalSet.length - Math.round(validation.after_pass_rate * evalSet.length),
           pass_rate: validation.after_pass_rate,
         };
+        const validatedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
         recordAudit(
           proposal.proposal_id,
           "validated",
           `Validation complete: improved=${validation.improved}`,
           evalSnapshot,
+          undefined,
+          {
+            validation_mode: validation.validation_mode,
+            validation_agent: validation.validation_agent,
+            validation_evidence_ref: validatedEvidenceRef,
+          },
         );
         recordEvidence({
           timestamp: new Date().toISOString(),
@@ -890,6 +925,9 @@ export async function evolve(
             regressions: validation.regressions,
             new_passes: validation.new_passes,
             per_entry_results: validation.per_entry_results,
+            validation_mode: validation.validation_mode,
+            validation_agent: validation.validation_agent,
+            validation_evidence_ref: validatedEvidenceRef,
           },
         });
@@ -906,10 +944,18 @@ export async function evolve(
         if (!validation.improved) {
           feedbackReason = `Validation failed: net_change=${validation.net_change.toFixed(3)}, improved=false`;
+          const rejectedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "rejected");
           recordAudit(
             proposal.proposal_id,
             "rejected",
             `Validation failed: net_change=${validation.net_change.toFixed(3)} (stopping: ${stopping.reason})`,
+            undefined,
+            undefined,
+            {
+              validation_mode: validation.validation_mode,
+              validation_agent: validation.validation_agent,
+              validation_evidence_ref: rejectedEvidenceRef,
+            },
           );
           recordEvidence({
             timestamp: new Date().toISOString(),
@@ -929,6 +975,9 @@ export async function evolve(
               regressions: validation.regressions,
               new_passes: validation.new_passes,
               per_entry_results: validation.per_entry_results,
+              validation_mode: validation.validation_mode,
+              validation_agent: validation.validation_agent,
+              validation_evidence_ref: rejectedEvidenceRef,
             },
           });
@@ -1138,6 +1187,11 @@ export async function evolve(
           pass_rate: lastValidation.after_pass_rate,
         },
         iterationsCompleted,
+        {
+          validation_mode: lastValidation.validation_mode,
+          validation_agent: lastValidation.validation_agent,
+          validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
+        },
       );
       recordEvidence({
         timestamp: new Date().toISOString(),
@@ -1157,6 +1211,9 @@ export async function evolve(
           regressions: lastValidation.regressions,
           new_passes: lastValidation.new_passes,
           per_entry_results: lastValidation.per_entry_results,
+          validation_mode: lastValidation.validation_mode,
+          validation_agent: lastValidation.validation_agent,
+          validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
         },
       });
     }

package/cli/selftune/evolution/validate-body.ts CHANGED Viewed

@@ -209,6 +209,8 @@ export async function validateBodyProposal(
       gate_results: gateResults,
       improved: false,
       regressions: [],
+      validation_mode: "structural_guard",
+      validation_agent: agent,
     };
   }
@@ -250,5 +252,13 @@ export async function validateBodyProposal(
     gate_results: gateResults,
     improved: gatesPassed === 3,
     regressions: accuracy.regressions,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
+    ...(evalSet.length > 0
+      ? {
+          before_pass_rate: accuracy.before_pass_rate,
+          after_pass_rate: accuracy.after_pass_rate,
+        }
+      : {}),
   };
 }

package/cli/selftune/evolution/validate-host-replay.ts ADDED Viewed

@@ -0,0 +1,236 @@
+import { existsSync, readFileSync, readdirSync, realpathSync, statSync } from "node:fs";
+import { basename, dirname, join } from "node:path";
+import type { EvalEntry, RoutingReplayEntryResult, RoutingReplayFixture } from "../types.js";
+import { parseFrontmatter } from "../utils/frontmatter.js";
+import { containsWholeSkillMention } from "../utils/skill-discovery.js";
+import { findGitRepositoryRoot } from "../utils/skill-discovery.js";
+import {
+  extractWhenToUseLines,
+  jaccardSimilarity,
+  tokenizeText,
+} from "../utils/text-similarity.js";
+interface ReplaySkillSurface {
+  skillName: string;
+  descriptionTokens: Set<string>;
+  whenToUseTokens: Set<string>;
+}
+/**
+ * Minimum score needed before replay treats routing text or skill-surface overlap
+ * as a real match. Tuned to suppress weak false positives without killing recall
+ * for short routing phrases and sparse skill surfaces.
+ */
+const HOST_REPLAY_MATCH_THRESHOLD = 0.18;
+function resolveReplayPath(path: string): string {
+  try {
+    return realpathSync(path);
+  } catch {
+    return path;
+  }
+}
+function listCompetingSkillPaths(targetSkillPath: string): string[] {
+  const normalizedTargetPath = resolveReplayPath(targetSkillPath);
+  const targetSkillDir = dirname(normalizedTargetPath);
+  const registryDir = dirname(targetSkillDir);
+  const targetDirName = basename(targetSkillDir);
+  const competingPaths: string[] = [];
+  try {
+    for (const entry of readdirSync(registryDir)) {
+      if (entry === targetDirName) continue;
+      const candidateDir = join(registryDir, entry);
+      try {
+        if (!statSync(candidateDir).isDirectory()) continue;
+      } catch {
+        continue;
+      }
+      const candidateSkillPath = join(candidateDir, "SKILL.md");
+      if (!existsSync(candidateSkillPath)) continue;
+      competingPaths.push(resolveReplayPath(candidateSkillPath));
+    }
+  } catch {
+    // Ignore unreadable registries and treat the fixture as target-only.
+  }
+  return competingPaths.sort((a, b) => a.localeCompare(b));
+}
+export function buildRoutingReplayFixture(options: {
+  skillName: string;
+  skillPath: string;
+  platform?: RoutingReplayFixture["platform"];
+  fixtureId?: string;
+  workspaceRoot?: string;
+}): RoutingReplayFixture {
+  const targetSkillPath = resolveReplayPath(options.skillPath);
+  const workspaceRoot =
+    options.workspaceRoot ?? findGitRepositoryRoot(dirname(dirname(targetSkillPath)));
+  const platform = options.platform ?? "claude_code";
+  return {
+    fixture_id: options.fixtureId ?? `auto-${platform}-${options.skillName}`,
+    platform,
+    target_skill_name: options.skillName,
+    target_skill_path: targetSkillPath,
+    competing_skill_paths: listCompetingSkillPaths(targetSkillPath),
+    ...(workspaceRoot ? { workspace_root: workspaceRoot } : {}),
+  };
+}
+function loadReplaySkillSurface(skillPath: string): ReplaySkillSurface {
+  const fallbackName = basename(dirname(skillPath)) || "unknown-skill";
+  try {
+    const raw = readFileSync(skillPath, "utf8");
+    const parsed = parseFrontmatter(raw);
+    return {
+      skillName: parsed.name.trim() || fallbackName,
+      descriptionTokens: tokenizeText(parsed.description),
+      whenToUseTokens: tokenizeText(extractWhenToUseLines(parsed.body).join(" ")),
+    };
+  } catch {
+    return {
+      skillName: fallbackName,
+      descriptionTokens: new Set<string>(),
+      whenToUseTokens: new Set<string>(),
+    };
+  }
+}
+function extractRoutingTriggerPhrases(routing: string): string[] {
+  const lines = routing
+    .trim()
+    .split("\n")
+    .map((line) => line.trim())
+    .filter(Boolean);
+  if (lines.length < 3) return [];
+  const phrases: string[] = [];
+  for (const row of lines.slice(2)) {
+    if (!row.startsWith("|") || !row.endsWith("|")) continue;
+    const cells = row.split("|").map((cell) => cell.trim());
+    const triggerCell = cells[1];
+    if (!triggerCell) continue;
+    for (const part of triggerCell.split(/,|\/| or /i)) {
+      const phrase = part.trim().replace(/^["'`]|["'`]$/g, "");
+      if (phrase.length >= 3) phrases.push(phrase);
+    }
+  }
+  return phrases;
+}
+function scoreQueryAgainstTriggerPhrases(query: string, triggerPhrases: string[]): number {
+  const normalizedQuery = query.toLowerCase();
+  const queryTokens = tokenizeText(query);
+  let best = 0;
+  for (const phrase of triggerPhrases) {
+    const normalizedPhrase = phrase.toLowerCase();
+    if (normalizedQuery.includes(normalizedPhrase)) {
+      best = Math.max(best, 1);
+      continue;
+    }
+    best = Math.max(best, jaccardSimilarity(queryTokens, tokenizeText(phrase)));
+  }
+  return best;
+}
+function scoreQueryAgainstSkillSurface(query: string, surface: ReplaySkillSurface): number {
+  const queryTokens = tokenizeText(query);
+  return Math.max(
+    jaccardSimilarity(queryTokens, surface.descriptionTokens),
+    jaccardSimilarity(queryTokens, surface.whenToUseTokens),
+  );
+}
+function evaluateReplayTrigger(
+  query: string,
+  routing: string,
+  targetSurface: ReplaySkillSurface,
+  competingSurfaces: ReplaySkillSurface[],
+): { triggered: boolean; evidence: string } {
+  const normalizedQuery = query.trim();
+  if (containsWholeSkillMention(normalizedQuery, targetSurface.skillName)) {
+    return {
+      triggered: true,
+      evidence: `explicit target mention: ${targetSurface.skillName}`,
+    };
+  }
+  for (const competingSurface of competingSurfaces) {
+    if (containsWholeSkillMention(normalizedQuery, competingSurface.skillName)) {
+      return {
+        triggered: false,
+        evidence: `explicit competing skill mention: ${competingSurface.skillName}`,
+      };
+    }
+  }
+  const triggerPhrases = extractRoutingTriggerPhrases(routing);
+  const triggerScore = scoreQueryAgainstTriggerPhrases(normalizedQuery, triggerPhrases);
+  const targetSurfaceScore = scoreQueryAgainstSkillSurface(normalizedQuery, targetSurface);
+  const targetScore = Math.max(triggerScore, targetSurfaceScore);
+  const bestCompetitor = competingSurfaces
+    .map((surface) => ({
+      skillName: surface.skillName,
+      score: scoreQueryAgainstSkillSurface(normalizedQuery, surface),
+    }))
+    .sort((a, b) => b.score - a.score)[0];
+  if (targetScore < HOST_REPLAY_MATCH_THRESHOLD) {
+    return {
+      triggered: false,
+      evidence: "target routing and skill surface did not clear replay threshold",
+    };
+  }
+  if (bestCompetitor && bestCompetitor.score >= targetScore) {
+    return {
+      triggered: false,
+      evidence: `competing skill surface scored higher: ${bestCompetitor.skillName}`,
+    };
+  }
+  if (triggerScore >= targetSurfaceScore) {
+    return {
+      triggered: true,
+      evidence:
+        triggerScore === 1
+          ? "query matched a routing trigger phrase exactly"
+          : "query aligned with routing trigger language",
+    };
+  }
+  return {
+    triggered: true,
+    evidence: "query aligned with target skill surface in replay fixture",
+  };
+}
+export function runHostReplayFixture(options: {
+  routing: string;
+  evalSet: EvalEntry[];
+  fixture: RoutingReplayFixture;
+}): RoutingReplayEntryResult[] {
+  const targetSurface = loadReplaySkillSurface(options.fixture.target_skill_path);
+  const competingSurfaces = options.fixture.competing_skill_paths.map(loadReplaySkillSurface);
+  return options.evalSet.map((entry) => {
+    const evaluated = evaluateReplayTrigger(
+      entry.query,
+      options.routing,
+      targetSurface,
+      competingSurfaces,
+    );
+    return {
+      query: entry.query,
+      should_trigger: entry.should_trigger,
+      triggered: evaluated.triggered,
+      passed: evaluated.triggered === entry.should_trigger,
+      evidence: evaluated.evidence,
+    };
+  });
+}

package/cli/selftune/evolution/validate-proposal.ts CHANGED Viewed

@@ -40,6 +40,8 @@ export interface ValidationResult {
   net_change: number; // after - before pass rate
   by_invocation_type?: InvocationTypeScores;
   per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
+  validation_mode?: "llm_judge";
+  validation_agent?: string;
 }
 // ---------------------------------------------------------------------------
@@ -63,6 +65,8 @@ export async function validateProposalSequential(
       regressions: [],
       new_passes: [],
       net_change: 0,
+      validation_mode: "llm_judge",
+      validation_agent: agent,
     };
   }
@@ -174,6 +178,8 @@ export async function validateProposalSequential(
     net_change: netChange,
     by_invocation_type: invocationScores,
     per_entry_results: perEntryResults,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
   };
 }
@@ -220,6 +226,8 @@ export async function validateProposalBatched(
       regressions: [],
       new_passes: [],
       net_change: 0,
+      validation_mode: "llm_judge",
+      validation_agent: agent,
     };
   }
@@ -342,6 +350,8 @@ export async function validateProposalBatched(
     net_change: netChange,
     by_invocation_type: invocationScores,
     per_entry_results: perEntryResults,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
   };
 }

package/cli/selftune/evolution/validate-routing.ts CHANGED Viewed

@@ -5,9 +5,43 @@
  * and running trigger accuracy checks against an eval set.
  */
-import type { BodyEvolutionProposal, BodyValidationResult, EvalEntry } from "../types.js";
+import type {
+  BodyEvolutionProposal,
+  BodyValidationResult,
+  EvalEntry,
+  RoutingReplayEntryResult,
+  RoutingReplayFixture,
+  ValidationMode,
+} from "../types.js";
 import { callLlm } from "../utils/llm-call.js";
 import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
+import { runHostReplayFixture } from "./validate-host-replay.js";
+export interface RoutingReplayRunnerInput {
+  routing: string;
+  evalSet: EvalEntry[];
+  agent: string;
+  fixture: RoutingReplayFixture;
+}
+export type RoutingReplayRunner = (
+  input: RoutingReplayRunnerInput,
+) => Promise<RoutingReplayEntryResult[]>;
+export interface RoutingValidationOptions {
+  replayFixture?: RoutingReplayFixture;
+  replayRunner?: RoutingReplayRunner;
+}
+export interface RoutingTriggerAccuracyResult {
+  before_pass_rate: number;
+  after_pass_rate: number;
+  improved: boolean;
+  validation_mode: ValidationMode;
+  validation_agent: string;
+  validation_fixture_id?: string;
+  per_entry_results?: RoutingReplayEntryResult[];
+}
 // ---------------------------------------------------------------------------
 // Structural validation
@@ -77,9 +111,70 @@ export async function validateRoutingTriggerAccuracy(
   evalSet: EvalEntry[],
   agent: string,
   modelFlag?: string,
-): Promise<{ before_pass_rate: number; after_pass_rate: number; improved: boolean }> {
+  options: RoutingValidationOptions = {},
+): Promise<RoutingTriggerAccuracyResult> {
   if (evalSet.length === 0) {
-    return { before_pass_rate: 0, after_pass_rate: 0, improved: false };
+    return {
+      before_pass_rate: 0,
+      after_pass_rate: 0,
+      improved: false,
+      validation_mode: "structural_guard",
+      validation_agent: agent,
+    };
+  }
+  if (options.replayFixture && options.replayRunner) {
+    const beforeResults = await options.replayRunner({
+      routing: originalRouting,
+      evalSet,
+      agent,
+      fixture: options.replayFixture,
+    });
+    const afterResults = await options.replayRunner({
+      routing: proposedRouting,
+      evalSet,
+      agent,
+      fixture: options.replayFixture,
+    });
+    const beforePassed = beforeResults.filter((result) => result.passed).length;
+    const afterPassed = afterResults.filter((result) => result.passed).length;
+    const total = evalSet.length;
+    return {
+      before_pass_rate: beforePassed / total,
+      after_pass_rate: afterPassed / total,
+      improved: afterPassed > beforePassed,
+      validation_mode: "host_replay",
+      validation_agent: agent,
+      validation_fixture_id: options.replayFixture.fixture_id,
+      per_entry_results: afterResults,
+    };
+  }
+  if (options.replayFixture) {
+    const beforeResults = runHostReplayFixture({
+      routing: originalRouting,
+      evalSet,
+      fixture: options.replayFixture,
+    });
+    const afterResults = runHostReplayFixture({
+      routing: proposedRouting,
+      evalSet,
+      fixture: options.replayFixture,
+    });
+    const beforePassed = beforeResults.filter((result) => result.passed).length;
+    const afterPassed = afterResults.filter((result) => result.passed).length;
+    const total = evalSet.length;
+    return {
+      before_pass_rate: beforePassed / total,
+      after_pass_rate: afterPassed / total,
+      improved: afterPassed > beforePassed,
+      validation_mode: "host_replay",
+      validation_agent: agent,
+      validation_fixture_id: options.replayFixture.fixture_id,
+      per_entry_results: afterResults,
+    };
   }
   const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
@@ -113,6 +208,8 @@ export async function validateRoutingTriggerAccuracy(
     before_pass_rate: beforePassRate,
     after_pass_rate: afterPassRate,
     improved: afterPassRate > beforePassRate,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
   };
 }
@@ -126,6 +223,7 @@ export async function validateRoutingProposal(
   evalSet: EvalEntry[],
   agent: string,
   modelFlag?: string,
+  options: RoutingValidationOptions = {},
 ): Promise<BodyValidationResult> {
   const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
@@ -145,6 +243,8 @@ export async function validateRoutingProposal(
       gate_results: gateResults,
       improved: false,
       regressions: [],
+      validation_mode: "structural_guard",
+      validation_agent: agent,
     };
   }
@@ -155,13 +255,14 @@ export async function validateRoutingProposal(
     evalSet,
     agent,
     modelFlag,
+    options,
   );
   gateResults.push({
     gate: "trigger_accuracy",
     passed: accuracy.improved,
     reason: accuracy.improved
-      ? `Improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
-      : `Not improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
+      ? `Improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
+      : `Not improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
   });
   const gatesPassed = gateResults.filter((g) => g.passed).length;
@@ -173,5 +274,11 @@ export async function validateRoutingProposal(
     gate_results: gateResults,
     improved: gatesPassed === 2,
     regressions: [],
+    validation_mode: accuracy.validation_mode,
+    validation_agent: accuracy.validation_agent,
+    validation_fixture_id: accuracy.validation_fixture_id,
+    before_pass_rate: accuracy.before_pass_rate,
+    after_pass_rate: accuracy.after_pass_rate,
+    per_entry_results: accuracy.per_entry_results,
   };
 }

package/cli/selftune/localdb/direct-write.ts CHANGED Viewed

@@ -285,11 +285,12 @@ export function writeEvolutionAuditToDb(record: EvolutionAuditEntry): boolean {
   return safeWrite("evolution-audit", (db) => {
     getStmt(
       db,
-      "evolution-audit-v2",
+      "evolution-audit-v3",
       `
       INSERT OR IGNORE INTO evolution_audit
-        (timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used)
-      VALUES (?, ?, ?, ?, ?, ?, ?)
+        (timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used,
+         validation_mode, validation_agent, validation_fixture_id, validation_evidence_ref)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     `,
     ).run(
       record.timestamp,
@@ -299,6 +300,10 @@ export function writeEvolutionAuditToDb(record: EvolutionAuditEntry): boolean {
       record.details,
       record.eval_snapshot ? JSON.stringify(record.eval_snapshot) : null,
       record.iterations_used ?? null,
+      record.validation_mode ?? null,
+      record.validation_agent ?? null,
+      record.validation_fixture_id ?? null,
+      record.validation_evidence_ref ?? null,
     );
   });
 }

package/cli/selftune/localdb/materialize.ts CHANGED Viewed

@@ -600,8 +600,9 @@ function insertEvolutionAudit(db: Database, records: EvolutionAuditEntry[]): num
   // (idx_evo_audit_dedup defined in schema.ts).
   const stmt = db.prepare(`
     INSERT OR IGNORE INTO evolution_audit
-      (timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used)
-    VALUES (?, ?, ?, ?, ?, ?, ?)
+      (timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used,
+       validation_mode, validation_agent, validation_fixture_id, validation_evidence_ref)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
   `);
   let count = 0;
@@ -614,6 +615,10 @@ function insertEvolutionAudit(db: Database, records: EvolutionAuditEntry[]): num
       r.details,
       r.eval_snapshot ? JSON.stringify(r.eval_snapshot) : null,
       r.iterations_used ?? null,
+      r.validation_mode ?? null,
+      r.validation_agent ?? null,
+      r.validation_fixture_id ?? null,
+      r.validation_evidence_ref ?? null,
     );
     count++;
   }