npm - selftune - Versions diffs - 0.2.21 → 0.2.23 - Mend

selftune 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/README.md +15 -8
package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/adapters/cline/hook.ts +167 -0
package/cli/selftune/adapters/cline/install.ts +197 -0
package/cli/selftune/adapters/codex/hook.ts +296 -0
package/cli/selftune/adapters/codex/install.ts +289 -0
package/cli/selftune/adapters/opencode/hook.ts +222 -0
package/cli/selftune/adapters/opencode/install.ts +543 -0
package/cli/selftune/adapters/pi/hook.ts +273 -0
package/cli/selftune/adapters/pi/install.ts +207 -0
package/cli/selftune/constants.ts +10 -1
package/cli/selftune/dashboard-contract.ts +14 -0
package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
package/cli/selftune/evolution/evidence.ts +2 -6
package/cli/selftune/evolution/evolve-body.ts +73 -20
package/cli/selftune/evolution/validate-body.ts +78 -42
package/cli/selftune/evolution/validate-routing.ts +45 -104
package/cli/selftune/hooks/auto-activate.ts +43 -37
package/cli/selftune/hooks/skill-eval.ts +2 -1
package/cli/selftune/hooks-shared/git-metadata.ts +149 -0
package/cli/selftune/hooks-shared/hook-output.ts +105 -0
package/cli/selftune/hooks-shared/normalize.ts +196 -0
package/cli/selftune/hooks-shared/session-state.ts +76 -0
package/cli/selftune/hooks-shared/skill-paths.ts +50 -0
package/cli/selftune/hooks-shared/stdin-dispatch.ts +59 -0
package/cli/selftune/hooks-shared/types.ts +91 -0
package/cli/selftune/index.ts +76 -6
package/cli/selftune/ingestors/pi-ingest.ts +726 -0
package/cli/selftune/init.ts +11 -1
package/cli/selftune/localdb/direct-write.ts +85 -0
package/cli/selftune/localdb/materialize.ts +6 -7
package/cli/selftune/localdb/queries.ts +126 -0
package/cli/selftune/localdb/schema.ts +38 -0
package/cli/selftune/observability.ts +8 -1
package/cli/selftune/orchestrate.ts +43 -0
package/cli/selftune/registry/client.ts +74 -0
package/cli/selftune/registry/history.ts +54 -0
package/cli/selftune/registry/index.ts +90 -0
package/cli/selftune/registry/install.ts +141 -0
package/cli/selftune/registry/list.ts +44 -0
package/cli/selftune/registry/push.ts +171 -0
package/cli/selftune/registry/rollback.ts +49 -0
package/cli/selftune/registry/status.ts +62 -0
package/cli/selftune/registry/sync.ts +125 -0
package/cli/selftune/repair/skill-usage.ts +4 -1
package/cli/selftune/status.ts +31 -0
package/cli/selftune/sync.ts +127 -23
package/cli/selftune/types.ts +2 -1
package/cli/selftune/utils/jsonl.ts +1 -30
package/cli/selftune/utils/llm-call.ts +99 -34
package/cli/selftune/utils/skill-discovery.ts +22 -0
package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/node_modules/@selftune/telemetry-contract/package.json +1 -1
package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
package/package.json +1 -1
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/index.ts +1 -0
package/packages/telemetry-contract/src/schemas.ts +22 -4
package/packages/telemetry-contract/src/types.ts +1 -12
package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
package/packages/ui/AGENTS.md +16 -0
package/packages/ui/README.md +1 -1
package/packages/ui/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
package/packages/ui/src/components/InfoTip.tsx +1 -2
package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
package/packages/ui/src/components/OverviewPanels.tsx +652 -0
package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
package/packages/ui/src/components/index.ts +56 -1
package/packages/ui/src/components/section-cards.tsx +18 -35
package/packages/ui/src/components/skill-health-grid.tsx +47 -37
package/packages/ui/src/lib/constants.tsx +0 -1
package/packages/ui/src/primitives/card.tsx +1 -1
package/packages/ui/src/primitives/checkbox.tsx +1 -1
package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
package/packages/ui/src/primitives/select.tsx +2 -2
package/packages/ui/src/types.ts +172 -4
package/skill/SKILL.md +26 -2
package/skill/Workflows/Ingest.md +60 -2
package/skill/Workflows/Initialize.md +54 -9
package/skill/Workflows/PlatformHooks.md +109 -0
package/skill/Workflows/Registry.md +99 -0
package/skill/Workflows/Sync.md +3 -1
package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
package/cli/selftune/utils/html.ts +0 -27
package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117

package/cli/selftune/evolution/engines/replay-engine.ts ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * replay-engine.ts
+ *
+ * Cohesive module for all replay-based validation logic:
+ *   - Host/runtime replay (PRIMARY path — real agent routing decisions)
+ *   - Fixture-backed replay (FALLBACK — surface similarity matching)
+ *   - Custom replay runner support
+ *
+ * Host/runtime replay is preferred because it captures actual agent routing
+ * behavior. Fixture-backed replay is used as a fallback when no invoker is
+ * provided or when the invoker returns an error.
+ *
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
+ * replay-specific concerns from judge-specific concerns.
+ */
+import type {
+  EvalEntry,
+  RoutingReplayEntryResult,
+  RoutingReplayFixture,
+  ValidationMode,
+} from "../../types.js";
+import { runHostReplayFixture } from "../validate-host-replay.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface ReplayRunnerInput {
+  routing: string;
+  evalSet: EvalEntry[];
+  agent: string;
+  fixture: RoutingReplayFixture;
+}
+export type ReplayRunner = (input: ReplayRunnerInput) => Promise<RoutingReplayEntryResult[]>;
+export interface ReplayValidationOptions {
+  replayFixture?: RoutingReplayFixture;
+  /** Host/runtime replay runner — PRIMARY validation path when provided. */
+  replayRunner?: ReplayRunner;
+}
+export interface ReplayValidationResult {
+  before_pass_rate: number;
+  after_pass_rate: number;
+  improved: boolean;
+  validation_mode: ValidationMode;
+  validation_agent: string;
+  validation_fixture_id?: string;
+  per_entry_results?: RoutingReplayEntryResult[];
+  /** Before-phase per-entry results for structured persistence. */
+  before_entry_results?: RoutingReplayEntryResult[];
+}
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+function computeReplayResult(
+  beforeResults: RoutingReplayEntryResult[],
+  afterResults: RoutingReplayEntryResult[],
+  total: number,
+  mode: ValidationMode,
+  agent: string,
+  fixtureId: string,
+): ReplayValidationResult {
+  const beforePassed = beforeResults.filter((result) => result.passed).length;
+  const afterPassed = afterResults.filter((result) => result.passed).length;
+  return {
+    before_pass_rate: beforePassed / total,
+    after_pass_rate: afterPassed / total,
+    improved: afterPassed > beforePassed,
+    validation_mode: mode,
+    validation_agent: agent,
+    validation_fixture_id: fixtureId,
+    per_entry_results: afterResults,
+    before_entry_results: beforeResults,
+  };
+}
+// ---------------------------------------------------------------------------
+// Replay validation engine
+// ---------------------------------------------------------------------------
+/**
+ * Attempt replay-backed validation. Prefers host/runtime replay when a
+ * replayRunner is provided; falls back to fixture-based replay when:
+ *   - No replayRunner is provided
+ *   - The replayRunner throws an error
+ *
+ * Returns null if no replay path is available (no fixture provided).
+ */
+export async function runReplayValidation(
+  originalContent: string,
+  proposedContent: string,
+  evalSet: EvalEntry[],
+  agent: string,
+  options: ReplayValidationOptions = {},
+): Promise<ReplayValidationResult | null> {
+  if (evalSet.length === 0 || !options.replayFixture) {
+    return null;
+  }
+  const fixture = options.replayFixture;
+  const total = evalSet.length;
+  // PRIMARY path: Host/runtime replay when a runner is provided
+  if (options.replayRunner) {
+    try {
+      const beforeResults = await options.replayRunner({
+        routing: originalContent,
+        evalSet,
+        agent,
+        fixture,
+      });
+      const afterResults = await options.replayRunner({
+        routing: proposedContent,
+        evalSet,
+        agent,
+        fixture,
+      });
+      return computeReplayResult(
+        beforeResults,
+        afterResults,
+        total,
+        "host_replay",
+        agent,
+        fixture.fixture_id,
+      );
+    } catch {
+      // Host replay failed — fall through to fixture-based fallback
+    }
+  }
+  // FALLBACK path: Fixture-backed replay (surface similarity matching)
+  const beforeResults = runHostReplayFixture({
+    routing: originalContent,
+    evalSet,
+    fixture,
+  });
+  const afterResults = runHostReplayFixture({
+    routing: proposedContent,
+    evalSet,
+    fixture,
+  });
+  return computeReplayResult(
+    beforeResults,
+    afterResults,
+    total,
+    "fixture_replay",
+    agent,
+    fixture.fixture_id,
+  );
+}

package/cli/selftune/evolution/evidence.ts CHANGED Viewed

@@ -12,11 +12,7 @@ import { queryEvolutionEvidence } from "../localdb/queries.js";
 import type { EvolutionEvidenceEntry } from "../types.js";
 /** Append a structured evidence artifact to the evolution evidence log (SQLite). */
-export function appendEvidenceEntry(
-  entry: EvolutionEvidenceEntry,
-  /** @deprecated Unused; retained for API compatibility during migration */
-  _logPath?: string,
-): void {
+export function appendEvidenceEntry(entry: EvolutionEvidenceEntry): void {
   writeEvolutionEvidenceToDb(entry);
 }
@@ -25,7 +21,7 @@ export function appendEvidenceEntry(
  *
  * @param skillName - Optional skill name to filter by
  */
-export function readEvidenceTrail(skillName?: string, _logPath?: string): EvolutionEvidenceEntry[] {
+export function readEvidenceTrail(skillName?: string): EvolutionEvidenceEntry[] {
   const db = getDb();
   return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
 }

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -12,6 +12,10 @@ import { parseArgs } from "node:util";
 import { buildEvalSet } from "../eval/hooks-to-evals.js";
 import { readGradingResultsForSkill } from "../grading/results.js";
 import { getDb } from "../localdb/db.js";
+import {
+  type ReplayEntryResultInput,
+  writeReplayEntryResultsToDb,
+} from "../localdb/direct-write.js";
 import { queryQueryLog, querySkillUsageRecords } from "../localdb/queries.js";
 import type {
   BodyEvolutionProposal,
@@ -37,6 +41,7 @@ import { extractFailurePatterns } from "./extract-patterns.js";
 import { type ExecutionContext, generateBodyProposal } from "./propose-body.js";
 import { generateRoutingProposal } from "./propose-routing.js";
 import { refineBodyProposal } from "./refine-body.js";
+import type { BodyValidationOptions } from "./validate-body.js";
 import { validateBodyProposal } from "./validate-body.js";
 import {
   buildRoutingReplayFixture,
@@ -463,29 +468,32 @@ export async function evolveBody(
       // Validate (validationModel overrides studentModel for validation calls)
       const validationModelFlag = options.validationModel ?? studentModel;
       let validation: BodyValidationResult;
-      if (target === "routing") {
-        const replayFixture = buildRoutingReplayFixture({
-          skillName,
-          skillPath,
-          platform: studentAgent === "codex" ? "codex" : "claude_code",
-        });
-        const replayRunner =
-          replayFixture.platform === "claude_code" && studentAgent === "claude"
-            ? async ({
+      // Build replay fixture + runner for ALL targets (not just routing)
+      const replayFixture = buildRoutingReplayFixture({
+        skillName,
+        skillPath,
+        platform: studentAgent === "codex" ? "codex" : "claude_code",
+      });
+      const replayRunner =
+        replayFixture.platform === "claude_code" && studentAgent === "claude"
+          ? async ({
+              routing,
+              evalSet,
+              fixture,
+            }: {
+              routing: string;
+              evalSet: EvalEntry[];
+              fixture: RoutingReplayFixture;
+            }) =>
+              await runClaudeRuntimeReplayFixture({
                 routing,
                 evalSet,
                 fixture,
-              }: {
-                routing: string;
-                evalSet: EvalEntry[];
-                fixture: RoutingReplayFixture;
-              }) =>
-                await runClaudeRuntimeReplayFixture({
-                  routing,
-                  evalSet,
-                  fixture,
-                })
-            : undefined;
+              })
+          : undefined;
+      if (target === "routing") {
         validation = await _validateRoutingProposal(
           proposal,
           evalSet,
@@ -497,11 +505,16 @@ export async function evolveBody(
           },
         );
       } else {
+        const bodyReplayOptions: BodyValidationOptions = replayRunner
+          ? { replay: { replayFixture, replayRunner } }
+          : {};
         validation = await _validateBodyProposal(
           proposal,
           evalSet,
           studentAgent,
           validationModelFlag,
+          undefined,
+          bodyReplayOptions,
         );
       }
       lastValidation = validation;
@@ -543,6 +556,46 @@ export async function evolveBody(
         },
       });
+      // Persist per-entry replay results to SQLite
+      try {
+        const entryResults: ReplayEntryResultInput[] = [];
+        if (validation.before_entry_results) {
+          for (const r of validation.before_entry_results) {
+            entryResults.push({
+              proposal_id: proposal.proposal_id,
+              skill_name: skillName,
+              validation_mode: validation.validation_mode ?? "llm_judge",
+              phase: "before",
+              query: r.query,
+              should_trigger: r.should_trigger,
+              triggered: r.triggered,
+              passed: r.passed,
+              evidence: r.evidence,
+            });
+          }
+        }
+        if (validation.per_entry_results) {
+          for (const r of validation.per_entry_results) {
+            entryResults.push({
+              proposal_id: proposal.proposal_id,
+              skill_name: skillName,
+              validation_mode: validation.validation_mode ?? "llm_judge",
+              phase: "after",
+              query: r.query,
+              should_trigger: r.should_trigger,
+              triggered: r.triggered,
+              passed: r.passed,
+              evidence: r.evidence,
+            });
+          }
+        }
+        if (entryResults.length > 0) {
+          writeReplayEntryResultsToDb(entryResults);
+        }
+      } catch {
+        // Fail-open: replay entry persistence is non-blocking
+      }
       if (validation.improved) {
         break;
       }

package/cli/selftune/evolution/validate-body.ts CHANGED Viewed

@@ -3,13 +3,32 @@
  *
  * 3-gate validation for full body evolution proposals:
  *   Gate 1 (structural): Pure code — YAML frontmatter, # Title, ## Workflow Routing preserved
- *   Gate 2 (trigger accuracy): Student model YES/NO per eval entry
+ *   Gate 2 (trigger accuracy): Replay-backed or student model YES/NO per eval entry
  *   Gate 3 (quality): Student model rates body clarity/completeness 0.0-1.0
+ *
+ * Gate 2 now supports replay-backed validation (via replay engine) in addition
+ * to LLM-judge-based checking. When replay options are provided and succeed,
+ * the replay path is preferred. Falls back to LLM judge otherwise.
  */
-import type { BodyEvolutionProposal, BodyValidationResult, EvalEntry } from "../types.js";
+import type {
+  BodyEvolutionProposal,
+  BodyValidationResult,
+  EvalEntry,
+  ValidationMode,
+} from "../types.js";
 import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
-import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
+import { runJudgeValidation } from "./engines/judge-engine.js";
+import { runReplayValidation, type ReplayValidationOptions } from "./engines/replay-engine.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface BodyValidationOptions {
+  /** Replay options for Gate 2 trigger accuracy. */
+  replay?: ReplayValidationOptions;
+}
 // ---------------------------------------------------------------------------
 // Gate 1: Structural validation (pure code, no LLM)
@@ -57,12 +76,15 @@ export function validateBodyStructure(proposedBody: string): { valid: boolean; r
 }
 // ---------------------------------------------------------------------------
-// Gate 2: Trigger accuracy (student model YES/NO)
+// Gate 2: Trigger accuracy (replay-backed or student model YES/NO)
 // ---------------------------------------------------------------------------
 /**
  * Run trigger checks on the eval set using the proposed body content.
  * Returns before/after pass rates.
+ *
+ * When replay options are provided, attempts replay-backed validation first.
+ * Falls back to LLM judge when replay is unavailable or no options given.
  */
 export async function validateBodyTriggerAccuracy(
   originalBody: string,
@@ -70,54 +92,64 @@ export async function validateBodyTriggerAccuracy(
   evalSet: EvalEntry[],
   agent: string,
   modelFlag?: string,
+  options?: BodyValidationOptions,
 ): Promise<{
   before_pass_rate: number;
   after_pass_rate: number;
   improved: boolean;
   regressions: string[];
+  validation_mode: ValidationMode;
+  per_entry_results?: import("../types.js").RoutingReplayEntryResult[];
+  before_entry_results?: import("../types.js").RoutingReplayEntryResult[];
 }> {
   if (evalSet.length === 0) {
-    return { before_pass_rate: 0, after_pass_rate: 0, improved: false, regressions: [] };
+    return {
+      before_pass_rate: 0,
+      after_pass_rate: 0,
+      improved: false,
+      regressions: [],
+      validation_mode: "llm_judge",
+    };
   }
-  const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
-  let beforePassed = 0;
-  let afterPassed = 0;
-  const regressions: string[] = [];
-  for (const entry of evalSet) {
-    // Check with original body
-    const beforePrompt = buildTriggerCheckPrompt(originalBody, entry.query);
-    const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
-    const beforeTriggered = parseTriggerResponse(beforeRaw);
-    const beforePass =
-      (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
-    // Check with proposed body
-    const afterPrompt = buildTriggerCheckPrompt(proposedBody, entry.query);
-    const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
-    const afterTriggered = parseTriggerResponse(afterRaw);
-    const afterPass =
-      (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
-    if (beforePass) beforePassed++;
-    if (afterPass) afterPassed++;
-    // Track regressions
-    if (beforePass && !afterPass) {
-      regressions.push(entry.query);
+  // Try replay-backed validation when options are provided
+  if (options?.replay) {
+    const replayResult = await runReplayValidation(
+      originalBody,
+      proposedBody,
+      evalSet,
+      agent,
+      options.replay,
+    );
+    if (replayResult) {
+      return {
+        before_pass_rate: replayResult.before_pass_rate,
+        after_pass_rate: replayResult.after_pass_rate,
+        improved: replayResult.improved,
+        regressions: [],
+        validation_mode: replayResult.validation_mode,
+        per_entry_results: replayResult.per_entry_results,
+        before_entry_results: replayResult.before_entry_results,
+      };
     }
   }
-  const total = evalSet.length;
-  const beforePassRate = beforePassed / total;
-  const afterPassRate = afterPassed / total;
+  // Fall back to LLM judge
+  const judgeResult = await runJudgeValidation(
+    originalBody,
+    proposedBody,
+    evalSet,
+    agent,
+    modelFlag,
+  );
   return {
-    before_pass_rate: beforePassRate,
-    after_pass_rate: afterPassRate,
-    improved: afterPassRate > beforePassRate,
-    regressions,
+    before_pass_rate: judgeResult.before_pass_rate,
+    after_pass_rate: judgeResult.after_pass_rate,
+    improved: judgeResult.improved,
+    regressions: judgeResult.regressions,
+    validation_mode: judgeResult.validation_mode,
   };
 }
@@ -190,6 +222,7 @@ export async function validateBodyProposal(
   agent: string,
   modelFlag?: string,
   qualityThreshold = QUALITY_THRESHOLD,
+  options?: BodyValidationOptions,
 ): Promise<BodyValidationResult> {
   const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
@@ -214,20 +247,21 @@ export async function validateBodyProposal(
     };
   }
-  // Gate 2: Trigger accuracy (student model)
+  // Gate 2: Trigger accuracy (replay-backed or student model)
   const accuracy = await validateBodyTriggerAccuracy(
     proposal.original_body,
     proposal.proposed_body,
     evalSet,
     agent,
     modelFlag,
+    options,
   );
   gateResults.push({
     gate: "trigger_accuracy",
     passed: accuracy.improved,
     reason: accuracy.improved
-      ? `Improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
-      : `Not improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
+      ? `Improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
+      : `Not improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
   });
   // Gate 3: Quality assessment (student model)
@@ -252,7 +286,7 @@ export async function validateBodyProposal(
     gate_results: gateResults,
     improved: gatesPassed === 3,
     regressions: accuracy.regressions,
-    validation_mode: "llm_judge",
+    validation_mode: accuracy.validation_mode,
     validation_agent: agent,
     ...(evalSet.length > 0
       ? {
@@ -260,5 +294,7 @@ export async function validateBodyProposal(
           after_pass_rate: accuracy.after_pass_rate,
         }
       : {}),
+    per_entry_results: accuracy.per_entry_results,
+    before_entry_results: accuracy.before_entry_results,
   };
 }