npm - codetrap - Versions diffs - 0.1.7 → 0.1.8 - Mend

codetrap 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md +151 -52
package/docs/installation.md +113 -29
package/package.json +4 -3
package/plugins/codetrap-agent/.codex-plugin/plugin.json +1 -2
package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +19 -17
package/plugins/codetrap-agent/hooks.json +2 -2
package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md +10 -4
package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +14 -3
package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md +52 -9
package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +74 -6
package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md +6 -5
package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +31 -5
package/scripts/search-policy-sweep.ts +131 -0
package/src/commands/workflow.ts +144 -68
package/src/db/embedding-queries.ts +230 -48
package/src/db/queries.ts +0 -25
package/src/db/repository.ts +32 -21
package/src/db/schema.ts +80 -0
package/src/index.ts +28 -3
package/src/lib/command-requests.ts +112 -1
package/src/lib/config.ts +57 -7
package/src/lib/constants.ts +1 -1
package/src/lib/doctor.ts +42 -12
package/src/lib/embedder.ts +118 -3
package/src/lib/embedding-health.ts +3 -1
package/src/lib/embedding-job.ts +3 -0
package/src/lib/embedding-management.ts +65 -0
package/src/lib/embedding-runtime.ts +177 -0
package/src/lib/output-json.ts +0 -2
package/src/lib/scope-context.ts +12 -6
package/src/lib/scope-migration.ts +2 -1
package/src/lib/scope.ts +0 -2
package/src/lib/search-eval.ts +38 -18
package/src/lib/search-policy-sweep.ts +563 -0
package/src/lib/search-policy.ts +0 -4
package/src/lib/search-service.ts +14 -15
package/src/lib/session-candidate-document.ts +175 -0
package/src/lib/session-candidate-scope.ts +6 -0
package/src/lib/session-capture.ts +298 -32
package/src/lib/session-codec.ts +1 -8
package/src/lib/session-operations.ts +83 -60
package/src/lib/session-review.ts +327 -0
package/src/lib/session-store.ts +87 -73
package/src/lib/store.ts +74 -10
package/src/lib/string-list.ts +3 -0
package/src/lib/text-lines.ts +7 -0
package/src/lib/trap-search-document.ts +2 -1
package/src/lib/value-types.ts +3 -0
package/src/web/client-review.ts +171 -0
package/src/web/client-script.ts +426 -51
package/src/web/client-shell.ts +414 -0
package/src/web/client-text.ts +112 -0
package/src/web/project-registry.ts +3 -5
package/src/web/server.ts +117 -103
package/src/web/static.ts +364 -19
package/skills/codetrap-capture-external/SKILL.md +0 -62
package/skills/codetrap-check/SKILL.md +0 -69
package/src/lib/embedding-index.ts +0 -53

package/src/lib/search-eval.ts CHANGED Viewed

@@ -3,13 +3,19 @@ import { openDatabase } from "../db/connection";
 import { TrapRepository } from "../db/repository";
 import type { TrapInput, TrapSearchResult } from "../domain/trap";
 import { SEARCH_MODES, type SearchMode } from "./constants";
+import { isRecord } from "./value-types";
 import {
-  createDefaultEmbeddingProvider,
-  embeddingConfig,
   type EmbeddingConfig,
   type EmbeddingProvider,
   type EmbeddingTask,
 } from "./embedder";
+import {
+  EmbeddingRuntime,
+  defaultEmbeddingRuntime,
+  embeddingRuntimeFrom,
+  type EmbeddingRuntimeInput,
+} from "./embedding-runtime";
+import type { RankingConfig } from "./search-policy";
 export type PhaseGate = "phase0" | "phase1" | "phase4" | "dogfood";
 export const DOGFOOD_JUDGMENTS = ["useful_hit", "miss", "noisy_hit", "no_relevant_trap"] as const;
@@ -79,6 +85,10 @@ export type SearchEvalReport = {
   next_actions: SearchEvalNextAction[];
 };
+export type SearchEvalDetailedReport = Omit<SearchEvalReport, "mode" | "fixture" | "next_actions"> & {
+  cases: EvalCaseReport[];
+};
 export type RecordDogfoodResult = {
   success: true;
   fixture: string;
@@ -115,8 +125,8 @@ export function recordDogfoodCase(fixturePath: string, jsonInput: string | undef
 export async function reportDogfood(fixturePath: string, live: boolean): Promise<SearchEvalReport> {
   const fixture = readEvalFixture(fixturePath);
-  const provider = live ? createDefaultEmbeddingProvider() : new EvalEmbedder();
-  const evaluated = await evaluateSearchFixture(fixture, provider);
+  const runtime = live ? defaultEmbeddingRuntime() : new EmbeddingRuntime(new EvalEmbedder());
+  const evaluated = await evaluateSearchFixture(fixture, runtime);
   const mode: SearchEvalReport["mode"] = live ? "live" : "deterministic";
   const report: Omit<SearchEvalReport, "next_actions"> = {
     mode,
@@ -131,19 +141,30 @@ export async function reportDogfood(fixturePath: string, live: boolean): Promise
 export async function evaluateSearchFixture(
   fixture: EvalFixture,
-  provider: EmbeddingProvider | undefined
+  embeddings: EmbeddingRuntimeInput,
+  ranking?: RankingConfig
 ): Promise<Omit<SearchEvalReport, "mode" | "fixture" | "next_actions">> {
-  const repo = fixtureRepository(fixture, provider);
+  const { cases: _cases, ...report } = await evaluateSearchFixtureCases(fixture, embeddings, ranking);
+  return report;
+}
+export async function evaluateSearchFixtureCases(
+  fixture: EvalFixture,
+  embeddings: EmbeddingRuntimeInput,
+  ranking?: RankingConfig
+): Promise<SearchEvalDetailedReport> {
+  const runtime = embeddingRuntimeFrom(embeddings);
+  const repo = fixtureRepository(fixture, runtime, ranking);
   let providerError: string | null = null;
-  if (provider) {
+  if (runtime.available()) {
     try {
       await repo.ensureEmbeddings();
     } catch (error) {
       providerError = errorMessage(error);
     }
   }
-  const searchRepo = providerError ? fixtureRepository(fixture, undefined) : repo;
+  const searchRepo = providerError ? fixtureRepository(fixture, undefined, ranking) : repo;
   const cases: EvalCaseReport[] = [];
   let hybridFallbackCount = 0;
@@ -154,7 +175,7 @@ export async function evaluateSearchFixture(
       const results = await searchRepo.search(item.query, { mode: item.mode, limit: 5 });
       const report = caseReport(item, fixture, results);
       cases.push(report);
-      if (item.mode === "hybrid" && (!provider || hasSemanticFallback(results))) {
+      if (item.mode === "hybrid" && (!runtime.available() || hasSemanticFallback(results))) {
         hybridFallbackCount++;
       }
     } catch (error) {
@@ -169,8 +190,8 @@ export async function evaluateSearchFixture(
   const noisyHits = cases.filter((item) => item.judgment === "noisy_hit");
   const metrics = aggregateMetrics(cases);
   return {
-    provider: provider ? embeddingConfig(provider) : null,
-    semantic_available: Boolean(provider && providerError === null),
+    provider: runtime.config(),
+    semantic_available: runtime.available() && providerError === null,
     provider_error: providerError,
     total_cases: cases.length,
     metrics: {
@@ -190,6 +211,7 @@ export async function evaluateSearchFixture(
     failures,
     misses,
     noisy_hits: noisyHits,
+    cases,
   };
 }
@@ -240,8 +262,9 @@ function buildSearchEvalNextActions(
 ): SearchEvalNextAction[] {
   const actions: SearchEvalNextAction[] = [];
   if (report.mode === "live" && !report.semantic_available) {
-    actions.push({
-      command: "export JINA_API_KEY=<your-jina-api-key>",
+    const action = defaultEmbeddingRuntime().setupAction();
+    if (action) actions.push({
+      command: action.command,
       reason: "Enable live semantic checks, then rerun bun run eval:dogfood -- report --live.",
     });
   }
@@ -309,8 +332,8 @@ function formatNextActions(actions: SearchEvalNextAction[]): string[] {
   return actions.map((action) => `  - ${action.command} # ${action.reason}`);
 }
-function fixtureRepository(fixture: EvalFixture, provider: EmbeddingProvider | undefined): TrapRepository {
-  const repo = new TrapRepository(openDatabase(":memory:"), provider);
+function fixtureRepository(fixture: EvalFixture, embeddings: EmbeddingRuntimeInput, ranking?: RankingConfig): TrapRepository {
+  const repo = new TrapRepository(openDatabase(":memory:"), embeddings, ranking);
   for (const trap of fixture.traps) repo.add(trap);
   return repo;
 }
@@ -471,9 +494,6 @@ function providerLabel(provider: EmbeddingConfig | null): string {
   return `${provider.provider}/${provider.model}`;
 }
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null && !Array.isArray(value);
-}
 function round(value: number): number {
   return Math.round(value * 10000) / 10000;

package/src/lib/search-policy-sweep.ts ADDED Viewed

@@ -0,0 +1,563 @@
+import { readFileSync } from "node:fs";
+import type { TrapSearchResult } from "../domain/trap";
+import { SCOPES, SEARCH_MODES, type Scope, type SearchMode } from "./constants";
+import { isRecord } from "./value-types";
+import {
+  defaultEmbeddingRuntime,
+  embeddingRuntimeFrom,
+  type EmbeddingRuntimeInput,
+} from "./embedding-runtime";
+import {
+  DEFAULT_SEARCH_EVAL_FIXTURE,
+  EvalEmbedder,
+  evaluateSearchFixtureCases,
+  readEvalFixture,
+  type EvalCaseReport,
+  type SearchEvalMetrics,
+} from "./search-eval";
+import { ScopedRepositoryContext } from "./scope-context";
+import {
+  DEFAULT_RANKING_CONFIG,
+  type RankingConfig,
+} from "./search-policy";
+export type RankingCandidate = {
+  name: string;
+  config: RankingConfig;
+};
+export type GoldTarget = {
+  scope?: Scope;
+  id?: number;
+  title?: string;
+};
+export type LiveEvalCase = {
+  query: string;
+  mode?: SearchMode;
+  scope?: Scope;
+  limit?: number;
+  gold?: GoldTarget[];
+  minRecallAt3?: number;
+  minRecallAt5?: number;
+};
+export type ComparableSearchCase = {
+  key: string;
+  query: string;
+  mode: SearchMode;
+  scope?: Scope;
+  scored: boolean;
+  recallAt3: number;
+  recallAt5: number;
+  reciprocalRank: number;
+  passed: boolean;
+  topResults: { id: number; scope?: Scope; title: string; sources: string[]; diagnostics: string[] }[];
+  warnings: string[];
+  error?: string;
+};
+export type CaseDelta = {
+  query: string;
+  mode: SearchMode;
+  scope?: Scope;
+  before: number;
+  after: number;
+  beforeTop: string[];
+  afterTop: string[];
+};
+export type SweepCandidateReport = {
+  name: string;
+  config: RankingConfig;
+  total_cases: number;
+  scored_cases: number;
+  metrics: SearchEvalMetrics;
+  cases: ComparableSearchCase[];
+  failures: ComparableSearchCase[];
+  regressions: CaseDelta[];
+  improvements: CaseDelta[];
+};
+export type PolicySweepReport = {
+  mode: "fixture" | "live";
+  source: string;
+  cwd?: string;
+  candidate_count: number;
+  baseline: SweepCandidateReport;
+  candidates: SweepCandidateReport[];
+  best: SweepCandidateReport;
+  recommendation: string;
+};
+type FixtureSweepOptions = {
+  fixturePath?: string;
+  candidates?: RankingCandidate[];
+};
+type LiveSweepOptions = {
+  cwd: string;
+  cases: LiveEvalCase[];
+  candidates?: RankingCandidate[];
+  embeddings?: EmbeddingRuntimeInput;
+  defaultScope?: Scope;
+  home?: string;
+};
+export const DEFAULT_POLICY_SWEEP_CANDIDATES: RankingCandidate[] = [
+  candidate("default", {}),
+  candidate("title-tag-heavy", {
+    titleTokenBoost: 0.24,
+    tagTokenBoost: 0.28,
+    maxBoost: 0.55,
+  }),
+  candidate("identifier-heavy", {
+    identifierBoost: 0.3,
+    maxBoost: 0.55,
+  }),
+  candidate("scope-heavy", {
+    pathMatchBoost: 0.2,
+    moduleMatchBoost: 0.14,
+    ownerMatchBoost: 0.08,
+    maxBoost: 0.55,
+  }),
+  candidate("severity-light", {
+    severityBoost: {
+      warning: 0,
+      error: 0.02,
+      critical: 0.03,
+    },
+  }),
+  candidate("semantic-loose", {
+    semanticMinScore: 0.2,
+  }),
+  candidate("semantic-strict", {
+    semanticMinScore: 0.4,
+  }),
+];
+export async function runFixturePolicySweep(options: FixtureSweepOptions = {}): Promise<PolicySweepReport> {
+  const fixturePath = options.fixturePath ?? DEFAULT_SEARCH_EVAL_FIXTURE;
+  const fixture = readEvalFixture(fixturePath);
+  const candidates = options.candidates ?? DEFAULT_POLICY_SWEEP_CANDIDATES;
+  const reports: SweepCandidateReport[] = [];
+  let baselineCases: ComparableSearchCase[] | undefined;
+  for (const item of candidates) {
+    const detailed = await evaluateSearchFixtureCases(fixture, new EvalEmbedder(), item.config);
+    const cases = detailed.cases.map(fromEvalCaseReport);
+    if (!baselineCases) baselineCases = cases;
+    reports.push(candidateReport(item, cases, baselineCases));
+  }
+  return buildSweepReport("fixture", fixturePath, reports);
+}
+export async function runLivePolicySweep(options: LiveSweepOptions): Promise<PolicySweepReport> {
+  if (options.cases.length === 0) throw new Error("Live sweep requires at least one query case.");
+  const candidates = options.candidates ?? DEFAULT_POLICY_SWEEP_CANDIDATES;
+  const embeddings = options.embeddings ?? defaultEmbeddingRuntime();
+  const reports: SweepCandidateReport[] = [];
+  let baselineCases: ComparableSearchCase[] | undefined;
+  for (const item of candidates) {
+    const cases = await evaluateLiveCases({
+      ...options,
+      embeddings,
+      defaultScope: options.defaultScope ?? "project",
+      ranking: item.config,
+    });
+    if (!baselineCases) baselineCases = cases;
+    reports.push(candidateReport(item, cases, baselineCases));
+  }
+  return buildSweepReport("live", "live project", reports, options.cwd);
+}
+export function readLiveEvalCases(path: string): LiveEvalCase[] {
+  const parsed = JSON.parse(readFileSync(path, "utf-8")) as unknown;
+  const records = Array.isArray(parsed)
+    ? parsed
+    : isRecord(parsed) && Array.isArray(parsed.queries)
+      ? parsed.queries
+      : null;
+  if (!records) throw new Error("Live queries file must be an array or an object with a queries array.");
+  return records.map(normalizeLiveEvalCase);
+}
+export function formatPolicySweepReport(report: PolicySweepReport): string {
+  const lines = [
+    `Search policy sweep (${report.mode})`,
+    `Source: ${report.source}`,
+  ];
+  if (report.cwd) lines.push(`cwd: ${report.cwd}`);
+  lines.push(
+    `Candidates: ${report.candidate_count}`,
+    `Baseline: ${summaryLine(report.baseline)}`,
+    `Best: ${summaryLine(report.best)}`,
+    `Recommendation: ${report.recommendation}`,
+    "Results:"
+  );
+  for (const candidate of report.candidates) {
+    lines.push(`  - ${summaryLine(candidate)}`);
+    if (candidate.regressions.length > 0) {
+      lines.push(`    regressions: ${formatDeltas(candidate.regressions)}`);
+    }
+    if (candidate.improvements.length > 0) {
+      lines.push(`    improvements: ${formatDeltas(candidate.improvements)}`);
+    }
+    if (candidate.failures.length > 0) {
+      lines.push(`    failures: ${candidate.failures.slice(0, 3).map((item) => item.query).join("; ")}`);
+    }
+  }
+  return lines.join("\n");
+}
+function candidate(name: string, patch: Partial<RankingConfig>): RankingCandidate {
+  return {
+    name,
+    config: {
+      ...DEFAULT_RANKING_CONFIG,
+      ...patch,
+      severityBoost: {
+        ...DEFAULT_RANKING_CONFIG.severityBoost,
+        ...(patch.severityBoost ?? {}),
+      },
+    },
+  };
+}
+async function evaluateLiveCases(options: LiveSweepOptions & { ranking: RankingConfig }): Promise<ComparableSearchCase[]> {
+  const defaultScope = options.defaultScope ?? "project";
+  const scopes = new ScopedRepositoryContext(
+    options.cwd,
+    embeddingRuntimeFrom(options.embeddings),
+    options.home,
+    options.ranking
+  );
+  const out: ComparableSearchCase[] = [];
+  for (const input of options.cases) {
+    const scope = input.scope ?? defaultScope;
+    const mode = input.mode ?? "hybrid";
+    try {
+      const results = await scopes.repositoryFor(scope).search(input.query, {
+        mode,
+        scope,
+        limit: input.limit ?? 5,
+      });
+      out.push(liveCaseReport(input, mode, scope, results));
+    } catch (error) {
+      out.push({
+        key: caseKey(input.query, mode, scope),
+        query: input.query,
+        mode,
+        scope,
+        scored: (input.gold ?? []).length > 0,
+        recallAt3: 0,
+        recallAt5: 0,
+        reciprocalRank: 0,
+        passed: false,
+        topResults: [],
+        warnings: [],
+        error: error instanceof Error ? error.message : String(error),
+      });
+    }
+  }
+  return out;
+}
+function liveCaseReport(
+  input: LiveEvalCase,
+  mode: SearchMode,
+  scope: Scope,
+  results: TrapSearchResult[]
+): ComparableSearchCase {
+  const gold = input.gold ?? [];
+  const warnings = new Set<string>();
+  const recallAt3 = gold.length > 0 ? recall(gold, results.slice(0, 3), warnings, scope) : 1;
+  const recallAt5 = gold.length > 0 ? recall(gold, results.slice(0, 5), warnings, scope) : 1;
+  const firstRank = gold.length > 0 ? firstMatchRank(gold, results, warnings, scope) : -1;
+  const reciprocalRank = firstRank >= 0 ? 1 / (firstRank + 1) : 0;
+  const minRecallAt3 = input.minRecallAt3 ?? (gold.length > 0 ? 1 : 0);
+  const minRecallAt5 = input.minRecallAt5 ?? (gold.length > 0 ? 1 : 0);
+  return {
+    key: caseKey(input.query, mode, scope),
+    query: input.query,
+    mode,
+    scope,
+    scored: gold.length > 0,
+    recallAt3,
+    recallAt5,
+    reciprocalRank,
+    passed: recallAt3 >= minRecallAt3 && recallAt5 >= minRecallAt5,
+    topResults: results.map((result) => ({
+      id: result.trap.id,
+      scope: result.trap.scope === "project" || result.trap.scope === "global" ? result.trap.scope : scope,
+      title: result.trap.title,
+      sources: result.sources ?? [],
+      diagnostics: (result.diagnostics ?? []).map((diagnostic) => diagnostic.code),
+    })),
+    warnings: [...warnings],
+  };
+}
+function recall(gold: GoldTarget[], results: TrapSearchResult[], warnings: Set<string>, defaultScope: Scope): number {
+  return gold.filter((target) => targetMatches(target, results, warnings, defaultScope)).length / gold.length;
+}
+function firstMatchRank(
+  gold: GoldTarget[],
+  results: TrapSearchResult[],
+  warnings: Set<string>,
+  defaultScope: Scope
+): number {
+  return results.findIndex((result) =>
+    gold.some((target) => targetMatches(target, [result], warnings, defaultScope))
+  );
+}
+function targetMatches(
+  target: GoldTarget,
+  results: TrapSearchResult[],
+  warnings: Set<string>,
+  defaultScope: Scope
+): boolean {
+  const scope = target.scope ?? defaultScope;
+  const scoped = results.filter((result) => result.trap.scope === scope);
+  if (target.id !== undefined) {
+    const idMatch = scoped.find((result) => result.trap.id === target.id);
+    if (idMatch) {
+      if (target.title && idMatch.trap.title !== target.title) {
+        warnings.add(`gold_title_mismatch:${target.id}`);
+      }
+      return true;
+    }
+  }
+  if (!target.title) return false;
+  const titleMatches = scoped.filter((result) => result.trap.title === target.title);
+  if (titleMatches.length > 1) warnings.add(`gold_title_ambiguous:${target.title}`);
+  if (titleMatches.length > 0) {
+    if (target.id !== undefined && titleMatches.every((result) => result.trap.id !== target.id)) {
+      warnings.add(`gold_id_drift:${target.id}->${titleMatches.map((result) => result.trap.id).join(",")}`);
+    }
+    return true;
+  }
+  return false;
+}
+function fromEvalCaseReport(item: EvalCaseReport): ComparableSearchCase {
+  return {
+    key: caseKey(item.query, item.mode),
+    query: item.query,
+    mode: item.mode,
+    scored: item.goldTrapIds.length > 0,
+    recallAt3: item.recallAt3,
+    recallAt5: item.recallAt5,
+    reciprocalRank: item.reciprocalRank,
+    passed: item.passed,
+    topResults: item.topResults.map((result) => ({
+      id: result.id,
+      title: result.title,
+      sources: result.sources,
+      diagnostics: result.diagnostics,
+    })),
+    warnings: [],
+    error: item.error,
+  };
+}
+function candidateReport(
+  candidate: RankingCandidate,
+  cases: ComparableSearchCase[],
+  baselineCases: ComparableSearchCase[]
+): SweepCandidateReport {
+  return {
+    name: candidate.name,
+    config: candidate.config,
+    total_cases: cases.length,
+    scored_cases: cases.filter((item) => item.scored).length,
+    metrics: aggregateMetrics(cases),
+    cases,
+    failures: cases.filter((item) => !item.passed),
+    regressions: deltas(cases, baselineCases, "regression"),
+    improvements: deltas(cases, baselineCases, "improvement"),
+  };
+}
+function aggregateMetrics(cases: ComparableSearchCase[]): SearchEvalMetrics {
+  const scored = cases.filter((item) => item.scored);
+  const total = scored.length || 1;
+  return {
+    recall_at_3: round(scored.reduce((sum, item) => sum + item.recallAt3, 0) / total),
+    recall_at_5: round(scored.reduce((sum, item) => sum + item.recallAt5, 0) / total),
+    mrr: round(scored.reduce((sum, item) => sum + item.reciprocalRank, 0) / total),
+    hybrid_fallback_count: cases.filter((item) =>
+      item.topResults.some((result) =>
+        result.diagnostics.some((code) =>
+          ["semantic_unavailable", "semantic_no_candidates", "semantic_failed"].includes(code)
+        )
+      )
+    ).length,
+    semantic_error_count: cases.filter((item) => item.error).length,
+  };
+}
+function deltas(
+  cases: ComparableSearchCase[],
+  baselineCases: ComparableSearchCase[],
+  direction: "regression" | "improvement"
+): CaseDelta[] {
+  const baselineByKey = new Map(baselineCases.map((item) => [item.key, item]));
+  const out: CaseDelta[] = [];
+  for (const item of cases) {
+    const baseline = baselineByKey.get(item.key);
+    if (!baseline || !item.scored) continue;
+    const diff = item.reciprocalRank - baseline.reciprocalRank;
+    const changed = direction === "regression" ? diff < -0.0001 : diff > 0.0001;
+    if (!changed) continue;
+    out.push({
+      query: item.query,
+      mode: item.mode,
+      scope: item.scope,
+      before: baseline.reciprocalRank,
+      after: item.reciprocalRank,
+      beforeTop: topTitles(baseline),
+      afterTop: topTitles(item),
+    });
+  }
+  return out;
+}
+function buildSweepReport(
+  mode: PolicySweepReport["mode"],
+  source: string,
+  candidates: SweepCandidateReport[],
+  cwd?: string
+): PolicySweepReport {
+  if (candidates.length === 0) throw new Error("At least one ranking candidate is required.");
+  const baseline = candidates[0]!;
+  const best = [...candidates].sort(compareCandidates)[0]!;
+  return {
+    mode,
+    source,
+    cwd,
+    candidate_count: candidates.length,
+    baseline,
+    candidates,
+    best,
+    recommendation: recommendation(baseline, best, candidates),
+  };
+}
+function compareCandidates(a: SweepCandidateReport, b: SweepCandidateReport): number {
+  return (
+    b.metrics.recall_at_3 - a.metrics.recall_at_3 ||
+    b.metrics.mrr - a.metrics.mrr ||
+    a.failures.length - b.failures.length ||
+    a.regressions.length - b.regressions.length
+  );
+}
+function recommendation(baseline: SweepCandidateReport, best: SweepCandidateReport, candidates: SweepCandidateReport[]): string {
+  if (baseline.scored_cases === 0) return "No scored live cases yet; add gold targets before using this as an optimization signal.";
+  const allTie = candidates.every((item) =>
+    item.metrics.recall_at_3 === baseline.metrics.recall_at_3 &&
+    item.metrics.recall_at_5 === baseline.metrics.recall_at_5 &&
+    item.metrics.mrr === baseline.metrics.mrr
+  );
+  if (allTie) return "All candidates tie; add harder miss/noisy_hit eval cases before changing ranking config.";
+  if (best.name === baseline.name) return "The default config is still best on this fixture.";
+  if (best.regressions.length > 0) return `${best.name} improves aggregate metrics but has regressions; inspect before adopting.`;
+  return `${best.name} is the strongest candidate on these cases; inspect changed rankings before editing defaults.`;
+}
+function normalizeLiveEvalCase(value: unknown): LiveEvalCase {
+  if (!isRecord(value)) throw new Error("Each live query case must be an object.");
+  const query = stringField(value, "query");
+  const mode = optionalEnum(value, "mode", SEARCH_MODES);
+  const scope = optionalEnum(value, "scope", SCOPES);
+  return {
+    query,
+    mode,
+    scope,
+    limit: optionalPositiveInt(value, "limit"),
+    gold: normalizeGoldTargets(value.gold),
+    minRecallAt3: optionalScore(value, "minRecallAt3"),
+    minRecallAt5: optionalScore(value, "minRecallAt5"),
+  };
+}
+function normalizeGoldTargets(value: unknown): GoldTarget[] | undefined {
+  if (value === undefined) return undefined;
+  if (!Array.isArray(value)) throw new Error("gold must be an array.");
+  return value.map((item) => {
+    if (!isRecord(item)) throw new Error("gold entries must be objects.");
+    const id = item.id === undefined ? undefined : Number(item.id);
+    if (id !== undefined && (!Number.isInteger(id) || id <= 0)) throw new Error("gold.id must be a positive integer.");
+    const scope = optionalEnum(item, "scope", SCOPES);
+    const title = typeof item.title === "string" && item.title.trim() ? item.title.trim() : undefined;
+    if (id === undefined && !title) throw new Error("gold entries require id or title.");
+    return { id, title, scope };
+  });
+}
+function caseKey(query: string, mode: SearchMode, scope?: Scope): string {
+  return `${scope ?? "fixture"}\0${mode}\0${query}`;
+}
+function topTitles(item: ComparableSearchCase): string[] {
+  return item.topResults.slice(0, 3).map((result) => `#${result.id} ${result.title}`);
+}
+function summaryLine(item: SweepCandidateReport): string {
+  return `${item.name} R@3=${item.metrics.recall_at_3} R@5=${item.metrics.recall_at_5} MRR=${item.metrics.mrr} failures=${item.failures.length} regressions=${item.regressions.length}`;
+}
+function formatDeltas(items: CaseDelta[]): string {
+  return items
+    .slice(0, 3)
+    .map((item) => `${item.query} ${round(item.before)}->${round(item.after)}`)
+    .join("; ");
+}
+function optionalEnum<T extends readonly string[]>(value: Record<string, unknown>, key: string, choices: T): T[number] | undefined {
+  const field = value[key];
+  if (field === undefined) return undefined;
+  if (typeof field !== "string" || !(choices as readonly string[]).includes(field)) {
+    throw new Error(`${key} must be one of: ${choices.join(", ")}`);
+  }
+  return field as T[number];
+}
+function stringField(value: Record<string, unknown>, key: string): string {
+  const field = value[key];
+  if (typeof field !== "string" || field.trim() === "") throw new Error(`${key} is required.`);
+  return field.trim();
+}
+function optionalPositiveInt(value: Record<string, unknown>, key: string): number | undefined {
+  const field = value[key];
+  if (field === undefined) return undefined;
+  const number = Number(field);
+  if (!Number.isInteger(number) || number <= 0) throw new Error(`${key} must be a positive integer.`);
+  return number;
+}
+function optionalScore(value: Record<string, unknown>, key: string): number | undefined {
+  const field = value[key];
+  if (field === undefined) return undefined;
+  const number = Number(field);
+  if (!Number.isFinite(number) || number < 0 || number > 1) throw new Error(`${key} must be between 0 and 1.`);
+  return number;
+}
+function round(value: number): number {
+  return Math.round(value * 10000) / 10000;
+}