npm - @aperdomoll90/ledger-ai - Versions diffs - 1.3.0 → 1.4.2 - Mend

@aperdomoll90/ledger-ai 1.3.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/dist/cli.js +177 -221
package/dist/commands/add.js +51 -100
package/dist/commands/backfill.js +55 -0
package/dist/commands/backup.js +10 -10
package/dist/commands/check.js +21 -29
package/dist/commands/config.js +13 -12
package/dist/commands/delete.js +22 -17
package/dist/commands/eval-judge.js +11 -0
package/dist/commands/eval.js +321 -0
package/dist/commands/export.js +8 -10
package/dist/commands/get.js +9 -0
package/dist/commands/hunt.js +206 -0
package/dist/commands/ingest.js +15 -14
package/dist/commands/init.js +18 -20
package/dist/commands/list.js +21 -7
package/dist/commands/migrate.js +11 -11
package/dist/commands/onboard.js +2 -2
package/dist/commands/pull.js +3 -2
package/dist/commands/push.js +8 -8
package/dist/commands/restore.js +38 -38
package/dist/commands/show.js +13 -16
package/dist/commands/sync.js +58 -19
package/dist/commands/tag.js +20 -14
package/dist/commands/update.js +50 -18
package/dist/commands/wizard.js +3 -3
package/dist/lib/ai-search.js +163 -0
package/dist/lib/audit.js +19 -0
package/dist/lib/backfill.js +60 -0
package/dist/lib/config.js +19 -2
package/dist/lib/document-classification.js +5 -0
package/dist/lib/document-fetching.js +77 -0
package/dist/lib/document-operations.js +150 -0
package/dist/lib/documents/classification.js +5 -0
package/dist/lib/documents/fetching.js +89 -0
package/dist/lib/documents/operations.js +304 -0
package/dist/lib/domains.js +116 -0
package/dist/lib/embeddings.js +190 -0
package/dist/lib/errors.js +3 -1
package/dist/lib/eval/eval-advanced.js +289 -0
package/dist/lib/eval/eval-judge-session.js +233 -0
package/dist/lib/eval/eval-store.js +105 -0
package/dist/lib/eval/eval.js +303 -0
package/dist/lib/file-writer.js +23 -0
package/dist/lib/generators.js +44 -45
package/dist/lib/hunter-db.js +235 -0
package/dist/lib/hunter-rss.js +30 -0
package/dist/lib/hunter-scoring.js +55 -0
package/dist/lib/hunter-types.js +36 -0
package/dist/lib/lint-configs.js +20 -0
package/dist/lib/migrate.js +2 -2
package/dist/lib/notes.js +173 -59
package/dist/lib/observability.js +296 -0
package/dist/lib/op-add-note-types.test.js +7 -6
package/dist/lib/prompt.js +8 -8
package/dist/lib/rate-limiter.js +103 -0
package/dist/lib/search/ai-search.js +396 -0
package/dist/lib/search/chunk-context-enrichment.js +155 -0
package/dist/lib/search/embeddings.js +293 -0
package/dist/lib/search/reranker.js +120 -0
package/dist/lib/search/semantic-cache.js +53 -0
package/dist/lib/type-registry.test.js +6 -6
package/dist/mcp-server.js +553 -66
package/dist/migrations/migrations/005-audit-log.sql +22 -0
package/dist/migrations/migrations/005_opportunities.sql +48 -0
package/dist/migrations/migrations/006-audited-operations.sql +235 -0
package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
package/dist/scripts/batch-grade.js +344 -0
package/dist/scripts/benchmark-ingestion.js +376 -0
package/dist/scripts/convert-judgments-to-graded.js +88 -0
package/dist/scripts/diagnose-first-result.js +333 -0
package/dist/scripts/drop-golden-query.js +53 -0
package/dist/scripts/eval-search.js +115 -0
package/dist/scripts/grade-unjudged-top1.js +138 -0
package/dist/scripts/hunter-analytics.js +38 -0
package/dist/scripts/hunter-cron.js +63 -0
package/dist/scripts/hunter-purge.js +25 -0
package/dist/scripts/migrate-v2.js +140 -0
package/dist/scripts/reindex.js +74 -0
package/dist/scripts/sync-local-docs.js +153 -0
package/package.json +7 -1

package/dist/commands/backfill.js ADDED Viewed

@@ -0,0 +1,55 @@
+import { backfillMetadata } from '../lib/backfill.js';
+export async function backfill(config, options) {
+    const { dryRun } = options;
+    console.error('Fetching all notes...');
+    const { data: notes, error } = await config.supabase
+        .from('notes')
+        .select('id, metadata')
+        .order('id', { ascending: true });
+    if (error) {
+        console.error(`Error fetching notes: ${error.message}`);
+        process.exit(1);
+    }
+    if (!notes || notes.length === 0) {
+        console.error('No notes found.');
+        return;
+    }
+    console.error(`Found ${notes.length} notes. Running v2 backfill...`);
+    let migrated = 0;
+    let skipped = 0;
+    let errors = 0;
+    for (const note of notes) {
+        const oldMeta = note.metadata;
+        const newMeta = backfillMetadata(oldMeta);
+        // Check if anything changed (idempotent skip)
+        if (JSON.stringify(oldMeta) === JSON.stringify(newMeta)) {
+            skipped++;
+            continue;
+        }
+        if (dryRun) {
+            const oldType = oldMeta.type ?? '?';
+            const newType = newMeta.type ?? '?';
+            const domain = newMeta.domain ?? '?';
+            const key = oldMeta.upsert_key ?? `id-${note.id}`;
+            console.error(`  [${note.id}] ${key}: ${oldType} → ${domain}/${newType}`);
+            migrated++;
+            continue;
+        }
+        const { error: updateError } = await config.supabase
+            .from('notes')
+            .update({ metadata: newMeta })
+            .eq('id', note.id);
+        if (updateError) {
+            console.error(`  [${note.id}] ERROR: ${updateError.message}`);
+            errors++;
+        }
+        else {
+            migrated++;
+        }
+    }
+    console.error(`\nBackfill ${dryRun ? '(dry run) ' : ''}complete:`);
+    console.error(`  ${migrated} migrated, ${skipped} already up-to-date, ${errors} errors`);
+    if (dryRun && migrated > 0) {
+        console.error('\nRun without --dry-run to apply changes.');
+    }
+}

package/dist/commands/backup.js CHANGED Viewed

@@ -6,10 +6,10 @@ export async function backup(config, options) {
     const { quiet } = options;
     const backupDir = resolve(getLedgerDir(), 'backups');
     mkdirSync(backupDir, { recursive: true });
-    // Fetch all notes (not just cached)
+    // Fetch all documents (not just cached)
     const { data, error } = await config.supabase
-        .from('notes')
-        .select('id, content, metadata, created_at, updated_at')
+        .from('documents')
+        .select('id, name, domain, document_type, project, protection, content, description, status, created_at, updated_at')
         .order('id', { ascending: true });
     if (error) {
         console.error(`Backup failed: ${error.message}`);
@@ -17,7 +17,7 @@ export async function backup(config, options) {
     }
     if (!data || data.length === 0) {
         if (!quiet)
-            console.error('No notes to backup.');
+            console.error('No documents to backup.');
         return;
     }
     const date = new Date().toISOString().split('T')[0];
@@ -25,7 +25,7 @@ export async function backup(config, options) {
     writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n');
     // Keep last 5 backups, delete older
     const backups = readdirSync(backupDir)
-        .filter(f => f.endsWith('.json'))
+        .filter(file => file.endsWith('.json'))
         .sort()
         .reverse();
     for (const old of backups.slice(5)) {
@@ -34,7 +34,7 @@ export async function backup(config, options) {
             console.error(`  deleted old backup: ${old}`);
     }
     if (!quiet) {
-        console.error(`Backed up ${data.length} notes to ${filePath}`);
+        console.error(`Backed up ${data.length} documents to ${filePath}`);
     }
     console.log(filePath);
 }
@@ -59,8 +59,8 @@ export function enableBackupCron() {
             throw new Error(result.stderr?.toString() || 'crontab failed');
         console.error('Daily backup enabled (1am). View with `crontab -l`.');
     }
-    catch (e) {
-        console.error(`Failed to set cron: ${e.message}`);
+    catch (cronError) {
+        console.error(`Failed to set cron: ${cronError.message}`);
         console.error(`Add manually: ${cronLine}`);
     }
 }
@@ -83,7 +83,7 @@ export function disableBackupCron() {
             throw new Error(result.stderr?.toString() || 'crontab failed');
         console.error('Backup cron disabled.');
     }
-    catch (e) {
-        console.error(`Failed to update cron: ${e.message}`);
+    catch (cronError) {
+        console.error(`Failed to update cron: ${cronError.message}`);
     }
 }

package/dist/commands/check.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { readFileSync, readdirSync, existsSync } from 'fs';
-import { resolve } from 'path';
-import { fetchNoteHashes, checkChunkIntegrity } from '../lib/notes.js';
+import { resolve, basename } from 'path';
+import { fetchSyncableDocuments } from '../lib/documents/fetching.js';
 import { contentHash } from '../lib/hash.js';
 export async function check(config) {
     const result = {
@@ -16,51 +16,55 @@ export async function check(config) {
         console.error('Memory directory not found. Run `ledger pull` first.');
         return result;
     }
-    const noteHashes = await fetchNoteHashes(config.supabase);
-    const notesByFile = new Map(noteHashes.map(n => [n.localFile, n]));
+    // Fetch all auto-load documents — these are the ones that sync locally
+    const syncableDocuments = await fetchSyncableDocuments(config.supabase);
+    // Build a map of local filename → document for comparison
+    const documentsByFile = new Map(syncableDocuments
+        .filter(document => document.file_path)
+        .map(document => [basename(document.file_path), document]));
     const localFiles = readdirSync(config.memoryDir)
-        .filter(f => f.endsWith('.md') && f !== 'MEMORY.md');
+        .filter(file => file.endsWith('.md') && file !== 'MEMORY.md');
     for (const file of localFiles) {
         const filePath = resolve(config.memoryDir, file);
         const localContent = readFileSync(filePath, 'utf-8').trim();
         const localHash = contentHash(localContent);
-        const note = notesByFile.get(file);
-        if (!note) {
+        const document = documentsByFile.get(file);
+        if (!document) {
             console.error(`  ${file} — unknown (not in Ledger)`);
             result.files.push({ file, state: 'unknown' });
             result.unknown++;
-            notesByFile.delete(file);
+            documentsByFile.delete(file);
             continue;
         }
-        const ledgerHash = contentHash(note.content);
-        const storedHash = note.contentHash;
+        const ledgerHash = contentHash(document.content);
+        const storedHash = document.content_hash;
         const localChanged = localHash !== storedHash;
         const ledgerChanged = ledgerHash !== storedHash;
         if (!localChanged && !ledgerChanged) {
             console.error(`  ${file} — in sync`);
-            result.files.push({ file, state: 'clean', noteId: note.id });
+            result.files.push({ file, state: 'clean', documentId: document.id });
             result.clean++;
         }
         else if (localChanged && !ledgerChanged) {
             console.error(`  ${file} — modified locally`);
-            result.files.push({ file, state: 'modified', noteId: note.id });
+            result.files.push({ file, state: 'modified', documentId: document.id });
             result.modified++;
         }
         else if (!localChanged && ledgerChanged) {
             console.error(`  ${file} — updated in Ledger`);
-            result.files.push({ file, state: 'upstream', noteId: note.id });
+            result.files.push({ file, state: 'upstream', documentId: document.id });
             result.upstream++;
         }
         else {
             console.error(`  ${file} — CONFLICT (both changed)`);
-            result.files.push({ file, state: 'conflict', noteId: note.id });
+            result.files.push({ file, state: 'conflict', documentId: document.id });
             result.conflicts++;
         }
-        notesByFile.delete(file);
+        documentsByFile.delete(file);
     }
-    for (const [file, note] of notesByFile) {
+    for (const [file, document] of documentsByFile) {
         console.error(`  ${file} — missing locally (exists in Ledger)`);
-        result.files.push({ file, state: 'deleted', noteId: note.id });
+        result.files.push({ file, state: 'deleted', documentId: document.id });
         result.deleted++;
     }
     const summary = [
@@ -77,15 +81,3 @@ export async function check(config) {
     }
     return result;
 }
-export async function checkChunks(config) {
-    console.error('Checking chunk integrity...');
-    const result = await checkChunkIntegrity(config.supabase);
-    if (result.incompleteGroups.length === 0) {
-        console.log('All chunk groups are complete.');
-        return;
-    }
-    console.error(`Found ${result.incompleteGroups.length} incomplete chunk group(s):`);
-    for (const group of result.incompleteGroups) {
-        console.error(`  group ${group.groupId}: expected ${group.expected} chunks, found ${group.found}`);
-    }
-}

package/dist/commands/config.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { getLedgerDir, saveConfigFile, loadConfigFile } from '../lib/config.js';
-import { BUILTIN_TYPES, getTypeRegistry, opUpdateMetadata, validateTypeName } from '../lib/notes.js';
+import { BUILTIN_TYPES, getTypeRegistry, opUpdateMetadata, validateTypeName, inferDomain } from '../lib/notes.js';
 import { choose, confirm } from '../lib/prompt.js';
 import { resolve } from 'path';
 const CONFIG_PATH = resolve(getLedgerDir(), 'config.json');
@@ -156,7 +156,7 @@ export async function configSet(key, value, clients) {
         const typeName = key.slice(6);
         const delivery = value;
         if (!['persona', 'project', 'knowledge', 'protected'].includes(delivery)) {
-            console.error(`Invalid delivery tier: "${value}". Must be: persona, project, knowledge, or protected.`);
+            console.error(`Invalid domain/delivery: "${value}". Must be: persona, project, knowledge, or protected.`);
             process.exit(1);
         }
         const nameError = validateTypeName(typeName);
@@ -172,30 +172,31 @@ export async function configSet(key, value, clients) {
         const isBuiltin = typeName in BUILTIN_TYPES;
         const action = isBuiltin ? 'overridden' : 'registered';
         console.error(`types.${typeName}: ${delivery} (${action})`);
-        // Delivery change propagation — only if we have DB access and delivery actually changed
+        // Domain change propagation — only if we have DB access and delivery actually changed
         if (clients && oldDelivery && oldDelivery !== delivery) {
+            const newDomain = inferDomain(typeName);
             const { data: notes } = await clients.supabase
                 .from('notes')
                 .select('id, metadata')
                 .eq('metadata->>type', typeName);
-            const affected = (notes ?? []).filter((n) => n.metadata.delivery !== delivery);
+            const affected = (notes ?? []).filter((n) => n.metadata.domain !== newDomain);
             if (affected.length > 0) {
-                console.error(`\n${affected.length} note(s) currently have a different delivery:`);
+                console.error(`\n${affected.length} note(s) currently have a different domain:`);
                 for (const note of affected) {
                     const meta = note.metadata;
                     const uKey = meta.upsert_key || `id-${note.id}`;
-                    console.error(`  [${note.id}] ${uKey} — delivery: ${meta.delivery}`);
+                    console.error(`  [${note.id}] ${uKey} — domain: ${meta.domain}`);
                 }
-                const action = await choose('\nUpdate delivery on these notes?', [
+                const action = await choose('\nUpdate domain on these notes?', [
                     'all — update all notes',
                     'select — choose individually',
                     'none — only affect new notes',
                 ]);
                 if (action.startsWith('all')) {
                     for (const note of affected) {
-                        await opUpdateMetadata(clients, note.id, { delivery });
+                        await opUpdateMetadata(clients, note.id, { domain: newDomain });
                     }
-                    console.error(`Updated delivery to "${delivery}" on ${affected.length} note(s).`);
+                    console.error(`Updated domain to "${newDomain}" on ${affected.length} note(s).`);
                 }
                 else if (action.startsWith('select')) {
                     let updated = 0;
@@ -204,11 +205,11 @@ export async function configSet(key, value, clients) {
                         const uKey = meta.upsert_key || `id-${note.id}`;
                         const yes = await confirm(`  Update [${note.id}] ${uKey}?`);
                         if (yes) {
-                            await opUpdateMetadata(clients, note.id, { delivery });
+                            await opUpdateMetadata(clients, note.id, { domain: newDomain });
                             updated++;
                         }
                     }
-                    console.error(`Updated delivery on ${updated} note(s).`);
+                    console.error(`Updated domain on ${updated} note(s).`);
                 }
             }
         }
@@ -238,7 +239,7 @@ export async function configUnset(key, clients) {
             .select('id')
             .eq('metadata->>type', typeName);
         if (notes && notes.length > 0) {
-            console.error(`\n${notes.length} note(s) use type "${typeName}". They will become unregistered (delivery defaults to "knowledge").`);
+            console.error(`\n${notes.length} note(s) use type "${typeName}". They will become unregistered (domain defaults to "project").`);
             const proceed = await confirm('Proceed?');
             if (!proceed) {
                 console.error('Cancelled.');

package/dist/commands/delete.js CHANGED Viewed

@@ -1,22 +1,27 @@
-import { opDeleteNote } from '../lib/notes.js';
+import { getDocumentById } from '../lib/documents/fetching.js';
+import { deleteDocument as deleteDocumentOperation } from '../lib/documents/operations.js';
 import { confirm } from '../lib/prompt.js';
-export async function deleteNote(config, id) {
-    const clients = { supabase: config.supabase, openai: config.openai };
-    // First call: show confirmation
-    const preview = await opDeleteNote(clients, id, false);
-    if (preview.status === 'error') {
-        console.error(preview.message);
+export async function removeDocument(config, id, options = {}) {
+    const document = await getDocumentById(config.supabase, id);
+    if (!document) {
+        console.error(`Document ${id} not found.`);
         process.exit(1);
     }
-    console.error(preview.message);
-    const proceed = await confirm('\nProceed with deletion?');
-    if (!proceed) {
-        console.error('Cancelled.');
-        return;
-    }
-    // Second call: execute
-    const result = await opDeleteNote(clients, id, true);
-    console.error(result.message);
-    if (result.status === 'error')
+    if (document.protection === 'immutable') {
+        console.error(`Document "${document.name}" (id: ${id}) is immutable and cannot be deleted.`);
         process.exit(1);
+    }
+    console.error(`Document: "${document.name}" (id: ${id})`);
+    console.error(`Domain: ${document.domain} | Type: ${document.document_type}`);
+    console.error(`Protection: ${document.protection}`);
+    console.error(`Content preview: ${document.content.slice(0, 200)}${document.content.length > 200 ? '...' : ''}`);
+    if (!options.yes) {
+        const proceed = await confirm('\nProceed with deletion?');
+        if (!proceed) {
+            console.error('Cancelled.');
+            return;
+        }
+    }
+    await deleteDocumentOperation({ supabase: config.supabase, openai: config.openai }, id, 'cli');
+    console.error(`Document ${id} soft-deleted. Can be restored within 30 days.`);
 }

package/dist/commands/eval-judge.js ADDED Viewed

@@ -0,0 +1,11 @@
+// eval-judge.ts
+// CLI command for the graded-relevance rejudging walkthrough.
+import { runJudgeSession } from '../lib/eval/eval-judge-session.js';
+export async function evalJudge(config, options) {
+    const clients = {
+        supabase: config.supabase,
+        openai: config.openai,
+        cohereApiKey: config.cohereApiKey,
+    };
+    await runJudgeSession(clients, options.query);
+}

package/dist/commands/eval.js ADDED Viewed

@@ -0,0 +1,321 @@
+import { randomUUID } from 'node:crypto';
+import { searchHybrid } from '../lib/search/ai-search.js';
+import { scoreTestCase, computeMetrics, formatReport, compareRuns, formatComparison } from '../lib/eval/eval.js';
+import { saveEvalRun, loadPreviousRun, loadEvalRun, CURRENT_SEARCH_CONFIG } from '../lib/eval/eval-store.js';
+import { computeConfidenceIntervals, computeScoreCalibration, computeCoverageAnalysis, formatAdvancedReport } from '../lib/eval/eval-advanced.js';
+import { runEvalTrace, runEvalQuerySpan, startSpan } from '../lib/observability.js';
+// Search config imported from eval-store.ts (single source of truth)
+// =============================================================================
+// Command
+// =============================================================================
+export async function evalSearch(config, options) {
+    // Tag every search trace from this eval run with environment=eval and a
+    // shared session ID so the Langfuse dashboard can filter eval traffic from
+    // prod and group all queries in this run under one session.
+    const evalRunUuid = randomUUID();
+    const clients = {
+        supabase: config.supabase,
+        openai: config.openai,
+        cohereApiKey: config.cohereApiKey,
+        sessionId: `eval-${evalRunUuid}`,
+        observabilityEnvironment: 'eval',
+    };
+    console.log('\n' + '='.repeat(60));
+    console.log('Ledger Search Evaluation');
+    if (options.dryRun)
+        console.log('(dry run — results will not be saved)');
+    console.log('='.repeat(60) + '\n');
+    const previousRun = await loadPreviousRun(clients.supabase);
+    if (previousRun) {
+        console.log(`Previous run: ${previousRun.run_date} (id: ${previousRun.id})\n`);
+    }
+    else {
+        console.log('No previous run found — this will be the first stored run.\n');
+    }
+    const { data: testCases, error } = await clients.supabase
+        .from('eval_golden_dataset')
+        .select('id, query, tags, judgments:eval_golden_judgments(document_id, grade)')
+        .order('id');
+    if (error || !testCases) {
+        process.stderr.write(`Failed to load golden dataset: ${error?.message ?? 'no data'}\n`);
+        process.exit(1);
+    }
+    console.log(`Loaded ${testCases.length} test cases.\n`);
+    await runEvalTrace({
+        sessionId: clients.sessionId,
+        tags: ['eval', 'run'],
+        config: CURRENT_SEARCH_CONFIG,
+        dryRun: options.dryRun,
+    }, async (evalTrace) => {
+        const results = [];
+        for (const testCase of testCases) {
+            const scored = await runEvalQuerySpan({
+                query: testCase.query,
+                goldenId: testCase.id,
+                tags: testCase.tags,
+                expectedDocs: testCase.judgments
+                    .filter(judgment => judgment.grade >= 2)
+                    .map(judgment => judgment.document_id),
+            }, async (querySpan) => {
+                const startTime = Date.now();
+                const searchResults = await searchHybrid(clients, {
+                    query: testCase.query,
+                    limit: CURRENT_SEARCH_CONFIG.limit,
+                    reranker: CURRENT_SEARCH_CONFIG.reranker,
+                });
+                const result = scoreTestCase(testCase, searchResults, Date.now() - startTime);
+                querySpan.update({
+                    output: {
+                        hit: result.hit,
+                        firstResultHit: result.firstResultHit,
+                        position: result.position,
+                        reciprocalRank: result.reciprocalRank,
+                        normalizedDiscountedCumulativeGain: result.normalizedDiscountedCumulativeGain,
+                        responseTimeMs: Date.now() - startTime,
+                    },
+                });
+                return result;
+            });
+            results.push(scored);
+            const isOutOfScope = !testCase.judgments.some(judgment => judgment.grade >= 2);
+            if (isOutOfScope) {
+                const status = scored.hit ? 'PASS' : `NOISE (${scored.returnedIds.length} results)`;
+                console.log(`  [${status}] "${testCase.query}" (out-of-scope)`);
+            }
+            else {
+                const status = scored.firstResultHit ? 'TOP' : scored.hit ? 'HIT' : 'MISS';
+                const positionInfo = scored.position !== null ? `@${scored.position + 1}` : '';
+                console.log(`  [${status}${positionInfo}] "${testCase.query}" → found ${scored.expectedFound}/${scored.expectedTotal}`);
+            }
+        }
+        const metrics = computeMetrics(results);
+        console.log('\n' + formatReport(metrics));
+        // Advanced analysis
+        const confidenceIntervals = computeConfidenceIntervals(results);
+        const scoreCalibration = computeScoreCalibration(results);
+        const coverageAnalysis = computeCoverageAnalysis(results);
+        // Eval analysis span
+        const analysisSpan = startSpan('eval-analysis');
+        analysisSpan.update({
+            input: {
+                testCaseCount: results.length,
+                normalCount: metrics.normalCases,
+                outOfScopeCount: metrics.outOfScopeCases,
+            },
+        });
+        if (!options.dryRun) {
+            const runId = await saveEvalRun(clients.supabase, {
+                metrics,
+                config: CURRENT_SEARCH_CONFIG,
+                results,
+                confidenceIntervals,
+                scoreCalibration,
+                coverageAnalysis,
+            });
+            process.stderr.write(`\nRun saved to eval_runs (id: ${runId})\n`);
+        }
+        let comparisonSeverity = 'none';
+        if (previousRun) {
+            const comparison = compareRuns({
+                hitRate: metrics.hitRate,
+                firstResultAccuracy: metrics.firstResultAccuracy,
+                recall: metrics.recall,
+                zeroResultRate: metrics.zeroResultRate,
+                meanReciprocalRank: metrics.meanReciprocalRank,
+                normalizedDiscountedCumulativeGain: metrics.normalizedDiscountedCumulativeGain,
+                avgResponseTimeMs: metrics.avgResponseTimeMs,
+            }, {
+                hitRate: previousRun.hit_rate,
+                firstResultAccuracy: previousRun.first_result_accuracy,
+                recall: previousRun.recall,
+                zeroResultRate: previousRun.zero_result_rate,
+                meanReciprocalRank: previousRun.mean_reciprocal_rank ?? 0,
+                normalizedDiscountedCumulativeGain: previousRun.normalized_discounted_cumulative_gain ?? 0,
+                avgResponseTimeMs: previousRun.avg_response_time_ms,
+            });
+            console.log('\n' + formatComparison(comparison));
+            comparisonSeverity = comparison.severity;
+        }
+        analysisSpan.update({
+            output: { metrics, comparisonSeverity },
+        });
+        analysisSpan.end();
+        evalTrace.update({
+            output: {
+                hitRate: metrics.hitRate,
+                firstResultAccuracy: metrics.firstResultAccuracy,
+                recall: metrics.recall,
+                meanReciprocalRank: metrics.meanReciprocalRank,
+                normalizedDiscountedCumulativeGain: metrics.normalizedDiscountedCumulativeGain,
+                comparisonSeverity,
+            },
+        });
+        console.log('\n' + formatAdvancedReport(confidenceIntervals, scoreCalibration, coverageAnalysis));
+    });
+}
+// =============================================================================
+// Threshold sweep — test multiple thresholds to find optimal value
+// =============================================================================
+/**
+ * Run the golden dataset at multiple similarity thresholds and compare.
+ * Prints a table showing how each metric changes with the threshold.
+ *
+ * Usage: ledger eval:sweep
+ *        ledger eval:sweep --thresholds 0.15,0.20,0.25,0.30,0.35,0.40
+ */
+export async function sweepThreshold(config, options) {
+    const sweepRunUuid = randomUUID();
+    const clients = {
+        supabase: config.supabase,
+        openai: config.openai,
+        cohereApiKey: config.cohereApiKey,
+        sessionId: `eval-sweep-${sweepRunUuid}`,
+        observabilityEnvironment: 'eval',
+    };
+    const thresholds = options.thresholds
+        .split(',')
+        .map(value => parseFloat(value.trim()))
+        .filter(value => !isNaN(value) && value > 0 && value < 1);
+    if (thresholds.length === 0) {
+        console.error('No valid thresholds provided. Use comma-separated values like: 0.15,0.20,0.25');
+        process.exit(1);
+    }
+    const { data: testCases, error } = await clients.supabase
+        .from('eval_golden_dataset')
+        .select('id, query, tags, judgments:eval_golden_judgments(document_id, grade)')
+        .order('id');
+    if (error || !testCases) {
+        console.error('Failed to load golden dataset:', error?.message ?? 'no data');
+        process.exit(1);
+    }
+    const goldenCases = testCases;
+    const normalCount = goldenCases.filter(testCase => testCase.judgments.some(judgment => judgment.grade >= 2)).length;
+    console.log(`\nLoaded ${goldenCases.length} test cases (${normalCount} normal)\n`);
+    console.log('threshold | hit_rate | first_result | recall   | MRR    | NDCG   | avg_ms');
+    console.log('----------|----------|--------------|----------|--------|--------|-------');
+    for (const threshold of thresholds) {
+        const results = [];
+        for (const testCase of goldenCases) {
+            const startTime = Date.now();
+            const searchResults = await searchHybrid(clients, {
+                query: testCase.query,
+                limit: CURRENT_SEARCH_CONFIG.limit,
+                threshold,
+                reranker: CURRENT_SEARCH_CONFIG.reranker,
+            });
+            results.push(scoreTestCase(testCase, searchResults, Date.now() - startTime));
+        }
+        const metrics = computeMetrics(results);
+        // metrics.hitRate etc are already percentages (0-100) from computeMetrics
+        console.log(`${threshold.toFixed(2).padStart(9)} | ` +
+            `${metrics.hitRate.toFixed(1).padStart(6)}% | ` +
+            `${metrics.firstResultAccuracy.toFixed(1).padStart(10)}% | ` +
+            `${metrics.recall.toFixed(1).padStart(6)}% | ` +
+            `${metrics.meanReciprocalRank.toFixed(3).padStart(6)} | ` +
+            `${metrics.normalizedDiscountedCumulativeGain.toFixed(3).padStart(6)} | ` +
+            `${metrics.avgResponseTimeMs.toFixed(0).padStart(5)}`);
+    }
+    console.log(`\nCurrent threshold: ${CURRENT_SEARCH_CONFIG.threshold}`);
+}
+async function fetchDocLookup(supabase, docIds) {
+    const lookup = new Map();
+    if (docIds.length === 0)
+        return lookup;
+    const { data, error } = await supabase
+        .from('documents')
+        .select('id, name, content')
+        .in('id', docIds);
+    if (error || !data)
+        return lookup;
+    for (const documentRow of data) {
+        const content = documentRow.content ?? '';
+        const snippet = content.replace(/\s+/g, ' ').slice(0, 140);
+        lookup.set(documentRow.id, { id: documentRow.id, name: documentRow.name, snippet });
+    }
+    return lookup;
+}
+export async function showEvalRun(config, runId, options) {
+    const supabase = config.supabase;
+    const run = await loadEvalRun(supabase, runId);
+    if (!run) {
+        process.stderr.write(`Eval run ${runId} not found\n`);
+        process.exit(1);
+    }
+    console.log('\n' + '='.repeat(60));
+    console.log(`Eval Run ${run.id} — ${run.run_date}`);
+    console.log('='.repeat(60));
+    console.log(`Test cases:        ${run.test_case_count}`);
+    console.log(`Hit rate:          ${run.hit_rate.toFixed(1)}%`);
+    console.log(`First-result acc:  ${run.first_result_accuracy.toFixed(1)}%`);
+    console.log(`Recall:            ${run.recall.toFixed(1)}%`);
+    console.log(`MRR:               ${(run.mean_reciprocal_rank ?? 0).toFixed(3)}`);
+    console.log(`NDCG:              ${(run.normalized_discounted_cumulative_gain ?? 0).toFixed(3)}`);
+    console.log(`Zero-result rate:  ${run.zero_result_rate.toFixed(1)}%`);
+    console.log(`Avg response (ms): ${run.avg_response_time_ms.toFixed(0)}`);
+    const missedQueries = run.missed_queries ?? [];
+    console.log(`\nMissed queries: ${missedQueries.length}\n`);
+    if (missedQueries.length === 0) {
+        console.log('  (none)');
+        return;
+    }
+    // Support both the new graded shape (judgments) and the legacy binary shape
+    // (expected) so eval:show still works against historical runs.
+    const expectedDocsFor = (missedQuery) => {
+        const judgments = missedQuery.judgments;
+        if (judgments && judgments.length > 0) {
+            return judgments
+                .filter(judgment => judgment.grade >= 2)
+                .map(judgment => ({ id: judgment.document_id, grade: judgment.grade }));
+        }
+        const expected = missedQuery.expected;
+        return (expected ?? []).map(id => ({ id }));
+    };
+    // Resolve doc ids → names + snippets in one batch
+    const allDocIds = new Set();
+    for (const missedQuery of missedQueries) {
+        for (const expected of expectedDocsFor(missedQuery))
+            allDocIds.add(expected.id);
+        for (const returnedId of missedQuery.got.slice(0, 3))
+            allDocIds.add(returnedId);
+    }
+    const lookup = await fetchDocLookup(supabase, Array.from(allDocIds));
+    const formatDoc = (docId, score) => {
+        const document = lookup.get(docId);
+        const documentName = document?.name ?? '<unknown>';
+        const scoreLabel = score !== undefined ? ` (${score.toFixed(3)})` : '';
+        return `#${docId} ${documentName}${scoreLabel}`;
+    };
+    for (const [missedIndex, missedQuery] of missedQueries.entries()) {
+        console.log(`[${missedIndex + 1}] "${missedQuery.query}"`);
+        if (missedQuery.tags.length > 0)
+            console.log(`    tags: ${missedQuery.tags.join(', ')}`);
+        const expectedDocs = expectedDocsFor(missedQuery);
+        console.log(`    expected:`);
+        for (const expected of expectedDocs) {
+            const gradeLabel = expected.grade !== undefined ? ` (grade ${expected.grade})` : '';
+            console.log(`      - ${formatDoc(expected.id)}${gradeLabel}`);
+        }
+        if (missedQuery.got.length === 0) {
+            console.log(`    got: (none — zero results)`);
+        }
+        else {
+            console.log(`    got (top 3):`);
+            const topReturned = Math.min(3, missedQuery.got.length);
+            for (let position = 0; position < topReturned; position++) {
+                console.log(`      ${position + 1}. ${formatDoc(missedQuery.got[position], missedQuery.gotScores[position])}`);
+            }
+            const topDoc = lookup.get(missedQuery.got[0]);
+            if (topDoc?.snippet)
+                console.log(`    top1 snippet: "${topDoc.snippet}…"`);
+        }
+        console.log('');
+    }
+    if (options.full && run.per_query_results) {
+        console.log('='.repeat(60));
+        console.log('Per-query results (full)');
+        console.log('='.repeat(60));
+        for (const queryResult of run.per_query_results) {
+            console.log(JSON.stringify(queryResult));
+        }
+    }
+}