@aperdomoll90/ledger-ai 1.3.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/cli.js +177 -221
  2. package/dist/commands/add.js +51 -100
  3. package/dist/commands/backfill.js +55 -0
  4. package/dist/commands/backup.js +10 -10
  5. package/dist/commands/check.js +21 -29
  6. package/dist/commands/config.js +13 -12
  7. package/dist/commands/delete.js +22 -17
  8. package/dist/commands/eval-judge.js +11 -0
  9. package/dist/commands/eval.js +321 -0
  10. package/dist/commands/export.js +8 -10
  11. package/dist/commands/get.js +9 -0
  12. package/dist/commands/hunt.js +206 -0
  13. package/dist/commands/ingest.js +15 -14
  14. package/dist/commands/init.js +18 -20
  15. package/dist/commands/list.js +21 -7
  16. package/dist/commands/migrate.js +11 -11
  17. package/dist/commands/onboard.js +2 -2
  18. package/dist/commands/pull.js +3 -2
  19. package/dist/commands/push.js +8 -8
  20. package/dist/commands/restore.js +38 -38
  21. package/dist/commands/show.js +13 -16
  22. package/dist/commands/sync.js +58 -19
  23. package/dist/commands/tag.js +20 -14
  24. package/dist/commands/update.js +50 -18
  25. package/dist/commands/wizard.js +3 -3
  26. package/dist/lib/ai-search.js +163 -0
  27. package/dist/lib/audit.js +19 -0
  28. package/dist/lib/backfill.js +60 -0
  29. package/dist/lib/config.js +19 -2
  30. package/dist/lib/document-classification.js +5 -0
  31. package/dist/lib/document-fetching.js +77 -0
  32. package/dist/lib/document-operations.js +150 -0
  33. package/dist/lib/documents/classification.js +5 -0
  34. package/dist/lib/documents/fetching.js +89 -0
  35. package/dist/lib/documents/operations.js +304 -0
  36. package/dist/lib/domains.js +116 -0
  37. package/dist/lib/embeddings.js +190 -0
  38. package/dist/lib/errors.js +3 -1
  39. package/dist/lib/eval/eval-advanced.js +289 -0
  40. package/dist/lib/eval/eval-judge-session.js +233 -0
  41. package/dist/lib/eval/eval-store.js +105 -0
  42. package/dist/lib/eval/eval.js +303 -0
  43. package/dist/lib/file-writer.js +23 -0
  44. package/dist/lib/generators.js +44 -45
  45. package/dist/lib/hunter-db.js +235 -0
  46. package/dist/lib/hunter-rss.js +30 -0
  47. package/dist/lib/hunter-scoring.js +55 -0
  48. package/dist/lib/hunter-types.js +36 -0
  49. package/dist/lib/lint-configs.js +20 -0
  50. package/dist/lib/migrate.js +2 -2
  51. package/dist/lib/notes.js +173 -59
  52. package/dist/lib/observability.js +296 -0
  53. package/dist/lib/op-add-note-types.test.js +7 -6
  54. package/dist/lib/prompt.js +8 -8
  55. package/dist/lib/rate-limiter.js +103 -0
  56. package/dist/lib/search/ai-search.js +396 -0
  57. package/dist/lib/search/chunk-context-enrichment.js +155 -0
  58. package/dist/lib/search/embeddings.js +293 -0
  59. package/dist/lib/search/reranker.js +120 -0
  60. package/dist/lib/search/semantic-cache.js +53 -0
  61. package/dist/lib/type-registry.test.js +6 -6
  62. package/dist/mcp-server.js +553 -66
  63. package/dist/migrations/migrations/005-audit-log.sql +22 -0
  64. package/dist/migrations/migrations/005_opportunities.sql +48 -0
  65. package/dist/migrations/migrations/006-audited-operations.sql +235 -0
  66. package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
  67. package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
  68. package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
  69. package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
  70. package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
  71. package/dist/scripts/batch-grade.js +344 -0
  72. package/dist/scripts/benchmark-ingestion.js +376 -0
  73. package/dist/scripts/convert-judgments-to-graded.js +88 -0
  74. package/dist/scripts/diagnose-first-result.js +333 -0
  75. package/dist/scripts/drop-golden-query.js +53 -0
  76. package/dist/scripts/eval-search.js +115 -0
  77. package/dist/scripts/grade-unjudged-top1.js +138 -0
  78. package/dist/scripts/hunter-analytics.js +38 -0
  79. package/dist/scripts/hunter-cron.js +63 -0
  80. package/dist/scripts/hunter-purge.js +25 -0
  81. package/dist/scripts/migrate-v2.js +140 -0
  82. package/dist/scripts/reindex.js +74 -0
  83. package/dist/scripts/sync-local-docs.js +153 -0
  84. package/package.json +7 -1
@@ -0,0 +1,376 @@
1
+ // benchmark-ingestion.ts
2
+ // Measures ingestion pipeline performance with different optimization modes.
3
+ // Does NOT write to the database. Only runs chunking, enrichment, and embedding.
4
+ //
5
+ // Usage:
6
+ // npx tsx src/scripts/benchmark-ingestion.ts # run all modes
7
+ // npx tsx src/scripts/benchmark-ingestion.ts --mode baseline # run one mode
8
+ // npx tsx src/scripts/benchmark-ingestion.ts --file docs/foo.md # custom test file
9
+ //
10
+ // Modes:
11
+ // baseline — current code: sequential enrichment, sequential embeddings
12
+ // batch-embed — sequential enrichment, batch embeddings (one API call)
13
+ // parallel-cr — parallel Contextual Retrieval (3 concurrent, TPM-safe), sequential embeddings
14
+ // truncated — truncated context (summary + neighbors), sequential embeddings
15
+ // all — truncated + parallel + batch embeddings combined
16
+ //
17
+ // Results are appended to docs/benchmark-results.json
18
+ import 'dotenv/config';
19
+ import OpenAI from 'openai';
20
+ import { readFileSync, existsSync, writeFileSync } from 'fs';
21
+ import { chunkText } from '../lib/search/embeddings.js';
22
+ import { openaiLimiter, createRateLimiter, updateLimitsFromHeaders } from '../lib/rate-limiter.js';
23
+ // =============================================================================
24
+ // Config
25
+ // =============================================================================
26
+ // TPM-safe limiter for parallel chat with full document context (~18K tokens/call).
27
+ // gpt-4o-mini: 200K TPM. 200K / 18K = ~11 calls/min max.
28
+ const chatLimiter = createRateLimiter({
29
+ maxConcurrent: 3,
30
+ reservoirAmount: 10,
31
+ reservoirRefreshInterval: 60_000,
32
+ minTime: 2000,
33
+ retryLimit: 3,
34
+ });
35
+ const EMBEDDING_MODEL = 'text-embedding-3-small';
36
+ const CONTEXT_MODEL = 'gpt-4o-mini';
37
+ const RESULTS_FILE = 'docs/benchmark-results.json';
38
+ const DEFAULT_TEST_FILE = 'docs/ledger-architecture-database-schemas.md';
39
+ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, maxRetries: 5 });
40
+ // =============================================================================
41
+ // Prompts
42
+ // =============================================================================
43
+ const CONTEXT_PROMPT = `Here is the full document:
44
+ <document>
45
+ {DOCUMENT_CONTENT}
46
+ </document>
47
+
48
+ Here is the chunk:
49
+ <chunk>
50
+ {CHUNK_CONTENT}
51
+ </chunk>
52
+
53
+ Write a short context (2-3 sentences) that situates this chunk within the document. Include the document's topic and what specific information this chunk covers. Be concise and factual.`;
54
+ const TRUNCATED_CONTEXT_PROMPT = `Here is a summary of the document:
55
+ <document_summary>
56
+ {DOCUMENT_SUMMARY}
57
+ </document_summary>
58
+
59
+ Here is the section this chunk belongs to (header path):
60
+ <section>
61
+ {HEADER_PATH}
62
+ </section>
63
+
64
+ Here are the neighboring chunks for context:
65
+ <previous_chunk>
66
+ {PREV_CHUNK}
67
+ </previous_chunk>
68
+
69
+ <chunk>
70
+ {CHUNK_CONTENT}
71
+ </chunk>
72
+
73
+ <next_chunk>
74
+ {NEXT_CHUNK}
75
+ </next_chunk>
76
+
77
+ Write a short context (2-3 sentences) that situates this chunk within the document. Include the document's topic and what specific information this chunk covers. Be concise and factual.`;
78
+ const SUMMARY_PROMPT = `Summarize this document in 150-200 words. Focus on: what the document is about, its structure, and the key topics it covers. Be factual and concise.
79
+
80
+ <document>
81
+ {DOCUMENT_CONTENT}
82
+ </document>`;
83
+ // =============================================================================
84
+ // Helpers
85
+ // =============================================================================
86
+ function estimateTokens(text) {
87
+ return Math.ceil(text.length / 4);
88
+ }
89
+ function findHeaderPath(content, chunkContent) {
90
+ const lines = content.split('\n');
91
+ const headers = [];
92
+ let foundChunk = false;
93
+ for (const line of lines) {
94
+ if (/^#{1,6}\s/.test(line)) {
95
+ const level = line.match(/^(#+)/)?.[1].length ?? 1;
96
+ while (headers.length >= level)
97
+ headers.pop();
98
+ headers.push(line.replace(/^#+\s*/, '').trim());
99
+ }
100
+ if (line.includes(chunkContent.slice(0, 50))) {
101
+ foundChunk = true;
102
+ break;
103
+ }
104
+ }
105
+ return foundChunk ? headers.join(' > ') : '';
106
+ }
107
+ async function generateDocSummary(documentContent) {
108
+ const prompt = SUMMARY_PROMPT.replace('{DOCUMENT_CONTENT}', documentContent);
109
+ const inputTokens = estimateTokens(prompt);
110
+ const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
111
+ model: CONTEXT_MODEL,
112
+ messages: [
113
+ { role: 'system', content: 'You are a precise technical writer. Output only the summary, nothing else.' },
114
+ { role: 'user', content: prompt },
115
+ ],
116
+ max_tokens: 300,
117
+ temperature: 0,
118
+ }));
119
+ return {
120
+ summary: (response.choices[0].message.content ?? '').trim(),
121
+ inputTokens,
122
+ };
123
+ }
124
+ // =============================================================================
125
+ // Enrichment strategies
126
+ // =============================================================================
127
+ async function enrichBaseline(chunks, documentContent) {
128
+ const start = Date.now();
129
+ const summaries = [];
130
+ let inputTokens = 0;
131
+ for (const chunk of chunks) {
132
+ const prompt = CONTEXT_PROMPT
133
+ .replace('{DOCUMENT_CONTENT}', documentContent)
134
+ .replace('{CHUNK_CONTENT}', chunk.content);
135
+ inputTokens += estimateTokens(prompt);
136
+ const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
137
+ model: CONTEXT_MODEL,
138
+ messages: [
139
+ { role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
140
+ { role: 'user', content: prompt },
141
+ ],
142
+ max_tokens: 150,
143
+ temperature: 0,
144
+ }));
145
+ summaries.push((response.choices[0].message.content ?? '').trim());
146
+ }
147
+ return { summaries, timeMs: Date.now() - start, inputTokens };
148
+ }
149
+ async function enrichParallel(chunks, documentContent) {
150
+ const start = Date.now();
151
+ let inputTokens = 0;
152
+ const promises = chunks.map((chunk, index) => {
153
+ const prompt = CONTEXT_PROMPT
154
+ .replace('{DOCUMENT_CONTENT}', documentContent)
155
+ .replace('{CHUNK_CONTENT}', chunk.content);
156
+ inputTokens += estimateTokens(prompt);
157
+ return chatLimiter.schedule({ id: `enrich-${index}` }, async () => {
158
+ const response = await openai.chat.completions.create({
159
+ model: CONTEXT_MODEL,
160
+ messages: [
161
+ { role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
162
+ { role: 'user', content: prompt },
163
+ ],
164
+ max_tokens: 150,
165
+ temperature: 0,
166
+ });
167
+ return { index, summary: (response.choices[0].message.content ?? '').trim() };
168
+ });
169
+ });
170
+ const results = await Promise.all(promises);
171
+ results.sort((first, second) => first.index - second.index);
172
+ return { summaries: results.map(result => result.summary), timeMs: Date.now() - start, inputTokens };
173
+ }
174
+ async function enrichTruncated(chunks, documentContent) {
175
+ const start = Date.now();
176
+ const summaries = [];
177
+ let inputTokens = 0;
178
+ const { summary: docSummary, inputTokens: summaryTokens } = await generateDocSummary(documentContent);
179
+ inputTokens += summaryTokens;
180
+ for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
181
+ const prevChunk = chunkIndex > 0 ? chunks[chunkIndex - 1].content : '(start of document)';
182
+ const nextChunk = chunkIndex < chunks.length - 1 ? chunks[chunkIndex + 1].content : '(end of document)';
183
+ const headerPath = findHeaderPath(documentContent, chunks[chunkIndex].content);
184
+ const prompt = TRUNCATED_CONTEXT_PROMPT
185
+ .replace('{DOCUMENT_SUMMARY}', docSummary)
186
+ .replace('{HEADER_PATH}', headerPath || '(unknown section)')
187
+ .replace('{PREV_CHUNK}', prevChunk)
188
+ .replace('{CHUNK_CONTENT}', chunks[chunkIndex].content)
189
+ .replace('{NEXT_CHUNK}', nextChunk);
190
+ inputTokens += estimateTokens(prompt);
191
+ const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
192
+ model: CONTEXT_MODEL,
193
+ messages: [
194
+ { role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
195
+ { role: 'user', content: prompt },
196
+ ],
197
+ max_tokens: 150,
198
+ temperature: 0,
199
+ }));
200
+ summaries.push((response.choices[0].message.content ?? '').trim());
201
+ }
202
+ return { summaries, timeMs: Date.now() - start, inputTokens };
203
+ }
204
+ async function enrichTruncatedParallel(chunks, documentContent) {
205
+ const start = Date.now();
206
+ let inputTokens = 0;
207
+ const { summary: docSummary, inputTokens: summaryTokens } = await generateDocSummary(documentContent);
208
+ inputTokens += summaryTokens;
209
+ // Truncated context = ~1K tokens per call. TPM-safe for full concurrency.
210
+ const promises = chunks.map((chunk, chunkIndex) => {
211
+ const prevChunk = chunkIndex > 0 ? chunks[chunkIndex - 1].content : '(start of document)';
212
+ const nextChunk = chunkIndex < chunks.length - 1 ? chunks[chunkIndex + 1].content : '(end of document)';
213
+ const headerPath = findHeaderPath(documentContent, chunk.content);
214
+ const prompt = TRUNCATED_CONTEXT_PROMPT
215
+ .replace('{DOCUMENT_SUMMARY}', docSummary)
216
+ .replace('{HEADER_PATH}', headerPath || '(unknown section)')
217
+ .replace('{PREV_CHUNK}', prevChunk)
218
+ .replace('{CHUNK_CONTENT}', chunk.content)
219
+ .replace('{NEXT_CHUNK}', nextChunk);
220
+ inputTokens += estimateTokens(prompt);
221
+ return openaiLimiter.schedule({ id: `tp-${chunkIndex}` }, async () => {
222
+ const response = await openai.chat.completions.create({
223
+ model: CONTEXT_MODEL,
224
+ messages: [
225
+ { role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
226
+ { role: 'user', content: prompt },
227
+ ],
228
+ max_tokens: 150,
229
+ temperature: 0,
230
+ });
231
+ return { index: chunkIndex, summary: (response.choices[0].message.content ?? '').trim() };
232
+ });
233
+ });
234
+ const results = await Promise.all(promises);
235
+ results.sort((first, second) => first.index - second.index);
236
+ return { summaries: results.map(result => result.summary), timeMs: Date.now() - start, inputTokens };
237
+ }
238
+ // =============================================================================
239
+ // Embedding strategies
240
+ // =============================================================================
241
+ async function embedSequential(texts) {
242
+ const start = Date.now();
243
+ const embeddings = [];
244
+ let inputTokens = 0;
245
+ for (const text of texts) {
246
+ inputTokens += estimateTokens(text);
247
+ const result = await openaiLimiter.schedule(async () => {
248
+ const { data, response } = await openai.embeddings.create({
249
+ model: EMBEDDING_MODEL,
250
+ input: text,
251
+ }).withResponse();
252
+ await updateLimitsFromHeaders(openaiLimiter, response.headers);
253
+ return data.data[0].embedding;
254
+ });
255
+ embeddings.push(result);
256
+ }
257
+ return { embeddings, timeMs: Date.now() - start, inputTokens };
258
+ }
259
+ async function embedBatch(texts) {
260
+ const start = Date.now();
261
+ const inputTokens = texts.reduce((sum, text) => sum + estimateTokens(text), 0);
262
+ const BATCH_SIZE = 100;
263
+ const allEmbeddings = [];
264
+ for (let batchStart = 0; batchStart < texts.length; batchStart += BATCH_SIZE) {
265
+ const batch = texts.slice(batchStart, batchStart + BATCH_SIZE);
266
+ const result = await openaiLimiter.schedule(async () => {
267
+ const { data, response } = await openai.embeddings.create({
268
+ model: EMBEDDING_MODEL,
269
+ input: batch,
270
+ }).withResponse();
271
+ await updateLimitsFromHeaders(openaiLimiter, response.headers);
272
+ return data.data.map(entry => entry.embedding);
273
+ });
274
+ allEmbeddings.push(...result);
275
+ }
276
+ return { embeddings: allEmbeddings, timeMs: Date.now() - start, inputTokens };
277
+ }
278
+ // =============================================================================
279
+ // Benchmark runner
280
+ // =============================================================================
281
+ async function runBenchmark(mode, content, filePath) {
282
+ console.log(`\n--- ${mode.toUpperCase()} ---`);
283
+ const chunkStart = Date.now();
284
+ const chunks = chunkText(content);
285
+ const chunkTime = Date.now() - chunkStart;
286
+ console.log(` Chunking: ${chunkTime}ms (${chunks.length} chunks)`);
287
+ const useParallel = mode === 'parallel-cr' || mode === 'all';
288
+ const useTruncated = mode === 'truncated' || mode === 'all';
289
+ let enrichResult;
290
+ if (useTruncated && useParallel) {
291
+ enrichResult = await enrichTruncatedParallel(chunks, content);
292
+ }
293
+ else if (useTruncated) {
294
+ enrichResult = await enrichTruncated(chunks, content);
295
+ }
296
+ else if (useParallel) {
297
+ enrichResult = await enrichParallel(chunks, content);
298
+ }
299
+ else {
300
+ enrichResult = await enrichBaseline(chunks, content);
301
+ }
302
+ console.log(` Enrichment: ${enrichResult.timeMs}ms (~${enrichResult.inputTokens} input tokens)`);
303
+ const embeddingInputs = chunks.map((chunk, index) => enrichResult.summaries[index] + '\n\n' + chunk.content);
304
+ const useBatch = mode === 'batch-embed' || mode === 'all';
305
+ const embedResult = useBatch
306
+ ? await embedBatch(embeddingInputs)
307
+ : await embedSequential(embeddingInputs);
308
+ console.log(` Embedding: ${embedResult.timeMs}ms (~${embedResult.inputTokens} input tokens)`);
309
+ const total = chunkTime + enrichResult.timeMs + embedResult.timeMs;
310
+ console.log(` TOTAL: ${total}ms`);
311
+ return {
312
+ mode,
313
+ file: filePath,
314
+ fileSize: content.length,
315
+ chunkCount: chunks.length,
316
+ timestamp: new Date().toISOString(),
317
+ timings: {
318
+ chunking: chunkTime,
319
+ enrichment: enrichResult.timeMs,
320
+ embedding: embedResult.timeMs,
321
+ total,
322
+ },
323
+ tokenEstimate: {
324
+ enrichmentInput: enrichResult.inputTokens,
325
+ embeddingInput: embedResult.inputTokens,
326
+ },
327
+ };
328
+ }
329
+ // =============================================================================
330
+ // Main
331
+ // =============================================================================
332
+ async function main() {
333
+ const modeArg = process.argv.find((_, argIndex, argv) => argv[argIndex - 1] === '--mode');
334
+ const fileArg = process.argv.find((_, argIndex, argv) => argv[argIndex - 1] === '--file');
335
+ const filePath = fileArg ?? DEFAULT_TEST_FILE;
336
+ if (!existsSync(filePath)) {
337
+ console.error(`File not found: ${filePath}`);
338
+ process.exit(1);
339
+ }
340
+ const content = readFileSync(filePath, 'utf8');
341
+ console.log(`File: ${filePath} (${content.length} chars)`);
342
+ const modes = modeArg
343
+ ? [modeArg]
344
+ : ['baseline', 'batch-embed', 'parallel-cr', 'truncated', 'all'];
345
+ const results = [];
346
+ for (const mode of modes) {
347
+ const result = await runBenchmark(mode, content, filePath);
348
+ results.push(result);
349
+ }
350
+ console.log('\n=== SUMMARY ===');
351
+ console.log('');
352
+ const baseline = results.find(benchmarkResult => benchmarkResult.mode === 'baseline');
353
+ for (const benchmarkResult of results) {
354
+ const speedup = baseline ? `${Math.round((1 - benchmarkResult.timings.total / baseline.timings.total) * 100)}%` : 'n/a';
355
+ const tokenSavings = baseline
356
+ ? `${Math.round((1 - benchmarkResult.tokenEstimate.enrichmentInput / baseline.tokenEstimate.enrichmentInput) * 100)}%`
357
+ : 'n/a';
358
+ console.log(`${benchmarkResult.mode.padEnd(15)} | ${String(benchmarkResult.timings.total).padStart(7)}ms | enrichment: ${String(benchmarkResult.timings.enrichment).padStart(7)}ms | embedding: ${String(benchmarkResult.timings.embedding).padStart(7)}ms | speedup: ${speedup.padStart(4)} | token savings: ${tokenSavings}`);
359
+ }
360
+ let existing = [];
361
+ if (existsSync(RESULTS_FILE)) {
362
+ try {
363
+ existing = JSON.parse(readFileSync(RESULTS_FILE, 'utf8'));
364
+ }
365
+ catch {
366
+ existing = [];
367
+ }
368
+ }
369
+ existing.push(...results);
370
+ writeFileSync(RESULTS_FILE, JSON.stringify(existing, null, 2) + '\n');
371
+ console.log(`\nResults saved to ${RESULTS_FILE}`);
372
+ }
373
+ main().catch((error) => {
374
+ console.error(error);
375
+ process.exit(1);
376
+ });
@@ -0,0 +1,88 @@
1
+ // convert-judgments-to-graded.ts
2
+ // Phase 4.6.2 — convert legacy eval_golden_dataset.expected_doc_ids to
3
+ // grade-3 rows in eval_golden_judgments. Idempotent.
4
+ //
5
+ // Run: npx tsx src/scripts/convert-judgments-to-graded.ts
6
+ import 'dotenv/config';
7
+ import { createClient } from '@supabase/supabase-js';
8
+ const supabaseUrl = process.env.SUPABASE_URL;
9
+ const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY;
10
+ if (!supabaseUrl || !supabaseKey) {
11
+ console.error('Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY');
12
+ process.exit(1);
13
+ }
14
+ const supabase = createClient(supabaseUrl, supabaseKey);
15
+ async function main() {
16
+ const { data: rows, error: loadError } = await supabase
17
+ .from('eval_golden_dataset')
18
+ .select('id, query, expected_doc_ids')
19
+ .order('id');
20
+ if (loadError || !rows) {
21
+ console.error('Failed to load golden dataset:', loadError?.message ?? 'no data');
22
+ process.exit(1);
23
+ }
24
+ const goldenRows = rows;
25
+ let totalExpected = 0;
26
+ let inserted = 0;
27
+ let skipped = 0;
28
+ let errorsCount = 0;
29
+ for (const goldenRow of goldenRows) {
30
+ const expectedIds = goldenRow.expected_doc_ids ?? [];
31
+ totalExpected += expectedIds.length;
32
+ for (const documentId of expectedIds) {
33
+ const { error: rpcError } = await supabase.rpc('judgment_create', {
34
+ p_golden_id: goldenRow.id,
35
+ p_document_id: documentId,
36
+ p_grade: 3,
37
+ p_judged_by: 'converter-phase-4.6.2',
38
+ p_notes: 'Auto-converted from legacy expected_doc_ids (grade 3 = canonical answer)',
39
+ });
40
+ if (rpcError) {
41
+ const messageText = rpcError.message ?? '';
42
+ if (messageText.includes('duplicate key') || messageText.includes('unique')) {
43
+ skipped++;
44
+ }
45
+ else {
46
+ errorsCount++;
47
+ console.error(` [ERR] golden_id=${goldenRow.id} doc_id=${documentId}: ${messageText}`);
48
+ }
49
+ }
50
+ else {
51
+ inserted++;
52
+ }
53
+ }
54
+ }
55
+ console.log('');
56
+ console.log('='.repeat(60));
57
+ console.log('Conversion summary');
58
+ console.log('='.repeat(60));
59
+ console.log(` Golden queries scanned: ${goldenRows.length}`);
60
+ console.log(` Total expected_doc_ids: ${totalExpected}`);
61
+ console.log(` Grade-3 judgments inserted: ${inserted}`);
62
+ console.log(` Skipped (already existed): ${skipped}`);
63
+ console.log(` Errors: ${errorsCount}`);
64
+ console.log('');
65
+ if (errorsCount > 0) {
66
+ console.error('Conversion completed with errors. Inspect and re-run.');
67
+ process.exit(1);
68
+ }
69
+ const { count, error: countError } = await supabase
70
+ .from('eval_golden_judgments')
71
+ .select('*', { count: 'exact', head: true })
72
+ .eq('grade', 3)
73
+ .eq('judged_by', 'converter-phase-4.6.2');
74
+ if (countError) {
75
+ console.error('Verification count failed:', countError.message);
76
+ process.exit(1);
77
+ }
78
+ console.log(`Verification: ${count} grade-3 judgments with judged_by='converter-phase-4.6.2' in table.`);
79
+ if (count !== inserted + skipped) {
80
+ console.error(`MISMATCH: expected ${inserted + skipped}, got ${count}`);
81
+ process.exit(1);
82
+ }
83
+ console.log('Conversion verified.');
84
+ }
85
+ main().catch((error) => {
86
+ console.error(error);
87
+ process.exit(1);
88
+ });