@aperdomoll90/ledger-ai 1.3.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +177 -221
- package/dist/commands/add.js +51 -100
- package/dist/commands/backfill.js +55 -0
- package/dist/commands/backup.js +10 -10
- package/dist/commands/check.js +21 -29
- package/dist/commands/config.js +13 -12
- package/dist/commands/delete.js +22 -17
- package/dist/commands/eval-judge.js +11 -0
- package/dist/commands/eval.js +321 -0
- package/dist/commands/export.js +8 -10
- package/dist/commands/get.js +9 -0
- package/dist/commands/hunt.js +206 -0
- package/dist/commands/ingest.js +15 -14
- package/dist/commands/init.js +18 -20
- package/dist/commands/list.js +21 -7
- package/dist/commands/migrate.js +11 -11
- package/dist/commands/onboard.js +2 -2
- package/dist/commands/pull.js +3 -2
- package/dist/commands/push.js +8 -8
- package/dist/commands/restore.js +38 -38
- package/dist/commands/show.js +13 -16
- package/dist/commands/sync.js +58 -19
- package/dist/commands/tag.js +20 -14
- package/dist/commands/update.js +50 -18
- package/dist/commands/wizard.js +3 -3
- package/dist/lib/ai-search.js +163 -0
- package/dist/lib/audit.js +19 -0
- package/dist/lib/backfill.js +60 -0
- package/dist/lib/config.js +19 -2
- package/dist/lib/document-classification.js +5 -0
- package/dist/lib/document-fetching.js +77 -0
- package/dist/lib/document-operations.js +150 -0
- package/dist/lib/documents/classification.js +5 -0
- package/dist/lib/documents/fetching.js +89 -0
- package/dist/lib/documents/operations.js +304 -0
- package/dist/lib/domains.js +116 -0
- package/dist/lib/embeddings.js +190 -0
- package/dist/lib/errors.js +3 -1
- package/dist/lib/eval/eval-advanced.js +289 -0
- package/dist/lib/eval/eval-judge-session.js +233 -0
- package/dist/lib/eval/eval-store.js +105 -0
- package/dist/lib/eval/eval.js +303 -0
- package/dist/lib/file-writer.js +23 -0
- package/dist/lib/generators.js +44 -45
- package/dist/lib/hunter-db.js +235 -0
- package/dist/lib/hunter-rss.js +30 -0
- package/dist/lib/hunter-scoring.js +55 -0
- package/dist/lib/hunter-types.js +36 -0
- package/dist/lib/lint-configs.js +20 -0
- package/dist/lib/migrate.js +2 -2
- package/dist/lib/notes.js +173 -59
- package/dist/lib/observability.js +296 -0
- package/dist/lib/op-add-note-types.test.js +7 -6
- package/dist/lib/prompt.js +8 -8
- package/dist/lib/rate-limiter.js +103 -0
- package/dist/lib/search/ai-search.js +396 -0
- package/dist/lib/search/chunk-context-enrichment.js +155 -0
- package/dist/lib/search/embeddings.js +293 -0
- package/dist/lib/search/reranker.js +120 -0
- package/dist/lib/search/semantic-cache.js +53 -0
- package/dist/lib/type-registry.test.js +6 -6
- package/dist/mcp-server.js +553 -66
- package/dist/migrations/migrations/005-audit-log.sql +22 -0
- package/dist/migrations/migrations/005_opportunities.sql +48 -0
- package/dist/migrations/migrations/006-audited-operations.sql +235 -0
- package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
- package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
- package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
- package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
- package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
- package/dist/scripts/batch-grade.js +344 -0
- package/dist/scripts/benchmark-ingestion.js +376 -0
- package/dist/scripts/convert-judgments-to-graded.js +88 -0
- package/dist/scripts/diagnose-first-result.js +333 -0
- package/dist/scripts/drop-golden-query.js +53 -0
- package/dist/scripts/eval-search.js +115 -0
- package/dist/scripts/grade-unjudged-top1.js +138 -0
- package/dist/scripts/hunter-analytics.js +38 -0
- package/dist/scripts/hunter-cron.js +63 -0
- package/dist/scripts/hunter-purge.js +25 -0
- package/dist/scripts/migrate-v2.js +140 -0
- package/dist/scripts/reindex.js +74 -0
- package/dist/scripts/sync-local-docs.js +153 -0
- package/package.json +7 -1
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
// benchmark-ingestion.ts
|
|
2
|
+
// Measures ingestion pipeline performance with different optimization modes.
|
|
3
|
+
// Does NOT write to the database. Only runs chunking, enrichment, and embedding.
|
|
4
|
+
//
|
|
5
|
+
// Usage:
|
|
6
|
+
// npx tsx src/scripts/benchmark-ingestion.ts # run all modes
|
|
7
|
+
// npx tsx src/scripts/benchmark-ingestion.ts --mode baseline # run one mode
|
|
8
|
+
// npx tsx src/scripts/benchmark-ingestion.ts --file docs/foo.md # custom test file
|
|
9
|
+
//
|
|
10
|
+
// Modes:
|
|
11
|
+
// baseline — current code: sequential enrichment, sequential embeddings
|
|
12
|
+
// batch-embed — sequential enrichment, batch embeddings (one API call)
|
|
13
|
+
// parallel-cr — parallel Contextual Retrieval (3 concurrent, TPM-safe), sequential embeddings
|
|
14
|
+
// truncated — truncated context (summary + neighbors), sequential embeddings
|
|
15
|
+
// all — truncated + parallel + batch embeddings combined
|
|
16
|
+
//
|
|
17
|
+
// Results are appended to docs/benchmark-results.json
|
|
18
|
+
import 'dotenv/config';
|
|
19
|
+
import OpenAI from 'openai';
|
|
20
|
+
import { readFileSync, existsSync, writeFileSync } from 'fs';
|
|
21
|
+
import { chunkText } from '../lib/search/embeddings.js';
|
|
22
|
+
import { openaiLimiter, createRateLimiter, updateLimitsFromHeaders } from '../lib/rate-limiter.js';
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// Config
|
|
25
|
+
// =============================================================================
|
|
26
|
+
// TPM-safe limiter for parallel chat with full document context (~18K tokens/call).
|
|
27
|
+
// gpt-4o-mini: 200K TPM. 200K / 18K = ~11 calls/min max.
|
|
28
|
+
const chatLimiter = createRateLimiter({
|
|
29
|
+
maxConcurrent: 3,
|
|
30
|
+
reservoirAmount: 10,
|
|
31
|
+
reservoirRefreshInterval: 60_000,
|
|
32
|
+
minTime: 2000,
|
|
33
|
+
retryLimit: 3,
|
|
34
|
+
});
|
|
35
|
+
const EMBEDDING_MODEL = 'text-embedding-3-small';
|
|
36
|
+
const CONTEXT_MODEL = 'gpt-4o-mini';
|
|
37
|
+
const RESULTS_FILE = 'docs/benchmark-results.json';
|
|
38
|
+
const DEFAULT_TEST_FILE = 'docs/ledger-architecture-database-schemas.md';
|
|
39
|
+
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, maxRetries: 5 });
|
|
40
|
+
// =============================================================================
|
|
41
|
+
// Prompts
|
|
42
|
+
// =============================================================================
|
|
43
|
+
const CONTEXT_PROMPT = `Here is the full document:
|
|
44
|
+
<document>
|
|
45
|
+
{DOCUMENT_CONTENT}
|
|
46
|
+
</document>
|
|
47
|
+
|
|
48
|
+
Here is the chunk:
|
|
49
|
+
<chunk>
|
|
50
|
+
{CHUNK_CONTENT}
|
|
51
|
+
</chunk>
|
|
52
|
+
|
|
53
|
+
Write a short context (2-3 sentences) that situates this chunk within the document. Include the document's topic and what specific information this chunk covers. Be concise and factual.`;
|
|
54
|
+
const TRUNCATED_CONTEXT_PROMPT = `Here is a summary of the document:
|
|
55
|
+
<document_summary>
|
|
56
|
+
{DOCUMENT_SUMMARY}
|
|
57
|
+
</document_summary>
|
|
58
|
+
|
|
59
|
+
Here is the section this chunk belongs to (header path):
|
|
60
|
+
<section>
|
|
61
|
+
{HEADER_PATH}
|
|
62
|
+
</section>
|
|
63
|
+
|
|
64
|
+
Here are the neighboring chunks for context:
|
|
65
|
+
<previous_chunk>
|
|
66
|
+
{PREV_CHUNK}
|
|
67
|
+
</previous_chunk>
|
|
68
|
+
|
|
69
|
+
<chunk>
|
|
70
|
+
{CHUNK_CONTENT}
|
|
71
|
+
</chunk>
|
|
72
|
+
|
|
73
|
+
<next_chunk>
|
|
74
|
+
{NEXT_CHUNK}
|
|
75
|
+
</next_chunk>
|
|
76
|
+
|
|
77
|
+
Write a short context (2-3 sentences) that situates this chunk within the document. Include the document's topic and what specific information this chunk covers. Be concise and factual.`;
|
|
78
|
+
const SUMMARY_PROMPT = `Summarize this document in 150-200 words. Focus on: what the document is about, its structure, and the key topics it covers. Be factual and concise.
|
|
79
|
+
|
|
80
|
+
<document>
|
|
81
|
+
{DOCUMENT_CONTENT}
|
|
82
|
+
</document>`;
|
|
83
|
+
// =============================================================================
|
|
84
|
+
// Helpers
|
|
85
|
+
// =============================================================================
|
|
86
|
+
function estimateTokens(text) {
|
|
87
|
+
return Math.ceil(text.length / 4);
|
|
88
|
+
}
|
|
89
|
+
function findHeaderPath(content, chunkContent) {
|
|
90
|
+
const lines = content.split('\n');
|
|
91
|
+
const headers = [];
|
|
92
|
+
let foundChunk = false;
|
|
93
|
+
for (const line of lines) {
|
|
94
|
+
if (/^#{1,6}\s/.test(line)) {
|
|
95
|
+
const level = line.match(/^(#+)/)?.[1].length ?? 1;
|
|
96
|
+
while (headers.length >= level)
|
|
97
|
+
headers.pop();
|
|
98
|
+
headers.push(line.replace(/^#+\s*/, '').trim());
|
|
99
|
+
}
|
|
100
|
+
if (line.includes(chunkContent.slice(0, 50))) {
|
|
101
|
+
foundChunk = true;
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return foundChunk ? headers.join(' > ') : '';
|
|
106
|
+
}
|
|
107
|
+
async function generateDocSummary(documentContent) {
|
|
108
|
+
const prompt = SUMMARY_PROMPT.replace('{DOCUMENT_CONTENT}', documentContent);
|
|
109
|
+
const inputTokens = estimateTokens(prompt);
|
|
110
|
+
const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
|
|
111
|
+
model: CONTEXT_MODEL,
|
|
112
|
+
messages: [
|
|
113
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the summary, nothing else.' },
|
|
114
|
+
{ role: 'user', content: prompt },
|
|
115
|
+
],
|
|
116
|
+
max_tokens: 300,
|
|
117
|
+
temperature: 0,
|
|
118
|
+
}));
|
|
119
|
+
return {
|
|
120
|
+
summary: (response.choices[0].message.content ?? '').trim(),
|
|
121
|
+
inputTokens,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
// =============================================================================
|
|
125
|
+
// Enrichment strategies
|
|
126
|
+
// =============================================================================
|
|
127
|
+
async function enrichBaseline(chunks, documentContent) {
|
|
128
|
+
const start = Date.now();
|
|
129
|
+
const summaries = [];
|
|
130
|
+
let inputTokens = 0;
|
|
131
|
+
for (const chunk of chunks) {
|
|
132
|
+
const prompt = CONTEXT_PROMPT
|
|
133
|
+
.replace('{DOCUMENT_CONTENT}', documentContent)
|
|
134
|
+
.replace('{CHUNK_CONTENT}', chunk.content);
|
|
135
|
+
inputTokens += estimateTokens(prompt);
|
|
136
|
+
const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
|
|
137
|
+
model: CONTEXT_MODEL,
|
|
138
|
+
messages: [
|
|
139
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
|
|
140
|
+
{ role: 'user', content: prompt },
|
|
141
|
+
],
|
|
142
|
+
max_tokens: 150,
|
|
143
|
+
temperature: 0,
|
|
144
|
+
}));
|
|
145
|
+
summaries.push((response.choices[0].message.content ?? '').trim());
|
|
146
|
+
}
|
|
147
|
+
return { summaries, timeMs: Date.now() - start, inputTokens };
|
|
148
|
+
}
|
|
149
|
+
async function enrichParallel(chunks, documentContent) {
|
|
150
|
+
const start = Date.now();
|
|
151
|
+
let inputTokens = 0;
|
|
152
|
+
const promises = chunks.map((chunk, index) => {
|
|
153
|
+
const prompt = CONTEXT_PROMPT
|
|
154
|
+
.replace('{DOCUMENT_CONTENT}', documentContent)
|
|
155
|
+
.replace('{CHUNK_CONTENT}', chunk.content);
|
|
156
|
+
inputTokens += estimateTokens(prompt);
|
|
157
|
+
return chatLimiter.schedule({ id: `enrich-${index}` }, async () => {
|
|
158
|
+
const response = await openai.chat.completions.create({
|
|
159
|
+
model: CONTEXT_MODEL,
|
|
160
|
+
messages: [
|
|
161
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
|
|
162
|
+
{ role: 'user', content: prompt },
|
|
163
|
+
],
|
|
164
|
+
max_tokens: 150,
|
|
165
|
+
temperature: 0,
|
|
166
|
+
});
|
|
167
|
+
return { index, summary: (response.choices[0].message.content ?? '').trim() };
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
const results = await Promise.all(promises);
|
|
171
|
+
results.sort((first, second) => first.index - second.index);
|
|
172
|
+
return { summaries: results.map(result => result.summary), timeMs: Date.now() - start, inputTokens };
|
|
173
|
+
}
|
|
174
|
+
async function enrichTruncated(chunks, documentContent) {
|
|
175
|
+
const start = Date.now();
|
|
176
|
+
const summaries = [];
|
|
177
|
+
let inputTokens = 0;
|
|
178
|
+
const { summary: docSummary, inputTokens: summaryTokens } = await generateDocSummary(documentContent);
|
|
179
|
+
inputTokens += summaryTokens;
|
|
180
|
+
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
|
|
181
|
+
const prevChunk = chunkIndex > 0 ? chunks[chunkIndex - 1].content : '(start of document)';
|
|
182
|
+
const nextChunk = chunkIndex < chunks.length - 1 ? chunks[chunkIndex + 1].content : '(end of document)';
|
|
183
|
+
const headerPath = findHeaderPath(documentContent, chunks[chunkIndex].content);
|
|
184
|
+
const prompt = TRUNCATED_CONTEXT_PROMPT
|
|
185
|
+
.replace('{DOCUMENT_SUMMARY}', docSummary)
|
|
186
|
+
.replace('{HEADER_PATH}', headerPath || '(unknown section)')
|
|
187
|
+
.replace('{PREV_CHUNK}', prevChunk)
|
|
188
|
+
.replace('{CHUNK_CONTENT}', chunks[chunkIndex].content)
|
|
189
|
+
.replace('{NEXT_CHUNK}', nextChunk);
|
|
190
|
+
inputTokens += estimateTokens(prompt);
|
|
191
|
+
const response = await openaiLimiter.schedule(() => openai.chat.completions.create({
|
|
192
|
+
model: CONTEXT_MODEL,
|
|
193
|
+
messages: [
|
|
194
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
|
|
195
|
+
{ role: 'user', content: prompt },
|
|
196
|
+
],
|
|
197
|
+
max_tokens: 150,
|
|
198
|
+
temperature: 0,
|
|
199
|
+
}));
|
|
200
|
+
summaries.push((response.choices[0].message.content ?? '').trim());
|
|
201
|
+
}
|
|
202
|
+
return { summaries, timeMs: Date.now() - start, inputTokens };
|
|
203
|
+
}
|
|
204
|
+
async function enrichTruncatedParallel(chunks, documentContent) {
|
|
205
|
+
const start = Date.now();
|
|
206
|
+
let inputTokens = 0;
|
|
207
|
+
const { summary: docSummary, inputTokens: summaryTokens } = await generateDocSummary(documentContent);
|
|
208
|
+
inputTokens += summaryTokens;
|
|
209
|
+
// Truncated context = ~1K tokens per call. TPM-safe for full concurrency.
|
|
210
|
+
const promises = chunks.map((chunk, chunkIndex) => {
|
|
211
|
+
const prevChunk = chunkIndex > 0 ? chunks[chunkIndex - 1].content : '(start of document)';
|
|
212
|
+
const nextChunk = chunkIndex < chunks.length - 1 ? chunks[chunkIndex + 1].content : '(end of document)';
|
|
213
|
+
const headerPath = findHeaderPath(documentContent, chunk.content);
|
|
214
|
+
const prompt = TRUNCATED_CONTEXT_PROMPT
|
|
215
|
+
.replace('{DOCUMENT_SUMMARY}', docSummary)
|
|
216
|
+
.replace('{HEADER_PATH}', headerPath || '(unknown section)')
|
|
217
|
+
.replace('{PREV_CHUNK}', prevChunk)
|
|
218
|
+
.replace('{CHUNK_CONTENT}', chunk.content)
|
|
219
|
+
.replace('{NEXT_CHUNK}', nextChunk);
|
|
220
|
+
inputTokens += estimateTokens(prompt);
|
|
221
|
+
return openaiLimiter.schedule({ id: `tp-${chunkIndex}` }, async () => {
|
|
222
|
+
const response = await openai.chat.completions.create({
|
|
223
|
+
model: CONTEXT_MODEL,
|
|
224
|
+
messages: [
|
|
225
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
|
|
226
|
+
{ role: 'user', content: prompt },
|
|
227
|
+
],
|
|
228
|
+
max_tokens: 150,
|
|
229
|
+
temperature: 0,
|
|
230
|
+
});
|
|
231
|
+
return { index: chunkIndex, summary: (response.choices[0].message.content ?? '').trim() };
|
|
232
|
+
});
|
|
233
|
+
});
|
|
234
|
+
const results = await Promise.all(promises);
|
|
235
|
+
results.sort((first, second) => first.index - second.index);
|
|
236
|
+
return { summaries: results.map(result => result.summary), timeMs: Date.now() - start, inputTokens };
|
|
237
|
+
}
|
|
238
|
+
// =============================================================================
|
|
239
|
+
// Embedding strategies
|
|
240
|
+
// =============================================================================
|
|
241
|
+
async function embedSequential(texts) {
|
|
242
|
+
const start = Date.now();
|
|
243
|
+
const embeddings = [];
|
|
244
|
+
let inputTokens = 0;
|
|
245
|
+
for (const text of texts) {
|
|
246
|
+
inputTokens += estimateTokens(text);
|
|
247
|
+
const result = await openaiLimiter.schedule(async () => {
|
|
248
|
+
const { data, response } = await openai.embeddings.create({
|
|
249
|
+
model: EMBEDDING_MODEL,
|
|
250
|
+
input: text,
|
|
251
|
+
}).withResponse();
|
|
252
|
+
await updateLimitsFromHeaders(openaiLimiter, response.headers);
|
|
253
|
+
return data.data[0].embedding;
|
|
254
|
+
});
|
|
255
|
+
embeddings.push(result);
|
|
256
|
+
}
|
|
257
|
+
return { embeddings, timeMs: Date.now() - start, inputTokens };
|
|
258
|
+
}
|
|
259
|
+
async function embedBatch(texts) {
|
|
260
|
+
const start = Date.now();
|
|
261
|
+
const inputTokens = texts.reduce((sum, text) => sum + estimateTokens(text), 0);
|
|
262
|
+
const BATCH_SIZE = 100;
|
|
263
|
+
const allEmbeddings = [];
|
|
264
|
+
for (let batchStart = 0; batchStart < texts.length; batchStart += BATCH_SIZE) {
|
|
265
|
+
const batch = texts.slice(batchStart, batchStart + BATCH_SIZE);
|
|
266
|
+
const result = await openaiLimiter.schedule(async () => {
|
|
267
|
+
const { data, response } = await openai.embeddings.create({
|
|
268
|
+
model: EMBEDDING_MODEL,
|
|
269
|
+
input: batch,
|
|
270
|
+
}).withResponse();
|
|
271
|
+
await updateLimitsFromHeaders(openaiLimiter, response.headers);
|
|
272
|
+
return data.data.map(entry => entry.embedding);
|
|
273
|
+
});
|
|
274
|
+
allEmbeddings.push(...result);
|
|
275
|
+
}
|
|
276
|
+
return { embeddings: allEmbeddings, timeMs: Date.now() - start, inputTokens };
|
|
277
|
+
}
|
|
278
|
+
// =============================================================================
|
|
279
|
+
// Benchmark runner
|
|
280
|
+
// =============================================================================
|
|
281
|
+
async function runBenchmark(mode, content, filePath) {
|
|
282
|
+
console.log(`\n--- ${mode.toUpperCase()} ---`);
|
|
283
|
+
const chunkStart = Date.now();
|
|
284
|
+
const chunks = chunkText(content);
|
|
285
|
+
const chunkTime = Date.now() - chunkStart;
|
|
286
|
+
console.log(` Chunking: ${chunkTime}ms (${chunks.length} chunks)`);
|
|
287
|
+
const useParallel = mode === 'parallel-cr' || mode === 'all';
|
|
288
|
+
const useTruncated = mode === 'truncated' || mode === 'all';
|
|
289
|
+
let enrichResult;
|
|
290
|
+
if (useTruncated && useParallel) {
|
|
291
|
+
enrichResult = await enrichTruncatedParallel(chunks, content);
|
|
292
|
+
}
|
|
293
|
+
else if (useTruncated) {
|
|
294
|
+
enrichResult = await enrichTruncated(chunks, content);
|
|
295
|
+
}
|
|
296
|
+
else if (useParallel) {
|
|
297
|
+
enrichResult = await enrichParallel(chunks, content);
|
|
298
|
+
}
|
|
299
|
+
else {
|
|
300
|
+
enrichResult = await enrichBaseline(chunks, content);
|
|
301
|
+
}
|
|
302
|
+
console.log(` Enrichment: ${enrichResult.timeMs}ms (~${enrichResult.inputTokens} input tokens)`);
|
|
303
|
+
const embeddingInputs = chunks.map((chunk, index) => enrichResult.summaries[index] + '\n\n' + chunk.content);
|
|
304
|
+
const useBatch = mode === 'batch-embed' || mode === 'all';
|
|
305
|
+
const embedResult = useBatch
|
|
306
|
+
? await embedBatch(embeddingInputs)
|
|
307
|
+
: await embedSequential(embeddingInputs);
|
|
308
|
+
console.log(` Embedding: ${embedResult.timeMs}ms (~${embedResult.inputTokens} input tokens)`);
|
|
309
|
+
const total = chunkTime + enrichResult.timeMs + embedResult.timeMs;
|
|
310
|
+
console.log(` TOTAL: ${total}ms`);
|
|
311
|
+
return {
|
|
312
|
+
mode,
|
|
313
|
+
file: filePath,
|
|
314
|
+
fileSize: content.length,
|
|
315
|
+
chunkCount: chunks.length,
|
|
316
|
+
timestamp: new Date().toISOString(),
|
|
317
|
+
timings: {
|
|
318
|
+
chunking: chunkTime,
|
|
319
|
+
enrichment: enrichResult.timeMs,
|
|
320
|
+
embedding: embedResult.timeMs,
|
|
321
|
+
total,
|
|
322
|
+
},
|
|
323
|
+
tokenEstimate: {
|
|
324
|
+
enrichmentInput: enrichResult.inputTokens,
|
|
325
|
+
embeddingInput: embedResult.inputTokens,
|
|
326
|
+
},
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
// =============================================================================
|
|
330
|
+
// Main
|
|
331
|
+
// =============================================================================
|
|
332
|
+
async function main() {
|
|
333
|
+
const modeArg = process.argv.find((_, argIndex, argv) => argv[argIndex - 1] === '--mode');
|
|
334
|
+
const fileArg = process.argv.find((_, argIndex, argv) => argv[argIndex - 1] === '--file');
|
|
335
|
+
const filePath = fileArg ?? DEFAULT_TEST_FILE;
|
|
336
|
+
if (!existsSync(filePath)) {
|
|
337
|
+
console.error(`File not found: ${filePath}`);
|
|
338
|
+
process.exit(1);
|
|
339
|
+
}
|
|
340
|
+
const content = readFileSync(filePath, 'utf8');
|
|
341
|
+
console.log(`File: ${filePath} (${content.length} chars)`);
|
|
342
|
+
const modes = modeArg
|
|
343
|
+
? [modeArg]
|
|
344
|
+
: ['baseline', 'batch-embed', 'parallel-cr', 'truncated', 'all'];
|
|
345
|
+
const results = [];
|
|
346
|
+
for (const mode of modes) {
|
|
347
|
+
const result = await runBenchmark(mode, content, filePath);
|
|
348
|
+
results.push(result);
|
|
349
|
+
}
|
|
350
|
+
console.log('\n=== SUMMARY ===');
|
|
351
|
+
console.log('');
|
|
352
|
+
const baseline = results.find(benchmarkResult => benchmarkResult.mode === 'baseline');
|
|
353
|
+
for (const benchmarkResult of results) {
|
|
354
|
+
const speedup = baseline ? `${Math.round((1 - benchmarkResult.timings.total / baseline.timings.total) * 100)}%` : 'n/a';
|
|
355
|
+
const tokenSavings = baseline
|
|
356
|
+
? `${Math.round((1 - benchmarkResult.tokenEstimate.enrichmentInput / baseline.tokenEstimate.enrichmentInput) * 100)}%`
|
|
357
|
+
: 'n/a';
|
|
358
|
+
console.log(`${benchmarkResult.mode.padEnd(15)} | ${String(benchmarkResult.timings.total).padStart(7)}ms | enrichment: ${String(benchmarkResult.timings.enrichment).padStart(7)}ms | embedding: ${String(benchmarkResult.timings.embedding).padStart(7)}ms | speedup: ${speedup.padStart(4)} | token savings: ${tokenSavings}`);
|
|
359
|
+
}
|
|
360
|
+
let existing = [];
|
|
361
|
+
if (existsSync(RESULTS_FILE)) {
|
|
362
|
+
try {
|
|
363
|
+
existing = JSON.parse(readFileSync(RESULTS_FILE, 'utf8'));
|
|
364
|
+
}
|
|
365
|
+
catch {
|
|
366
|
+
existing = [];
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
existing.push(...results);
|
|
370
|
+
writeFileSync(RESULTS_FILE, JSON.stringify(existing, null, 2) + '\n');
|
|
371
|
+
console.log(`\nResults saved to ${RESULTS_FILE}`);
|
|
372
|
+
}
|
|
373
|
+
main().catch((error) => {
|
|
374
|
+
console.error(error);
|
|
375
|
+
process.exit(1);
|
|
376
|
+
});
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// convert-judgments-to-graded.ts
|
|
2
|
+
// Phase 4.6.2 — convert legacy eval_golden_dataset.expected_doc_ids to
|
|
3
|
+
// grade-3 rows in eval_golden_judgments. Idempotent.
|
|
4
|
+
//
|
|
5
|
+
// Run: npx tsx src/scripts/convert-judgments-to-graded.ts
|
|
6
|
+
import 'dotenv/config';
|
|
7
|
+
import { createClient } from '@supabase/supabase-js';
|
|
8
|
+
const supabaseUrl = process.env.SUPABASE_URL;
|
|
9
|
+
const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY;
|
|
10
|
+
if (!supabaseUrl || !supabaseKey) {
|
|
11
|
+
console.error('Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY');
|
|
12
|
+
process.exit(1);
|
|
13
|
+
}
|
|
14
|
+
const supabase = createClient(supabaseUrl, supabaseKey);
|
|
15
|
+
async function main() {
|
|
16
|
+
const { data: rows, error: loadError } = await supabase
|
|
17
|
+
.from('eval_golden_dataset')
|
|
18
|
+
.select('id, query, expected_doc_ids')
|
|
19
|
+
.order('id');
|
|
20
|
+
if (loadError || !rows) {
|
|
21
|
+
console.error('Failed to load golden dataset:', loadError?.message ?? 'no data');
|
|
22
|
+
process.exit(1);
|
|
23
|
+
}
|
|
24
|
+
const goldenRows = rows;
|
|
25
|
+
let totalExpected = 0;
|
|
26
|
+
let inserted = 0;
|
|
27
|
+
let skipped = 0;
|
|
28
|
+
let errorsCount = 0;
|
|
29
|
+
for (const goldenRow of goldenRows) {
|
|
30
|
+
const expectedIds = goldenRow.expected_doc_ids ?? [];
|
|
31
|
+
totalExpected += expectedIds.length;
|
|
32
|
+
for (const documentId of expectedIds) {
|
|
33
|
+
const { error: rpcError } = await supabase.rpc('judgment_create', {
|
|
34
|
+
p_golden_id: goldenRow.id,
|
|
35
|
+
p_document_id: documentId,
|
|
36
|
+
p_grade: 3,
|
|
37
|
+
p_judged_by: 'converter-phase-4.6.2',
|
|
38
|
+
p_notes: 'Auto-converted from legacy expected_doc_ids (grade 3 = canonical answer)',
|
|
39
|
+
});
|
|
40
|
+
if (rpcError) {
|
|
41
|
+
const messageText = rpcError.message ?? '';
|
|
42
|
+
if (messageText.includes('duplicate key') || messageText.includes('unique')) {
|
|
43
|
+
skipped++;
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
errorsCount++;
|
|
47
|
+
console.error(` [ERR] golden_id=${goldenRow.id} doc_id=${documentId}: ${messageText}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
inserted++;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
console.log('');
|
|
56
|
+
console.log('='.repeat(60));
|
|
57
|
+
console.log('Conversion summary');
|
|
58
|
+
console.log('='.repeat(60));
|
|
59
|
+
console.log(` Golden queries scanned: ${goldenRows.length}`);
|
|
60
|
+
console.log(` Total expected_doc_ids: ${totalExpected}`);
|
|
61
|
+
console.log(` Grade-3 judgments inserted: ${inserted}`);
|
|
62
|
+
console.log(` Skipped (already existed): ${skipped}`);
|
|
63
|
+
console.log(` Errors: ${errorsCount}`);
|
|
64
|
+
console.log('');
|
|
65
|
+
if (errorsCount > 0) {
|
|
66
|
+
console.error('Conversion completed with errors. Inspect and re-run.');
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
const { count, error: countError } = await supabase
|
|
70
|
+
.from('eval_golden_judgments')
|
|
71
|
+
.select('*', { count: 'exact', head: true })
|
|
72
|
+
.eq('grade', 3)
|
|
73
|
+
.eq('judged_by', 'converter-phase-4.6.2');
|
|
74
|
+
if (countError) {
|
|
75
|
+
console.error('Verification count failed:', countError.message);
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
console.log(`Verification: ${count} grade-3 judgments with judged_by='converter-phase-4.6.2' in table.`);
|
|
79
|
+
if (count !== inserted + skipped) {
|
|
80
|
+
console.error(`MISMATCH: expected ${inserted + skipped}, got ${count}`);
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
console.log('Conversion verified.');
|
|
84
|
+
}
|
|
85
|
+
main().catch((error) => {
|
|
86
|
+
console.error(error);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
});
|