@aperdomoll90/ledger-ai 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +177 -221
- package/dist/commands/add.js +51 -100
- package/dist/commands/backfill.js +55 -0
- package/dist/commands/backup.js +10 -10
- package/dist/commands/check.js +21 -29
- package/dist/commands/config.js +13 -12
- package/dist/commands/delete.js +22 -17
- package/dist/commands/eval-judge.js +11 -0
- package/dist/commands/eval.js +321 -0
- package/dist/commands/export.js +8 -10
- package/dist/commands/get.js +9 -0
- package/dist/commands/hunt.js +206 -0
- package/dist/commands/ingest.js +15 -14
- package/dist/commands/init.js +18 -20
- package/dist/commands/list.js +21 -7
- package/dist/commands/migrate.js +11 -11
- package/dist/commands/onboard.js +2 -2
- package/dist/commands/pull.js +3 -2
- package/dist/commands/push.js +8 -8
- package/dist/commands/restore.js +38 -38
- package/dist/commands/show.js +13 -16
- package/dist/commands/sync.js +58 -19
- package/dist/commands/tag.js +20 -14
- package/dist/commands/update.js +50 -18
- package/dist/commands/wizard.js +3 -3
- package/dist/lib/ai-search.js +163 -0
- package/dist/lib/audit.js +19 -0
- package/dist/lib/backfill.js +60 -0
- package/dist/lib/config.js +19 -2
- package/dist/lib/document-classification.js +5 -0
- package/dist/lib/document-fetching.js +77 -0
- package/dist/lib/document-operations.js +150 -0
- package/dist/lib/documents/classification.js +5 -0
- package/dist/lib/documents/fetching.js +89 -0
- package/dist/lib/documents/operations.js +304 -0
- package/dist/lib/domains.js +116 -0
- package/dist/lib/embeddings.js +190 -0
- package/dist/lib/errors.js +3 -1
- package/dist/lib/eval/eval-advanced.js +289 -0
- package/dist/lib/eval/eval-judge-session.js +233 -0
- package/dist/lib/eval/eval-store.js +105 -0
- package/dist/lib/eval/eval.js +303 -0
- package/dist/lib/file-writer.js +23 -0
- package/dist/lib/generators.js +44 -45
- package/dist/lib/hunter-db.js +235 -0
- package/dist/lib/hunter-rss.js +30 -0
- package/dist/lib/hunter-scoring.js +55 -0
- package/dist/lib/hunter-types.js +36 -0
- package/dist/lib/lint-configs.js +20 -0
- package/dist/lib/migrate.js +2 -2
- package/dist/lib/notes.js +173 -59
- package/dist/lib/observability.js +296 -0
- package/dist/lib/op-add-note-types.test.js +7 -6
- package/dist/lib/prompt.js +8 -8
- package/dist/lib/rate-limiter.js +103 -0
- package/dist/lib/search/ai-search.js +396 -0
- package/dist/lib/search/chunk-context-enrichment.js +155 -0
- package/dist/lib/search/embeddings.js +293 -0
- package/dist/lib/search/reranker.js +120 -0
- package/dist/lib/search/semantic-cache.js +53 -0
- package/dist/lib/type-registry.test.js +6 -6
- package/dist/mcp-server.js +553 -66
- package/dist/migrations/migrations/005-audit-log.sql +22 -0
- package/dist/migrations/migrations/005_opportunities.sql +48 -0
- package/dist/migrations/migrations/006-audited-operations.sql +235 -0
- package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
- package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
- package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
- package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
- package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
- package/dist/scripts/batch-grade.js +344 -0
- package/dist/scripts/benchmark-ingestion.js +376 -0
- package/dist/scripts/convert-judgments-to-graded.js +88 -0
- package/dist/scripts/diagnose-first-result.js +333 -0
- package/dist/scripts/drop-golden-query.js +53 -0
- package/dist/scripts/eval-search.js +115 -0
- package/dist/scripts/grade-unjudged-top1.js +138 -0
- package/dist/scripts/hunter-analytics.js +38 -0
- package/dist/scripts/hunter-cron.js +63 -0
- package/dist/scripts/hunter-purge.js +25 -0
- package/dist/scripts/migrate-v2.js +140 -0
- package/dist/scripts/reindex.js +74 -0
- package/dist/scripts/sync-local-docs.js +153 -0
- package/package.json +7 -1
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
// ai-search.ts
|
|
2
|
+
// AI-powered search — vector (meaning), keyword (exact words), hybrid (both combined).
|
|
3
|
+
// Each function calls a Postgres RPC function that does the actual search.
|
|
4
|
+
// TypeScript's job: generate the query embedding, then call the right function.
|
|
5
|
+
import { getOrCacheQueryEmbedding, toVectorString } from './embeddings.js';
|
|
6
|
+
import { rerankResults } from './reranker.js';
|
|
7
|
+
import { buildSearchParams, extractSourceDocIds, SEMANTIC_CACHE_MODEL_ID, SEMANTIC_CACHE_THRESHOLD, } from './semantic-cache.js';
|
|
8
|
+
import { runSearchTrace, startSpan, recordChildSpan, withActiveSpan } from '../observability.js';
|
|
9
|
+
// =============================================================================
|
|
10
|
+
// Search evaluation logging
|
|
11
|
+
// =============================================================================
|
|
12
|
+
/**
|
|
13
|
+
* Log a search to the search_evaluations table.
|
|
14
|
+
* Called after every search — silently records what was searched,
|
|
15
|
+
* what came back, and how long it took. This is the raw data
|
|
16
|
+
* that powers all evaluation, quality tracking, and improvement.
|
|
17
|
+
*
|
|
18
|
+
* Fire-and-forget: we don't await this. If logging fails,
|
|
19
|
+
* the search still returns results. The user never waits for logging.
|
|
20
|
+
*/
|
|
21
|
+
function logSearchEvaluation(supabase, params) {
|
|
22
|
+
// Extract unique document_types and source_types from results
|
|
23
|
+
// These tell us which types of documents search finds well vs poorly
|
|
24
|
+
const documentTypes = [...new Set(params.results.map(result => result.document_type))];
|
|
25
|
+
// Build the results JSONB array — just IDs and scores, not full content
|
|
26
|
+
const resultsSummary = params.results.map(result => ({
|
|
27
|
+
id: result.id,
|
|
28
|
+
score: result.similarity ?? result.rank ?? result.score ?? null,
|
|
29
|
+
document_type: result.document_type,
|
|
30
|
+
}));
|
|
31
|
+
// Fire and forget — don't await, don't block the search response
|
|
32
|
+
supabase
|
|
33
|
+
.from('search_evaluations')
|
|
34
|
+
.insert({
|
|
35
|
+
query_text: params.query,
|
|
36
|
+
search_mode: params.searchMode,
|
|
37
|
+
result_count: params.results.length,
|
|
38
|
+
results: resultsSummary,
|
|
39
|
+
document_types: documentTypes,
|
|
40
|
+
response_time_ms: params.responseTimeMs,
|
|
41
|
+
})
|
|
42
|
+
.then(() => { })
|
|
43
|
+
.catch((logError) => {
|
|
44
|
+
process.stderr.write(`[ledger] search evaluation logging failed: ${logError.message ?? 'unknown error'}\n`);
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
// =============================================================================
|
|
48
|
+
// Search functions
|
|
49
|
+
// =============================================================================
|
|
50
|
+
/**
|
|
51
|
+
* Search by meaning — "how does auth work?" finds documents about OAuth.
|
|
52
|
+
*
|
|
53
|
+
* Flow:
|
|
54
|
+
* 1. Convert query text to an embedding (array of 1,536 numbers) via OpenAI
|
|
55
|
+
* 2. Check the query_cache first to avoid repeat API calls
|
|
56
|
+
* 3. Call match_documents RPC — Postgres compares the query embedding
|
|
57
|
+
* against every chunk's embedding using cosine similarity
|
|
58
|
+
* 4. Return matching documents sorted by similarity
|
|
59
|
+
*/
|
|
60
|
+
export async function searchByVector(clients, props) {
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
return runSearchTrace({
|
|
63
|
+
mode: 'vector',
|
|
64
|
+
query: props.query,
|
|
65
|
+
environment: clients.observabilityEnvironment,
|
|
66
|
+
sessionId: clients.sessionId,
|
|
67
|
+
input: {
|
|
68
|
+
query: props.query,
|
|
69
|
+
filters: { domain: props.domain, project: props.project, document_type: props.document_type },
|
|
70
|
+
},
|
|
71
|
+
metadata: { threshold: props.threshold ?? 0.38, limit: props.limit ?? 10 },
|
|
72
|
+
}, async (trace) => {
|
|
73
|
+
const queryEmbedding = await getOrCacheQueryEmbedding(clients, props.query);
|
|
74
|
+
const embeddingString = toVectorString(queryEmbedding);
|
|
75
|
+
// Semantic cache lookup (layer 2)
|
|
76
|
+
const searchParams = buildSearchParams({
|
|
77
|
+
threshold: props.threshold ?? 0.38,
|
|
78
|
+
limit: props.limit ?? 10,
|
|
79
|
+
domain: props.domain,
|
|
80
|
+
document_type: props.document_type,
|
|
81
|
+
project: props.project,
|
|
82
|
+
});
|
|
83
|
+
const cacheSpan = startSpan('semantic-cache-lookup');
|
|
84
|
+
const { data: cachedResults } = await clients.supabase.rpc('semantic_cache_lookup', {
|
|
85
|
+
p_query_embedding: embeddingString,
|
|
86
|
+
p_search_mode: 'vector',
|
|
87
|
+
p_search_params: searchParams,
|
|
88
|
+
p_embedding_model_id: SEMANTIC_CACHE_MODEL_ID,
|
|
89
|
+
p_similarity_threshold: SEMANTIC_CACHE_THRESHOLD,
|
|
90
|
+
});
|
|
91
|
+
const cacheHit = !!(cachedResults && cachedResults.length > 0);
|
|
92
|
+
cacheSpan.update({ output: { hit: cacheHit } });
|
|
93
|
+
cacheSpan.end();
|
|
94
|
+
if (cacheHit) {
|
|
95
|
+
const results = cachedResults;
|
|
96
|
+
trace.update({
|
|
97
|
+
output: {
|
|
98
|
+
resultCount: results.length,
|
|
99
|
+
topResultIds: results.slice(0, 3).map(result => result.id),
|
|
100
|
+
cacheHit: true,
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
logSearchEvaluation(clients.supabase, {
|
|
104
|
+
query: props.query,
|
|
105
|
+
searchMode: 'vector',
|
|
106
|
+
results,
|
|
107
|
+
responseTimeMs: Date.now() - startTime,
|
|
108
|
+
});
|
|
109
|
+
return results;
|
|
110
|
+
}
|
|
111
|
+
// Cache miss: run full search pipeline
|
|
112
|
+
const retrieveSpan = startSpan('retrieve');
|
|
113
|
+
const { data, error } = await clients.supabase.rpc('match_documents', {
|
|
114
|
+
q_emb: embeddingString,
|
|
115
|
+
p_threshold: props.threshold ?? 0.38,
|
|
116
|
+
p_max_results: props.limit ?? 10,
|
|
117
|
+
p_domain: props.domain ?? null,
|
|
118
|
+
p_document_type: props.document_type ?? null,
|
|
119
|
+
p_project: props.project ?? null,
|
|
120
|
+
});
|
|
121
|
+
if (error) {
|
|
122
|
+
retrieveSpan.update({ output: { error: error.message } });
|
|
123
|
+
retrieveSpan.end();
|
|
124
|
+
trace.update({ output: { error: error.message } });
|
|
125
|
+
throw new Error(`Vector search failed for "${props.query}": ${error.message}`);
|
|
126
|
+
}
|
|
127
|
+
const results = (data ?? []);
|
|
128
|
+
retrieveSpan.update({ output: { rowCount: results.length } });
|
|
129
|
+
retrieveSpan.end();
|
|
130
|
+
// Store in semantic cache (non-blocking)
|
|
131
|
+
if (results.length > 0) {
|
|
132
|
+
const sourceDocIds = extractSourceDocIds(results);
|
|
133
|
+
const storeSpan = startSpan('semantic-cache-store');
|
|
134
|
+
Promise.resolve(clients.supabase.rpc('semantic_cache_store', {
|
|
135
|
+
p_query_text: props.query,
|
|
136
|
+
p_query_embedding: embeddingString,
|
|
137
|
+
p_search_mode: 'vector',
|
|
138
|
+
p_search_params: searchParams,
|
|
139
|
+
p_cached_results: results,
|
|
140
|
+
p_source_doc_ids: sourceDocIds,
|
|
141
|
+
p_embedding_model_id: SEMANTIC_CACHE_MODEL_ID,
|
|
142
|
+
})).then(() => {
|
|
143
|
+
storeSpan.end();
|
|
144
|
+
}).catch((cacheStoreError) => {
|
|
145
|
+
storeSpan.update({ output: { error: cacheStoreError.message ?? 'unknown' } });
|
|
146
|
+
storeSpan.end();
|
|
147
|
+
process.stderr.write(`[ledger] semantic cache store failed: ${cacheStoreError.message ?? 'unknown'}\n`);
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
trace.update({
|
|
151
|
+
output: {
|
|
152
|
+
resultCount: results.length,
|
|
153
|
+
topResultIds: results.slice(0, 3).map(result => result.id),
|
|
154
|
+
cacheHit: false,
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
logSearchEvaluation(clients.supabase, {
|
|
158
|
+
query: props.query,
|
|
159
|
+
searchMode: 'vector',
|
|
160
|
+
results,
|
|
161
|
+
responseTimeMs: Date.now() - startTime,
|
|
162
|
+
});
|
|
163
|
+
return results;
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Search by exact words — "pgvector HNSW" finds documents containing those words.
|
|
168
|
+
*
|
|
169
|
+
* No embedding needed — Postgres uses the search_vector column (GIN index)
|
|
170
|
+
* to match words directly. Good for code identifiers, proper nouns, error messages.
|
|
171
|
+
*/
|
|
172
|
+
export async function searchByKeyword(clients, props) {
|
|
173
|
+
const startTime = Date.now();
|
|
174
|
+
return runSearchTrace({
|
|
175
|
+
mode: 'keyword',
|
|
176
|
+
query: props.query,
|
|
177
|
+
environment: clients.observabilityEnvironment,
|
|
178
|
+
sessionId: clients.sessionId,
|
|
179
|
+
input: {
|
|
180
|
+
query: props.query,
|
|
181
|
+
filters: { domain: props.domain, project: props.project, document_type: props.document_type },
|
|
182
|
+
},
|
|
183
|
+
metadata: { limit: props.limit ?? 10 },
|
|
184
|
+
}, async (trace) => {
|
|
185
|
+
const { data, error } = await clients.supabase.rpc('match_documents_keyword', {
|
|
186
|
+
p_query: props.query,
|
|
187
|
+
p_max_results: props.limit ?? 10,
|
|
188
|
+
p_domain: props.domain ?? null,
|
|
189
|
+
p_document_type: props.document_type ?? null,
|
|
190
|
+
p_project: props.project ?? null,
|
|
191
|
+
});
|
|
192
|
+
if (error) {
|
|
193
|
+
trace.update({ output: { error: error.message } });
|
|
194
|
+
throw new Error(`Keyword search failed for "${props.query}": ${error.message}`);
|
|
195
|
+
}
|
|
196
|
+
const results = (data ?? []);
|
|
197
|
+
trace.update({
|
|
198
|
+
output: {
|
|
199
|
+
resultCount: results.length,
|
|
200
|
+
topResultIds: results.slice(0, 3).map(result => result.id),
|
|
201
|
+
cacheHit: false,
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
logSearchEvaluation(clients.supabase, {
|
|
205
|
+
query: props.query,
|
|
206
|
+
searchMode: 'keyword',
|
|
207
|
+
results,
|
|
208
|
+
responseTimeMs: Date.now() - startTime,
|
|
209
|
+
});
|
|
210
|
+
return results;
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Combined search — runs both vector AND keyword, merges results with RRF fusion.
|
|
215
|
+
*
|
|
216
|
+
* Documents found by both methods rank highest. This is the default search mode
|
|
217
|
+
* because it handles both meaning-based queries ("how does auth work?") and
|
|
218
|
+
* exact-term queries ("pgvector HNSW") well.
|
|
219
|
+
*
|
|
220
|
+
* RRF (Reciprocal Rank Fusion) formula:
|
|
221
|
+
* score = 1/(k + vector_rank) + 1/(k + keyword_rank)
|
|
222
|
+
* k=60 is a smoothing constant that prevents the #1 result from dominating.
|
|
223
|
+
*/
|
|
224
|
+
export async function searchHybrid(clients, props) {
|
|
225
|
+
const startTime = Date.now();
|
|
226
|
+
// When reranking, fetch more candidates so the reranker has a bigger pool.
|
|
227
|
+
// The reranker will select the best N from this larger set.
|
|
228
|
+
const useReranker = props.reranker === 'cohere' && clients.cohereApiKey;
|
|
229
|
+
const desiredLimit = props.limit ?? 10;
|
|
230
|
+
const requestLimit = useReranker ? desiredLimit * 2 : desiredLimit;
|
|
231
|
+
return runSearchTrace({
|
|
232
|
+
mode: useReranker ? 'hybrid+rerank' : 'hybrid',
|
|
233
|
+
query: props.query,
|
|
234
|
+
environment: clients.observabilityEnvironment,
|
|
235
|
+
sessionId: clients.sessionId,
|
|
236
|
+
input: {
|
|
237
|
+
query: props.query,
|
|
238
|
+
filters: { domain: props.domain, project: props.project, document_type: props.document_type },
|
|
239
|
+
},
|
|
240
|
+
metadata: {
|
|
241
|
+
threshold: props.threshold ?? 0.38,
|
|
242
|
+
limit: desiredLimit,
|
|
243
|
+
rerankerEnabled: !!useReranker,
|
|
244
|
+
reciprocalRankFusionK: props.reciprocalRankFusionK ?? 60,
|
|
245
|
+
},
|
|
246
|
+
}, async (trace) => {
|
|
247
|
+
const queryEmbedding = await getOrCacheQueryEmbedding(clients, props.query);
|
|
248
|
+
const embeddingString = toVectorString(queryEmbedding);
|
|
249
|
+
// Semantic cache lookup (layer 2)
|
|
250
|
+
// Skip cache when reranker is enabled (reranker produces different ordering)
|
|
251
|
+
const searchParams = buildSearchParams({
|
|
252
|
+
threshold: props.threshold ?? 0.38,
|
|
253
|
+
limit: requestLimit,
|
|
254
|
+
domain: props.domain,
|
|
255
|
+
document_type: props.document_type,
|
|
256
|
+
project: props.project,
|
|
257
|
+
});
|
|
258
|
+
if (!useReranker) {
|
|
259
|
+
const cacheSpan = startSpan('semantic-cache-lookup');
|
|
260
|
+
const { data: cachedResults } = await clients.supabase.rpc('semantic_cache_lookup', {
|
|
261
|
+
p_query_embedding: embeddingString,
|
|
262
|
+
p_search_mode: 'hybrid',
|
|
263
|
+
p_search_params: searchParams,
|
|
264
|
+
p_embedding_model_id: SEMANTIC_CACHE_MODEL_ID,
|
|
265
|
+
p_similarity_threshold: SEMANTIC_CACHE_THRESHOLD,
|
|
266
|
+
});
|
|
267
|
+
const cacheHit = !!(cachedResults && cachedResults.length > 0);
|
|
268
|
+
cacheSpan.update({ output: { hit: cacheHit } });
|
|
269
|
+
cacheSpan.end();
|
|
270
|
+
if (cacheHit) {
|
|
271
|
+
const cachedRows = cachedResults;
|
|
272
|
+
trace.update({
|
|
273
|
+
output: {
|
|
274
|
+
resultCount: cachedRows.length,
|
|
275
|
+
topResultIds: cachedRows.slice(0, 3).map(result => result.id),
|
|
276
|
+
cacheHit: true,
|
|
277
|
+
},
|
|
278
|
+
});
|
|
279
|
+
logSearchEvaluation(clients.supabase, {
|
|
280
|
+
query: props.query,
|
|
281
|
+
searchMode: 'hybrid',
|
|
282
|
+
results: cachedRows,
|
|
283
|
+
responseTimeMs: Date.now() - startTime,
|
|
284
|
+
});
|
|
285
|
+
return cachedRows;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
// Cache miss: run full search pipeline
|
|
289
|
+
const retrieveSpan = startSpan('retrieve');
|
|
290
|
+
const retrieveStart = Date.now();
|
|
291
|
+
const { data, error } = await clients.supabase.rpc('match_documents_hybrid', {
|
|
292
|
+
q_emb: embeddingString,
|
|
293
|
+
q_text: props.query,
|
|
294
|
+
p_threshold: props.threshold ?? 0.38,
|
|
295
|
+
p_max_results: requestLimit,
|
|
296
|
+
p_domain: props.domain ?? null,
|
|
297
|
+
p_document_type: props.document_type ?? null,
|
|
298
|
+
p_project: props.project ?? null,
|
|
299
|
+
p_rrf_k: props.reciprocalRankFusionK ?? 60,
|
|
300
|
+
});
|
|
301
|
+
if (error) {
|
|
302
|
+
retrieveSpan.update({ output: { error: error.message } });
|
|
303
|
+
retrieveSpan.end();
|
|
304
|
+
trace.update({ output: { error: error.message } });
|
|
305
|
+
throw new Error(`Hybrid search failed for "${props.query}": ${error.message}`);
|
|
306
|
+
}
|
|
307
|
+
const rows = (data ?? []);
|
|
308
|
+
const timing = rows[0]?.timing;
|
|
309
|
+
retrieveSpan.update({ output: { rowCount: rows.length, timing } });
|
|
310
|
+
retrieveSpan.end();
|
|
311
|
+
// Emit three child spans from the Postgres timing sidecar.
|
|
312
|
+
// Spans are backdated from retrieveStart using the measured ms deltas.
|
|
313
|
+
if (timing) {
|
|
314
|
+
let cursor = retrieveStart;
|
|
315
|
+
recordChildSpan('retrieve.vector', cursor, cursor + timing.vector_ms, { durationMs: timing.vector_ms });
|
|
316
|
+
cursor += timing.vector_ms;
|
|
317
|
+
recordChildSpan('retrieve.keyword', cursor, cursor + timing.keyword_ms, { durationMs: timing.keyword_ms });
|
|
318
|
+
cursor += timing.keyword_ms;
|
|
319
|
+
recordChildSpan('retrieve.fusion', cursor, cursor + timing.fusion_ms, { durationMs: timing.fusion_ms });
|
|
320
|
+
}
|
|
321
|
+
// Strip timing from rows before exposing to callers (internal sidecar only).
|
|
322
|
+
let results = rows.map(({ timing: _timing, ...rest }) => rest);
|
|
323
|
+
// Rerank: send candidates to Cohere cross-encoder for re-scoring.
|
|
324
|
+
// If reranking fails, results are returned unchanged (graceful degradation).
|
|
325
|
+
if (useReranker && results.length > 0) {
|
|
326
|
+
const rerankSpan = startSpan('rerank');
|
|
327
|
+
const inputCount = results.length;
|
|
328
|
+
results = await withActiveSpan(rerankSpan, async () => {
|
|
329
|
+
return rerankResults(props.query, results, {
|
|
330
|
+
apiKey: clients.cohereApiKey,
|
|
331
|
+
topN: desiredLimit,
|
|
332
|
+
});
|
|
333
|
+
});
|
|
334
|
+
rerankSpan.update({ output: { inputCount, outputCount: results.length } });
|
|
335
|
+
rerankSpan.end();
|
|
336
|
+
}
|
|
337
|
+
// Store in semantic cache (non-blocking, skip if reranker was used)
|
|
338
|
+
if (results.length > 0 && !useReranker) {
|
|
339
|
+
const sourceDocIds = extractSourceDocIds(results);
|
|
340
|
+
const storeSpan = startSpan('semantic-cache-store');
|
|
341
|
+
Promise.resolve(clients.supabase.rpc('semantic_cache_store', {
|
|
342
|
+
p_query_text: props.query,
|
|
343
|
+
p_query_embedding: embeddingString,
|
|
344
|
+
p_search_mode: 'hybrid',
|
|
345
|
+
p_search_params: searchParams,
|
|
346
|
+
p_cached_results: results,
|
|
347
|
+
p_source_doc_ids: sourceDocIds,
|
|
348
|
+
p_embedding_model_id: SEMANTIC_CACHE_MODEL_ID,
|
|
349
|
+
})).then(() => {
|
|
350
|
+
storeSpan.end();
|
|
351
|
+
}).catch((cacheStoreError) => {
|
|
352
|
+
storeSpan.update({ output: { error: cacheStoreError.message ?? 'unknown' } });
|
|
353
|
+
storeSpan.end();
|
|
354
|
+
process.stderr.write(`[ledger] semantic cache store failed: ${cacheStoreError.message ?? 'unknown'}\n`);
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
trace.update({
|
|
358
|
+
output: {
|
|
359
|
+
resultCount: results.length,
|
|
360
|
+
topResultIds: results.slice(0, 3).map(result => result.id),
|
|
361
|
+
cacheHit: false,
|
|
362
|
+
},
|
|
363
|
+
});
|
|
364
|
+
logSearchEvaluation(clients.supabase, {
|
|
365
|
+
query: props.query,
|
|
366
|
+
searchMode: useReranker ? 'hybrid+rerank' : 'hybrid',
|
|
367
|
+
results,
|
|
368
|
+
responseTimeMs: Date.now() - startTime,
|
|
369
|
+
});
|
|
370
|
+
return results;
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Smart retrieval — decide how much content to send to the LLM.
|
|
375
|
+
*
|
|
376
|
+
* After search finds a matching document, this decides:
|
|
377
|
+
* - Small document (under context_window chars) → return full content
|
|
378
|
+
* - Large document → return only the matched chunk + neighbors
|
|
379
|
+
*
|
|
380
|
+
* Why: sending a 50,000-char document to the LLM when only one section
|
|
381
|
+
* is relevant wastes tokens and money. But sending only a 500-char chunk
|
|
382
|
+
* might miss context. This finds the balance.
|
|
383
|
+
*/
|
|
384
|
+
export async function retrieveContext(supabase, props) {
|
|
385
|
+
const { data, error } = await supabase.rpc('retrieve_context', {
|
|
386
|
+
p_document_id: props.document_id,
|
|
387
|
+
p_matched_chunk_index: props.matched_chunk_index,
|
|
388
|
+
p_context_window: props.context_window ?? 4000,
|
|
389
|
+
p_neighbor_count: props.neighbor_count ?? 1,
|
|
390
|
+
});
|
|
391
|
+
if (error)
|
|
392
|
+
throw new Error(`Context retrieval failed for document #${props.document_id}, chunk ${props.matched_chunk_index}: ${error.message}`);
|
|
393
|
+
if (!data || (Array.isArray(data) && data.length === 0))
|
|
394
|
+
return null;
|
|
395
|
+
return (Array.isArray(data) ? data[0] : data);
|
|
396
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
// chunk-context-enrichment.ts
|
|
2
|
+
// Pre-embedding enrichment — generates context summaries per chunk using an LLM.
|
|
3
|
+
//
|
|
4
|
+
// Implements the "Contextual Retrieval" technique (Anthropic, 2024).
|
|
5
|
+
// We call it "chunk context enrichment" because the code operates at ingestion
|
|
6
|
+
// time, enriching chunks with document context before embedding — not at
|
|
7
|
+
// retrieval time. The industry name describes the goal (better retrieval),
|
|
8
|
+
// not the action.
|
|
9
|
+
//
|
|
10
|
+
// Optimized pipeline (S38):
|
|
11
|
+
// 1. Generate a document summary (one LLM call, processes full document once)
|
|
12
|
+
// 2. For each chunk, build context from: summary + header path + neighbor chunks
|
|
13
|
+
// 3. Fire all LLM calls in parallel (rate limiter controls concurrency)
|
|
14
|
+
// 4. Each call processes ~1K tokens instead of ~18K (95% token reduction)
|
|
15
|
+
//
|
|
16
|
+
// This reduces ingestion of a 73K document from ~12 minutes to ~30 seconds.
|
|
17
|
+
// The key insight: truncated context (summary + neighbors instead of full doc)
|
|
18
|
+
// reduces per-call tokens enough to unblock parallelism without hitting
|
|
19
|
+
// the TPM (Tokens Per Minute) limit.
|
|
20
|
+
import { openaiLimiter } from '../rate-limiter.js';
|
|
21
|
+
const CONTEXT_ENRICHMENT_MODEL = 'gpt-4o-mini';
|
|
22
|
+
const SUMMARY_PROMPT = `Summarize this document in 150-200 words. Focus on: what the document is about, its structure, and the key topics it covers. Be factual and concise.
|
|
23
|
+
|
|
24
|
+
<document>
|
|
25
|
+
{DOCUMENT_CONTENT}
|
|
26
|
+
</document>`;
|
|
27
|
+
const CONTEXT_PROMPT = `Here is a summary of the document:
|
|
28
|
+
<document_summary>
|
|
29
|
+
{DOCUMENT_SUMMARY}
|
|
30
|
+
</document_summary>
|
|
31
|
+
|
|
32
|
+
Here is the section this chunk belongs to (header path):
|
|
33
|
+
<section>
|
|
34
|
+
{HEADER_PATH}
|
|
35
|
+
</section>
|
|
36
|
+
|
|
37
|
+
Here are the neighboring chunks for context:
|
|
38
|
+
<previous_chunk>
|
|
39
|
+
{PREV_CHUNK}
|
|
40
|
+
</previous_chunk>
|
|
41
|
+
|
|
42
|
+
<chunk>
|
|
43
|
+
{CHUNK_CONTENT}
|
|
44
|
+
</chunk>
|
|
45
|
+
|
|
46
|
+
<next_chunk>
|
|
47
|
+
{NEXT_CHUNK}
|
|
48
|
+
</next_chunk>
|
|
49
|
+
|
|
50
|
+
Write a short context (2-3 sentences) that situates this chunk within the document. Include the document's topic and what specific information this chunk covers. Be concise and factual.`;
|
|
51
|
+
// =============================================================================
|
|
52
|
+
// Pure functions
|
|
53
|
+
// =============================================================================
|
|
54
|
+
/**
|
|
55
|
+
* Estimate token count from character length.
|
|
56
|
+
* Standard approximation for English text with GPT tokenizers: ~4 chars per token.
|
|
57
|
+
* Used for token budgeting in search results (e.g., limiting chunks to fit a context window).
|
|
58
|
+
*/
|
|
59
|
+
export function estimateTokenCount(text) {
|
|
60
|
+
return Math.ceil(text.length / 4);
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Find the markdown header hierarchy for a chunk's position in the document.
|
|
64
|
+
* Returns a path like "Database > Caching > semantic_cache".
|
|
65
|
+
* Uses string matching, no LLM call needed.
|
|
66
|
+
*/
|
|
67
|
+
export function findHeaderPath(documentContent, chunkContent) {
|
|
68
|
+
const lines = documentContent.split('\n');
|
|
69
|
+
const headers = [];
|
|
70
|
+
let foundChunk = false;
|
|
71
|
+
for (const line of lines) {
|
|
72
|
+
if (/^#{1,6}\s/.test(line)) {
|
|
73
|
+
const level = line.match(/^(#+)/)?.[1].length ?? 1;
|
|
74
|
+
while (headers.length >= level)
|
|
75
|
+
headers.pop();
|
|
76
|
+
headers.push(line.replace(/^#+\s*/, '').trim());
|
|
77
|
+
}
|
|
78
|
+
if (line.includes(chunkContent.slice(0, 50))) {
|
|
79
|
+
foundChunk = true;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return foundChunk ? headers.join(' > ') : '';
|
|
84
|
+
}
|
|
85
|
+
// =============================================================================
|
|
86
|
+
// LLM functions
|
|
87
|
+
// =============================================================================
|
|
88
|
+
/**
|
|
89
|
+
* Generate context summaries for each chunk using an LLM.
|
|
90
|
+
*
|
|
91
|
+
* Optimized pipeline:
|
|
92
|
+
* 1. Generate a document summary (one LLM call)
|
|
93
|
+
* 2. For each chunk, send summary + header path + neighbors (not the full document)
|
|
94
|
+
* 3. All chunk enrichment calls run in parallel via the rate limiter
|
|
95
|
+
*
|
|
96
|
+
* This produces context summaries of equivalent quality while using 95% fewer
|
|
97
|
+
* tokens and completing 25x faster for large documents.
|
|
98
|
+
*/
|
|
99
|
+
export async function generateContextSummaries(openai, chunks, documentContent) {
|
|
100
|
+
if (chunks.length === 0)
|
|
101
|
+
return [];
|
|
102
|
+
// Step 1: Generate document summary (one LLM call, full document)
|
|
103
|
+
const summaryPrompt = SUMMARY_PROMPT.replace('{DOCUMENT_CONTENT}', documentContent);
|
|
104
|
+
const summaryResponse = await openaiLimiter.schedule(() => openai.chat.completions.create({
|
|
105
|
+
model: CONTEXT_ENRICHMENT_MODEL,
|
|
106
|
+
messages: [
|
|
107
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the summary, nothing else.' },
|
|
108
|
+
{ role: 'user', content: summaryPrompt },
|
|
109
|
+
],
|
|
110
|
+
max_tokens: 300,
|
|
111
|
+
temperature: 0,
|
|
112
|
+
}));
|
|
113
|
+
const docSummary = (summaryResponse.choices[0].message.content ?? '').trim();
|
|
114
|
+
// Step 2: Parallel enrichment with truncated context
|
|
115
|
+
const promises = chunks.map((chunk, chunkIndex) => {
|
|
116
|
+
const prevChunk = chunkIndex > 0 ? chunks[chunkIndex - 1].content : '(start of document)';
|
|
117
|
+
const nextChunk = chunkIndex < chunks.length - 1 ? chunks[chunkIndex + 1].content : '(end of document)';
|
|
118
|
+
const headerPath = findHeaderPath(documentContent, chunk.content);
|
|
119
|
+
const prompt = CONTEXT_PROMPT
|
|
120
|
+
.replace('{DOCUMENT_SUMMARY}', docSummary)
|
|
121
|
+
.replace('{HEADER_PATH}', headerPath || '(unknown section)')
|
|
122
|
+
.replace('{PREV_CHUNK}', prevChunk)
|
|
123
|
+
.replace('{CHUNK_CONTENT}', chunk.content)
|
|
124
|
+
.replace('{NEXT_CHUNK}', nextChunk);
|
|
125
|
+
return openaiLimiter.schedule({ id: `enrich-${chunkIndex}` }, async () => {
|
|
126
|
+
try {
|
|
127
|
+
const response = await openai.chat.completions.create({
|
|
128
|
+
model: CONTEXT_ENRICHMENT_MODEL,
|
|
129
|
+
messages: [
|
|
130
|
+
{ role: 'system', content: 'You are a precise technical writer. Output only the context summary, nothing else.' },
|
|
131
|
+
{ role: 'user', content: prompt },
|
|
132
|
+
],
|
|
133
|
+
max_tokens: 150,
|
|
134
|
+
temperature: 0,
|
|
135
|
+
});
|
|
136
|
+
return {
|
|
137
|
+
index: chunkIndex,
|
|
138
|
+
summary: (response.choices[0].message.content ?? '').trim(),
|
|
139
|
+
tokenCount: estimateTokenCount(chunk.content),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
const preview = chunk.content.slice(0, 60).replace(/\n/g, ' ');
|
|
144
|
+
throw new Error(`Context summary failed for chunk ${chunk.chunk_index} ("${preview}..."): ${error instanceof Error ? error.message : String(error)}`);
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
const results = await Promise.all(promises);
|
|
149
|
+
// Sort back to original order (parallel execution may complete out of order)
|
|
150
|
+
results.sort((first, second) => first.index - second.index);
|
|
151
|
+
return results.map(enrichmentResult => ({
|
|
152
|
+
summary: enrichmentResult.summary,
|
|
153
|
+
tokenCount: enrichmentResult.tokenCount,
|
|
154
|
+
}));
|
|
155
|
+
}
|