@aperdomoll90/ledger-ai 1.3.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +177 -221
- package/dist/commands/add.js +51 -100
- package/dist/commands/backfill.js +55 -0
- package/dist/commands/backup.js +10 -10
- package/dist/commands/check.js +21 -29
- package/dist/commands/config.js +13 -12
- package/dist/commands/delete.js +22 -17
- package/dist/commands/eval-judge.js +11 -0
- package/dist/commands/eval.js +321 -0
- package/dist/commands/export.js +8 -10
- package/dist/commands/get.js +9 -0
- package/dist/commands/hunt.js +206 -0
- package/dist/commands/ingest.js +15 -14
- package/dist/commands/init.js +18 -20
- package/dist/commands/list.js +21 -7
- package/dist/commands/migrate.js +11 -11
- package/dist/commands/onboard.js +2 -2
- package/dist/commands/pull.js +3 -2
- package/dist/commands/push.js +8 -8
- package/dist/commands/restore.js +38 -38
- package/dist/commands/show.js +13 -16
- package/dist/commands/sync.js +58 -19
- package/dist/commands/tag.js +20 -14
- package/dist/commands/update.js +50 -18
- package/dist/commands/wizard.js +3 -3
- package/dist/lib/ai-search.js +163 -0
- package/dist/lib/audit.js +19 -0
- package/dist/lib/backfill.js +60 -0
- package/dist/lib/config.js +19 -2
- package/dist/lib/document-classification.js +5 -0
- package/dist/lib/document-fetching.js +77 -0
- package/dist/lib/document-operations.js +150 -0
- package/dist/lib/documents/classification.js +5 -0
- package/dist/lib/documents/fetching.js +89 -0
- package/dist/lib/documents/operations.js +304 -0
- package/dist/lib/domains.js +116 -0
- package/dist/lib/embeddings.js +190 -0
- package/dist/lib/errors.js +3 -1
- package/dist/lib/eval/eval-advanced.js +289 -0
- package/dist/lib/eval/eval-judge-session.js +233 -0
- package/dist/lib/eval/eval-store.js +105 -0
- package/dist/lib/eval/eval.js +303 -0
- package/dist/lib/file-writer.js +23 -0
- package/dist/lib/generators.js +44 -45
- package/dist/lib/hunter-db.js +235 -0
- package/dist/lib/hunter-rss.js +30 -0
- package/dist/lib/hunter-scoring.js +55 -0
- package/dist/lib/hunter-types.js +36 -0
- package/dist/lib/lint-configs.js +20 -0
- package/dist/lib/migrate.js +2 -2
- package/dist/lib/notes.js +173 -59
- package/dist/lib/observability.js +296 -0
- package/dist/lib/op-add-note-types.test.js +7 -6
- package/dist/lib/prompt.js +8 -8
- package/dist/lib/rate-limiter.js +103 -0
- package/dist/lib/search/ai-search.js +396 -0
- package/dist/lib/search/chunk-context-enrichment.js +155 -0
- package/dist/lib/search/embeddings.js +293 -0
- package/dist/lib/search/reranker.js +120 -0
- package/dist/lib/search/semantic-cache.js +53 -0
- package/dist/lib/type-registry.test.js +6 -6
- package/dist/mcp-server.js +553 -66
- package/dist/migrations/migrations/005-audit-log.sql +22 -0
- package/dist/migrations/migrations/005_opportunities.sql +48 -0
- package/dist/migrations/migrations/006-audited-operations.sql +235 -0
- package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
- package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
- package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
- package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
- package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
- package/dist/scripts/batch-grade.js +344 -0
- package/dist/scripts/benchmark-ingestion.js +376 -0
- package/dist/scripts/convert-judgments-to-graded.js +88 -0
- package/dist/scripts/diagnose-first-result.js +333 -0
- package/dist/scripts/drop-golden-query.js +53 -0
- package/dist/scripts/eval-search.js +115 -0
- package/dist/scripts/grade-unjudged-top1.js +138 -0
- package/dist/scripts/hunter-analytics.js +38 -0
- package/dist/scripts/hunter-cron.js +63 -0
- package/dist/scripts/hunter-purge.js +25 -0
- package/dist/scripts/migrate-v2.js +140 -0
- package/dist/scripts/reindex.js +74 -0
- package/dist/scripts/sync-local-docs.js +153 -0
- package/package.json +7 -1
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
// embeddings.ts
|
|
2
|
+
// Prepares data for the database: generate embeddings, chunk text, format vectors.
|
|
3
|
+
// The database can't call OpenAI or split text — that's TypeScript's job.
|
|
4
|
+
import { createHash } from 'crypto';
|
|
5
|
+
import { openaiLimiter } from '../rate-limiter.js';
|
|
6
|
+
// =============================================================================
|
|
7
|
+
// Constants
|
|
8
|
+
// =============================================================================
|
|
9
|
+
const EMBEDDING_MODEL = 'text-embedding-3-small';
|
|
10
|
+
const DEFAULT_CHUNK_CONFIG = {
|
|
11
|
+
maxChunkSize: 1000,
|
|
12
|
+
overlapChars: 200,
|
|
13
|
+
strategy: 'recursive',
|
|
14
|
+
};
|
|
15
|
+
// Split separators — ordered from coarsest to finest.
|
|
16
|
+
// The recursive chunker tries each level in order until chunks fit.
|
|
17
|
+
const SPLIT_SEPARATORS = [
|
|
18
|
+
/^#{1,6}\s/m, // Level 0: Markdown headers
|
|
19
|
+
/\n\n+/, // Level 1: Double newlines (paragraphs)
|
|
20
|
+
/\n/, // Level 2: Single newlines
|
|
21
|
+
/(?<=[.!?])\s+/, // Level 3: Sentence boundaries
|
|
22
|
+
];
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// Pure functions — no API calls, no database, fully testable
|
|
25
|
+
// =============================================================================
|
|
26
|
+
/**
|
|
27
|
+
* SHA-256 hash of text content.
|
|
28
|
+
* Used for change detection: "has this document's content changed since last sync?"
|
|
29
|
+
* Same algorithm used in Postgres via pgcrypto.
|
|
30
|
+
*/
|
|
31
|
+
export function contentHash(text) {
|
|
32
|
+
return createHash('sha256').update(text, 'utf-8').digest('hex');
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Format a number[] embedding as a Postgres vector string.
|
|
36
|
+
* Supabase RPC can't send number[] as vector(1536) — it needs this string format.
|
|
37
|
+
* Example: [0.021, -0.007, 0.045] → "[0.021,-0.007,0.045]"
|
|
38
|
+
*/
|
|
39
|
+
export function toVectorString(embedding) {
|
|
40
|
+
return `[${embedding.join(',')}]`;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Parse a Postgres vector back into a number[].
|
|
44
|
+
* Supabase REST API returns vector(1536) columns as strings like "[0.021,-0.007,0.045]".
|
|
45
|
+
* If the value is already a number[] (e.g. from a mock in tests), it passes through unchanged.
|
|
46
|
+
*/
|
|
47
|
+
export function parseVector(raw) {
|
|
48
|
+
if (Array.isArray(raw))
|
|
49
|
+
return raw;
|
|
50
|
+
if (typeof raw === 'string')
|
|
51
|
+
return JSON.parse(raw);
|
|
52
|
+
throw new Error(`Cannot parse vector: expected string or number[], got ${typeof raw}`);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Split text into chunks using a recursive hierarchical strategy.
|
|
56
|
+
*
|
|
57
|
+
* Implements chunk context enrichment pipeline step 1 (chunking).
|
|
58
|
+
* Based on the recursive character splitting pattern used in production
|
|
59
|
+
* RAG systems (LangChain, LlamaIndex).
|
|
60
|
+
*
|
|
61
|
+
* Split hierarchy (coarsest to finest):
|
|
62
|
+
* 1. Markdown headers (^#{1,6}\s)
|
|
63
|
+
* 2. Double newlines (paragraph boundaries)
|
|
64
|
+
* 3. Single newlines (line breaks)
|
|
65
|
+
* 4. Sentence boundaries (after . ! ?)
|
|
66
|
+
* 5. Character-level force split (fallback)
|
|
67
|
+
*
|
|
68
|
+
* If text fits within maxChunkSize, returns it as a single chunk.
|
|
69
|
+
* Otherwise, splits at the coarsest level possible. If any resulting
|
|
70
|
+
* section still exceeds maxChunkSize, recurses to the next finer level.
|
|
71
|
+
*
|
|
72
|
+
* Overlap is applied between adjacent chunks at levels 1-4.
|
|
73
|
+
* Header-level splits (level 0) do NOT overlap — sections are
|
|
74
|
+
* semantically distinct.
|
|
75
|
+
*/
|
|
76
|
+
export function chunkText(text, config) {
|
|
77
|
+
const resolvedConfig = { ...DEFAULT_CHUNK_CONFIG, ...config };
|
|
78
|
+
const { maxChunkSize, strategy } = resolvedConfig;
|
|
79
|
+
// Short text = one chunk
|
|
80
|
+
if (text.length <= maxChunkSize) {
|
|
81
|
+
return [{
|
|
82
|
+
content: text,
|
|
83
|
+
chunk_index: 0,
|
|
84
|
+
content_type: 'text',
|
|
85
|
+
strategy,
|
|
86
|
+
overlap_chars: 0,
|
|
87
|
+
}];
|
|
88
|
+
}
|
|
89
|
+
const rawChunks = recursiveSplit(text, 0, resolvedConfig);
|
|
90
|
+
// Assign sequential chunk_index across all chunks
|
|
91
|
+
return rawChunks.map((chunk, index) => ({
|
|
92
|
+
...chunk,
|
|
93
|
+
chunk_index: index,
|
|
94
|
+
}));
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Core recursive splitting logic.
|
|
98
|
+
* Tries the separator at `level`. If a section still exceeds maxChunkSize,
|
|
99
|
+
* recurses to level + 1. At the bottom level, force-splits at character positions.
|
|
100
|
+
*/
|
|
101
|
+
function recursiveSplit(text, level, config) {
|
|
102
|
+
const { maxChunkSize, overlapChars, strategy } = config;
|
|
103
|
+
// Base case: text fits
|
|
104
|
+
if (text.length <= maxChunkSize) {
|
|
105
|
+
return [{
|
|
106
|
+
content: text,
|
|
107
|
+
chunk_index: 0, // reassigned by caller
|
|
108
|
+
content_type: 'text',
|
|
109
|
+
strategy,
|
|
110
|
+
overlap_chars: 0,
|
|
111
|
+
}];
|
|
112
|
+
}
|
|
113
|
+
// Bottom level: force-split at character boundaries
|
|
114
|
+
if (level >= SPLIT_SEPARATORS.length) {
|
|
115
|
+
return forceCharSplit(text, maxChunkSize, overlapChars);
|
|
116
|
+
}
|
|
117
|
+
const separator = SPLIT_SEPARATORS[level];
|
|
118
|
+
const isHeaderLevel = level === 0;
|
|
119
|
+
const sections = splitKeepingSeparator(text, separator, isHeaderLevel);
|
|
120
|
+
// If splitting produced only 1 section (separator not found), try next level
|
|
121
|
+
if (sections.length <= 1) {
|
|
122
|
+
return recursiveSplit(text, level + 1, config);
|
|
123
|
+
}
|
|
124
|
+
// Pack sections into chunks, recurse oversized ones
|
|
125
|
+
const chunks = [];
|
|
126
|
+
let currentContent = '';
|
|
127
|
+
for (const section of sections) {
|
|
128
|
+
const wouldExceed = (currentContent + section).length > maxChunkSize;
|
|
129
|
+
if (wouldExceed && currentContent.length > 0) {
|
|
130
|
+
// Flush current accumulated content as chunk(s)
|
|
131
|
+
chunks.push(...recursiveSplit(currentContent.trim(), level + 1, config));
|
|
132
|
+
// Apply overlap (except at header boundaries)
|
|
133
|
+
if (!isHeaderLevel && overlapChars > 0) {
|
|
134
|
+
const overlap = currentContent.slice(-overlapChars);
|
|
135
|
+
currentContent = overlap + section;
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
currentContent = section;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
currentContent = currentContent + section;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Flush remaining content
|
|
146
|
+
if (currentContent.trim().length > 0) {
|
|
147
|
+
chunks.push(...recursiveSplit(currentContent.trim(), level + 1, config));
|
|
148
|
+
}
|
|
149
|
+
// Mark overlap on chunks (first chunk has 0)
|
|
150
|
+
return chunks.map((chunk, index) => ({
|
|
151
|
+
...chunk,
|
|
152
|
+
overlap_chars: index > 0 && !isHeaderLevel ? Math.min(overlapChars, chunk.content.length) : 0,
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Split text by a regex separator.
|
|
157
|
+
* For header-level splits, the separator (e.g. "# Title") is kept at the
|
|
158
|
+
* start of the section it belongs to. For other levels, the separator
|
|
159
|
+
* is consumed (it's whitespace/newlines anyway).
|
|
160
|
+
*/
|
|
161
|
+
function splitKeepingSeparator(text, separator, keepSeparator) {
|
|
162
|
+
if (keepSeparator) {
|
|
163
|
+
// Header split: split just before each header, keep header in its section
|
|
164
|
+
const parts = text.split(new RegExp(`(?=${separator.source})`, 'm'));
|
|
165
|
+
return parts.filter(part => part.length > 0);
|
|
166
|
+
}
|
|
167
|
+
return text.split(separator).filter(part => part.trim().length > 0);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Force-split at character boundaries as a last resort.
|
|
171
|
+
* Handles text with no structural separators (JSON blobs, base64, walls of text).
|
|
172
|
+
*/
|
|
173
|
+
function forceCharSplit(text, maxChunkSize, overlapChars) {
|
|
174
|
+
const chunks = [];
|
|
175
|
+
const step = Math.max(1, maxChunkSize - overlapChars);
|
|
176
|
+
for (let offset = 0; offset < text.length; offset += step) {
|
|
177
|
+
chunks.push({
|
|
178
|
+
content: text.slice(offset, offset + maxChunkSize),
|
|
179
|
+
chunk_index: 0, // reassigned by caller
|
|
180
|
+
content_type: 'text',
|
|
181
|
+
strategy: 'forced',
|
|
182
|
+
overlap_chars: offset > 0 ? overlapChars : 0,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
return chunks;
|
|
186
|
+
}
|
|
187
|
+
// =============================================================================
|
|
188
|
+
// API functions — call OpenAI and/or database
|
|
189
|
+
// =============================================================================
|
|
190
|
+
/**
|
|
191
|
+
* Call OpenAI to convert text into an array of 1,536 numbers.
|
|
192
|
+
* These numbers represent the "meaning" of the text in a mathematical space.
|
|
193
|
+
* Similar texts produce similar numbers — that's how search works.
|
|
194
|
+
*/
|
|
195
|
+
export async function generateEmbedding(openai, text) {
|
|
196
|
+
try {
|
|
197
|
+
return await openaiLimiter.schedule(async () => {
|
|
198
|
+
const result = await openai.embeddings.create({
|
|
199
|
+
model: EMBEDDING_MODEL,
|
|
200
|
+
input: text,
|
|
201
|
+
});
|
|
202
|
+
return result.data[0].embedding;
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
catch (error) {
|
|
206
|
+
const preview = text.slice(0, 80).replace(/\n/g, ' ');
|
|
207
|
+
throw new Error(`Embedding generation failed for text "${preview}...": ${error instanceof Error ? error.message : String(error)}`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Batch-generate embeddings for multiple texts in a single API call.
|
|
212
|
+
* OpenAI accepts an array of inputs and returns all embeddings at once.
|
|
213
|
+
* For 151 chunks, this is 2 API calls (batches of 100) instead of 151.
|
|
214
|
+
*/
|
|
215
|
+
export async function generateEmbeddingsBatch(openai, texts) {
|
|
216
|
+
if (texts.length === 0)
|
|
217
|
+
return [];
|
|
218
|
+
const BATCH_SIZE = 100;
|
|
219
|
+
const allEmbeddings = [];
|
|
220
|
+
for (let batchStart = 0; batchStart < texts.length; batchStart += BATCH_SIZE) {
|
|
221
|
+
const batch = texts.slice(batchStart, batchStart + BATCH_SIZE);
|
|
222
|
+
try {
|
|
223
|
+
const embeddings = await openaiLimiter.schedule(async () => {
|
|
224
|
+
const result = await openai.embeddings.create({
|
|
225
|
+
model: EMBEDDING_MODEL,
|
|
226
|
+
input: batch,
|
|
227
|
+
});
|
|
228
|
+
return result.data.map(entry => entry.embedding);
|
|
229
|
+
});
|
|
230
|
+
allEmbeddings.push(...embeddings);
|
|
231
|
+
}
|
|
232
|
+
catch (error) {
|
|
233
|
+
const batchNumber = Math.floor(batchStart / BATCH_SIZE) + 1;
|
|
234
|
+
throw new Error(`Batch embedding failed (batch ${batchNumber}, ${batch.length} texts): ${error instanceof Error ? error.message : String(error)}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return allEmbeddings;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Get an embedding for a search query, using the cache to avoid repeat API calls.
|
|
241
|
+
*
|
|
242
|
+
* Flow:
|
|
243
|
+
* 1. Check query_cache table for this exact query text
|
|
244
|
+
* 2. If cached: return the cached embedding, update hit_count
|
|
245
|
+
* 3. If not cached: call OpenAI, save to cache, return embedding
|
|
246
|
+
*
|
|
247
|
+
* Why cache: each OpenAI embedding call costs money. If you search
|
|
248
|
+
* "how does auth work" three times, the cache saves 2 API calls.
|
|
249
|
+
*/
|
|
250
|
+
export async function getOrCacheQueryEmbedding(clients, query) {
|
|
251
|
+
// Normalize query to avoid cache misses from capitalization/whitespace differences
|
|
252
|
+
const normalizedQuery = query.toLowerCase().trim();
|
|
253
|
+
// Check cache
|
|
254
|
+
const { data: cached, error: cacheError } = await clients.supabase
|
|
255
|
+
.from('query_cache')
|
|
256
|
+
.select('embedding, hit_count')
|
|
257
|
+
.eq('query_text', normalizedQuery)
|
|
258
|
+
.single();
|
|
259
|
+
if (cacheError && cacheError.code !== 'PGRST116') {
|
|
260
|
+
// PGRST116 = "not found" (expected for cache miss). Any other error is real.
|
|
261
|
+
process.stderr.write(`[ledger] query cache lookup failed: ${cacheError.message}\n`);
|
|
262
|
+
}
|
|
263
|
+
if (cached?.embedding) {
|
|
264
|
+
// Update cache stats (non-blocking, non-fatal)
|
|
265
|
+
const { error: updateError } = await clients.supabase
|
|
266
|
+
.from('query_cache')
|
|
267
|
+
.update({
|
|
268
|
+
hit_count: cached.hit_count + 1,
|
|
269
|
+
last_used_at: new Date().toISOString(),
|
|
270
|
+
})
|
|
271
|
+
.eq('query_text', normalizedQuery);
|
|
272
|
+
if (updateError) {
|
|
273
|
+
process.stderr.write(`[ledger] query cache hit_count update failed: ${updateError.message}\n`);
|
|
274
|
+
}
|
|
275
|
+
return parseVector(cached.embedding);
|
|
276
|
+
}
|
|
277
|
+
// Generate and cache. Send original query to OpenAI (preserves meaning),
|
|
278
|
+
// but store under normalized key (so "Auth" and "auth" share one cache entry)
|
|
279
|
+
const embedding = await generateEmbedding(clients.openai, query);
|
|
280
|
+
// Cache insert is non-blocking, non-fatal. A failed insert means the next
|
|
281
|
+
// identical query will hit OpenAI again, but search still works.
|
|
282
|
+
const { error: insertError } = await clients.supabase
|
|
283
|
+
.from('query_cache')
|
|
284
|
+
.insert({
|
|
285
|
+
query_text: normalizedQuery,
|
|
286
|
+
embedding: toVectorString(embedding),
|
|
287
|
+
embedding_model_id: 'openai/text-embedding-3-small',
|
|
288
|
+
});
|
|
289
|
+
if (insertError) {
|
|
290
|
+
process.stderr.write(`[ledger] query cache insert failed for "${normalizedQuery}": ${insertError.message}\n`);
|
|
291
|
+
}
|
|
292
|
+
return embedding;
|
|
293
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// reranker.ts
|
|
2
|
+
// Cross-encoder reranking via Cohere Rerank API.
|
|
3
|
+
//
|
|
4
|
+
// After hybrid search returns candidates, this module re-scores each one
|
|
5
|
+
// by sending (query, document) pairs to a cross-encoder model. The model
|
|
6
|
+
// reads query and document together — much more accurate than embedding
|
|
7
|
+
// similarity, which encodes them separately.
|
|
8
|
+
//
|
|
9
|
+
// Uses native fetch — no Cohere SDK dependency. The API is one endpoint,
|
|
10
|
+
// one request shape, one response shape.
|
|
11
|
+
//
|
|
12
|
+
// Graceful degradation: if the API fails, returns original results unchanged.
|
|
13
|
+
// Search should never break because reranking failed.
|
|
14
|
+
import { cohereLimiter } from '../rate-limiter.js';
|
|
15
|
+
import { startSpan } from '../observability.js';
|
|
16
|
+
const COHERE_RERANK_URL = 'https://api.cohere.com/v2/rerank';
|
|
17
|
+
const COHERE_RERANK_MODEL = 'rerank-v3.5';
|
|
18
|
+
// =============================================================================
|
|
19
|
+
// rerankResults
|
|
20
|
+
// =============================================================================
|
|
21
|
+
/**
|
|
22
|
+
* Re-rank search results using Cohere's cross-encoder model.
|
|
23
|
+
*
|
|
24
|
+
* Sends each result's content + the query to Cohere, which scores
|
|
25
|
+
* how well each document answers the query (0 to 1). Results are
|
|
26
|
+
* re-sorted by this relevance score (highest first).
|
|
27
|
+
*
|
|
28
|
+
* The score field on each result is replaced with the Cohere
|
|
29
|
+
* relevance score — this is intentional. The original RRF score
|
|
30
|
+
* is a ranking position, not a quality signal. The reranker score
|
|
31
|
+
* IS a quality signal (how relevant is this document to this query).
|
|
32
|
+
*
|
|
33
|
+
* Security: API key is sent only in the Authorization header,
|
|
34
|
+
* never in the request body. Document content IS sent to Cohere
|
|
35
|
+
* for scoring — same data flow as OpenAI embeddings.
|
|
36
|
+
*/
|
|
37
|
+
export async function rerankResults(query, searchResults, options) {
|
|
38
|
+
if (searchResults.length === 0)
|
|
39
|
+
return [];
|
|
40
|
+
const topN = options.topN ?? searchResults.length;
|
|
41
|
+
const model = options.model ?? COHERE_RERANK_MODEL;
|
|
42
|
+
// --- rerank.prepare ---
|
|
43
|
+
const prepareSpan = startSpan('rerank.prepare');
|
|
44
|
+
const documents = searchResults.map(searchResult => ({
|
|
45
|
+
text: searchResult.content,
|
|
46
|
+
}));
|
|
47
|
+
const totalContentLength = documents.reduce((sum, document) => sum + document.text.length, 0);
|
|
48
|
+
prepareSpan.update({ output: { documentCount: documents.length, totalContentLength } });
|
|
49
|
+
prepareSpan.end();
|
|
50
|
+
// --- rerank.queue-wait + rerank.api-call ---
|
|
51
|
+
const queueSpan = startSpan('rerank.queue-wait');
|
|
52
|
+
let response;
|
|
53
|
+
try {
|
|
54
|
+
response = await cohereLimiter.schedule(() => {
|
|
55
|
+
queueSpan.end();
|
|
56
|
+
const apiSpan = startSpan('rerank.api-call');
|
|
57
|
+
apiSpan.update({ input: { model, topN, documentCount: documents.length } });
|
|
58
|
+
const apiStartMs = Date.now();
|
|
59
|
+
return fetch(COHERE_RERANK_URL, {
|
|
60
|
+
method: 'POST',
|
|
61
|
+
headers: {
|
|
62
|
+
'Authorization': `Bearer ${options.apiKey}`,
|
|
63
|
+
'Content-Type': 'application/json',
|
|
64
|
+
},
|
|
65
|
+
body: JSON.stringify({
|
|
66
|
+
model,
|
|
67
|
+
query,
|
|
68
|
+
documents,
|
|
69
|
+
top_n: topN,
|
|
70
|
+
}),
|
|
71
|
+
}).then(fetchResponse => {
|
|
72
|
+
apiSpan.update({
|
|
73
|
+
output: {
|
|
74
|
+
statusCode: fetchResponse.status,
|
|
75
|
+
latencyMs: Date.now() - apiStartMs,
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
apiSpan.end();
|
|
79
|
+
return fetchResponse;
|
|
80
|
+
}).catch(fetchError => {
|
|
81
|
+
apiSpan.update({
|
|
82
|
+
output: {
|
|
83
|
+
error: fetchError.message,
|
|
84
|
+
errorType: 'network',
|
|
85
|
+
latencyMs: Date.now() - apiStartMs,
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
apiSpan.end();
|
|
89
|
+
throw fetchError;
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
catch (_networkError) {
|
|
94
|
+
// Network failure or limiter error — return originals unchanged
|
|
95
|
+
return searchResults;
|
|
96
|
+
}
|
|
97
|
+
if (!response.ok) {
|
|
98
|
+
// API error (rate limit, bad key, server error) — return originals unchanged
|
|
99
|
+
return searchResults;
|
|
100
|
+
}
|
|
101
|
+
let cohereResponse;
|
|
102
|
+
try {
|
|
103
|
+
cohereResponse = (await response.json());
|
|
104
|
+
}
|
|
105
|
+
catch (_parseError) {
|
|
106
|
+
// Malformed JSON from Cohere — return originals unchanged
|
|
107
|
+
return searchResults;
|
|
108
|
+
}
|
|
109
|
+
if (!cohereResponse.results || !Array.isArray(cohereResponse.results)) {
|
|
110
|
+
return searchResults;
|
|
111
|
+
}
|
|
112
|
+
// Map Cohere results back to our search results, re-sorted by relevance.
|
|
113
|
+
// Cohere returns results sorted by relevance_score (highest first).
|
|
114
|
+
// Each result has an 'index' pointing to the original position in our input array.
|
|
115
|
+
const rerankedResults = cohereResponse.results.map((cohereResult) => ({
|
|
116
|
+
...searchResults[cohereResult.index],
|
|
117
|
+
score: cohereResult.relevance_score,
|
|
118
|
+
}));
|
|
119
|
+
return rerankedResults;
|
|
120
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// semantic-cache.ts
|
|
2
|
+
// Helpers for the semantic cache (layer 2).
|
|
3
|
+
// Handles serialization, deserialization, and parameter normalization
|
|
4
|
+
// for cache lookup and store operations.
|
|
5
|
+
//
|
|
6
|
+
// The actual cache logic (HNSW lookup, store, invalidation) lives in Postgres
|
|
7
|
+
// RPC functions. This module prepares data for those calls.
|
|
8
|
+
const EMBEDDING_MODEL_ID = 'openai/text-embedding-3-small';
|
|
9
|
+
const SIMILARITY_THRESHOLD = 0.90;
|
|
10
|
+
export { EMBEDDING_MODEL_ID as SEMANTIC_CACHE_MODEL_ID };
|
|
11
|
+
export { SIMILARITY_THRESHOLD as SEMANTIC_CACHE_THRESHOLD };
|
|
12
|
+
/**
|
|
13
|
+
* Build a normalized search_params object for cache key matching.
|
|
14
|
+
* Keys are sorted alphabetically so JSONB equality works regardless
|
|
15
|
+
* of the order properties were passed in.
|
|
16
|
+
* Undefined values are omitted (not set to null) to avoid
|
|
17
|
+
* mismatches between {domain: null} and {}.
|
|
18
|
+
*/
|
|
19
|
+
export function buildSearchParams(input) {
|
|
20
|
+
const params = {};
|
|
21
|
+
// Alphabetical order for consistent JSONB serialization
|
|
22
|
+
if (input.document_type !== undefined)
|
|
23
|
+
params.document_type = input.document_type;
|
|
24
|
+
if (input.domain !== undefined)
|
|
25
|
+
params.domain = input.domain;
|
|
26
|
+
if (input.limit !== undefined)
|
|
27
|
+
params.limit = input.limit;
|
|
28
|
+
if (input.project !== undefined)
|
|
29
|
+
params.project = input.project;
|
|
30
|
+
if (input.threshold !== undefined)
|
|
31
|
+
params.threshold = input.threshold;
|
|
32
|
+
return params;
|
|
33
|
+
}
|
|
34
|
+
// =============================================================================
|
|
35
|
+
// Result serialization
|
|
36
|
+
// =============================================================================
|
|
37
|
+
/**
|
|
38
|
+
* Parse cached_results JSONB from Postgres into typed array.
|
|
39
|
+
* Returns the array directly since we cache full ISearchResultProps objects.
|
|
40
|
+
*/
|
|
41
|
+
export function parseCachedResults(jsonb) {
|
|
42
|
+
if (!jsonb || jsonb.length === 0)
|
|
43
|
+
return [];
|
|
44
|
+
return jsonb;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Extract unique document IDs from search results for the reverse index.
|
|
48
|
+
* These are stored in source_doc_ids so document_update/delete can
|
|
49
|
+
* invalidate affected cache entries.
|
|
50
|
+
*/
|
|
51
|
+
export function extractSourceDocIds(results) {
|
|
52
|
+
return [...new Set(results.map(result => result.id))];
|
|
53
|
+
}
|
|
@@ -6,7 +6,7 @@ vi.mock('./config.js', () => ({
|
|
|
6
6
|
saveConfigFile: (config) => { mockConfigState.current = config; },
|
|
7
7
|
}));
|
|
8
8
|
// Import AFTER mock setup so modules pick up the mocked config
|
|
9
|
-
const { BUILTIN_TYPES, getTypeRegistry, inferDelivery, getRegisteredTypes, isRegisteredType, registerType, validateTypeName, checkMetadataCompleteness, } = await import('./notes.js');
|
|
9
|
+
const { BUILTIN_TYPES, getTypeRegistry, inferDelivery, inferDomain, getRegisteredTypes, isRegisteredType, registerType, validateTypeName, checkMetadataCompleteness, } = await import('./notes.js');
|
|
10
10
|
// --- Helpers ---
|
|
11
11
|
function setUserTypes(types) {
|
|
12
12
|
mockConfigState.current = { types };
|
|
@@ -178,7 +178,7 @@ describe('validateTypeName', () => {
|
|
|
178
178
|
});
|
|
179
179
|
});
|
|
180
180
|
// ============================================================
|
|
181
|
-
// 7. checkMetadataCompleteness (dynamic
|
|
181
|
+
// 7. checkMetadataCompleteness (dynamic domain check)
|
|
182
182
|
// ============================================================
|
|
183
183
|
describe('checkMetadataCompleteness', () => {
|
|
184
184
|
it('returns null when all fields are present for project type', () => {
|
|
@@ -189,20 +189,20 @@ describe('checkMetadataCompleteness', () => {
|
|
|
189
189
|
const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'code-craft');
|
|
190
190
|
expect(result).toBeNull();
|
|
191
191
|
});
|
|
192
|
-
it('prompts for status on project-
|
|
192
|
+
it('prompts for status on project-domain types', () => {
|
|
193
193
|
const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'architecture-decision');
|
|
194
194
|
expect(result).toContain('status');
|
|
195
195
|
});
|
|
196
|
-
it('does NOT prompt for status on persona-
|
|
196
|
+
it('does NOT prompt for status on persona-domain types', () => {
|
|
197
197
|
const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'persona-rule');
|
|
198
198
|
expect(result).toBeNull();
|
|
199
199
|
});
|
|
200
|
-
it('uses
|
|
200
|
+
it('uses type registry for custom types — project custom type requires status', () => {
|
|
201
201
|
setUserTypes({ 'wine-log': 'project' });
|
|
202
202
|
const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'wine-log');
|
|
203
203
|
expect(result).toContain('status');
|
|
204
204
|
});
|
|
205
|
-
it('uses
|
|
205
|
+
it('uses type registry for custom types — knowledge custom type skips status', () => {
|
|
206
206
|
setUserTypes({ 'recipe': 'knowledge' });
|
|
207
207
|
const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'recipe');
|
|
208
208
|
expect(result).toBeNull();
|