@aperdomoll90/ledger-ai 1.3.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/cli.js +177 -221
  2. package/dist/commands/add.js +51 -100
  3. package/dist/commands/backfill.js +55 -0
  4. package/dist/commands/backup.js +10 -10
  5. package/dist/commands/check.js +21 -29
  6. package/dist/commands/config.js +13 -12
  7. package/dist/commands/delete.js +22 -17
  8. package/dist/commands/eval-judge.js +11 -0
  9. package/dist/commands/eval.js +321 -0
  10. package/dist/commands/export.js +8 -10
  11. package/dist/commands/get.js +9 -0
  12. package/dist/commands/hunt.js +206 -0
  13. package/dist/commands/ingest.js +15 -14
  14. package/dist/commands/init.js +18 -20
  15. package/dist/commands/list.js +21 -7
  16. package/dist/commands/migrate.js +11 -11
  17. package/dist/commands/onboard.js +2 -2
  18. package/dist/commands/pull.js +3 -2
  19. package/dist/commands/push.js +8 -8
  20. package/dist/commands/restore.js +38 -38
  21. package/dist/commands/show.js +13 -16
  22. package/dist/commands/sync.js +58 -19
  23. package/dist/commands/tag.js +20 -14
  24. package/dist/commands/update.js +50 -18
  25. package/dist/commands/wizard.js +3 -3
  26. package/dist/lib/ai-search.js +163 -0
  27. package/dist/lib/audit.js +19 -0
  28. package/dist/lib/backfill.js +60 -0
  29. package/dist/lib/config.js +19 -2
  30. package/dist/lib/document-classification.js +5 -0
  31. package/dist/lib/document-fetching.js +77 -0
  32. package/dist/lib/document-operations.js +150 -0
  33. package/dist/lib/documents/classification.js +5 -0
  34. package/dist/lib/documents/fetching.js +89 -0
  35. package/dist/lib/documents/operations.js +304 -0
  36. package/dist/lib/domains.js +116 -0
  37. package/dist/lib/embeddings.js +190 -0
  38. package/dist/lib/errors.js +3 -1
  39. package/dist/lib/eval/eval-advanced.js +289 -0
  40. package/dist/lib/eval/eval-judge-session.js +233 -0
  41. package/dist/lib/eval/eval-store.js +105 -0
  42. package/dist/lib/eval/eval.js +303 -0
  43. package/dist/lib/file-writer.js +23 -0
  44. package/dist/lib/generators.js +44 -45
  45. package/dist/lib/hunter-db.js +235 -0
  46. package/dist/lib/hunter-rss.js +30 -0
  47. package/dist/lib/hunter-scoring.js +55 -0
  48. package/dist/lib/hunter-types.js +36 -0
  49. package/dist/lib/lint-configs.js +20 -0
  50. package/dist/lib/migrate.js +2 -2
  51. package/dist/lib/notes.js +173 -59
  52. package/dist/lib/observability.js +296 -0
  53. package/dist/lib/op-add-note-types.test.js +7 -6
  54. package/dist/lib/prompt.js +8 -8
  55. package/dist/lib/rate-limiter.js +103 -0
  56. package/dist/lib/search/ai-search.js +396 -0
  57. package/dist/lib/search/chunk-context-enrichment.js +155 -0
  58. package/dist/lib/search/embeddings.js +293 -0
  59. package/dist/lib/search/reranker.js +120 -0
  60. package/dist/lib/search/semantic-cache.js +53 -0
  61. package/dist/lib/type-registry.test.js +6 -6
  62. package/dist/mcp-server.js +553 -66
  63. package/dist/migrations/migrations/005-audit-log.sql +22 -0
  64. package/dist/migrations/migrations/005_opportunities.sql +48 -0
  65. package/dist/migrations/migrations/006-audited-operations.sql +235 -0
  66. package/dist/migrations/migrations/006_hunt_analytics.sql +38 -0
  67. package/dist/migrations/migrations/007-eval-golden-judgments.sql +119 -0
  68. package/dist/migrations/migrations/008-drop-expected-doc-ids.sql +9 -0
  69. package/dist/migrations/migrations/008-judge-helpers.sql +21 -0
  70. package/dist/migrations/migrations/009-semantic-cache.sql +216 -0
  71. package/dist/scripts/batch-grade.js +344 -0
  72. package/dist/scripts/benchmark-ingestion.js +376 -0
  73. package/dist/scripts/convert-judgments-to-graded.js +88 -0
  74. package/dist/scripts/diagnose-first-result.js +333 -0
  75. package/dist/scripts/drop-golden-query.js +53 -0
  76. package/dist/scripts/eval-search.js +115 -0
  77. package/dist/scripts/grade-unjudged-top1.js +138 -0
  78. package/dist/scripts/hunter-analytics.js +38 -0
  79. package/dist/scripts/hunter-cron.js +63 -0
  80. package/dist/scripts/hunter-purge.js +25 -0
  81. package/dist/scripts/migrate-v2.js +140 -0
  82. package/dist/scripts/reindex.js +74 -0
  83. package/dist/scripts/sync-local-docs.js +153 -0
  84. package/package.json +7 -1
@@ -0,0 +1,293 @@
1
+ // embeddings.ts
2
+ // Prepares data for the database: generate embeddings, chunk text, format vectors.
3
+ // The database can't call OpenAI or split text — that's TypeScript's job.
4
+ import { createHash } from 'crypto';
5
+ import { openaiLimiter } from '../rate-limiter.js';
6
+ // =============================================================================
7
+ // Constants
8
+ // =============================================================================
9
+ const EMBEDDING_MODEL = 'text-embedding-3-small';
10
+ const DEFAULT_CHUNK_CONFIG = {
11
+ maxChunkSize: 1000,
12
+ overlapChars: 200,
13
+ strategy: 'recursive',
14
+ };
15
+ // Split separators — ordered from coarsest to finest.
16
+ // The recursive chunker tries each level in order until chunks fit.
17
+ const SPLIT_SEPARATORS = [
18
+ /^#{1,6}\s/m, // Level 0: Markdown headers
19
+ /\n\n+/, // Level 1: Double newlines (paragraphs)
20
+ /\n/, // Level 2: Single newlines
21
+ /(?<=[.!?])\s+/, // Level 3: Sentence boundaries
22
+ ];
23
+ // =============================================================================
24
+ // Pure functions — no API calls, no database, fully testable
25
+ // =============================================================================
26
+ /**
27
+ * SHA-256 hash of text content.
28
+ * Used for change detection: "has this document's content changed since last sync?"
29
+ * Same algorithm used in Postgres via pgcrypto.
30
+ */
31
+ export function contentHash(text) {
32
+ return createHash('sha256').update(text, 'utf-8').digest('hex');
33
+ }
34
+ /**
35
+ * Format a number[] embedding as a Postgres vector string.
36
+ * Supabase RPC can't send number[] as vector(1536) — it needs this string format.
37
+ * Example: [0.021, -0.007, 0.045] → "[0.021,-0.007,0.045]"
38
+ */
39
+ export function toVectorString(embedding) {
40
+ return `[${embedding.join(',')}]`;
41
+ }
42
+ /**
43
+ * Parse a Postgres vector back into a number[].
44
+ * Supabase REST API returns vector(1536) columns as strings like "[0.021,-0.007,0.045]".
45
+ * If the value is already a number[] (e.g. from a mock in tests), it passes through unchanged.
46
+ */
47
+ export function parseVector(raw) {
48
+ if (Array.isArray(raw))
49
+ return raw;
50
+ if (typeof raw === 'string')
51
+ return JSON.parse(raw);
52
+ throw new Error(`Cannot parse vector: expected string or number[], got ${typeof raw}`);
53
+ }
54
+ /**
55
+ * Split text into chunks using a recursive hierarchical strategy.
56
+ *
57
+ * Implements chunk context enrichment pipeline step 1 (chunking).
58
+ * Based on the recursive character splitting pattern used in production
59
+ * RAG systems (LangChain, LlamaIndex).
60
+ *
61
+ * Split hierarchy (coarsest to finest):
62
+ * 1. Markdown headers (^#{1,6}\s)
63
+ * 2. Double newlines (paragraph boundaries)
64
+ * 3. Single newlines (line breaks)
65
+ * 4. Sentence boundaries (after . ! ?)
66
+ * 5. Character-level force split (fallback)
67
+ *
68
+ * If text fits within maxChunkSize, returns it as a single chunk.
69
+ * Otherwise, splits at the coarsest level possible. If any resulting
70
+ * section still exceeds maxChunkSize, recurses to the next finer level.
71
+ *
72
+ * Overlap is applied between adjacent chunks at levels 1-4.
73
+ * Header-level splits (level 0) do NOT overlap — sections are
74
+ * semantically distinct.
75
+ */
76
+ export function chunkText(text, config) {
77
+ const resolvedConfig = { ...DEFAULT_CHUNK_CONFIG, ...config };
78
+ const { maxChunkSize, strategy } = resolvedConfig;
79
+ // Short text = one chunk
80
+ if (text.length <= maxChunkSize) {
81
+ return [{
82
+ content: text,
83
+ chunk_index: 0,
84
+ content_type: 'text',
85
+ strategy,
86
+ overlap_chars: 0,
87
+ }];
88
+ }
89
+ const rawChunks = recursiveSplit(text, 0, resolvedConfig);
90
+ // Assign sequential chunk_index across all chunks
91
+ return rawChunks.map((chunk, index) => ({
92
+ ...chunk,
93
+ chunk_index: index,
94
+ }));
95
+ }
96
+ /**
97
+ * Core recursive splitting logic.
98
+ * Tries the separator at `level`. If a section still exceeds maxChunkSize,
99
+ * recurses to level + 1. At the bottom level, force-splits at character positions.
100
+ */
101
+ function recursiveSplit(text, level, config) {
102
+ const { maxChunkSize, overlapChars, strategy } = config;
103
+ // Base case: text fits
104
+ if (text.length <= maxChunkSize) {
105
+ return [{
106
+ content: text,
107
+ chunk_index: 0, // reassigned by caller
108
+ content_type: 'text',
109
+ strategy,
110
+ overlap_chars: 0,
111
+ }];
112
+ }
113
+ // Bottom level: force-split at character boundaries
114
+ if (level >= SPLIT_SEPARATORS.length) {
115
+ return forceCharSplit(text, maxChunkSize, overlapChars);
116
+ }
117
+ const separator = SPLIT_SEPARATORS[level];
118
+ const isHeaderLevel = level === 0;
119
+ const sections = splitKeepingSeparator(text, separator, isHeaderLevel);
120
+ // If splitting produced only 1 section (separator not found), try next level
121
+ if (sections.length <= 1) {
122
+ return recursiveSplit(text, level + 1, config);
123
+ }
124
+ // Pack sections into chunks, recurse oversized ones
125
+ const chunks = [];
126
+ let currentContent = '';
127
+ for (const section of sections) {
128
+ const wouldExceed = (currentContent + section).length > maxChunkSize;
129
+ if (wouldExceed && currentContent.length > 0) {
130
+ // Flush current accumulated content as chunk(s)
131
+ chunks.push(...recursiveSplit(currentContent.trim(), level + 1, config));
132
+ // Apply overlap (except at header boundaries)
133
+ if (!isHeaderLevel && overlapChars > 0) {
134
+ const overlap = currentContent.slice(-overlapChars);
135
+ currentContent = overlap + section;
136
+ }
137
+ else {
138
+ currentContent = section;
139
+ }
140
+ }
141
+ else {
142
+ currentContent = currentContent + section;
143
+ }
144
+ }
145
+ // Flush remaining content
146
+ if (currentContent.trim().length > 0) {
147
+ chunks.push(...recursiveSplit(currentContent.trim(), level + 1, config));
148
+ }
149
+ // Mark overlap on chunks (first chunk has 0)
150
+ return chunks.map((chunk, index) => ({
151
+ ...chunk,
152
+ overlap_chars: index > 0 && !isHeaderLevel ? Math.min(overlapChars, chunk.content.length) : 0,
153
+ }));
154
+ }
155
+ /**
156
+ * Split text by a regex separator.
157
+ * For header-level splits, the separator (e.g. "# Title") is kept at the
158
+ * start of the section it belongs to. For other levels, the separator
159
+ * is consumed (it's whitespace/newlines anyway).
160
+ */
161
+ function splitKeepingSeparator(text, separator, keepSeparator) {
162
+ if (keepSeparator) {
163
+ // Header split: split just before each header, keep header in its section
164
+ const parts = text.split(new RegExp(`(?=${separator.source})`, 'm'));
165
+ return parts.filter(part => part.length > 0);
166
+ }
167
+ return text.split(separator).filter(part => part.trim().length > 0);
168
+ }
169
+ /**
170
+ * Force-split at character boundaries as a last resort.
171
+ * Handles text with no structural separators (JSON blobs, base64, walls of text).
172
+ */
173
+ function forceCharSplit(text, maxChunkSize, overlapChars) {
174
+ const chunks = [];
175
+ const step = Math.max(1, maxChunkSize - overlapChars);
176
+ for (let offset = 0; offset < text.length; offset += step) {
177
+ chunks.push({
178
+ content: text.slice(offset, offset + maxChunkSize),
179
+ chunk_index: 0, // reassigned by caller
180
+ content_type: 'text',
181
+ strategy: 'forced',
182
+ overlap_chars: offset > 0 ? overlapChars : 0,
183
+ });
184
+ }
185
+ return chunks;
186
+ }
187
+ // =============================================================================
188
+ // API functions — call OpenAI and/or database
189
+ // =============================================================================
190
+ /**
191
+ * Call OpenAI to convert text into an array of 1,536 numbers.
192
+ * These numbers represent the "meaning" of the text in a mathematical space.
193
+ * Similar texts produce similar numbers — that's how search works.
194
+ */
195
+ export async function generateEmbedding(openai, text) {
196
+ try {
197
+ return await openaiLimiter.schedule(async () => {
198
+ const result = await openai.embeddings.create({
199
+ model: EMBEDDING_MODEL,
200
+ input: text,
201
+ });
202
+ return result.data[0].embedding;
203
+ });
204
+ }
205
+ catch (error) {
206
+ const preview = text.slice(0, 80).replace(/\n/g, ' ');
207
+ throw new Error(`Embedding generation failed for text "${preview}...": ${error instanceof Error ? error.message : String(error)}`);
208
+ }
209
+ }
210
+ /**
211
+ * Batch-generate embeddings for multiple texts in a single API call.
212
+ * OpenAI accepts an array of inputs and returns all embeddings at once.
213
+ * For 151 chunks, this is 2 API calls (batches of 100) instead of 151.
214
+ */
215
+ export async function generateEmbeddingsBatch(openai, texts) {
216
+ if (texts.length === 0)
217
+ return [];
218
+ const BATCH_SIZE = 100;
219
+ const allEmbeddings = [];
220
+ for (let batchStart = 0; batchStart < texts.length; batchStart += BATCH_SIZE) {
221
+ const batch = texts.slice(batchStart, batchStart + BATCH_SIZE);
222
+ try {
223
+ const embeddings = await openaiLimiter.schedule(async () => {
224
+ const result = await openai.embeddings.create({
225
+ model: EMBEDDING_MODEL,
226
+ input: batch,
227
+ });
228
+ return result.data.map(entry => entry.embedding);
229
+ });
230
+ allEmbeddings.push(...embeddings);
231
+ }
232
+ catch (error) {
233
+ const batchNumber = Math.floor(batchStart / BATCH_SIZE) + 1;
234
+ throw new Error(`Batch embedding failed (batch ${batchNumber}, ${batch.length} texts): ${error instanceof Error ? error.message : String(error)}`);
235
+ }
236
+ }
237
+ return allEmbeddings;
238
+ }
239
+ /**
240
+ * Get an embedding for a search query, using the cache to avoid repeat API calls.
241
+ *
242
+ * Flow:
243
+ * 1. Check query_cache table for this exact query text
244
+ * 2. If cached: return the cached embedding, update hit_count
245
+ * 3. If not cached: call OpenAI, save to cache, return embedding
246
+ *
247
+ * Why cache: each OpenAI embedding call costs money. If you search
248
+ * "how does auth work" three times, the cache saves 2 API calls.
249
+ */
250
+ export async function getOrCacheQueryEmbedding(clients, query) {
251
+ // Normalize query to avoid cache misses from capitalization/whitespace differences
252
+ const normalizedQuery = query.toLowerCase().trim();
253
+ // Check cache
254
+ const { data: cached, error: cacheError } = await clients.supabase
255
+ .from('query_cache')
256
+ .select('embedding, hit_count')
257
+ .eq('query_text', normalizedQuery)
258
+ .single();
259
+ if (cacheError && cacheError.code !== 'PGRST116') {
260
+ // PGRST116 = "not found" (expected for cache miss). Any other error is real.
261
+ process.stderr.write(`[ledger] query cache lookup failed: ${cacheError.message}\n`);
262
+ }
263
+ if (cached?.embedding) {
264
+ // Update cache stats (non-blocking, non-fatal)
265
+ const { error: updateError } = await clients.supabase
266
+ .from('query_cache')
267
+ .update({
268
+ hit_count: cached.hit_count + 1,
269
+ last_used_at: new Date().toISOString(),
270
+ })
271
+ .eq('query_text', normalizedQuery);
272
+ if (updateError) {
273
+ process.stderr.write(`[ledger] query cache hit_count update failed: ${updateError.message}\n`);
274
+ }
275
+ return parseVector(cached.embedding);
276
+ }
277
+ // Generate and cache. Send original query to OpenAI (preserves meaning),
278
+ // but store under normalized key (so "Auth" and "auth" share one cache entry)
279
+ const embedding = await generateEmbedding(clients.openai, query);
280
+ // Cache insert is non-blocking, non-fatal. A failed insert means the next
281
+ // identical query will hit OpenAI again, but search still works.
282
+ const { error: insertError } = await clients.supabase
283
+ .from('query_cache')
284
+ .insert({
285
+ query_text: normalizedQuery,
286
+ embedding: toVectorString(embedding),
287
+ embedding_model_id: 'openai/text-embedding-3-small',
288
+ });
289
+ if (insertError) {
290
+ process.stderr.write(`[ledger] query cache insert failed for "${normalizedQuery}": ${insertError.message}\n`);
291
+ }
292
+ return embedding;
293
+ }
@@ -0,0 +1,120 @@
1
+ // reranker.ts
2
+ // Cross-encoder reranking via Cohere Rerank API.
3
+ //
4
+ // After hybrid search returns candidates, this module re-scores each one
5
+ // by sending (query, document) pairs to a cross-encoder model. The model
6
+ // reads query and document together — much more accurate than embedding
7
+ // similarity, which encodes them separately.
8
+ //
9
+ // Uses native fetch — no Cohere SDK dependency. The API is one endpoint,
10
+ // one request shape, one response shape.
11
+ //
12
+ // Graceful degradation: if the API fails, returns original results unchanged.
13
+ // Search should never break because reranking failed.
14
+ import { cohereLimiter } from '../rate-limiter.js';
15
+ import { startSpan } from '../observability.js';
16
+ const COHERE_RERANK_URL = 'https://api.cohere.com/v2/rerank';
17
+ const COHERE_RERANK_MODEL = 'rerank-v3.5';
18
+ // =============================================================================
19
+ // rerankResults
20
+ // =============================================================================
21
+ /**
22
+ * Re-rank search results using Cohere's cross-encoder model.
23
+ *
24
+ * Sends each result's content + the query to Cohere, which scores
25
+ * how well each document answers the query (0 to 1). Results are
26
+ * re-sorted by this relevance score (highest first).
27
+ *
28
+ * The score field on each result is replaced with the Cohere
29
+ * relevance score — this is intentional. The original RRF score
30
+ * is a ranking position, not a quality signal. The reranker score
31
+ * IS a quality signal (how relevant is this document to this query).
32
+ *
33
+ * Security: API key is sent only in the Authorization header,
34
+ * never in the request body. Document content IS sent to Cohere
35
+ * for scoring — same data flow as OpenAI embeddings.
36
+ */
37
+ export async function rerankResults(query, searchResults, options) {
38
+ if (searchResults.length === 0)
39
+ return [];
40
+ const topN = options.topN ?? searchResults.length;
41
+ const model = options.model ?? COHERE_RERANK_MODEL;
42
+ // --- rerank.prepare ---
43
+ const prepareSpan = startSpan('rerank.prepare');
44
+ const documents = searchResults.map(searchResult => ({
45
+ text: searchResult.content,
46
+ }));
47
+ const totalContentLength = documents.reduce((sum, document) => sum + document.text.length, 0);
48
+ prepareSpan.update({ output: { documentCount: documents.length, totalContentLength } });
49
+ prepareSpan.end();
50
+ // --- rerank.queue-wait + rerank.api-call ---
51
+ const queueSpan = startSpan('rerank.queue-wait');
52
+ let response;
53
+ try {
54
+ response = await cohereLimiter.schedule(() => {
55
+ queueSpan.end();
56
+ const apiSpan = startSpan('rerank.api-call');
57
+ apiSpan.update({ input: { model, topN, documentCount: documents.length } });
58
+ const apiStartMs = Date.now();
59
+ return fetch(COHERE_RERANK_URL, {
60
+ method: 'POST',
61
+ headers: {
62
+ 'Authorization': `Bearer ${options.apiKey}`,
63
+ 'Content-Type': 'application/json',
64
+ },
65
+ body: JSON.stringify({
66
+ model,
67
+ query,
68
+ documents,
69
+ top_n: topN,
70
+ }),
71
+ }).then(fetchResponse => {
72
+ apiSpan.update({
73
+ output: {
74
+ statusCode: fetchResponse.status,
75
+ latencyMs: Date.now() - apiStartMs,
76
+ },
77
+ });
78
+ apiSpan.end();
79
+ return fetchResponse;
80
+ }).catch(fetchError => {
81
+ apiSpan.update({
82
+ output: {
83
+ error: fetchError.message,
84
+ errorType: 'network',
85
+ latencyMs: Date.now() - apiStartMs,
86
+ },
87
+ });
88
+ apiSpan.end();
89
+ throw fetchError;
90
+ });
91
+ });
92
+ }
93
+ catch (_networkError) {
94
+ // Network failure or limiter error — return originals unchanged
95
+ return searchResults;
96
+ }
97
+ if (!response.ok) {
98
+ // API error (rate limit, bad key, server error) — return originals unchanged
99
+ return searchResults;
100
+ }
101
+ let cohereResponse;
102
+ try {
103
+ cohereResponse = (await response.json());
104
+ }
105
+ catch (_parseError) {
106
+ // Malformed JSON from Cohere — return originals unchanged
107
+ return searchResults;
108
+ }
109
+ if (!cohereResponse.results || !Array.isArray(cohereResponse.results)) {
110
+ return searchResults;
111
+ }
112
+ // Map Cohere results back to our search results, re-sorted by relevance.
113
+ // Cohere returns results sorted by relevance_score (highest first).
114
+ // Each result has an 'index' pointing to the original position in our input array.
115
+ const rerankedResults = cohereResponse.results.map((cohereResult) => ({
116
+ ...searchResults[cohereResult.index],
117
+ score: cohereResult.relevance_score,
118
+ }));
119
+ return rerankedResults;
120
+ }
@@ -0,0 +1,53 @@
1
+ // semantic-cache.ts
2
+ // Helpers for the semantic cache (layer 2).
3
+ // Handles serialization, deserialization, and parameter normalization
4
+ // for cache lookup and store operations.
5
+ //
6
+ // The actual cache logic (HNSW lookup, store, invalidation) lives in Postgres
7
+ // RPC functions. This module prepares data for those calls.
8
+ const EMBEDDING_MODEL_ID = 'openai/text-embedding-3-small';
9
+ const SIMILARITY_THRESHOLD = 0.90;
10
+ export { EMBEDDING_MODEL_ID as SEMANTIC_CACHE_MODEL_ID };
11
+ export { SIMILARITY_THRESHOLD as SEMANTIC_CACHE_THRESHOLD };
12
+ /**
13
+ * Build a normalized search_params object for cache key matching.
14
+ * Keys are sorted alphabetically so JSONB equality works regardless
15
+ * of the order properties were passed in.
16
+ * Undefined values are omitted (not set to null) to avoid
17
+ * mismatches between {domain: null} and {}.
18
+ */
19
+ export function buildSearchParams(input) {
20
+ const params = {};
21
+ // Alphabetical order for consistent JSONB serialization
22
+ if (input.document_type !== undefined)
23
+ params.document_type = input.document_type;
24
+ if (input.domain !== undefined)
25
+ params.domain = input.domain;
26
+ if (input.limit !== undefined)
27
+ params.limit = input.limit;
28
+ if (input.project !== undefined)
29
+ params.project = input.project;
30
+ if (input.threshold !== undefined)
31
+ params.threshold = input.threshold;
32
+ return params;
33
+ }
34
+ // =============================================================================
35
+ // Result serialization
36
+ // =============================================================================
37
+ /**
38
+ * Parse cached_results JSONB from Postgres into typed array.
39
+ * Returns the array directly since we cache full ISearchResultProps objects.
40
+ */
41
+ export function parseCachedResults(jsonb) {
42
+ if (!jsonb || jsonb.length === 0)
43
+ return [];
44
+ return jsonb;
45
+ }
46
+ /**
47
+ * Extract unique document IDs from search results for the reverse index.
48
+ * These are stored in source_doc_ids so document_update/delete can
49
+ * invalidate affected cache entries.
50
+ */
51
+ export function extractSourceDocIds(results) {
52
+ return [...new Set(results.map(result => result.id))];
53
+ }
@@ -6,7 +6,7 @@ vi.mock('./config.js', () => ({
6
6
  saveConfigFile: (config) => { mockConfigState.current = config; },
7
7
  }));
8
8
  // Import AFTER mock setup so modules pick up the mocked config
9
- const { BUILTIN_TYPES, getTypeRegistry, inferDelivery, getRegisteredTypes, isRegisteredType, registerType, validateTypeName, checkMetadataCompleteness, } = await import('./notes.js');
9
+ const { BUILTIN_TYPES, getTypeRegistry, inferDelivery, inferDomain, getRegisteredTypes, isRegisteredType, registerType, validateTypeName, checkMetadataCompleteness, } = await import('./notes.js');
10
10
  // --- Helpers ---
11
11
  function setUserTypes(types) {
12
12
  mockConfigState.current = { types };
@@ -178,7 +178,7 @@ describe('validateTypeName', () => {
178
178
  });
179
179
  });
180
180
  // ============================================================
181
- // 7. checkMetadataCompleteness (dynamic delivery check)
181
+ // 7. checkMetadataCompleteness (dynamic domain check)
182
182
  // ============================================================
183
183
  describe('checkMetadataCompleteness', () => {
184
184
  it('returns null when all fields are present for project type', () => {
@@ -189,20 +189,20 @@ describe('checkMetadataCompleteness', () => {
189
189
  const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'code-craft');
190
190
  expect(result).toBeNull();
191
191
  });
192
- it('prompts for status on project-delivery types', () => {
192
+ it('prompts for status on project-domain types', () => {
193
193
  const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'architecture-decision');
194
194
  expect(result).toContain('status');
195
195
  });
196
- it('does NOT prompt for status on persona-delivery types', () => {
196
+ it('does NOT prompt for status on persona-domain types', () => {
197
197
  const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'persona-rule');
198
198
  expect(result).toBeNull();
199
199
  });
200
- it('uses inferDelivery for custom types — project custom type requires status', () => {
200
+ it('uses type registry for custom types — project custom type requires status', () => {
201
201
  setUserTypes({ 'wine-log': 'project' });
202
202
  const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'wine-log');
203
203
  expect(result).toContain('status');
204
204
  });
205
- it('uses inferDelivery for custom types — knowledge custom type skips status', () => {
205
+ it('uses type registry for custom types — knowledge custom type skips status', () => {
206
206
  setUserTypes({ 'recipe': 'knowledge' });
207
207
  const result = checkMetadataCompleteness({ description: 'test', upsert_key: 'test-key' }, 'recipe');
208
208
  expect(result).toBeNull();