@getplumb/core 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +2 -2
  2. package/dist/embedder.d.ts +16 -2
  3. package/dist/embedder.d.ts.map +1 -1
  4. package/dist/embedder.js +23 -4
  5. package/dist/embedder.js.map +1 -1
  6. package/dist/extraction-queue.d.ts +13 -3
  7. package/dist/extraction-queue.d.ts.map +1 -1
  8. package/dist/extraction-queue.js +21 -4
  9. package/dist/extraction-queue.js.map +1 -1
  10. package/dist/extractor.d.ts +2 -1
  11. package/dist/extractor.d.ts.map +1 -1
  12. package/dist/extractor.js +106 -7
  13. package/dist/extractor.js.map +1 -1
  14. package/dist/extractor.test.d.ts +2 -0
  15. package/dist/extractor.test.d.ts.map +1 -0
  16. package/dist/extractor.test.js +158 -0
  17. package/dist/extractor.test.js.map +1 -0
  18. package/dist/fact-search.d.ts +9 -5
  19. package/dist/fact-search.d.ts.map +1 -1
  20. package/dist/fact-search.js +25 -16
  21. package/dist/fact-search.js.map +1 -1
  22. package/dist/fact-search.test.d.ts +12 -0
  23. package/dist/fact-search.test.d.ts.map +1 -0
  24. package/dist/fact-search.test.js +117 -0
  25. package/dist/fact-search.test.js.map +1 -0
  26. package/dist/index.d.ts +2 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +1 -0
  29. package/dist/index.js.map +1 -1
  30. package/dist/llm-client.d.ts +11 -2
  31. package/dist/llm-client.d.ts.map +1 -1
  32. package/dist/llm-client.js +47 -3
  33. package/dist/llm-client.js.map +1 -1
  34. package/dist/local-store.d.ts +32 -1
  35. package/dist/local-store.d.ts.map +1 -1
  36. package/dist/local-store.js +510 -35
  37. package/dist/local-store.js.map +1 -1
  38. package/dist/local-store.test.d.ts +2 -0
  39. package/dist/local-store.test.d.ts.map +1 -0
  40. package/dist/local-store.test.js +146 -0
  41. package/dist/local-store.test.js.map +1 -0
  42. package/dist/raw-log-search.d.ts +9 -5
  43. package/dist/raw-log-search.d.ts.map +1 -1
  44. package/dist/raw-log-search.js +107 -29
  45. package/dist/raw-log-search.js.map +1 -1
  46. package/dist/raw-log-search.test.d.ts +12 -0
  47. package/dist/raw-log-search.test.d.ts.map +1 -0
  48. package/dist/raw-log-search.test.js +124 -0
  49. package/dist/raw-log-search.test.js.map +1 -0
  50. package/dist/read-path.test.d.ts +15 -0
  51. package/dist/read-path.test.d.ts.map +1 -0
  52. package/dist/read-path.test.js +393 -0
  53. package/dist/read-path.test.js.map +1 -0
  54. package/dist/schema.d.ts +2 -2
  55. package/dist/schema.d.ts.map +1 -1
  56. package/dist/schema.js +58 -1
  57. package/dist/schema.js.map +1 -1
  58. package/dist/scorer.test.d.ts +10 -0
  59. package/dist/scorer.test.d.ts.map +1 -0
  60. package/dist/scorer.test.js +169 -0
  61. package/dist/scorer.test.js.map +1 -0
  62. package/dist/store.d.ts +3 -1
  63. package/dist/store.d.ts.map +1 -1
  64. package/dist/wasm-db.d.ts +63 -8
  65. package/dist/wasm-db.d.ts.map +1 -1
  66. package/dist/wasm-db.js +124 -31
  67. package/dist/wasm-db.js.map +1 -1
  68. package/package.json +14 -2
@@ -6,17 +6,110 @@ import { openDb } from './wasm-db.js';
6
6
  import { applySchema } from './schema.js';
7
7
  import { extractFacts } from './extractor.js';
8
8
  import { callLLMWithConfig } from './llm-client.js';
9
- import { embed } from './embedder.js';
9
+ import { embed, warmEmbedder, warmReranker } from './embedder.js';
10
10
  import { formatExchange } from './chunker.js';
11
11
  import { searchRawLog } from './raw-log-search.js';
12
12
  import { searchFacts } from './fact-search.js';
13
13
  import { ExtractionQueue } from './extraction-queue.js';
14
- import { serializeEmbedding } from './vector-search.js';
14
+ import { serializeEmbedding, deserializeEmbedding, cosineDistance } from './vector-search.js';
15
+ /**
16
+ * Split text into overlapping child chunks for parent-child chunking (T-108).
17
+ * Target: ~250 chars per chunk with ~50 char overlap.
18
+ * Prefers sentence boundaries, falls back to word boundaries, hard-cuts at 300 chars max.
19
+ */
20
+ function splitIntoChildren(text) {
21
+ const TARGET_SIZE = 250;
22
+ const OVERLAP = 50;
23
+ const MAX_SIZE = 300;
24
+ const SENTENCE_ENDINGS = /[.!?]\s+/g;
25
+ if (text.length <= TARGET_SIZE) {
26
+ // Text is already small enough — return as single child
27
+ return [text];
28
+ }
29
+ const chunks = [];
30
+ let pos = 0;
31
+ while (pos < text.length) {
32
+ let endPos = Math.min(pos + TARGET_SIZE, text.length);
33
+ // If we're at the end of the text, take the rest
34
+ if (endPos >= text.length) {
35
+ chunks.push(text.slice(pos));
36
+ break;
37
+ }
38
+ // Try to find a sentence boundary within the target range
39
+ const segment = text.slice(pos, Math.min(pos + MAX_SIZE, text.length));
40
+ const sentenceMatches = Array.from(segment.matchAll(SENTENCE_ENDINGS));
41
+ if (sentenceMatches.length > 0) {
42
+ // Find the last sentence boundary before TARGET_SIZE
43
+ let bestMatch = sentenceMatches[0]; // Safe: array is non-empty
44
+ for (const match of sentenceMatches) {
45
+ if (match.index !== undefined && match.index <= TARGET_SIZE) {
46
+ bestMatch = match;
47
+ }
48
+ else {
49
+ break;
50
+ }
51
+ }
52
+ if (bestMatch.index !== undefined && bestMatch[0] !== undefined) {
53
+ endPos = pos + bestMatch.index + bestMatch[0].length;
54
+ }
55
+ else {
56
+ // Fall back to word boundary
57
+ endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
58
+ }
59
+ }
60
+ else {
61
+ // No sentence boundary found — fall back to word boundary
62
+ endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
63
+ }
64
+ chunks.push(text.slice(pos, endPos).trim());
65
+ // Move position forward, with overlap
66
+ pos = endPos - OVERLAP;
67
+ if (pos < 0)
68
+ pos = endPos; // Safety: don't go negative
69
+ }
70
+ return chunks.filter(chunk => chunk.length > 0);
71
+ }
72
+ /**
73
+ * Find a word boundary near the target position.
74
+ * Prefers breaking at TARGET_SIZE, but will extend up to MAX_SIZE if needed.
75
+ */
76
+ function findWordBoundary(text, start, targetSize, maxSize) {
77
+ const targetPos = start + targetSize;
78
+ const maxPos = Math.min(start + maxSize, text.length);
79
+ // Look for whitespace near the target position
80
+ let endPos = targetPos;
81
+ // First try: find whitespace after targetPos
82
+ for (let i = targetPos; i < maxPos; i++) {
83
+ if (/\s/.test(text[i] ?? '')) {
84
+ endPos = i + 1; // Include the whitespace
85
+ break;
86
+ }
87
+ }
88
+ // If we hit maxPos without finding whitespace, hard cut at maxPos
89
+ if (endPos === targetPos && targetPos < maxPos) {
90
+ endPos = maxPos;
91
+ }
92
+ return endPos;
93
+ }
15
94
  export class LocalStore {
16
95
  #db;
17
96
  #userId;
18
97
  #llmConfig;
19
98
  #extractionQueue;
99
+ // Backlog processor state (T-095: drain loops)
100
+ #embedDrainStopped = false;
101
+ #extractDrainStopped = false;
102
+ #embedDrainPromise = null;
103
+ #extractDrainPromise = null;
104
+ #embedIdleMs;
105
+ #extractIdleMs;
106
+ #extractConcurrency;
107
+ #retryBackoffMs;
108
+ #extractFn;
109
+ // T-096: In-memory embedding cache for vec_facts (eliminates 292ms SQLite load on each query)
110
+ #embeddingCache = [];
111
+ // T-103: In-memory embedding cache for vec_raw_log (eliminates ~3,700ms SQLite load on each query)
112
+ #rawLogEmbeddingCache = [];
20
113
  /** Expose database for plugin use (e.g., NudgeManager) */
21
114
  get db() {
22
115
  return this.#db;
@@ -29,11 +122,17 @@ export class LocalStore {
29
122
  get extractionQueue() {
30
123
  return this.#extractionQueue;
31
124
  }
32
- constructor(db, userId, llmConfig, extractionQueue) {
125
+ constructor(db, userId, llmConfig, extractionQueue, extractFn, backlog) {
33
126
  this.#db = db;
34
127
  this.#userId = userId;
35
128
  this.#llmConfig = llmConfig;
36
129
  this.#extractionQueue = extractionQueue;
130
+ this.#extractFn = extractFn;
131
+ // Initialize backlog processor config — defaults run as fast as possible with concurrency.
132
+ this.#embedIdleMs = backlog?.embedIdleMs ?? 5000;
133
+ this.#extractIdleMs = backlog?.extractIdleMs ?? 5000;
134
+ this.#extractConcurrency = backlog?.concurrency ?? 5;
135
+ this.#retryBackoffMs = backlog?.retryBackoffMs ?? 2000;
37
136
  }
38
137
  /**
39
138
  * Create a new LocalStore instance (async factory).
@@ -52,36 +151,135 @@ export class LocalStore {
52
151
  // Use a mutable cell to hold the store reference (needed for circular dependency)
53
152
  let storeRef = null;
54
153
  // Initialize extraction queue with deferred store lookup
55
- const extractFn = (exchange, userId) => {
154
+ // T-079: Wrapper handles extract_status updates on success/failure.
155
+ const extractFn = async (exchange, userId, sourceChunkId) => {
56
156
  if (!storeRef)
57
157
  throw new Error('Store not initialized');
58
158
  const llmFn = llmConfig
59
159
  ? (prompt) => callLLMWithConfig(prompt, llmConfig)
60
160
  : undefined;
61
- return extractFacts(exchange, userId, storeRef, llmFn);
161
+ try {
162
+ const facts = await extractFacts(exchange, userId, storeRef, llmFn, sourceChunkId);
163
+ // T-079: Update extract_status='done' on success.
164
+ const updateStmt = db.prepare(`
165
+ UPDATE raw_log SET extract_status = 'done' WHERE id = ?
166
+ `);
167
+ updateStmt.bind([sourceChunkId]);
168
+ updateStmt.step();
169
+ updateStmt.finalize();
170
+ return facts;
171
+ }
172
+ catch (err) {
173
+ // T-079: Update extract_status='failed' with error message.
174
+ const errorMsg = err instanceof Error ? err.message : String(err);
175
+ const updateStmt = db.prepare(`
176
+ UPDATE raw_log SET extract_status = 'failed', extract_error = ? WHERE id = ?
177
+ `);
178
+ updateStmt.bind([errorMsg, sourceChunkId]);
179
+ updateStmt.step();
180
+ updateStmt.finalize();
181
+ // Re-throw so Promise.allSettled() in flush() sees the rejection.
182
+ throw err;
183
+ }
62
184
  };
63
185
  const extractionQueue = options.extractionQueue ?? new ExtractionQueue(extractFn);
64
186
  // Create store and assign to ref
65
- const store = new LocalStore(db, userId, llmConfig, extractionQueue);
187
+ const store = new LocalStore(db, userId, llmConfig, extractionQueue, extractFn, options.backlog);
66
188
  storeRef = store;
189
+ // T-096: Warm embedder pipeline to eliminate 365ms cold-start on first query
190
+ await warmEmbedder();
191
+ // T-101: Warm reranker pipeline to eliminate ~200ms cold-start on first query
192
+ // (intentionally loads ~80MB model at init for consistent <250ms query performance)
193
+ await warmReranker();
194
+ // T-096: Load all vec_facts embeddings into in-memory cache (eliminates 292ms SQLite load per query)
195
+ const vecStmt = db.prepare(`SELECT rowid, embedding FROM vec_facts`);
196
+ while (vecStmt.step()) {
197
+ const row = vecStmt.get({});
198
+ store.#embeddingCache.push({
199
+ rowid: row.rowid,
200
+ embedding: deserializeEmbedding(row.embedding),
201
+ });
202
+ }
203
+ vecStmt.finalize();
204
+ // T-103/T-108: Load vec_raw_log embeddings for child rows only (eliminates ~3,700ms SQLite load per query)
205
+ // Child rows have parent_id IS NOT NULL. Parent rows are not embedded (embed_status='no_embed').
206
+ const rawLogVecStmt = db.prepare(`
207
+ SELECT v.rowid, v.embedding
208
+ FROM vec_raw_log v
209
+ JOIN raw_log r ON r.vec_rowid = v.rowid
210
+ WHERE r.parent_id IS NOT NULL
211
+ `);
212
+ while (rawLogVecStmt.step()) {
213
+ const row = rawLogVecStmt.get({});
214
+ store.#rawLogEmbeddingCache.push({
215
+ rowid: row.rowid,
216
+ embedding: deserializeEmbedding(row.embedding),
217
+ });
218
+ }
219
+ rawLogVecStmt.finalize();
67
220
  return store;
68
221
  }
69
- async store(fact) {
70
- const id = crypto.randomUUID();
71
- // Embed concatenated fact text for vector search.
222
+ async store(fact, sourceChunkId) {
223
+ // T-097: Cross-chunk fact deduplication — prevent storing duplicate facts across different chunks.
224
+ // A fact is considered a duplicate if it has the same subject+predicate and the object is either:
225
+ // 1. Identical (case-insensitive, normalized whitespace), OR
226
+ // 2. Semantically similar (cosine similarity >= 0.92 on embeddings)
227
+ //
228
+ // Pre-filter by subject+predicate via SQL (uses index, avoids full corpus scan).
229
+ const candidateStmt = this.#db.prepare(`
230
+ SELECT id, object, vec_rowid
231
+ FROM facts
232
+ WHERE user_id = ? AND subject = ? AND predicate = ? AND deleted_at IS NULL
233
+ `);
234
+ candidateStmt.bind([this.#userId, fact.subject, fact.predicate]);
235
+ const candidates = [];
236
+ while (candidateStmt.step()) {
237
+ candidates.push(candidateStmt.get({}));
238
+ }
239
+ candidateStmt.finalize();
240
+ // Helper: Normalize text for exact-match check (lowercase, trim, collapse multiple spaces)
241
+ const normalizeText = (text) => text.toLowerCase().trim().replace(/\s+/g, ' ');
242
+ const normalizedNewObject = normalizeText(fact.object);
243
+ // Check for exact object match first (avoids embedding call in the common case)
244
+ for (const candidate of candidates) {
245
+ if (normalizeText(candidate.object) === normalizedNewObject) {
246
+ // Exact duplicate found — return existing fact ID without inserting
247
+ return candidate.id;
248
+ }
249
+ }
250
+ // No exact match found. Now embed the new fact for semantic similarity check and insertion.
72
251
  const text = `${fact.subject} ${fact.predicate} ${fact.object} ${fact.context ?? ''}`.trim();
73
252
  const embedding = await embed(text);
74
253
  const embeddingJson = serializeEmbedding(embedding);
254
+ // Check semantic similarity against candidates (only if we have candidates with embeddings)
255
+ if (candidates.length > 0) {
256
+ for (const candidate of candidates) {
257
+ if (candidate.vec_rowid === null)
258
+ continue;
259
+ // Find candidate embedding in in-memory cache (T-096)
260
+ const cachedEntry = this.#embeddingCache.find(entry => entry.rowid === candidate.vec_rowid);
261
+ if (!cachedEntry)
262
+ continue;
263
+ // Compute cosine similarity. Distance = 1 - similarity, so similarity >= 0.92 means distance <= 0.08.
264
+ const distance = cosineDistance(embedding, cachedEntry.embedding);
265
+ if (distance <= 0.08) {
266
+ // Semantically equivalent fact found — return existing ID without inserting
267
+ return candidate.id;
268
+ }
269
+ }
270
+ }
271
+ // No duplicate found (neither exact nor semantic) — proceed with normal insertion
272
+ const id = crypto.randomUUID();
75
273
  // Begin transaction
76
274
  this.#db.exec('BEGIN');
77
275
  try {
78
- // Insert fact
276
+ // Insert fact (T-079: include source_chunk_id)
79
277
  const factStmt = this.#db.prepare(`
80
278
  INSERT INTO facts
81
279
  (id, user_id, subject, predicate, object,
82
280
  confidence, decay_rate, timestamp, source_session_id,
83
- source_session_label, context)
84
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
281
+ source_session_label, context, source_chunk_id)
282
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
85
283
  `);
86
284
  factStmt.bind([
87
285
  id,
@@ -95,6 +293,7 @@ export class LocalStore {
95
293
  fact.sourceSessionId,
96
294
  fact.sourceSessionLabel ?? null,
97
295
  fact.context ?? null,
296
+ sourceChunkId ?? null,
98
297
  ]);
99
298
  factStmt.step();
100
299
  factStmt.finalize();
@@ -110,6 +309,8 @@ export class LocalStore {
110
309
  updateStmt.step();
111
310
  updateStmt.finalize();
112
311
  this.#db.exec('COMMIT');
312
+ // T-096: Append new embedding to in-memory cache
313
+ this.#embeddingCache.push({ rowid: vecRowid, embedding });
113
314
  }
114
315
  catch (err) {
115
316
  this.#db.exec('ROLLBACK');
@@ -118,9 +319,18 @@ export class LocalStore {
118
319
  return id;
119
320
  }
120
321
  async search(query, limit = 20) {
121
- return searchFacts(this.#db, this.#userId, query, limit);
322
+ // T-096: Pass in-memory embedding cache to searchFacts (eliminates 292ms SQLite load per query)
323
+ return searchFacts(this.#db, this.#userId, query, limit, this.#embeddingCache);
122
324
  }
123
325
  async delete(id) {
326
+ // T-096: Get vec_rowid before soft-deleting so we can remove from cache
327
+ const vecRowidStmt = this.#db.prepare(`
328
+ SELECT vec_rowid FROM facts WHERE id = ? AND user_id = ?
329
+ `);
330
+ vecRowidStmt.bind([id, this.#userId]);
331
+ vecRowidStmt.step();
332
+ const vecRowid = vecRowidStmt.get(0);
333
+ vecRowidStmt.finalize();
124
334
  // Soft delete only — never hard delete.
125
335
  const stmt = this.#db.prepare(`
126
336
  UPDATE facts SET deleted_at = ? WHERE id = ? AND user_id = ?
@@ -128,6 +338,13 @@ export class LocalStore {
128
338
  stmt.bind([new Date().toISOString(), id, this.#userId]);
129
339
  stmt.step();
130
340
  stmt.finalize();
341
+ // T-096: Remove from in-memory embedding cache
342
+ if (vecRowid !== null) {
343
+ const cacheIdx = this.#embeddingCache.findIndex(entry => entry.rowid === vecRowid);
344
+ if (cacheIdx !== -1) {
345
+ this.#embeddingCache.splice(cacheIdx, 1);
346
+ }
347
+ }
131
348
  }
132
349
  async status() {
133
350
  const factStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM facts WHERE user_id = ? AND deleted_at IS NULL`);
@@ -159,18 +376,19 @@ export class LocalStore {
159
376
  const chunkText = formatExchange(exchange);
160
377
  // Compute content hash for deduplication (scoped per userId).
161
378
  const contentHash = createHash('sha256').update(chunkText).digest('hex');
162
- // Embed before opening the DB transaction.
163
- const embedding = await embed(chunkText);
164
- const embeddingJson = serializeEmbedding(embedding);
379
+ // T-108: Parent-child chunking don't embed parent, only children.
380
+ // Parent extract_status: 'no_llm' if no config, otherwise 'pending' (extraction runs on parent only).
381
+ const extractStatus = this.#llmConfig ? 'pending' : 'no_llm';
165
382
  // Attempt insert — catch UNIQUE constraint violations (duplicate content_hash).
166
383
  try {
167
384
  this.#db.exec('BEGIN');
168
- // Insert into raw_log
385
+ // T-108: Insert parent row (no embedding, no vec_rowid).
169
386
  const rawLogStmt = this.#db.prepare(`
170
387
  INSERT INTO raw_log
171
388
  (id, user_id, session_id, session_label,
172
- user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash)
173
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
389
+ user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
390
+ embed_status, embed_error, embed_model, extract_status, parent_id)
391
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
174
392
  `);
175
393
  rawLogStmt.bind([
176
394
  rawLogId,
@@ -184,20 +402,81 @@ export class LocalStore {
184
402
  chunkText,
185
403
  0,
186
404
  contentHash,
405
+ 'no_embed', // Parent is not embedded (T-108)
406
+ null,
407
+ null,
408
+ extractStatus,
409
+ null, // parent_id=NULL for parent rows
187
410
  ]);
188
411
  rawLogStmt.step();
189
412
  rawLogStmt.finalize();
190
- // Insert embedding into vec_raw_log (auto-assigned id).
191
- const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
192
- vecStmt.bind([embeddingJson]);
193
- vecStmt.step();
194
- vecStmt.finalize();
195
- const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
196
- // Back-fill vec_rowid so raw-log-search can join without a mapping table.
197
- const updateStmt = this.#db.prepare(`UPDATE raw_log SET vec_rowid = ? WHERE id = ?`);
198
- updateStmt.bind([vecRowid, rawLogId]);
199
- updateStmt.step();
200
- updateStmt.finalize();
413
+ // T-108: Split parent into child chunks and embed each child.
414
+ const childChunks = splitIntoChildren(chunkText);
415
+ for (let i = 0; i < childChunks.length; i++) {
416
+ const childText = childChunks[i];
417
+ if (!childText)
418
+ continue;
419
+ const childId = crypto.randomUUID();
420
+ let childEmbedding = null;
421
+ let childEmbeddingJson = null;
422
+ let childEmbedStatus = 'pending';
423
+ let childEmbedError = null;
424
+ let childEmbedModel = null;
425
+ // Embed the child chunk
426
+ try {
427
+ childEmbedding = await embed(childText);
428
+ childEmbeddingJson = serializeEmbedding(childEmbedding);
429
+ childEmbedStatus = 'done';
430
+ childEmbedModel = 'Xenova/bge-small-en-v1.5';
431
+ }
432
+ catch (err) {
433
+ childEmbedStatus = 'failed';
434
+ childEmbedError = err instanceof Error ? err.message : String(err);
435
+ }
436
+ // Insert child row
437
+ const childStmt = this.#db.prepare(`
438
+ INSERT INTO raw_log
439
+ (id, user_id, session_id, session_label,
440
+ user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
441
+ embed_status, embed_error, embed_model, extract_status, parent_id)
442
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
443
+ `);
444
+ childStmt.bind([
445
+ childId,
446
+ this.#userId,
447
+ exchange.sessionId,
448
+ exchange.sessionLabel ?? null,
449
+ exchange.userMessage,
450
+ exchange.agentResponse,
451
+ exchange.timestamp.toISOString(),
452
+ exchange.source,
453
+ childText,
454
+ i, // chunk_index for ordering
455
+ null, // No content_hash for children (they don't participate in dedup)
456
+ childEmbedStatus,
457
+ childEmbedError,
458
+ childEmbedModel,
459
+ 'child', // T-108: Mark as 'child' to prevent extraction
460
+ rawLogId, // parent_id points to parent
461
+ ]);
462
+ childStmt.step();
463
+ childStmt.finalize();
464
+ // Insert child embedding into vec_raw_log if embedding succeeded
465
+ if (childEmbeddingJson !== null) {
466
+ const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
467
+ vecStmt.bind([childEmbeddingJson]);
468
+ vecStmt.step();
469
+ vecStmt.finalize();
470
+ const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
471
+ // Back-fill vec_rowid on child row
472
+ const updateStmt = this.#db.prepare(`UPDATE raw_log SET vec_rowid = ? WHERE id = ?`);
473
+ updateStmt.bind([vecRowid, childId]);
474
+ updateStmt.step();
475
+ updateStmt.finalize();
476
+ // T-103: Append child embedding to in-memory cache
477
+ this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding: childEmbedding });
478
+ }
479
+ }
201
480
  this.#db.exec('COMMIT');
202
481
  }
203
482
  catch (err) {
@@ -215,9 +494,11 @@ export class LocalStore {
215
494
  // Re-throw other errors (e.g., real DB issues).
216
495
  throw err;
217
496
  }
218
- // Layer 2: enqueue exchange for batched fact extraction (T-071).
219
- // ExtractionQueue handles draining on interval or batch size threshold.
220
- this.#extractionQueue.enqueue(exchange, this.#userId);
497
+ // Layer 2: enqueue exchange for batched fact extraction (T-071) only if LLM config is present.
498
+ // If no LLM config, extract_status is already set to 'no_llm', so skip enqueue.
499
+ if (this.#llmConfig) {
500
+ this.#extractionQueue.enqueue(exchange, this.#userId, rawLogId);
501
+ }
221
502
  return {
222
503
  rawLogId,
223
504
  factsExtracted: 0,
@@ -229,7 +510,8 @@ export class LocalStore {
229
510
  * See raw-log-search.ts for the full pipeline description.
230
511
  */
231
512
  async searchRawLog(query, limit = 10) {
232
- return searchRawLog(this.#db, this.#userId, query, limit);
513
+ // T-103: Pass in-memory embedding cache to searchRawLog (eliminates ~3,700ms SQLite load per query)
514
+ return searchRawLog(this.#db, this.#userId, query, limit, this.#rawLogEmbeddingCache);
233
515
  }
234
516
  /**
235
517
  * Wait for all queued fact extractions to complete.
@@ -384,7 +666,12 @@ export class LocalStore {
384
666
  source,
385
667
  chunk_text AS chunkText,
386
668
  chunk_index AS chunkIndex,
387
- content_hash AS contentHash
669
+ content_hash AS contentHash,
670
+ embed_status AS embedStatus,
671
+ embed_error AS embedError,
672
+ embed_model AS embedModel,
673
+ extract_status AS extractStatus,
674
+ extract_error AS extractError
388
675
  FROM raw_log
389
676
  WHERE user_id = ?
390
677
  ORDER BY timestamp DESC
@@ -397,6 +684,194 @@ export class LocalStore {
397
684
  rawLogStmt.finalize();
398
685
  return { facts, rawLog };
399
686
  }
687
+ /**
688
+ * Start background backlog processor drain loops (T-095).
689
+ * Launches continuous async loops for embed and extract backlogs.
690
+ * Call this after store.extractionQueue.start() in plugin-module.ts.
691
+ */
692
+ startBacklogProcessor() {
693
+ // Start embed drain loop
694
+ if (this.#embedDrainPromise === null) {
695
+ this.#embedDrainStopped = false;
696
+ this.#embedDrainPromise = this.#embedDrainLoop();
697
+ }
698
+ // Start extract drain loop (only if LLM config is present)
699
+ if (this.#llmConfig && this.#extractDrainPromise === null) {
700
+ this.#extractDrainStopped = false;
701
+ this.#extractDrainPromise = this.#extractDrainLoop();
702
+ }
703
+ }
704
+ /**
705
+ * Stop background backlog processor drain loops (T-095).
706
+ * Signals both loops to stop and awaits in-flight work.
707
+ * Call this alongside store.extractionQueue.stop() in session_end and process exit handlers.
708
+ */
709
+ async stopBacklogProcessor() {
710
+ // Signal loops to stop
711
+ this.#embedDrainStopped = true;
712
+ this.#extractDrainStopped = true;
713
+ // Await drain loop Promises (waits for in-flight work to complete)
714
+ const promises = [];
715
+ if (this.#embedDrainPromise !== null) {
716
+ promises.push(this.#embedDrainPromise);
717
+ this.#embedDrainPromise = null;
718
+ }
719
+ if (this.#extractDrainPromise !== null) {
720
+ promises.push(this.#extractDrainPromise);
721
+ this.#extractDrainPromise = null;
722
+ }
723
+ await Promise.all(promises);
724
+ }
725
+ /**
726
+ * Continuous drain loop for embed backlog (T-095).
727
+ * Runs as fast as the Worker thread allows, with no artificial throttling.
728
+ * Only sleeps when the queue is empty.
729
+ */
730
+ async #embedDrainLoop() {
731
+ while (!this.#embedDrainStopped) {
732
+ const processed = await this.#processEmbedBatch();
733
+ if (processed === 0) {
734
+ // Queue is empty — sleep before checking again
735
+ await new Promise(resolve => setTimeout(resolve, this.#embedIdleMs));
736
+ }
737
+ // If processed > 0: immediately loop to grab the next batch
738
+ }
739
+ }
740
+ /**
741
+ * Process one batch of embed backlog rows (T-095).
742
+ * Uses Promise.all for parallelism across the batch (embed runs in Worker, no API limits).
743
+ * Returns count of rows processed.
744
+ */
745
+ async #processEmbedBatch() {
746
+ const BATCH_SIZE = 50; // Large batch — embed is CPU-bound, no rate limit
747
+ // T-108: Fetch pending child rows only (parent_id IS NOT NULL).
748
+ // Old parent rows (parent_id IS NULL, embed_status='pending') are left as-is for fallback search.
749
+ const stmt = this.#db.prepare(`
750
+ SELECT id, chunk_text FROM raw_log
751
+ WHERE user_id = ? AND embed_status = 'pending' AND parent_id IS NOT NULL
752
+ ORDER BY rowid ASC
753
+ LIMIT ?
754
+ `);
755
+ stmt.bind([this.#userId, BATCH_SIZE]);
756
+ const pendingRows = [];
757
+ while (stmt.step()) {
758
+ pendingRows.push(stmt.get({}));
759
+ }
760
+ stmt.finalize();
761
+ if (pendingRows.length === 0)
762
+ return 0;
763
+ // Process rows concurrently with Promise.all
764
+ await Promise.all(pendingRows.map(async (row) => {
765
+ try {
766
+ const embedding = await embed(row.chunk_text);
767
+ const embeddingJson = serializeEmbedding(embedding);
768
+ const embedModel = 'Xenova/bge-small-en-v1.5';
769
+ // Insert into vec_raw_log (transaction per row for isolation)
770
+ this.#db.exec('BEGIN');
771
+ const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
772
+ vecStmt.bind([embeddingJson]);
773
+ vecStmt.step();
774
+ vecStmt.finalize();
775
+ const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
776
+ // Update raw_log: embed_status='done', vec_rowid, embed_model
777
+ const updateStmt = this.#db.prepare(`
778
+ UPDATE raw_log
779
+ SET embed_status = 'done', embed_error = NULL, embed_model = ?, vec_rowid = ?
780
+ WHERE id = ?
781
+ `);
782
+ updateStmt.bind([embedModel, vecRowid, row.id]);
783
+ updateStmt.step();
784
+ updateStmt.finalize();
785
+ this.#db.exec('COMMIT');
786
+ // T-103: Append new embedding to in-memory cache
787
+ this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding });
788
+ }
789
+ catch (err) {
790
+ // Embedding failed — update embed_status='failed' with error
791
+ const errorMsg = err instanceof Error ? err.message : String(err);
792
+ const updateStmt = this.#db.prepare(`
793
+ UPDATE raw_log
794
+ SET embed_status = 'failed', embed_error = ?
795
+ WHERE id = ?
796
+ `);
797
+ updateStmt.bind([errorMsg, row.id]);
798
+ updateStmt.step();
799
+ updateStmt.finalize();
800
+ }
801
+ }));
802
+ return pendingRows.length;
803
+ }
804
+ /**
805
+ * Continuous drain loop for extract backlog (T-095).
806
+ * Fetches up to `concurrency` rows and processes them concurrently with 429 backoff.
807
+ * Only sleeps when the queue is empty.
808
+ */
809
+ async #extractDrainLoop() {
810
+ while (!this.#extractDrainStopped) {
811
+ // Fetch pending rows (up to concurrency limit)
812
+ const stmt = this.#db.prepare(`
813
+ SELECT id, user_message, agent_response, timestamp, session_id, session_label, source
814
+ FROM raw_log
815
+ WHERE user_id = ? AND extract_status = 'pending'
816
+ ORDER BY rowid ASC
817
+ LIMIT ?
818
+ `);
819
+ stmt.bind([this.#userId, this.#extractConcurrency]);
820
+ const pendingRows = [];
821
+ while (stmt.step()) {
822
+ pendingRows.push(stmt.get({}));
823
+ }
824
+ stmt.finalize();
825
+ if (pendingRows.length === 0) {
826
+ // Queue is empty — sleep before checking again
827
+ await new Promise(resolve => setTimeout(resolve, this.#extractIdleMs));
828
+ continue;
829
+ }
830
+ // Process rows concurrently with 429 backoff
831
+ await Promise.all(pendingRows.map(async (row) => {
832
+ const exchange = {
833
+ userMessage: row.user_message,
834
+ agentResponse: row.agent_response,
835
+ timestamp: new Date(row.timestamp),
836
+ source: row.source,
837
+ sessionId: row.session_id,
838
+ ...(row.session_label !== null ? { sessionLabel: row.session_label } : {}),
839
+ };
840
+ await this.#extractRowWithBackoff(exchange, row.id);
841
+ }));
842
+ }
843
+ }
844
+ /**
845
+ * Extract facts for one row with exponential backoff on 429 errors (T-095).
846
+ * Calls extractFn directly (bypasses ExtractionQueue for backlog processing).
847
+ * extractFn already handles DB status updates (extract_status=done/failed).
848
+ */
849
+ async #extractRowWithBackoff(exchange, sourceChunkId) {
850
+ const MAX_RETRIES = 4;
851
+ let attempt = 0;
852
+ while (attempt <= MAX_RETRIES) {
853
+ try {
854
+ await this.#extractFn(exchange, this.#userId, sourceChunkId);
855
+ return; // Success
856
+ }
857
+ catch (err) {
858
+ const errorMsg = err instanceof Error ? err.message : String(err);
859
+ const is429 = errorMsg.toLowerCase().includes('429') ||
860
+ errorMsg.toLowerCase().includes('rate') ||
861
+ errorMsg.toLowerCase().includes('quota');
862
+ if (is429 && attempt < MAX_RETRIES) {
863
+ // Exponential backoff: 2s, 4s, 8s, 16s
864
+ const backoffMs = this.#retryBackoffMs * Math.pow(2, attempt);
865
+ await new Promise(resolve => setTimeout(resolve, backoffMs));
866
+ attempt++;
867
+ }
868
+ else {
869
+ // Not a 429, or max retries reached — extractFn already marked extract_status='failed'
870
+ return;
871
+ }
872
+ }
873
+ }
874
+ }
400
875
  /** Close the database connection. Call when done (e.g. in tests). */
401
876
  close() {
402
877
  this.#db.close();