@getplumb/core 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/embedder.d.ts +16 -2
- package/dist/embedder.d.ts.map +1 -1
- package/dist/embedder.js +23 -4
- package/dist/embedder.js.map +1 -1
- package/dist/extraction-queue.d.ts +13 -3
- package/dist/extraction-queue.d.ts.map +1 -1
- package/dist/extraction-queue.js +21 -4
- package/dist/extraction-queue.js.map +1 -1
- package/dist/extractor.d.ts +2 -1
- package/dist/extractor.d.ts.map +1 -1
- package/dist/extractor.js +106 -7
- package/dist/extractor.js.map +1 -1
- package/dist/extractor.test.d.ts +2 -0
- package/dist/extractor.test.d.ts.map +1 -0
- package/dist/extractor.test.js +158 -0
- package/dist/extractor.test.js.map +1 -0
- package/dist/fact-search.d.ts +9 -5
- package/dist/fact-search.d.ts.map +1 -1
- package/dist/fact-search.js +25 -16
- package/dist/fact-search.js.map +1 -1
- package/dist/fact-search.test.d.ts +12 -0
- package/dist/fact-search.test.d.ts.map +1 -0
- package/dist/fact-search.test.js +117 -0
- package/dist/fact-search.test.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/llm-client.d.ts +11 -2
- package/dist/llm-client.d.ts.map +1 -1
- package/dist/llm-client.js +47 -3
- package/dist/llm-client.js.map +1 -1
- package/dist/local-store.d.ts +32 -1
- package/dist/local-store.d.ts.map +1 -1
- package/dist/local-store.js +510 -35
- package/dist/local-store.js.map +1 -1
- package/dist/local-store.test.d.ts +2 -0
- package/dist/local-store.test.d.ts.map +1 -0
- package/dist/local-store.test.js +146 -0
- package/dist/local-store.test.js.map +1 -0
- package/dist/raw-log-search.d.ts +9 -5
- package/dist/raw-log-search.d.ts.map +1 -1
- package/dist/raw-log-search.js +107 -29
- package/dist/raw-log-search.js.map +1 -1
- package/dist/raw-log-search.test.d.ts +12 -0
- package/dist/raw-log-search.test.d.ts.map +1 -0
- package/dist/raw-log-search.test.js +124 -0
- package/dist/raw-log-search.test.js.map +1 -0
- package/dist/read-path.test.d.ts +15 -0
- package/dist/read-path.test.d.ts.map +1 -0
- package/dist/read-path.test.js +393 -0
- package/dist/read-path.test.js.map +1 -0
- package/dist/schema.d.ts +2 -2
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +58 -1
- package/dist/schema.js.map +1 -1
- package/dist/scorer.test.d.ts +10 -0
- package/dist/scorer.test.d.ts.map +1 -0
- package/dist/scorer.test.js +169 -0
- package/dist/scorer.test.js.map +1 -0
- package/dist/store.d.ts +3 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/wasm-db.d.ts +63 -8
- package/dist/wasm-db.d.ts.map +1 -1
- package/dist/wasm-db.js +124 -31
- package/dist/wasm-db.js.map +1 -1
- package/package.json +14 -2
package/dist/local-store.js
CHANGED
|
@@ -6,17 +6,110 @@ import { openDb } from './wasm-db.js';
|
|
|
6
6
|
import { applySchema } from './schema.js';
|
|
7
7
|
import { extractFacts } from './extractor.js';
|
|
8
8
|
import { callLLMWithConfig } from './llm-client.js';
|
|
9
|
-
import { embed } from './embedder.js';
|
|
9
|
+
import { embed, warmEmbedder, warmReranker } from './embedder.js';
|
|
10
10
|
import { formatExchange } from './chunker.js';
|
|
11
11
|
import { searchRawLog } from './raw-log-search.js';
|
|
12
12
|
import { searchFacts } from './fact-search.js';
|
|
13
13
|
import { ExtractionQueue } from './extraction-queue.js';
|
|
14
|
-
import { serializeEmbedding } from './vector-search.js';
|
|
14
|
+
import { serializeEmbedding, deserializeEmbedding, cosineDistance } from './vector-search.js';
|
|
15
|
+
/**
|
|
16
|
+
* Split text into overlapping child chunks for parent-child chunking (T-108).
|
|
17
|
+
* Target: ~250 chars per chunk with ~50 char overlap.
|
|
18
|
+
* Prefers sentence boundaries, falls back to word boundaries, hard-cuts at 300 chars max.
|
|
19
|
+
*/
|
|
20
|
+
function splitIntoChildren(text) {
|
|
21
|
+
const TARGET_SIZE = 250;
|
|
22
|
+
const OVERLAP = 50;
|
|
23
|
+
const MAX_SIZE = 300;
|
|
24
|
+
const SENTENCE_ENDINGS = /[.!?]\s+/g;
|
|
25
|
+
if (text.length <= TARGET_SIZE) {
|
|
26
|
+
// Text is already small enough — return as single child
|
|
27
|
+
return [text];
|
|
28
|
+
}
|
|
29
|
+
const chunks = [];
|
|
30
|
+
let pos = 0;
|
|
31
|
+
while (pos < text.length) {
|
|
32
|
+
let endPos = Math.min(pos + TARGET_SIZE, text.length);
|
|
33
|
+
// If we're at the end of the text, take the rest
|
|
34
|
+
if (endPos >= text.length) {
|
|
35
|
+
chunks.push(text.slice(pos));
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
// Try to find a sentence boundary within the target range
|
|
39
|
+
const segment = text.slice(pos, Math.min(pos + MAX_SIZE, text.length));
|
|
40
|
+
const sentenceMatches = Array.from(segment.matchAll(SENTENCE_ENDINGS));
|
|
41
|
+
if (sentenceMatches.length > 0) {
|
|
42
|
+
// Find the last sentence boundary before TARGET_SIZE
|
|
43
|
+
let bestMatch = sentenceMatches[0]; // Safe: array is non-empty
|
|
44
|
+
for (const match of sentenceMatches) {
|
|
45
|
+
if (match.index !== undefined && match.index <= TARGET_SIZE) {
|
|
46
|
+
bestMatch = match;
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (bestMatch.index !== undefined && bestMatch[0] !== undefined) {
|
|
53
|
+
endPos = pos + bestMatch.index + bestMatch[0].length;
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
// Fall back to word boundary
|
|
57
|
+
endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// No sentence boundary found — fall back to word boundary
|
|
62
|
+
endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
|
|
63
|
+
}
|
|
64
|
+
chunks.push(text.slice(pos, endPos).trim());
|
|
65
|
+
// Move position forward, with overlap
|
|
66
|
+
pos = endPos - OVERLAP;
|
|
67
|
+
if (pos < 0)
|
|
68
|
+
pos = endPos; // Safety: don't go negative
|
|
69
|
+
}
|
|
70
|
+
return chunks.filter(chunk => chunk.length > 0);
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Find a word boundary near the target position.
|
|
74
|
+
* Prefers breaking at TARGET_SIZE, but will extend up to MAX_SIZE if needed.
|
|
75
|
+
*/
|
|
76
|
+
function findWordBoundary(text, start, targetSize, maxSize) {
|
|
77
|
+
const targetPos = start + targetSize;
|
|
78
|
+
const maxPos = Math.min(start + maxSize, text.length);
|
|
79
|
+
// Look for whitespace near the target position
|
|
80
|
+
let endPos = targetPos;
|
|
81
|
+
// First try: find whitespace after targetPos
|
|
82
|
+
for (let i = targetPos; i < maxPos; i++) {
|
|
83
|
+
if (/\s/.test(text[i] ?? '')) {
|
|
84
|
+
endPos = i + 1; // Include the whitespace
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// If we hit maxPos without finding whitespace, hard cut at maxPos
|
|
89
|
+
if (endPos === targetPos && targetPos < maxPos) {
|
|
90
|
+
endPos = maxPos;
|
|
91
|
+
}
|
|
92
|
+
return endPos;
|
|
93
|
+
}
|
|
15
94
|
export class LocalStore {
|
|
16
95
|
#db;
|
|
17
96
|
#userId;
|
|
18
97
|
#llmConfig;
|
|
19
98
|
#extractionQueue;
|
|
99
|
+
// Backlog processor state (T-095: drain loops)
|
|
100
|
+
#embedDrainStopped = false;
|
|
101
|
+
#extractDrainStopped = false;
|
|
102
|
+
#embedDrainPromise = null;
|
|
103
|
+
#extractDrainPromise = null;
|
|
104
|
+
#embedIdleMs;
|
|
105
|
+
#extractIdleMs;
|
|
106
|
+
#extractConcurrency;
|
|
107
|
+
#retryBackoffMs;
|
|
108
|
+
#extractFn;
|
|
109
|
+
// T-096: In-memory embedding cache for vec_facts (eliminates 292ms SQLite load on each query)
|
|
110
|
+
#embeddingCache = [];
|
|
111
|
+
// T-103: In-memory embedding cache for vec_raw_log (eliminates ~3,700ms SQLite load on each query)
|
|
112
|
+
#rawLogEmbeddingCache = [];
|
|
20
113
|
/** Expose database for plugin use (e.g., NudgeManager) */
|
|
21
114
|
get db() {
|
|
22
115
|
return this.#db;
|
|
@@ -29,11 +122,17 @@ export class LocalStore {
|
|
|
29
122
|
get extractionQueue() {
|
|
30
123
|
return this.#extractionQueue;
|
|
31
124
|
}
|
|
32
|
-
constructor(db, userId, llmConfig, extractionQueue) {
|
|
125
|
+
constructor(db, userId, llmConfig, extractionQueue, extractFn, backlog) {
|
|
33
126
|
this.#db = db;
|
|
34
127
|
this.#userId = userId;
|
|
35
128
|
this.#llmConfig = llmConfig;
|
|
36
129
|
this.#extractionQueue = extractionQueue;
|
|
130
|
+
this.#extractFn = extractFn;
|
|
131
|
+
// Initialize backlog processor config — defaults run as fast as possible with concurrency.
|
|
132
|
+
this.#embedIdleMs = backlog?.embedIdleMs ?? 5000;
|
|
133
|
+
this.#extractIdleMs = backlog?.extractIdleMs ?? 5000;
|
|
134
|
+
this.#extractConcurrency = backlog?.concurrency ?? 5;
|
|
135
|
+
this.#retryBackoffMs = backlog?.retryBackoffMs ?? 2000;
|
|
37
136
|
}
|
|
38
137
|
/**
|
|
39
138
|
* Create a new LocalStore instance (async factory).
|
|
@@ -52,36 +151,135 @@ export class LocalStore {
|
|
|
52
151
|
// Use a mutable cell to hold the store reference (needed for circular dependency)
|
|
53
152
|
let storeRef = null;
|
|
54
153
|
// Initialize extraction queue with deferred store lookup
|
|
55
|
-
|
|
154
|
+
// T-079: Wrapper handles extract_status updates on success/failure.
|
|
155
|
+
const extractFn = async (exchange, userId, sourceChunkId) => {
|
|
56
156
|
if (!storeRef)
|
|
57
157
|
throw new Error('Store not initialized');
|
|
58
158
|
const llmFn = llmConfig
|
|
59
159
|
? (prompt) => callLLMWithConfig(prompt, llmConfig)
|
|
60
160
|
: undefined;
|
|
61
|
-
|
|
161
|
+
try {
|
|
162
|
+
const facts = await extractFacts(exchange, userId, storeRef, llmFn, sourceChunkId);
|
|
163
|
+
// T-079: Update extract_status='done' on success.
|
|
164
|
+
const updateStmt = db.prepare(`
|
|
165
|
+
UPDATE raw_log SET extract_status = 'done' WHERE id = ?
|
|
166
|
+
`);
|
|
167
|
+
updateStmt.bind([sourceChunkId]);
|
|
168
|
+
updateStmt.step();
|
|
169
|
+
updateStmt.finalize();
|
|
170
|
+
return facts;
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
// T-079: Update extract_status='failed' with error message.
|
|
174
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
175
|
+
const updateStmt = db.prepare(`
|
|
176
|
+
UPDATE raw_log SET extract_status = 'failed', extract_error = ? WHERE id = ?
|
|
177
|
+
`);
|
|
178
|
+
updateStmt.bind([errorMsg, sourceChunkId]);
|
|
179
|
+
updateStmt.step();
|
|
180
|
+
updateStmt.finalize();
|
|
181
|
+
// Re-throw so Promise.allSettled() in flush() sees the rejection.
|
|
182
|
+
throw err;
|
|
183
|
+
}
|
|
62
184
|
};
|
|
63
185
|
const extractionQueue = options.extractionQueue ?? new ExtractionQueue(extractFn);
|
|
64
186
|
// Create store and assign to ref
|
|
65
|
-
const store = new LocalStore(db, userId, llmConfig, extractionQueue);
|
|
187
|
+
const store = new LocalStore(db, userId, llmConfig, extractionQueue, extractFn, options.backlog);
|
|
66
188
|
storeRef = store;
|
|
189
|
+
// T-096: Warm embedder pipeline to eliminate 365ms cold-start on first query
|
|
190
|
+
await warmEmbedder();
|
|
191
|
+
// T-101: Warm reranker pipeline to eliminate ~200ms cold-start on first query
|
|
192
|
+
// (intentionally loads ~80MB model at init for consistent <250ms query performance)
|
|
193
|
+
await warmReranker();
|
|
194
|
+
// T-096: Load all vec_facts embeddings into in-memory cache (eliminates 292ms SQLite load per query)
|
|
195
|
+
const vecStmt = db.prepare(`SELECT rowid, embedding FROM vec_facts`);
|
|
196
|
+
while (vecStmt.step()) {
|
|
197
|
+
const row = vecStmt.get({});
|
|
198
|
+
store.#embeddingCache.push({
|
|
199
|
+
rowid: row.rowid,
|
|
200
|
+
embedding: deserializeEmbedding(row.embedding),
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
vecStmt.finalize();
|
|
204
|
+
// T-103/T-108: Load vec_raw_log embeddings for child rows only (eliminates ~3,700ms SQLite load per query)
|
|
205
|
+
// Child rows have parent_id IS NOT NULL. Parent rows are not embedded (embed_status='no_embed').
|
|
206
|
+
const rawLogVecStmt = db.prepare(`
|
|
207
|
+
SELECT v.rowid, v.embedding
|
|
208
|
+
FROM vec_raw_log v
|
|
209
|
+
JOIN raw_log r ON r.vec_rowid = v.rowid
|
|
210
|
+
WHERE r.parent_id IS NOT NULL
|
|
211
|
+
`);
|
|
212
|
+
while (rawLogVecStmt.step()) {
|
|
213
|
+
const row = rawLogVecStmt.get({});
|
|
214
|
+
store.#rawLogEmbeddingCache.push({
|
|
215
|
+
rowid: row.rowid,
|
|
216
|
+
embedding: deserializeEmbedding(row.embedding),
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
rawLogVecStmt.finalize();
|
|
67
220
|
return store;
|
|
68
221
|
}
|
|
69
|
-
async store(fact) {
|
|
70
|
-
|
|
71
|
-
//
|
|
222
|
+
async store(fact, sourceChunkId) {
|
|
223
|
+
// T-097: Cross-chunk fact deduplication — prevent storing duplicate facts across different chunks.
|
|
224
|
+
// A fact is considered a duplicate if it has the same subject+predicate and the object is either:
|
|
225
|
+
// 1. Identical (case-insensitive, normalized whitespace), OR
|
|
226
|
+
// 2. Semantically similar (cosine similarity >= 0.92 on embeddings)
|
|
227
|
+
//
|
|
228
|
+
// Pre-filter by subject+predicate via SQL (uses index, avoids full corpus scan).
|
|
229
|
+
const candidateStmt = this.#db.prepare(`
|
|
230
|
+
SELECT id, object, vec_rowid
|
|
231
|
+
FROM facts
|
|
232
|
+
WHERE user_id = ? AND subject = ? AND predicate = ? AND deleted_at IS NULL
|
|
233
|
+
`);
|
|
234
|
+
candidateStmt.bind([this.#userId, fact.subject, fact.predicate]);
|
|
235
|
+
const candidates = [];
|
|
236
|
+
while (candidateStmt.step()) {
|
|
237
|
+
candidates.push(candidateStmt.get({}));
|
|
238
|
+
}
|
|
239
|
+
candidateStmt.finalize();
|
|
240
|
+
// Helper: Normalize text for exact-match check (lowercase, trim, collapse multiple spaces)
|
|
241
|
+
const normalizeText = (text) => text.toLowerCase().trim().replace(/\s+/g, ' ');
|
|
242
|
+
const normalizedNewObject = normalizeText(fact.object);
|
|
243
|
+
// Check for exact object match first (avoids embedding call in the common case)
|
|
244
|
+
for (const candidate of candidates) {
|
|
245
|
+
if (normalizeText(candidate.object) === normalizedNewObject) {
|
|
246
|
+
// Exact duplicate found — return existing fact ID without inserting
|
|
247
|
+
return candidate.id;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// No exact match found. Now embed the new fact for semantic similarity check and insertion.
|
|
72
251
|
const text = `${fact.subject} ${fact.predicate} ${fact.object} ${fact.context ?? ''}`.trim();
|
|
73
252
|
const embedding = await embed(text);
|
|
74
253
|
const embeddingJson = serializeEmbedding(embedding);
|
|
254
|
+
// Check semantic similarity against candidates (only if we have candidates with embeddings)
|
|
255
|
+
if (candidates.length > 0) {
|
|
256
|
+
for (const candidate of candidates) {
|
|
257
|
+
if (candidate.vec_rowid === null)
|
|
258
|
+
continue;
|
|
259
|
+
// Find candidate embedding in in-memory cache (T-096)
|
|
260
|
+
const cachedEntry = this.#embeddingCache.find(entry => entry.rowid === candidate.vec_rowid);
|
|
261
|
+
if (!cachedEntry)
|
|
262
|
+
continue;
|
|
263
|
+
// Compute cosine similarity. Distance = 1 - similarity, so similarity >= 0.92 means distance <= 0.08.
|
|
264
|
+
const distance = cosineDistance(embedding, cachedEntry.embedding);
|
|
265
|
+
if (distance <= 0.08) {
|
|
266
|
+
// Semantically equivalent fact found — return existing ID without inserting
|
|
267
|
+
return candidate.id;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// No duplicate found (neither exact nor semantic) — proceed with normal insertion
|
|
272
|
+
const id = crypto.randomUUID();
|
|
75
273
|
// Begin transaction
|
|
76
274
|
this.#db.exec('BEGIN');
|
|
77
275
|
try {
|
|
78
|
-
// Insert fact
|
|
276
|
+
// Insert fact (T-079: include source_chunk_id)
|
|
79
277
|
const factStmt = this.#db.prepare(`
|
|
80
278
|
INSERT INTO facts
|
|
81
279
|
(id, user_id, subject, predicate, object,
|
|
82
280
|
confidence, decay_rate, timestamp, source_session_id,
|
|
83
|
-
source_session_label, context)
|
|
84
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
281
|
+
source_session_label, context, source_chunk_id)
|
|
282
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
85
283
|
`);
|
|
86
284
|
factStmt.bind([
|
|
87
285
|
id,
|
|
@@ -95,6 +293,7 @@ export class LocalStore {
|
|
|
95
293
|
fact.sourceSessionId,
|
|
96
294
|
fact.sourceSessionLabel ?? null,
|
|
97
295
|
fact.context ?? null,
|
|
296
|
+
sourceChunkId ?? null,
|
|
98
297
|
]);
|
|
99
298
|
factStmt.step();
|
|
100
299
|
factStmt.finalize();
|
|
@@ -110,6 +309,8 @@ export class LocalStore {
|
|
|
110
309
|
updateStmt.step();
|
|
111
310
|
updateStmt.finalize();
|
|
112
311
|
this.#db.exec('COMMIT');
|
|
312
|
+
// T-096: Append new embedding to in-memory cache
|
|
313
|
+
this.#embeddingCache.push({ rowid: vecRowid, embedding });
|
|
113
314
|
}
|
|
114
315
|
catch (err) {
|
|
115
316
|
this.#db.exec('ROLLBACK');
|
|
@@ -118,9 +319,18 @@ export class LocalStore {
|
|
|
118
319
|
return id;
|
|
119
320
|
}
|
|
120
321
|
async search(query, limit = 20) {
|
|
121
|
-
|
|
322
|
+
// T-096: Pass in-memory embedding cache to searchFacts (eliminates 292ms SQLite load per query)
|
|
323
|
+
return searchFacts(this.#db, this.#userId, query, limit, this.#embeddingCache);
|
|
122
324
|
}
|
|
123
325
|
async delete(id) {
|
|
326
|
+
// T-096: Get vec_rowid before soft-deleting so we can remove from cache
|
|
327
|
+
const vecRowidStmt = this.#db.prepare(`
|
|
328
|
+
SELECT vec_rowid FROM facts WHERE id = ? AND user_id = ?
|
|
329
|
+
`);
|
|
330
|
+
vecRowidStmt.bind([id, this.#userId]);
|
|
331
|
+
vecRowidStmt.step();
|
|
332
|
+
const vecRowid = vecRowidStmt.get(0);
|
|
333
|
+
vecRowidStmt.finalize();
|
|
124
334
|
// Soft delete only — never hard delete.
|
|
125
335
|
const stmt = this.#db.prepare(`
|
|
126
336
|
UPDATE facts SET deleted_at = ? WHERE id = ? AND user_id = ?
|
|
@@ -128,6 +338,13 @@ export class LocalStore {
|
|
|
128
338
|
stmt.bind([new Date().toISOString(), id, this.#userId]);
|
|
129
339
|
stmt.step();
|
|
130
340
|
stmt.finalize();
|
|
341
|
+
// T-096: Remove from in-memory embedding cache
|
|
342
|
+
if (vecRowid !== null) {
|
|
343
|
+
const cacheIdx = this.#embeddingCache.findIndex(entry => entry.rowid === vecRowid);
|
|
344
|
+
if (cacheIdx !== -1) {
|
|
345
|
+
this.#embeddingCache.splice(cacheIdx, 1);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
131
348
|
}
|
|
132
349
|
async status() {
|
|
133
350
|
const factStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM facts WHERE user_id = ? AND deleted_at IS NULL`);
|
|
@@ -159,18 +376,19 @@ export class LocalStore {
|
|
|
159
376
|
const chunkText = formatExchange(exchange);
|
|
160
377
|
// Compute content hash for deduplication (scoped per userId).
|
|
161
378
|
const contentHash = createHash('sha256').update(chunkText).digest('hex');
|
|
162
|
-
//
|
|
163
|
-
|
|
164
|
-
const
|
|
379
|
+
// T-108: Parent-child chunking — don't embed parent, only children.
|
|
380
|
+
// Parent extract_status: 'no_llm' if no config, otherwise 'pending' (extraction runs on parent only).
|
|
381
|
+
const extractStatus = this.#llmConfig ? 'pending' : 'no_llm';
|
|
165
382
|
// Attempt insert — catch UNIQUE constraint violations (duplicate content_hash).
|
|
166
383
|
try {
|
|
167
384
|
this.#db.exec('BEGIN');
|
|
168
|
-
// Insert
|
|
385
|
+
// T-108: Insert parent row (no embedding, no vec_rowid).
|
|
169
386
|
const rawLogStmt = this.#db.prepare(`
|
|
170
387
|
INSERT INTO raw_log
|
|
171
388
|
(id, user_id, session_id, session_label,
|
|
172
|
-
user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash
|
|
173
|
-
|
|
389
|
+
user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
|
|
390
|
+
embed_status, embed_error, embed_model, extract_status, parent_id)
|
|
391
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
174
392
|
`);
|
|
175
393
|
rawLogStmt.bind([
|
|
176
394
|
rawLogId,
|
|
@@ -184,20 +402,81 @@ export class LocalStore {
|
|
|
184
402
|
chunkText,
|
|
185
403
|
0,
|
|
186
404
|
contentHash,
|
|
405
|
+
'no_embed', // Parent is not embedded (T-108)
|
|
406
|
+
null,
|
|
407
|
+
null,
|
|
408
|
+
extractStatus,
|
|
409
|
+
null, // parent_id=NULL for parent rows
|
|
187
410
|
]);
|
|
188
411
|
rawLogStmt.step();
|
|
189
412
|
rawLogStmt.finalize();
|
|
190
|
-
//
|
|
191
|
-
const
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
413
|
+
// T-108: Split parent into child chunks and embed each child.
|
|
414
|
+
const childChunks = splitIntoChildren(chunkText);
|
|
415
|
+
for (let i = 0; i < childChunks.length; i++) {
|
|
416
|
+
const childText = childChunks[i];
|
|
417
|
+
if (!childText)
|
|
418
|
+
continue;
|
|
419
|
+
const childId = crypto.randomUUID();
|
|
420
|
+
let childEmbedding = null;
|
|
421
|
+
let childEmbeddingJson = null;
|
|
422
|
+
let childEmbedStatus = 'pending';
|
|
423
|
+
let childEmbedError = null;
|
|
424
|
+
let childEmbedModel = null;
|
|
425
|
+
// Embed the child chunk
|
|
426
|
+
try {
|
|
427
|
+
childEmbedding = await embed(childText);
|
|
428
|
+
childEmbeddingJson = serializeEmbedding(childEmbedding);
|
|
429
|
+
childEmbedStatus = 'done';
|
|
430
|
+
childEmbedModel = 'Xenova/bge-small-en-v1.5';
|
|
431
|
+
}
|
|
432
|
+
catch (err) {
|
|
433
|
+
childEmbedStatus = 'failed';
|
|
434
|
+
childEmbedError = err instanceof Error ? err.message : String(err);
|
|
435
|
+
}
|
|
436
|
+
// Insert child row
|
|
437
|
+
const childStmt = this.#db.prepare(`
|
|
438
|
+
INSERT INTO raw_log
|
|
439
|
+
(id, user_id, session_id, session_label,
|
|
440
|
+
user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
|
|
441
|
+
embed_status, embed_error, embed_model, extract_status, parent_id)
|
|
442
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
443
|
+
`);
|
|
444
|
+
childStmt.bind([
|
|
445
|
+
childId,
|
|
446
|
+
this.#userId,
|
|
447
|
+
exchange.sessionId,
|
|
448
|
+
exchange.sessionLabel ?? null,
|
|
449
|
+
exchange.userMessage,
|
|
450
|
+
exchange.agentResponse,
|
|
451
|
+
exchange.timestamp.toISOString(),
|
|
452
|
+
exchange.source,
|
|
453
|
+
childText,
|
|
454
|
+
i, // chunk_index for ordering
|
|
455
|
+
null, // No content_hash for children (they don't participate in dedup)
|
|
456
|
+
childEmbedStatus,
|
|
457
|
+
childEmbedError,
|
|
458
|
+
childEmbedModel,
|
|
459
|
+
'child', // T-108: Mark as 'child' to prevent extraction
|
|
460
|
+
rawLogId, // parent_id points to parent
|
|
461
|
+
]);
|
|
462
|
+
childStmt.step();
|
|
463
|
+
childStmt.finalize();
|
|
464
|
+
// Insert child embedding into vec_raw_log if embedding succeeded
|
|
465
|
+
if (childEmbeddingJson !== null) {
|
|
466
|
+
const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
|
|
467
|
+
vecStmt.bind([childEmbeddingJson]);
|
|
468
|
+
vecStmt.step();
|
|
469
|
+
vecStmt.finalize();
|
|
470
|
+
const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
|
|
471
|
+
// Back-fill vec_rowid on child row
|
|
472
|
+
const updateStmt = this.#db.prepare(`UPDATE raw_log SET vec_rowid = ? WHERE id = ?`);
|
|
473
|
+
updateStmt.bind([vecRowid, childId]);
|
|
474
|
+
updateStmt.step();
|
|
475
|
+
updateStmt.finalize();
|
|
476
|
+
// T-103: Append child embedding to in-memory cache
|
|
477
|
+
this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding: childEmbedding });
|
|
478
|
+
}
|
|
479
|
+
}
|
|
201
480
|
this.#db.exec('COMMIT');
|
|
202
481
|
}
|
|
203
482
|
catch (err) {
|
|
@@ -215,9 +494,11 @@ export class LocalStore {
|
|
|
215
494
|
// Re-throw other errors (e.g., real DB issues).
|
|
216
495
|
throw err;
|
|
217
496
|
}
|
|
218
|
-
// Layer 2: enqueue exchange for batched fact extraction (T-071).
|
|
219
|
-
//
|
|
220
|
-
|
|
497
|
+
// Layer 2: enqueue exchange for batched fact extraction (T-071) only if LLM config is present.
|
|
498
|
+
// If no LLM config, extract_status is already set to 'no_llm', so skip enqueue.
|
|
499
|
+
if (this.#llmConfig) {
|
|
500
|
+
this.#extractionQueue.enqueue(exchange, this.#userId, rawLogId);
|
|
501
|
+
}
|
|
221
502
|
return {
|
|
222
503
|
rawLogId,
|
|
223
504
|
factsExtracted: 0,
|
|
@@ -229,7 +510,8 @@ export class LocalStore {
|
|
|
229
510
|
* See raw-log-search.ts for the full pipeline description.
|
|
230
511
|
*/
|
|
231
512
|
async searchRawLog(query, limit = 10) {
|
|
232
|
-
|
|
513
|
+
// T-103: Pass in-memory embedding cache to searchRawLog (eliminates ~3,700ms SQLite load per query)
|
|
514
|
+
return searchRawLog(this.#db, this.#userId, query, limit, this.#rawLogEmbeddingCache);
|
|
233
515
|
}
|
|
234
516
|
/**
|
|
235
517
|
* Wait for all queued fact extractions to complete.
|
|
@@ -384,7 +666,12 @@ export class LocalStore {
|
|
|
384
666
|
source,
|
|
385
667
|
chunk_text AS chunkText,
|
|
386
668
|
chunk_index AS chunkIndex,
|
|
387
|
-
content_hash AS contentHash
|
|
669
|
+
content_hash AS contentHash,
|
|
670
|
+
embed_status AS embedStatus,
|
|
671
|
+
embed_error AS embedError,
|
|
672
|
+
embed_model AS embedModel,
|
|
673
|
+
extract_status AS extractStatus,
|
|
674
|
+
extract_error AS extractError
|
|
388
675
|
FROM raw_log
|
|
389
676
|
WHERE user_id = ?
|
|
390
677
|
ORDER BY timestamp DESC
|
|
@@ -397,6 +684,194 @@ export class LocalStore {
|
|
|
397
684
|
rawLogStmt.finalize();
|
|
398
685
|
return { facts, rawLog };
|
|
399
686
|
}
|
|
687
|
+
/**
|
|
688
|
+
* Start background backlog processor drain loops (T-095).
|
|
689
|
+
* Launches continuous async loops for embed and extract backlogs.
|
|
690
|
+
* Call this after store.extractionQueue.start() in plugin-module.ts.
|
|
691
|
+
*/
|
|
692
|
+
startBacklogProcessor() {
|
|
693
|
+
// Start embed drain loop
|
|
694
|
+
if (this.#embedDrainPromise === null) {
|
|
695
|
+
this.#embedDrainStopped = false;
|
|
696
|
+
this.#embedDrainPromise = this.#embedDrainLoop();
|
|
697
|
+
}
|
|
698
|
+
// Start extract drain loop (only if LLM config is present)
|
|
699
|
+
if (this.#llmConfig && this.#extractDrainPromise === null) {
|
|
700
|
+
this.#extractDrainStopped = false;
|
|
701
|
+
this.#extractDrainPromise = this.#extractDrainLoop();
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
/**
|
|
705
|
+
* Stop background backlog processor drain loops (T-095).
|
|
706
|
+
* Signals both loops to stop and awaits in-flight work.
|
|
707
|
+
* Call this alongside store.extractionQueue.stop() in session_end and process exit handlers.
|
|
708
|
+
*/
|
|
709
|
+
async stopBacklogProcessor() {
|
|
710
|
+
// Signal loops to stop
|
|
711
|
+
this.#embedDrainStopped = true;
|
|
712
|
+
this.#extractDrainStopped = true;
|
|
713
|
+
// Await drain loop Promises (waits for in-flight work to complete)
|
|
714
|
+
const promises = [];
|
|
715
|
+
if (this.#embedDrainPromise !== null) {
|
|
716
|
+
promises.push(this.#embedDrainPromise);
|
|
717
|
+
this.#embedDrainPromise = null;
|
|
718
|
+
}
|
|
719
|
+
if (this.#extractDrainPromise !== null) {
|
|
720
|
+
promises.push(this.#extractDrainPromise);
|
|
721
|
+
this.#extractDrainPromise = null;
|
|
722
|
+
}
|
|
723
|
+
await Promise.all(promises);
|
|
724
|
+
}
|
|
725
|
+
/**
|
|
726
|
+
* Continuous drain loop for embed backlog (T-095).
|
|
727
|
+
* Runs as fast as the Worker thread allows, with no artificial throttling.
|
|
728
|
+
* Only sleeps when the queue is empty.
|
|
729
|
+
*/
|
|
730
|
+
async #embedDrainLoop() {
|
|
731
|
+
while (!this.#embedDrainStopped) {
|
|
732
|
+
const processed = await this.#processEmbedBatch();
|
|
733
|
+
if (processed === 0) {
|
|
734
|
+
// Queue is empty — sleep before checking again
|
|
735
|
+
await new Promise(resolve => setTimeout(resolve, this.#embedIdleMs));
|
|
736
|
+
}
|
|
737
|
+
// If processed > 0: immediately loop to grab the next batch
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
/**
|
|
741
|
+
* Process one batch of embed backlog rows (T-095).
|
|
742
|
+
* Uses Promise.all for parallelism across the batch (embed runs in Worker, no API limits).
|
|
743
|
+
* Returns count of rows processed.
|
|
744
|
+
*/
|
|
745
|
+
async #processEmbedBatch() {
|
|
746
|
+
const BATCH_SIZE = 50; // Large batch — embed is CPU-bound, no rate limit
|
|
747
|
+
// T-108: Fetch pending child rows only (parent_id IS NOT NULL).
|
|
748
|
+
// Old parent rows (parent_id IS NULL, embed_status='pending') are left as-is for fallback search.
|
|
749
|
+
const stmt = this.#db.prepare(`
|
|
750
|
+
SELECT id, chunk_text FROM raw_log
|
|
751
|
+
WHERE user_id = ? AND embed_status = 'pending' AND parent_id IS NOT NULL
|
|
752
|
+
ORDER BY rowid ASC
|
|
753
|
+
LIMIT ?
|
|
754
|
+
`);
|
|
755
|
+
stmt.bind([this.#userId, BATCH_SIZE]);
|
|
756
|
+
const pendingRows = [];
|
|
757
|
+
while (stmt.step()) {
|
|
758
|
+
pendingRows.push(stmt.get({}));
|
|
759
|
+
}
|
|
760
|
+
stmt.finalize();
|
|
761
|
+
if (pendingRows.length === 0)
|
|
762
|
+
return 0;
|
|
763
|
+
// Process rows concurrently with Promise.all
|
|
764
|
+
await Promise.all(pendingRows.map(async (row) => {
|
|
765
|
+
try {
|
|
766
|
+
const embedding = await embed(row.chunk_text);
|
|
767
|
+
const embeddingJson = serializeEmbedding(embedding);
|
|
768
|
+
const embedModel = 'Xenova/bge-small-en-v1.5';
|
|
769
|
+
// Insert into vec_raw_log (transaction per row for isolation)
|
|
770
|
+
this.#db.exec('BEGIN');
|
|
771
|
+
const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
|
|
772
|
+
vecStmt.bind([embeddingJson]);
|
|
773
|
+
vecStmt.step();
|
|
774
|
+
vecStmt.finalize();
|
|
775
|
+
const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
|
|
776
|
+
// Update raw_log: embed_status='done', vec_rowid, embed_model
|
|
777
|
+
const updateStmt = this.#db.prepare(`
|
|
778
|
+
UPDATE raw_log
|
|
779
|
+
SET embed_status = 'done', embed_error = NULL, embed_model = ?, vec_rowid = ?
|
|
780
|
+
WHERE id = ?
|
|
781
|
+
`);
|
|
782
|
+
updateStmt.bind([embedModel, vecRowid, row.id]);
|
|
783
|
+
updateStmt.step();
|
|
784
|
+
updateStmt.finalize();
|
|
785
|
+
this.#db.exec('COMMIT');
|
|
786
|
+
// T-103: Append new embedding to in-memory cache
|
|
787
|
+
this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding });
|
|
788
|
+
}
|
|
789
|
+
catch (err) {
|
|
790
|
+
// Embedding failed — update embed_status='failed' with error
|
|
791
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
792
|
+
const updateStmt = this.#db.prepare(`
|
|
793
|
+
UPDATE raw_log
|
|
794
|
+
SET embed_status = 'failed', embed_error = ?
|
|
795
|
+
WHERE id = ?
|
|
796
|
+
`);
|
|
797
|
+
updateStmt.bind([errorMsg, row.id]);
|
|
798
|
+
updateStmt.step();
|
|
799
|
+
updateStmt.finalize();
|
|
800
|
+
}
|
|
801
|
+
}));
|
|
802
|
+
return pendingRows.length;
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Continuous drain loop for extract backlog (T-095).
|
|
806
|
+
* Fetches up to `concurrency` rows and processes them concurrently with 429 backoff.
|
|
807
|
+
* Only sleeps when the queue is empty.
|
|
808
|
+
*/
|
|
809
|
+
async #extractDrainLoop() {
|
|
810
|
+
while (!this.#extractDrainStopped) {
|
|
811
|
+
// Fetch pending rows (up to concurrency limit)
|
|
812
|
+
const stmt = this.#db.prepare(`
|
|
813
|
+
SELECT id, user_message, agent_response, timestamp, session_id, session_label, source
|
|
814
|
+
FROM raw_log
|
|
815
|
+
WHERE user_id = ? AND extract_status = 'pending'
|
|
816
|
+
ORDER BY rowid ASC
|
|
817
|
+
LIMIT ?
|
|
818
|
+
`);
|
|
819
|
+
stmt.bind([this.#userId, this.#extractConcurrency]);
|
|
820
|
+
const pendingRows = [];
|
|
821
|
+
while (stmt.step()) {
|
|
822
|
+
pendingRows.push(stmt.get({}));
|
|
823
|
+
}
|
|
824
|
+
stmt.finalize();
|
|
825
|
+
if (pendingRows.length === 0) {
|
|
826
|
+
// Queue is empty — sleep before checking again
|
|
827
|
+
await new Promise(resolve => setTimeout(resolve, this.#extractIdleMs));
|
|
828
|
+
continue;
|
|
829
|
+
}
|
|
830
|
+
// Process rows concurrently with 429 backoff
|
|
831
|
+
await Promise.all(pendingRows.map(async (row) => {
|
|
832
|
+
const exchange = {
|
|
833
|
+
userMessage: row.user_message,
|
|
834
|
+
agentResponse: row.agent_response,
|
|
835
|
+
timestamp: new Date(row.timestamp),
|
|
836
|
+
source: row.source,
|
|
837
|
+
sessionId: row.session_id,
|
|
838
|
+
...(row.session_label !== null ? { sessionLabel: row.session_label } : {}),
|
|
839
|
+
};
|
|
840
|
+
await this.#extractRowWithBackoff(exchange, row.id);
|
|
841
|
+
}));
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Extract facts for one row with exponential backoff on 429 errors (T-095).
|
|
846
|
+
* Calls extractFn directly (bypasses ExtractionQueue for backlog processing).
|
|
847
|
+
* extractFn already handles DB status updates (extract_status=done/failed).
|
|
848
|
+
*/
|
|
849
|
+
async #extractRowWithBackoff(exchange, sourceChunkId) {
|
|
850
|
+
const MAX_RETRIES = 4;
|
|
851
|
+
let attempt = 0;
|
|
852
|
+
while (attempt <= MAX_RETRIES) {
|
|
853
|
+
try {
|
|
854
|
+
await this.#extractFn(exchange, this.#userId, sourceChunkId);
|
|
855
|
+
return; // Success
|
|
856
|
+
}
|
|
857
|
+
catch (err) {
|
|
858
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
859
|
+
const is429 = errorMsg.toLowerCase().includes('429') ||
|
|
860
|
+
errorMsg.toLowerCase().includes('rate') ||
|
|
861
|
+
errorMsg.toLowerCase().includes('quota');
|
|
862
|
+
if (is429 && attempt < MAX_RETRIES) {
|
|
863
|
+
// Exponential backoff: 2s, 4s, 8s, 16s
|
|
864
|
+
const backoffMs = this.#retryBackoffMs * Math.pow(2, attempt);
|
|
865
|
+
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
866
|
+
attempt++;
|
|
867
|
+
}
|
|
868
|
+
else {
|
|
869
|
+
// Not a 429, or max retries reached — extractFn already marked extract_status='failed'
|
|
870
|
+
return;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
}
|
|
400
875
|
/** Close the database connection. Call when done (e.g. in tests). */
|
|
401
876
|
close() {
|
|
402
877
|
this.#db.close();
|