@getplumb/core 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-builder.d.ts +1 -7
- package/dist/context-builder.d.ts.map +1 -1
- package/dist/context-builder.js +7 -44
- package/dist/context-builder.js.map +1 -1
- package/dist/index.d.ts +4 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -5
- package/dist/index.js.map +1 -1
- package/dist/local-store.d.ts +7 -82
- package/dist/local-store.d.ts.map +1 -1
- package/dist/local-store.js +84 -468
- package/dist/local-store.js.map +1 -1
- package/dist/read-path.d.ts +6 -23
- package/dist/read-path.d.ts.map +1 -1
- package/dist/read-path.js +9 -48
- package/dist/read-path.js.map +1 -1
- package/dist/schema.d.ts +4 -13
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +6 -73
- package/dist/schema.js.map +1 -1
- package/dist/scorer.d.ts +0 -9
- package/dist/scorer.d.ts.map +1 -1
- package/dist/scorer.js +1 -31
- package/dist/scorer.js.map +1 -1
- package/dist/store.d.ts +2 -16
- package/dist/store.d.ts.map +1 -1
- package/dist/types.d.ts +0 -25
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -6
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/dist/extraction-queue.d.ts +0 -72
- package/dist/extraction-queue.d.ts.map +0 -1
- package/dist/extraction-queue.js +0 -101
- package/dist/extraction-queue.js.map +0 -1
- package/dist/extractor.d.ts +0 -22
- package/dist/extractor.d.ts.map +0 -1
- package/dist/extractor.js +0 -188
- package/dist/extractor.js.map +0 -1
- package/dist/extractor.test.d.ts +0 -2
- package/dist/extractor.test.d.ts.map +0 -1
- package/dist/extractor.test.js +0 -158
- package/dist/extractor.test.js.map +0 -1
- package/dist/fact-search.d.ts +0 -32
- package/dist/fact-search.d.ts.map +0 -1
- package/dist/fact-search.js +0 -174
- package/dist/fact-search.js.map +0 -1
- package/dist/fact-search.test.d.ts +0 -12
- package/dist/fact-search.test.d.ts.map +0 -1
- package/dist/fact-search.test.js +0 -117
- package/dist/fact-search.test.js.map +0 -1
- package/dist/llm-client.d.ts +0 -59
- package/dist/llm-client.d.ts.map +0 -1
- package/dist/llm-client.js +0 -227
- package/dist/llm-client.js.map +0 -1
- package/dist/local-store.test.d.ts +0 -2
- package/dist/local-store.test.d.ts.map +0 -1
- package/dist/local-store.test.js +0 -146
- package/dist/local-store.test.js.map +0 -1
- package/dist/raw-log-search.test.d.ts +0 -12
- package/dist/raw-log-search.test.d.ts.map +0 -1
- package/dist/raw-log-search.test.js +0 -124
- package/dist/raw-log-search.test.js.map +0 -1
- package/dist/read-path.test.d.ts +0 -15
- package/dist/read-path.test.d.ts.map +0 -1
- package/dist/read-path.test.js +0 -393
- package/dist/read-path.test.js.map +0 -1
- package/dist/scorer.test.d.ts +0 -10
- package/dist/scorer.test.d.ts.map +0 -1
- package/dist/scorer.test.js +0 -169
- package/dist/scorer.test.js.map +0 -1
package/dist/local-store.js
CHANGED
|
@@ -4,35 +4,37 @@ import { mkdirSync } from 'node:fs';
|
|
|
4
4
|
import { join, dirname } from 'node:path';
|
|
5
5
|
import { openDb } from './wasm-db.js';
|
|
6
6
|
import { applySchema } from './schema.js';
|
|
7
|
-
import { extractFacts } from './extractor.js';
|
|
8
|
-
import { callLLMWithConfig } from './llm-client.js';
|
|
9
7
|
import { embed, warmEmbedder, warmReranker } from './embedder.js';
|
|
10
8
|
import { formatExchange } from './chunker.js';
|
|
11
9
|
import { searchRawLog } from './raw-log-search.js';
|
|
12
|
-
import {
|
|
13
|
-
import { ExtractionQueue } from './extraction-queue.js';
|
|
14
|
-
import { serializeEmbedding, deserializeEmbedding, cosineDistance } from './vector-search.js';
|
|
10
|
+
import { serializeEmbedding, deserializeEmbedding } from './vector-search.js';
|
|
15
11
|
/**
|
|
16
12
|
* Split text into overlapping child chunks for parent-child chunking (T-108).
|
|
17
13
|
* Target: ~250 chars per chunk with ~50 char overlap.
|
|
18
14
|
* Prefers sentence boundaries, falls back to word boundaries, hard-cuts at 300 chars max.
|
|
15
|
+
*
|
|
16
|
+
* Uses a generator to avoid materializing the full chunk array in memory,
|
|
17
|
+
* which prevents OOM crashes on large inputs (fix for splitIntoChildren array limit bug).
|
|
19
18
|
*/
|
|
20
|
-
function splitIntoChildren(text) {
|
|
19
|
+
function* splitIntoChildren(text) {
|
|
21
20
|
const TARGET_SIZE = 250;
|
|
22
21
|
const OVERLAP = 50;
|
|
23
22
|
const MAX_SIZE = 300;
|
|
24
23
|
const SENTENCE_ENDINGS = /[.!?]\s+/g;
|
|
25
24
|
if (text.length <= TARGET_SIZE) {
|
|
26
|
-
// Text is already small enough —
|
|
27
|
-
|
|
25
|
+
// Text is already small enough — yield as single child
|
|
26
|
+
if (text.trim().length > 0)
|
|
27
|
+
yield text;
|
|
28
|
+
return;
|
|
28
29
|
}
|
|
29
|
-
const chunks = [];
|
|
30
30
|
let pos = 0;
|
|
31
31
|
while (pos < text.length) {
|
|
32
32
|
let endPos = Math.min(pos + TARGET_SIZE, text.length);
|
|
33
33
|
// If we're at the end of the text, take the rest
|
|
34
34
|
if (endPos >= text.length) {
|
|
35
|
-
|
|
35
|
+
const last = text.slice(pos).trim();
|
|
36
|
+
if (last.length > 0)
|
|
37
|
+
yield last;
|
|
36
38
|
break;
|
|
37
39
|
}
|
|
38
40
|
// Try to find a sentence boundary within the target range
|
|
@@ -61,13 +63,14 @@ function splitIntoChildren(text) {
|
|
|
61
63
|
// No sentence boundary found — fall back to word boundary
|
|
62
64
|
endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
|
|
63
65
|
}
|
|
64
|
-
|
|
66
|
+
const chunk = text.slice(pos, endPos).trim();
|
|
67
|
+
if (chunk.length > 0)
|
|
68
|
+
yield chunk;
|
|
65
69
|
// Move position forward, with overlap
|
|
66
70
|
pos = endPos - OVERLAP;
|
|
67
71
|
if (pos < 0)
|
|
68
72
|
pos = endPos; // Safety: don't go negative
|
|
69
73
|
}
|
|
70
|
-
return chunks.filter(chunk => chunk.length > 0);
|
|
71
74
|
}
|
|
72
75
|
/**
|
|
73
76
|
* Find a word boundary near the target position.
|
|
@@ -94,22 +97,18 @@ function findWordBoundary(text, start, targetSize, maxSize) {
|
|
|
94
97
|
export class LocalStore {
|
|
95
98
|
#db;
|
|
96
99
|
#userId;
|
|
97
|
-
|
|
98
|
-
#extractionQueue;
|
|
99
|
-
// Backlog processor state (T-095: drain loops)
|
|
100
|
+
// Backlog processor state (T-095: drain loop)
|
|
100
101
|
#embedDrainStopped = false;
|
|
101
|
-
#extractDrainStopped = false;
|
|
102
102
|
#embedDrainPromise = null;
|
|
103
|
-
#extractDrainPromise = null;
|
|
104
103
|
#embedIdleMs;
|
|
105
|
-
#extractIdleMs;
|
|
106
|
-
#extractConcurrency;
|
|
107
|
-
#retryBackoffMs;
|
|
108
|
-
#extractFn;
|
|
109
|
-
// T-096: In-memory embedding cache for vec_facts (eliminates 292ms SQLite load on each query)
|
|
110
|
-
#embeddingCache = [];
|
|
111
104
|
// T-103: In-memory embedding cache for vec_raw_log (eliminates ~3,700ms SQLite load on each query)
|
|
112
105
|
#rawLogEmbeddingCache = [];
|
|
106
|
+
// FIX 3: WAL checkpoint throttling to prevent unbounded WAL growth
|
|
107
|
+
#lastCheckpoint = Date.now();
|
|
108
|
+
#checkpointIntervalMs = 60000; // Checkpoint every minute
|
|
109
|
+
// FIX 4: Health check to detect stuck drain loops
|
|
110
|
+
#lastActivityTimestamp = Date.now();
|
|
111
|
+
#healthCheckInterval = null;
|
|
113
112
|
/** Expose database for plugin use (e.g., NudgeManager) */
|
|
114
113
|
get db() {
|
|
115
114
|
return this.#db;
|
|
@@ -118,21 +117,11 @@ export class LocalStore {
|
|
|
118
117
|
get userId() {
|
|
119
118
|
return this.#userId;
|
|
120
119
|
}
|
|
121
|
-
|
|
122
|
-
get extractionQueue() {
|
|
123
|
-
return this.#extractionQueue;
|
|
124
|
-
}
|
|
125
|
-
constructor(db, userId, llmConfig, extractionQueue, extractFn, backlog) {
|
|
120
|
+
constructor(db, userId, backlog) {
|
|
126
121
|
this.#db = db;
|
|
127
122
|
this.#userId = userId;
|
|
128
|
-
|
|
129
|
-
this.#extractionQueue = extractionQueue;
|
|
130
|
-
this.#extractFn = extractFn;
|
|
131
|
-
// Initialize backlog processor config — defaults run as fast as possible with concurrency.
|
|
123
|
+
// Initialize backlog processor config
|
|
132
124
|
this.#embedIdleMs = backlog?.embedIdleMs ?? 5000;
|
|
133
|
-
this.#extractIdleMs = backlog?.extractIdleMs ?? 5000;
|
|
134
|
-
this.#extractConcurrency = backlog?.concurrency ?? 5;
|
|
135
|
-
this.#retryBackoffMs = backlog?.retryBackoffMs ?? 2000;
|
|
136
125
|
}
|
|
137
126
|
/**
|
|
138
127
|
* Create a new LocalStore instance (async factory).
|
|
@@ -141,66 +130,19 @@ export class LocalStore {
|
|
|
141
130
|
static async create(options = {}) {
|
|
142
131
|
const dbPath = options.dbPath ?? join(homedir(), '.plumb', 'memory.db');
|
|
143
132
|
const userId = options.userId ?? 'default';
|
|
144
|
-
const llmConfig = options.llmConfig;
|
|
145
133
|
mkdirSync(dirname(dbPath), { recursive: true });
|
|
146
134
|
const db = await openDb(dbPath);
|
|
147
135
|
// Enable WAL mode and foreign keys
|
|
148
136
|
db.exec('PRAGMA journal_mode = WAL');
|
|
149
137
|
db.exec('PRAGMA foreign_keys = ON');
|
|
150
138
|
applySchema(db);
|
|
151
|
-
//
|
|
152
|
-
|
|
153
|
-
// Initialize extraction queue with deferred store lookup
|
|
154
|
-
// T-079: Wrapper handles extract_status updates on success/failure.
|
|
155
|
-
const extractFn = async (exchange, userId, sourceChunkId) => {
|
|
156
|
-
if (!storeRef)
|
|
157
|
-
throw new Error('Store not initialized');
|
|
158
|
-
const llmFn = llmConfig
|
|
159
|
-
? (prompt) => callLLMWithConfig(prompt, llmConfig)
|
|
160
|
-
: undefined;
|
|
161
|
-
try {
|
|
162
|
-
const facts = await extractFacts(exchange, userId, storeRef, llmFn, sourceChunkId);
|
|
163
|
-
// T-079: Update extract_status='done' on success.
|
|
164
|
-
const updateStmt = db.prepare(`
|
|
165
|
-
UPDATE raw_log SET extract_status = 'done' WHERE id = ?
|
|
166
|
-
`);
|
|
167
|
-
updateStmt.bind([sourceChunkId]);
|
|
168
|
-
updateStmt.step();
|
|
169
|
-
updateStmt.finalize();
|
|
170
|
-
return facts;
|
|
171
|
-
}
|
|
172
|
-
catch (err) {
|
|
173
|
-
// T-079: Update extract_status='failed' with error message.
|
|
174
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
175
|
-
const updateStmt = db.prepare(`
|
|
176
|
-
UPDATE raw_log SET extract_status = 'failed', extract_error = ? WHERE id = ?
|
|
177
|
-
`);
|
|
178
|
-
updateStmt.bind([errorMsg, sourceChunkId]);
|
|
179
|
-
updateStmt.step();
|
|
180
|
-
updateStmt.finalize();
|
|
181
|
-
// Re-throw so Promise.allSettled() in flush() sees the rejection.
|
|
182
|
-
throw err;
|
|
183
|
-
}
|
|
184
|
-
};
|
|
185
|
-
const extractionQueue = options.extractionQueue ?? new ExtractionQueue(extractFn);
|
|
186
|
-
// Create store and assign to ref
|
|
187
|
-
const store = new LocalStore(db, userId, llmConfig, extractionQueue, extractFn, options.backlog);
|
|
188
|
-
storeRef = store;
|
|
139
|
+
// Create store
|
|
140
|
+
const store = new LocalStore(db, userId, options.backlog);
|
|
189
141
|
// T-096: Warm embedder pipeline to eliminate 365ms cold-start on first query
|
|
190
142
|
await warmEmbedder();
|
|
191
143
|
// T-101: Warm reranker pipeline to eliminate ~200ms cold-start on first query
|
|
192
144
|
// (intentionally loads ~80MB model at init for consistent <250ms query performance)
|
|
193
145
|
await warmReranker();
|
|
194
|
-
// T-096: Load all vec_facts embeddings into in-memory cache (eliminates 292ms SQLite load per query)
|
|
195
|
-
const vecStmt = db.prepare(`SELECT rowid, embedding FROM vec_facts`);
|
|
196
|
-
while (vecStmt.step()) {
|
|
197
|
-
const row = vecStmt.get({});
|
|
198
|
-
store.#embeddingCache.push({
|
|
199
|
-
rowid: row.rowid,
|
|
200
|
-
embedding: deserializeEmbedding(row.embedding),
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
vecStmt.finalize();
|
|
204
146
|
// T-103/T-108: Load vec_raw_log embeddings for child rows only (eliminates ~3,700ms SQLite load per query)
|
|
205
147
|
// Child rows have parent_id IS NOT NULL. Parent rows are not embedded (embed_status='no_embed').
|
|
206
148
|
const rawLogVecStmt = db.prepare(`
|
|
@@ -219,139 +161,7 @@ export class LocalStore {
|
|
|
219
161
|
rawLogVecStmt.finalize();
|
|
220
162
|
return store;
|
|
221
163
|
}
|
|
222
|
-
async store(fact, sourceChunkId) {
|
|
223
|
-
// T-097: Cross-chunk fact deduplication — prevent storing duplicate facts across different chunks.
|
|
224
|
-
// A fact is considered a duplicate if it has the same subject+predicate and the object is either:
|
|
225
|
-
// 1. Identical (case-insensitive, normalized whitespace), OR
|
|
226
|
-
// 2. Semantically similar (cosine similarity >= 0.92 on embeddings)
|
|
227
|
-
//
|
|
228
|
-
// Pre-filter by subject+predicate via SQL (uses index, avoids full corpus scan).
|
|
229
|
-
const candidateStmt = this.#db.prepare(`
|
|
230
|
-
SELECT id, object, vec_rowid
|
|
231
|
-
FROM facts
|
|
232
|
-
WHERE user_id = ? AND subject = ? AND predicate = ? AND deleted_at IS NULL
|
|
233
|
-
`);
|
|
234
|
-
candidateStmt.bind([this.#userId, fact.subject, fact.predicate]);
|
|
235
|
-
const candidates = [];
|
|
236
|
-
while (candidateStmt.step()) {
|
|
237
|
-
candidates.push(candidateStmt.get({}));
|
|
238
|
-
}
|
|
239
|
-
candidateStmt.finalize();
|
|
240
|
-
// Helper: Normalize text for exact-match check (lowercase, trim, collapse multiple spaces)
|
|
241
|
-
const normalizeText = (text) => text.toLowerCase().trim().replace(/\s+/g, ' ');
|
|
242
|
-
const normalizedNewObject = normalizeText(fact.object);
|
|
243
|
-
// Check for exact object match first (avoids embedding call in the common case)
|
|
244
|
-
for (const candidate of candidates) {
|
|
245
|
-
if (normalizeText(candidate.object) === normalizedNewObject) {
|
|
246
|
-
// Exact duplicate found — return existing fact ID without inserting
|
|
247
|
-
return candidate.id;
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
// No exact match found. Now embed the new fact for semantic similarity check and insertion.
|
|
251
|
-
const text = `${fact.subject} ${fact.predicate} ${fact.object} ${fact.context ?? ''}`.trim();
|
|
252
|
-
const embedding = await embed(text);
|
|
253
|
-
const embeddingJson = serializeEmbedding(embedding);
|
|
254
|
-
// Check semantic similarity against candidates (only if we have candidates with embeddings)
|
|
255
|
-
if (candidates.length > 0) {
|
|
256
|
-
for (const candidate of candidates) {
|
|
257
|
-
if (candidate.vec_rowid === null)
|
|
258
|
-
continue;
|
|
259
|
-
// Find candidate embedding in in-memory cache (T-096)
|
|
260
|
-
const cachedEntry = this.#embeddingCache.find(entry => entry.rowid === candidate.vec_rowid);
|
|
261
|
-
if (!cachedEntry)
|
|
262
|
-
continue;
|
|
263
|
-
// Compute cosine similarity. Distance = 1 - similarity, so similarity >= 0.92 means distance <= 0.08.
|
|
264
|
-
const distance = cosineDistance(embedding, cachedEntry.embedding);
|
|
265
|
-
if (distance <= 0.08) {
|
|
266
|
-
// Semantically equivalent fact found — return existing ID without inserting
|
|
267
|
-
return candidate.id;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
// No duplicate found (neither exact nor semantic) — proceed with normal insertion
|
|
272
|
-
const id = crypto.randomUUID();
|
|
273
|
-
// Begin transaction
|
|
274
|
-
this.#db.exec('BEGIN');
|
|
275
|
-
try {
|
|
276
|
-
// Insert fact (T-079: include source_chunk_id)
|
|
277
|
-
const factStmt = this.#db.prepare(`
|
|
278
|
-
INSERT INTO facts
|
|
279
|
-
(id, user_id, subject, predicate, object,
|
|
280
|
-
confidence, decay_rate, timestamp, source_session_id,
|
|
281
|
-
source_session_label, context, source_chunk_id)
|
|
282
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
283
|
-
`);
|
|
284
|
-
factStmt.bind([
|
|
285
|
-
id,
|
|
286
|
-
this.#userId,
|
|
287
|
-
fact.subject,
|
|
288
|
-
fact.predicate,
|
|
289
|
-
fact.object,
|
|
290
|
-
fact.confidence,
|
|
291
|
-
fact.decayRate,
|
|
292
|
-
fact.timestamp.toISOString(),
|
|
293
|
-
fact.sourceSessionId,
|
|
294
|
-
fact.sourceSessionLabel ?? null,
|
|
295
|
-
fact.context ?? null,
|
|
296
|
-
sourceChunkId ?? null,
|
|
297
|
-
]);
|
|
298
|
-
factStmt.step();
|
|
299
|
-
factStmt.finalize();
|
|
300
|
-
// Insert embedding into vec_facts (auto-assigned id).
|
|
301
|
-
const vecStmt = this.#db.prepare(`INSERT INTO vec_facts(embedding) VALUES (?)`);
|
|
302
|
-
vecStmt.bind([embeddingJson]);
|
|
303
|
-
vecStmt.step();
|
|
304
|
-
vecStmt.finalize();
|
|
305
|
-
const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
|
|
306
|
-
// Back-fill vec_rowid so fact-search can join without a mapping table.
|
|
307
|
-
const updateStmt = this.#db.prepare(`UPDATE facts SET vec_rowid = ? WHERE id = ?`);
|
|
308
|
-
updateStmt.bind([vecRowid, id]);
|
|
309
|
-
updateStmt.step();
|
|
310
|
-
updateStmt.finalize();
|
|
311
|
-
this.#db.exec('COMMIT');
|
|
312
|
-
// T-096: Append new embedding to in-memory cache
|
|
313
|
-
this.#embeddingCache.push({ rowid: vecRowid, embedding });
|
|
314
|
-
}
|
|
315
|
-
catch (err) {
|
|
316
|
-
this.#db.exec('ROLLBACK');
|
|
317
|
-
throw err;
|
|
318
|
-
}
|
|
319
|
-
return id;
|
|
320
|
-
}
|
|
321
|
-
async search(query, limit = 20) {
|
|
322
|
-
// T-096: Pass in-memory embedding cache to searchFacts (eliminates 292ms SQLite load per query)
|
|
323
|
-
return searchFacts(this.#db, this.#userId, query, limit, this.#embeddingCache);
|
|
324
|
-
}
|
|
325
|
-
async delete(id) {
|
|
326
|
-
// T-096: Get vec_rowid before soft-deleting so we can remove from cache
|
|
327
|
-
const vecRowidStmt = this.#db.prepare(`
|
|
328
|
-
SELECT vec_rowid FROM facts WHERE id = ? AND user_id = ?
|
|
329
|
-
`);
|
|
330
|
-
vecRowidStmt.bind([id, this.#userId]);
|
|
331
|
-
vecRowidStmt.step();
|
|
332
|
-
const vecRowid = vecRowidStmt.get(0);
|
|
333
|
-
vecRowidStmt.finalize();
|
|
334
|
-
// Soft delete only — never hard delete.
|
|
335
|
-
const stmt = this.#db.prepare(`
|
|
336
|
-
UPDATE facts SET deleted_at = ? WHERE id = ? AND user_id = ?
|
|
337
|
-
`);
|
|
338
|
-
stmt.bind([new Date().toISOString(), id, this.#userId]);
|
|
339
|
-
stmt.step();
|
|
340
|
-
stmt.finalize();
|
|
341
|
-
// T-096: Remove from in-memory embedding cache
|
|
342
|
-
if (vecRowid !== null) {
|
|
343
|
-
const cacheIdx = this.#embeddingCache.findIndex(entry => entry.rowid === vecRowid);
|
|
344
|
-
if (cacheIdx !== -1) {
|
|
345
|
-
this.#embeddingCache.splice(cacheIdx, 1);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
164
|
async status() {
|
|
350
|
-
const factStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM facts WHERE user_id = ? AND deleted_at IS NULL`);
|
|
351
|
-
factStmt.bind([this.#userId]);
|
|
352
|
-
factStmt.step();
|
|
353
|
-
const factCount = factStmt.get(0);
|
|
354
|
-
factStmt.finalize();
|
|
355
165
|
const rawLogStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM raw_log WHERE user_id = ?`);
|
|
356
166
|
rawLogStmt.bind([this.#userId]);
|
|
357
167
|
rawLogStmt.step();
|
|
@@ -365,7 +175,6 @@ export class LocalStore {
|
|
|
365
175
|
const pageCount = this.#db.selectValue('PRAGMA page_count');
|
|
366
176
|
const pageSize = this.#db.selectValue('PRAGMA page_size');
|
|
367
177
|
return {
|
|
368
|
-
factCount,
|
|
369
178
|
rawLogCount,
|
|
370
179
|
lastIngestion: lastIngestionTs !== null ? new Date(lastIngestionTs) : null,
|
|
371
180
|
storageBytes: pageCount * pageSize,
|
|
@@ -376,9 +185,6 @@ export class LocalStore {
|
|
|
376
185
|
const chunkText = formatExchange(exchange);
|
|
377
186
|
// Compute content hash for deduplication (scoped per userId).
|
|
378
187
|
const contentHash = createHash('sha256').update(chunkText).digest('hex');
|
|
379
|
-
// T-108: Parent-child chunking — don't embed parent, only children.
|
|
380
|
-
// Parent extract_status: 'no_llm' if no config, otherwise 'pending' (extraction runs on parent only).
|
|
381
|
-
const extractStatus = this.#llmConfig ? 'pending' : 'no_llm';
|
|
382
188
|
// Attempt insert — catch UNIQUE constraint violations (duplicate content_hash).
|
|
383
189
|
try {
|
|
384
190
|
this.#db.exec('BEGIN');
|
|
@@ -387,8 +193,8 @@ export class LocalStore {
|
|
|
387
193
|
INSERT INTO raw_log
|
|
388
194
|
(id, user_id, session_id, session_label,
|
|
389
195
|
user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
|
|
390
|
-
embed_status, embed_error, embed_model,
|
|
391
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
|
|
196
|
+
embed_status, embed_error, embed_model, parent_id)
|
|
197
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
392
198
|
`);
|
|
393
199
|
rawLogStmt.bind([
|
|
394
200
|
rawLogId,
|
|
@@ -405,17 +211,14 @@ export class LocalStore {
|
|
|
405
211
|
'no_embed', // Parent is not embedded (T-108)
|
|
406
212
|
null,
|
|
407
213
|
null,
|
|
408
|
-
extractStatus,
|
|
409
214
|
null, // parent_id=NULL for parent rows
|
|
410
215
|
]);
|
|
411
216
|
rawLogStmt.step();
|
|
412
217
|
rawLogStmt.finalize();
|
|
413
218
|
// T-108: Split parent into child chunks and embed each child.
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
if (!childText)
|
|
418
|
-
continue;
|
|
219
|
+
// splitIntoChildren is a generator — iterate lazily to avoid OOM on large inputs.
|
|
220
|
+
let i = 0;
|
|
221
|
+
for (const childText of splitIntoChildren(chunkText)) {
|
|
419
222
|
const childId = crypto.randomUUID();
|
|
420
223
|
let childEmbedding = null;
|
|
421
224
|
let childEmbeddingJson = null;
|
|
@@ -438,8 +241,8 @@ export class LocalStore {
|
|
|
438
241
|
INSERT INTO raw_log
|
|
439
242
|
(id, user_id, session_id, session_label,
|
|
440
243
|
user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
|
|
441
|
-
embed_status, embed_error, embed_model,
|
|
442
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
|
|
244
|
+
embed_status, embed_error, embed_model, parent_id)
|
|
245
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
443
246
|
`);
|
|
444
247
|
childStmt.bind([
|
|
445
248
|
childId,
|
|
@@ -456,7 +259,6 @@ export class LocalStore {
|
|
|
456
259
|
childEmbedStatus,
|
|
457
260
|
childEmbedError,
|
|
458
261
|
childEmbedModel,
|
|
459
|
-
'child', // T-108: Mark as 'child' to prevent extraction
|
|
460
262
|
rawLogId, // parent_id points to parent
|
|
461
263
|
]);
|
|
462
264
|
childStmt.step();
|
|
@@ -476,6 +278,7 @@ export class LocalStore {
|
|
|
476
278
|
// T-103: Append child embedding to in-memory cache
|
|
477
279
|
this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding: childEmbedding });
|
|
478
280
|
}
|
|
281
|
+
i++;
|
|
479
282
|
}
|
|
480
283
|
this.#db.exec('COMMIT');
|
|
481
284
|
}
|
|
@@ -483,26 +286,17 @@ export class LocalStore {
|
|
|
483
286
|
this.#db.exec('ROLLBACK');
|
|
484
287
|
// Check for SQLite UNIQUE constraint error on content_hash.
|
|
485
288
|
if (err instanceof Error && err.message.includes('UNIQUE constraint')) {
|
|
486
|
-
// Duplicate content — skip ingestion
|
|
289
|
+
// Duplicate content — skip ingestion.
|
|
487
290
|
return {
|
|
488
291
|
rawLogId: '',
|
|
489
|
-
factsExtracted: 0,
|
|
490
|
-
factIds: [],
|
|
491
292
|
skipped: true,
|
|
492
293
|
};
|
|
493
294
|
}
|
|
494
295
|
// Re-throw other errors (e.g., real DB issues).
|
|
495
296
|
throw err;
|
|
496
297
|
}
|
|
497
|
-
// Layer 2: enqueue exchange for batched fact extraction (T-071) only if LLM config is present.
|
|
498
|
-
// If no LLM config, extract_status is already set to 'no_llm', so skip enqueue.
|
|
499
|
-
if (this.#llmConfig) {
|
|
500
|
-
this.#extractionQueue.enqueue(exchange, this.#userId, rawLogId);
|
|
501
|
-
}
|
|
502
298
|
return {
|
|
503
299
|
rawLogId,
|
|
504
|
-
factsExtracted: 0,
|
|
505
|
-
factIds: [],
|
|
506
300
|
};
|
|
507
301
|
}
|
|
508
302
|
/**
|
|
@@ -513,146 +307,11 @@ export class LocalStore {
|
|
|
513
307
|
// T-103: Pass in-memory embedding cache to searchRawLog (eliminates ~3,700ms SQLite load per query)
|
|
514
308
|
return searchRawLog(this.#db, this.#userId, query, limit, this.#rawLogEmbeddingCache);
|
|
515
309
|
}
|
|
516
|
-
/**
|
|
517
|
-
* Wait for all queued fact extractions to complete.
|
|
518
|
-
* Call this before close() to ensure all async work is done.
|
|
519
|
-
* Delegates to ExtractionQueue.flush().
|
|
520
|
-
*/
|
|
521
|
-
async drain() {
|
|
522
|
-
await this.#extractionQueue.flush();
|
|
523
|
-
}
|
|
524
|
-
/**
|
|
525
|
-
* Re-extract facts for orphaned raw_log chunks (chunks with no corresponding facts).
|
|
526
|
-
*
|
|
527
|
-
* This is useful when fact extraction failed during initial ingest (e.g., missing API key,
|
|
528
|
-
* rate limits, crashes). Re-running the normal seeder won't help because content-hash dedup
|
|
529
|
-
* skips already-ingested chunks before reaching the extraction phase.
|
|
530
|
-
*
|
|
531
|
-
* This method directly calls extractFacts() for each orphaned chunk, bypassing the dedup gate.
|
|
532
|
-
*
|
|
533
|
-
* @param throttleMs - Delay between extractions (default 1000ms) to stay under rate limits
|
|
534
|
-
* @returns Statistics: orphansFound, factsCreated
|
|
535
|
-
*/
|
|
536
|
-
async reextractOrphans(throttleMs = 1000) {
|
|
537
|
-
// Query for raw_log entries with no corresponding facts.
|
|
538
|
-
const stmt = this.#db.prepare(`
|
|
539
|
-
SELECT
|
|
540
|
-
id,
|
|
541
|
-
user_id AS userId,
|
|
542
|
-
session_id AS sessionId,
|
|
543
|
-
session_label AS sessionLabel,
|
|
544
|
-
user_message AS userMessage,
|
|
545
|
-
agent_response AS agentResponse,
|
|
546
|
-
timestamp,
|
|
547
|
-
source
|
|
548
|
-
FROM raw_log
|
|
549
|
-
WHERE user_id = ?
|
|
550
|
-
AND NOT EXISTS (
|
|
551
|
-
SELECT 1 FROM facts
|
|
552
|
-
WHERE facts.source_session_id = raw_log.session_id
|
|
553
|
-
)
|
|
554
|
-
ORDER BY timestamp ASC
|
|
555
|
-
`);
|
|
556
|
-
stmt.bind([this.#userId]);
|
|
557
|
-
const orphanRows = [];
|
|
558
|
-
while (stmt.step()) {
|
|
559
|
-
const row = stmt.get({});
|
|
560
|
-
orphanRows.push(row);
|
|
561
|
-
}
|
|
562
|
-
stmt.finalize();
|
|
563
|
-
const orphansFound = orphanRows.length;
|
|
564
|
-
if (orphansFound === 0) {
|
|
565
|
-
return { orphansFound: 0, factsCreated: 0 };
|
|
566
|
-
}
|
|
567
|
-
let factsCreated = 0;
|
|
568
|
-
for (let i = 0; i < orphanRows.length; i++) {
|
|
569
|
-
const row = orphanRows[i];
|
|
570
|
-
if (!row)
|
|
571
|
-
continue;
|
|
572
|
-
// Reconstruct MessageExchange from raw_log data
|
|
573
|
-
const exchange = {
|
|
574
|
-
userMessage: row.userMessage,
|
|
575
|
-
agentResponse: row.agentResponse,
|
|
576
|
-
timestamp: new Date(row.timestamp),
|
|
577
|
-
source: row.source,
|
|
578
|
-
sessionId: row.sessionId,
|
|
579
|
-
...(row.sessionLabel !== null ? { sessionLabel: row.sessionLabel } : {}),
|
|
580
|
-
};
|
|
581
|
-
// Extract facts directly (bypasses ingest dedup gate)
|
|
582
|
-
try {
|
|
583
|
-
const llmFn = this.#llmConfig
|
|
584
|
-
? (prompt) => callLLMWithConfig(prompt, this.#llmConfig)
|
|
585
|
-
: undefined;
|
|
586
|
-
const facts = await extractFacts(exchange, this.#userId, this, llmFn);
|
|
587
|
-
factsCreated += facts.length;
|
|
588
|
-
console.log(` ✅ [${i + 1}/${orphansFound}] Re-extracted ${facts.length} fact(s) from session ${row.sessionId}`);
|
|
589
|
-
}
|
|
590
|
-
catch (err) {
|
|
591
|
-
console.error(` ❌ [${i + 1}/${orphansFound}] Failed to re-extract facts from session ${row.sessionId}:`, err);
|
|
592
|
-
}
|
|
593
|
-
// Throttle to stay under rate limits (skip delay after last item)
|
|
594
|
-
if (i < orphanRows.length - 1) {
|
|
595
|
-
await new Promise(resolve => setTimeout(resolve, throttleMs));
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
return { orphansFound, factsCreated };
|
|
599
|
-
}
|
|
600
|
-
/**
|
|
601
|
-
* Get top subjects by fact count (for plumb status command).
|
|
602
|
-
* Returns subjects ordered by number of facts (non-deleted only).
|
|
603
|
-
*/
|
|
604
|
-
topSubjects(userId, limit = 5) {
|
|
605
|
-
const stmt = this.#db.prepare(`
|
|
606
|
-
SELECT subject, COUNT(*) as count
|
|
607
|
-
FROM facts
|
|
608
|
-
WHERE user_id = ? AND deleted_at IS NULL
|
|
609
|
-
GROUP BY subject
|
|
610
|
-
ORDER BY count DESC
|
|
611
|
-
LIMIT ?
|
|
612
|
-
`);
|
|
613
|
-
stmt.bind([userId, limit]);
|
|
614
|
-
const results = [];
|
|
615
|
-
while (stmt.step()) {
|
|
616
|
-
results.push(stmt.get({}));
|
|
617
|
-
}
|
|
618
|
-
stmt.finalize();
|
|
619
|
-
return results;
|
|
620
|
-
}
|
|
621
310
|
/**
|
|
622
311
|
* Export all data for a user (for plumb export command).
|
|
623
312
|
* Returns raw database rows (no vector data).
|
|
624
|
-
* Includes soft-deleted facts for transparency.
|
|
625
313
|
*/
|
|
626
314
|
exportAll(userId) {
|
|
627
|
-
// Export all non-deleted facts only (soft-deleted facts are excluded).
|
|
628
|
-
const factStmt = this.#db.prepare(`
|
|
629
|
-
SELECT
|
|
630
|
-
id,
|
|
631
|
-
user_id AS userId,
|
|
632
|
-
subject,
|
|
633
|
-
predicate,
|
|
634
|
-
object,
|
|
635
|
-
confidence,
|
|
636
|
-
decay_rate AS decayRate,
|
|
637
|
-
timestamp,
|
|
638
|
-
source_session_id AS sourceSessionId,
|
|
639
|
-
source_session_label AS sourceSessionLabel,
|
|
640
|
-
context,
|
|
641
|
-
deleted_at AS deletedAt
|
|
642
|
-
FROM facts
|
|
643
|
-
WHERE user_id = ? AND deleted_at IS NULL
|
|
644
|
-
ORDER BY timestamp DESC
|
|
645
|
-
`);
|
|
646
|
-
factStmt.bind([userId]);
|
|
647
|
-
const factRows = [];
|
|
648
|
-
while (factStmt.step()) {
|
|
649
|
-
factRows.push(factStmt.get({}));
|
|
650
|
-
}
|
|
651
|
-
factStmt.finalize();
|
|
652
|
-
const facts = factRows.map((row) => ({
|
|
653
|
-
...row,
|
|
654
|
-
deleted: false, // All exported facts are non-deleted
|
|
655
|
-
}));
|
|
656
315
|
// Export all raw_log entries (no vector data).
|
|
657
316
|
const rawLogStmt = this.#db.prepare(`
|
|
658
317
|
SELECT
|
|
@@ -669,9 +328,7 @@ export class LocalStore {
|
|
|
669
328
|
content_hash AS contentHash,
|
|
670
329
|
embed_status AS embedStatus,
|
|
671
330
|
embed_error AS embedError,
|
|
672
|
-
embed_model AS embedModel
|
|
673
|
-
extract_status AS extractStatus,
|
|
674
|
-
extract_error AS extractError
|
|
331
|
+
embed_model AS embedModel
|
|
675
332
|
FROM raw_log
|
|
676
333
|
WHERE user_id = ?
|
|
677
334
|
ORDER BY timestamp DESC
|
|
@@ -682,12 +339,11 @@ export class LocalStore {
|
|
|
682
339
|
rawLog.push(rawLogStmt.get({}));
|
|
683
340
|
}
|
|
684
341
|
rawLogStmt.finalize();
|
|
685
|
-
return {
|
|
342
|
+
return { rawLog };
|
|
686
343
|
}
|
|
687
344
|
/**
|
|
688
|
-
* Start background backlog processor drain
|
|
689
|
-
* Launches continuous async
|
|
690
|
-
* Call this after store.extractionQueue.start() in plugin-module.ts.
|
|
345
|
+
* Start background backlog processor drain loop (T-095).
|
|
346
|
+
* Launches continuous async loop for embed backlog.
|
|
691
347
|
*/
|
|
692
348
|
startBacklogProcessor() {
|
|
693
349
|
// Start embed drain loop
|
|
@@ -695,32 +351,36 @@ export class LocalStore {
|
|
|
695
351
|
this.#embedDrainStopped = false;
|
|
696
352
|
this.#embedDrainPromise = this.#embedDrainLoop();
|
|
697
353
|
}
|
|
698
|
-
//
|
|
699
|
-
if (this.#
|
|
700
|
-
this.#
|
|
701
|
-
|
|
354
|
+
// FIX 4: Health check - detect runaway loop that isn't processing or stopping
|
|
355
|
+
if (this.#healthCheckInterval === null) {
|
|
356
|
+
this.#healthCheckInterval = setInterval(() => {
|
|
357
|
+
const idleTime = Date.now() - this.#lastActivityTimestamp;
|
|
358
|
+
const MAX_IDLE_TIME = 300000; // 5 minutes of no activity
|
|
359
|
+
// If loop is running but idle for too long, force stop
|
|
360
|
+
if (idleTime > MAX_IDLE_TIME && !this.#embedDrainStopped) {
|
|
361
|
+
console.warn(`[plumb] Drain loop idle for ${Math.round(idleTime / 1000)}s, forcing stop`);
|
|
362
|
+
void this.stopBacklogProcessor();
|
|
363
|
+
}
|
|
364
|
+
}, 60000); // Check every minute
|
|
702
365
|
}
|
|
703
366
|
}
|
|
704
367
|
/**
|
|
705
|
-
* Stop background backlog processor drain
|
|
706
|
-
* Signals
|
|
707
|
-
* Call this alongside store.extractionQueue.stop() in session_end and process exit handlers.
|
|
368
|
+
* Stop background backlog processor drain loop (T-095).
|
|
369
|
+
* Signals loop to stop and awaits in-flight work.
|
|
708
370
|
*/
|
|
709
371
|
async stopBacklogProcessor() {
|
|
710
|
-
//
|
|
372
|
+
// FIX 4: Clear health check interval
|
|
373
|
+
if (this.#healthCheckInterval !== null) {
|
|
374
|
+
clearInterval(this.#healthCheckInterval);
|
|
375
|
+
this.#healthCheckInterval = null;
|
|
376
|
+
}
|
|
377
|
+
// Signal loop to stop
|
|
711
378
|
this.#embedDrainStopped = true;
|
|
712
|
-
|
|
713
|
-
// Await drain loop Promises (waits for in-flight work to complete)
|
|
714
|
-
const promises = [];
|
|
379
|
+
// Await drain loop Promise (waits for in-flight work to complete)
|
|
715
380
|
if (this.#embedDrainPromise !== null) {
|
|
716
|
-
|
|
381
|
+
await this.#embedDrainPromise;
|
|
717
382
|
this.#embedDrainPromise = null;
|
|
718
383
|
}
|
|
719
|
-
if (this.#extractDrainPromise !== null) {
|
|
720
|
-
promises.push(this.#extractDrainPromise);
|
|
721
|
-
this.#extractDrainPromise = null;
|
|
722
|
-
}
|
|
723
|
-
await Promise.all(promises);
|
|
724
384
|
}
|
|
725
385
|
/**
|
|
726
386
|
* Continuous drain loop for embed backlog (T-095).
|
|
@@ -728,12 +388,28 @@ export class LocalStore {
|
|
|
728
388
|
* Only sleeps when the queue is empty.
|
|
729
389
|
*/
|
|
730
390
|
async #embedDrainLoop() {
|
|
391
|
+
// FIX 2: Safety counter to detect infinite loops
|
|
392
|
+
let consecutiveEmptyBatches = 0;
|
|
393
|
+
const MAX_EMPTY_BATCHES = 1000; // Safety limit: stop after many empty iterations
|
|
731
394
|
while (!this.#embedDrainStopped) {
|
|
732
395
|
const processed = await this.#processEmbedBatch();
|
|
733
396
|
if (processed === 0) {
|
|
397
|
+
consecutiveEmptyBatches++;
|
|
398
|
+
// FIX 2: Safety check - if idle too long, verify stop flag
|
|
399
|
+
if (consecutiveEmptyBatches >= MAX_EMPTY_BATCHES) {
|
|
400
|
+
console.warn('[plumb] Embed drain loop: hit safety limit, verifying stop flag');
|
|
401
|
+
if (this.#embedDrainStopped)
|
|
402
|
+
break;
|
|
403
|
+
consecutiveEmptyBatches = 0; // Reset and continue
|
|
404
|
+
}
|
|
734
405
|
// Queue is empty — sleep before checking again
|
|
735
406
|
await new Promise(resolve => setTimeout(resolve, this.#embedIdleMs));
|
|
736
407
|
}
|
|
408
|
+
else {
|
|
409
|
+
consecutiveEmptyBatches = 0;
|
|
410
|
+
// FIX 4: Update activity timestamp
|
|
411
|
+
this.#lastActivityTimestamp = Date.now();
|
|
412
|
+
}
|
|
737
413
|
// If processed > 0: immediately loop to grab the next batch
|
|
738
414
|
}
|
|
739
415
|
}
|
|
@@ -799,78 +475,18 @@ export class LocalStore {
|
|
|
799
475
|
updateStmt.finalize();
|
|
800
476
|
}
|
|
801
477
|
}));
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
* Continuous drain loop for extract backlog (T-095).
|
|
806
|
-
* Fetches up to `concurrency` rows and processes them concurrently with 429 backoff.
|
|
807
|
-
* Only sleeps when the queue is empty.
|
|
808
|
-
*/
|
|
809
|
-
async #extractDrainLoop() {
|
|
810
|
-
while (!this.#extractDrainStopped) {
|
|
811
|
-
// Fetch pending rows (up to concurrency limit)
|
|
812
|
-
const stmt = this.#db.prepare(`
|
|
813
|
-
SELECT id, user_message, agent_response, timestamp, session_id, session_label, source
|
|
814
|
-
FROM raw_log
|
|
815
|
-
WHERE user_id = ? AND extract_status = 'pending'
|
|
816
|
-
ORDER BY rowid ASC
|
|
817
|
-
LIMIT ?
|
|
818
|
-
`);
|
|
819
|
-
stmt.bind([this.#userId, this.#extractConcurrency]);
|
|
820
|
-
const pendingRows = [];
|
|
821
|
-
while (stmt.step()) {
|
|
822
|
-
pendingRows.push(stmt.get({}));
|
|
823
|
-
}
|
|
824
|
-
stmt.finalize();
|
|
825
|
-
if (pendingRows.length === 0) {
|
|
826
|
-
// Queue is empty — sleep before checking again
|
|
827
|
-
await new Promise(resolve => setTimeout(resolve, this.#extractIdleMs));
|
|
828
|
-
continue;
|
|
829
|
-
}
|
|
830
|
-
// Process rows concurrently with 429 backoff
|
|
831
|
-
await Promise.all(pendingRows.map(async (row) => {
|
|
832
|
-
const exchange = {
|
|
833
|
-
userMessage: row.user_message,
|
|
834
|
-
agentResponse: row.agent_response,
|
|
835
|
-
timestamp: new Date(row.timestamp),
|
|
836
|
-
source: row.source,
|
|
837
|
-
sessionId: row.session_id,
|
|
838
|
-
...(row.session_label !== null ? { sessionLabel: row.session_label } : {}),
|
|
839
|
-
};
|
|
840
|
-
await this.#extractRowWithBackoff(exchange, row.id);
|
|
841
|
-
}));
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
/**
|
|
845
|
-
* Extract facts for one row with exponential backoff on 429 errors (T-095).
|
|
846
|
-
* Calls extractFn directly (bypasses ExtractionQueue for backlog processing).
|
|
847
|
-
* extractFn already handles DB status updates (extract_status=done/failed).
|
|
848
|
-
*/
|
|
849
|
-
async #extractRowWithBackoff(exchange, sourceChunkId) {
|
|
850
|
-
const MAX_RETRIES = 4;
|
|
851
|
-
let attempt = 0;
|
|
852
|
-
while (attempt <= MAX_RETRIES) {
|
|
478
|
+
// FIX 3: Periodic WAL checkpoint to prevent unbounded growth
|
|
479
|
+
const now = Date.now();
|
|
480
|
+
if (now - this.#lastCheckpoint > this.#checkpointIntervalMs) {
|
|
853
481
|
try {
|
|
854
|
-
|
|
855
|
-
|
|
482
|
+
this.#db.exec('PRAGMA wal_checkpoint(PASSIVE)');
|
|
483
|
+
this.#lastCheckpoint = now;
|
|
856
484
|
}
|
|
857
|
-
catch (
|
|
858
|
-
|
|
859
|
-
const is429 = errorMsg.toLowerCase().includes('429') ||
|
|
860
|
-
errorMsg.toLowerCase().includes('rate') ||
|
|
861
|
-
errorMsg.toLowerCase().includes('quota');
|
|
862
|
-
if (is429 && attempt < MAX_RETRIES) {
|
|
863
|
-
// Exponential backoff: 2s, 4s, 8s, 16s
|
|
864
|
-
const backoffMs = this.#retryBackoffMs * Math.pow(2, attempt);
|
|
865
|
-
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
866
|
-
attempt++;
|
|
867
|
-
}
|
|
868
|
-
else {
|
|
869
|
-
// Not a 429, or max retries reached — extractFn already marked extract_status='failed'
|
|
870
|
-
return;
|
|
871
|
-
}
|
|
485
|
+
catch (e) {
|
|
486
|
+
console.warn('[plumb] WAL checkpoint failed:', e);
|
|
872
487
|
}
|
|
873
488
|
}
|
|
489
|
+
return pendingRows.length;
|
|
874
490
|
}
|
|
875
491
|
/** Close the database connection. Call when done (e.g. in tests). */
|
|
876
492
|
close() {
|