npm - @getplumb/core - Versions diffs - 0.1.6 → 0.3.0 - Mend

@getplumb/core 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +2 -2
package/dist/embedder.d.ts +16 -2
package/dist/embedder.d.ts.map +1 -1
package/dist/embedder.js +23 -4
package/dist/embedder.js.map +1 -1
package/dist/extraction-queue.d.ts +13 -3
package/dist/extraction-queue.d.ts.map +1 -1
package/dist/extraction-queue.js +21 -4
package/dist/extraction-queue.js.map +1 -1
package/dist/extractor.d.ts +2 -1
package/dist/extractor.d.ts.map +1 -1
package/dist/extractor.js +106 -7
package/dist/extractor.js.map +1 -1
package/dist/extractor.test.d.ts +2 -0
package/dist/extractor.test.d.ts.map +1 -0
package/dist/extractor.test.js +158 -0
package/dist/extractor.test.js.map +1 -0
package/dist/fact-search.d.ts +9 -5
package/dist/fact-search.d.ts.map +1 -1
package/dist/fact-search.js +25 -16
package/dist/fact-search.js.map +1 -1
package/dist/fact-search.test.d.ts +12 -0
package/dist/fact-search.test.d.ts.map +1 -0
package/dist/fact-search.test.js +117 -0
package/dist/fact-search.test.js.map +1 -0
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/llm-client.d.ts +11 -2
package/dist/llm-client.d.ts.map +1 -1
package/dist/llm-client.js +47 -3
package/dist/llm-client.js.map +1 -1
package/dist/local-store.d.ts +32 -1
package/dist/local-store.d.ts.map +1 -1
package/dist/local-store.js +510 -35
package/dist/local-store.js.map +1 -1
package/dist/local-store.test.d.ts +2 -0
package/dist/local-store.test.d.ts.map +1 -0
package/dist/local-store.test.js +146 -0
package/dist/local-store.test.js.map +1 -0
package/dist/raw-log-search.d.ts +9 -5
package/dist/raw-log-search.d.ts.map +1 -1
package/dist/raw-log-search.js +107 -29
package/dist/raw-log-search.js.map +1 -1
package/dist/raw-log-search.test.d.ts +12 -0
package/dist/raw-log-search.test.d.ts.map +1 -0
package/dist/raw-log-search.test.js +124 -0
package/dist/raw-log-search.test.js.map +1 -0
package/dist/read-path.test.d.ts +15 -0
package/dist/read-path.test.d.ts.map +1 -0
package/dist/read-path.test.js +393 -0
package/dist/read-path.test.js.map +1 -0
package/dist/schema.d.ts +2 -2
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +58 -1
package/dist/schema.js.map +1 -1
package/dist/scorer.test.d.ts +10 -0
package/dist/scorer.test.d.ts.map +1 -0
package/dist/scorer.test.js +169 -0
package/dist/scorer.test.js.map +1 -0
package/dist/store.d.ts +3 -1
package/dist/store.d.ts.map +1 -1
package/dist/wasm-db.d.ts +63 -8
package/dist/wasm-db.d.ts.map +1 -1
package/dist/wasm-db.js +124 -31
package/dist/wasm-db.js.map +1 -1
package/package.json +14 -2

package/dist/local-store.js CHANGED Viewed

@@ -6,17 +6,110 @@ import { openDb } from './wasm-db.js';
 import { applySchema } from './schema.js';
 import { extractFacts } from './extractor.js';
 import { callLLMWithConfig } from './llm-client.js';
-import { embed } from './embedder.js';
+import { embed, warmEmbedder, warmReranker } from './embedder.js';
 import { formatExchange } from './chunker.js';
 import { searchRawLog } from './raw-log-search.js';
 import { searchFacts } from './fact-search.js';
 import { ExtractionQueue } from './extraction-queue.js';
-import { serializeEmbedding } from './vector-search.js';
+import { serializeEmbedding, deserializeEmbedding, cosineDistance } from './vector-search.js';
+/**
+ * Split text into overlapping child chunks for parent-child chunking (T-108).
+ * Target: ~250 chars per chunk with ~50 char overlap.
+ * Prefers sentence boundaries, falls back to word boundaries, hard-cuts at 300 chars max.
+ */
+function splitIntoChildren(text) {
+    const TARGET_SIZE = 250;
+    const OVERLAP = 50;
+    const MAX_SIZE = 300;
+    const SENTENCE_ENDINGS = /[.!?]\s+/g;
+    if (text.length <= TARGET_SIZE) {
+        // Text is already small enough — return as single child
+        return [text];
+    }
+    const chunks = [];
+    let pos = 0;
+    while (pos < text.length) {
+        let endPos = Math.min(pos + TARGET_SIZE, text.length);
+        // If we're at the end of the text, take the rest
+        if (endPos >= text.length) {
+            chunks.push(text.slice(pos));
+            break;
+        }
+        // Try to find a sentence boundary within the target range
+        const segment = text.slice(pos, Math.min(pos + MAX_SIZE, text.length));
+        const sentenceMatches = Array.from(segment.matchAll(SENTENCE_ENDINGS));
+        if (sentenceMatches.length > 0) {
+            // Find the last sentence boundary before TARGET_SIZE
+            let bestMatch = sentenceMatches[0]; // Safe: array is non-empty
+            for (const match of sentenceMatches) {
+                if (match.index !== undefined && match.index <= TARGET_SIZE) {
+                    bestMatch = match;
+                }
+                else {
+                    break;
+                }
+            }
+            if (bestMatch.index !== undefined && bestMatch[0] !== undefined) {
+                endPos = pos + bestMatch.index + bestMatch[0].length;
+            }
+            else {
+                // Fall back to word boundary
+                endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
+            }
+        }
+        else {
+            // No sentence boundary found — fall back to word boundary
+            endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
+        }
+        chunks.push(text.slice(pos, endPos).trim());
+        // Move position forward, with overlap
+        pos = endPos - OVERLAP;
+        if (pos < 0)
+            pos = endPos; // Safety: don't go negative
+    }
+    return chunks.filter(chunk => chunk.length > 0);
+}
+/**
+ * Find a word boundary near the target position.
+ * Prefers breaking at TARGET_SIZE, but will extend up to MAX_SIZE if needed.
+ */
+function findWordBoundary(text, start, targetSize, maxSize) {
+    const targetPos = start + targetSize;
+    const maxPos = Math.min(start + maxSize, text.length);
+    // Look for whitespace near the target position
+    let endPos = targetPos;
+    // First try: find whitespace after targetPos
+    for (let i = targetPos; i < maxPos; i++) {
+        if (/\s/.test(text[i] ?? '')) {
+            endPos = i + 1; // Include the whitespace
+            break;
+        }
+    }
+    // If we hit maxPos without finding whitespace, hard cut at maxPos
+    if (endPos === targetPos && targetPos < maxPos) {
+        endPos = maxPos;
+    }
+    return endPos;
+}
 export class LocalStore {
     #db;
     #userId;
     #llmConfig;
     #extractionQueue;
+    // Backlog processor state (T-095: drain loops)
+    #embedDrainStopped = false;
+    #extractDrainStopped = false;
+    #embedDrainPromise = null;
+    #extractDrainPromise = null;
+    #embedIdleMs;
+    #extractIdleMs;
+    #extractConcurrency;
+    #retryBackoffMs;
+    #extractFn;
+    // T-096: In-memory embedding cache for vec_facts (eliminates 292ms SQLite load on each query)
+    #embeddingCache = [];
+    // T-103: In-memory embedding cache for vec_raw_log (eliminates ~3,700ms SQLite load on each query)
+    #rawLogEmbeddingCache = [];
     /** Expose database for plugin use (e.g., NudgeManager) */
     get db() {
         return this.#db;
@@ -29,11 +122,17 @@ export class LocalStore {
     get extractionQueue() {
         return this.#extractionQueue;
     }
-    constructor(db, userId, llmConfig, extractionQueue) {
+    constructor(db, userId, llmConfig, extractionQueue, extractFn, backlog) {
         this.#db = db;
         this.#userId = userId;
         this.#llmConfig = llmConfig;
         this.#extractionQueue = extractionQueue;
+        this.#extractFn = extractFn;
+        // Initialize backlog processor config — defaults run as fast as possible with concurrency.
+        this.#embedIdleMs = backlog?.embedIdleMs ?? 5000;
+        this.#extractIdleMs = backlog?.extractIdleMs ?? 5000;
+        this.#extractConcurrency = backlog?.concurrency ?? 5;
+        this.#retryBackoffMs = backlog?.retryBackoffMs ?? 2000;
     }
     /**
      * Create a new LocalStore instance (async factory).
@@ -52,36 +151,135 @@ export class LocalStore {
         // Use a mutable cell to hold the store reference (needed for circular dependency)
         let storeRef = null;
         // Initialize extraction queue with deferred store lookup
-        const extractFn = (exchange, userId) => {
+        // T-079: Wrapper handles extract_status updates on success/failure.
+        const extractFn = async (exchange, userId, sourceChunkId) => {
             if (!storeRef)
                 throw new Error('Store not initialized');
             const llmFn = llmConfig
                 ? (prompt) => callLLMWithConfig(prompt, llmConfig)
                 : undefined;
-            return extractFacts(exchange, userId, storeRef, llmFn);
+            try {
+                const facts = await extractFacts(exchange, userId, storeRef, llmFn, sourceChunkId);
+                // T-079: Update extract_status='done' on success.
+                const updateStmt = db.prepare(`
+          UPDATE raw_log SET extract_status = 'done' WHERE id = ?
+        `);
+                updateStmt.bind([sourceChunkId]);
+                updateStmt.step();
+                updateStmt.finalize();
+                return facts;
+            }
+            catch (err) {
+                // T-079: Update extract_status='failed' with error message.
+                const errorMsg = err instanceof Error ? err.message : String(err);
+                const updateStmt = db.prepare(`
+          UPDATE raw_log SET extract_status = 'failed', extract_error = ? WHERE id = ?
+        `);
+                updateStmt.bind([errorMsg, sourceChunkId]);
+                updateStmt.step();
+                updateStmt.finalize();
+                // Re-throw so Promise.allSettled() in flush() sees the rejection.
+                throw err;
+            }
         };
         const extractionQueue = options.extractionQueue ?? new ExtractionQueue(extractFn);
         // Create store and assign to ref
-        const store = new LocalStore(db, userId, llmConfig, extractionQueue);
+        const store = new LocalStore(db, userId, llmConfig, extractionQueue, extractFn, options.backlog);
         storeRef = store;
+        // T-096: Warm embedder pipeline to eliminate 365ms cold-start on first query
+        await warmEmbedder();
+        // T-101: Warm reranker pipeline to eliminate ~200ms cold-start on first query
+        // (intentionally loads ~80MB model at init for consistent <250ms query performance)
+        await warmReranker();
+        // T-096: Load all vec_facts embeddings into in-memory cache (eliminates 292ms SQLite load per query)
+        const vecStmt = db.prepare(`SELECT rowid, embedding FROM vec_facts`);
+        while (vecStmt.step()) {
+            const row = vecStmt.get({});
+            store.#embeddingCache.push({
+                rowid: row.rowid,
+                embedding: deserializeEmbedding(row.embedding),
+            });
+        }
+        vecStmt.finalize();
+        // T-103/T-108: Load vec_raw_log embeddings for child rows only (eliminates ~3,700ms SQLite load per query)
+        // Child rows have parent_id IS NOT NULL. Parent rows are not embedded (embed_status='no_embed').
+        const rawLogVecStmt = db.prepare(`
+      SELECT v.rowid, v.embedding
+      FROM vec_raw_log v
+      JOIN raw_log r ON r.vec_rowid = v.rowid
+      WHERE r.parent_id IS NOT NULL
+    `);
+        while (rawLogVecStmt.step()) {
+            const row = rawLogVecStmt.get({});
+            store.#rawLogEmbeddingCache.push({
+                rowid: row.rowid,
+                embedding: deserializeEmbedding(row.embedding),
+            });
+        }
+        rawLogVecStmt.finalize();
         return store;
     }
-    async store(fact) {
-        const id = crypto.randomUUID();
-        // Embed concatenated fact text for vector search.
+    async store(fact, sourceChunkId) {
+        // T-097: Cross-chunk fact deduplication — prevent storing duplicate facts across different chunks.
+        // A fact is considered a duplicate if it has the same subject+predicate and the object is either:
+        // 1. Identical (case-insensitive, normalized whitespace), OR
+        // 2. Semantically similar (cosine similarity >= 0.92 on embeddings)
+        //
+        // Pre-filter by subject+predicate via SQL (uses index, avoids full corpus scan).
+        const candidateStmt = this.#db.prepare(`
+      SELECT id, object, vec_rowid
+      FROM facts
+      WHERE user_id = ? AND subject = ? AND predicate = ? AND deleted_at IS NULL
+    `);
+        candidateStmt.bind([this.#userId, fact.subject, fact.predicate]);
+        const candidates = [];
+        while (candidateStmt.step()) {
+            candidates.push(candidateStmt.get({}));
+        }
+        candidateStmt.finalize();
+        // Helper: Normalize text for exact-match check (lowercase, trim, collapse multiple spaces)
+        const normalizeText = (text) => text.toLowerCase().trim().replace(/\s+/g, ' ');
+        const normalizedNewObject = normalizeText(fact.object);
+        // Check for exact object match first (avoids embedding call in the common case)
+        for (const candidate of candidates) {
+            if (normalizeText(candidate.object) === normalizedNewObject) {
+                // Exact duplicate found — return existing fact ID without inserting
+                return candidate.id;
+            }
+        }
+        // No exact match found. Now embed the new fact for semantic similarity check and insertion.
         const text = `${fact.subject} ${fact.predicate} ${fact.object} ${fact.context ?? ''}`.trim();
         const embedding = await embed(text);
         const embeddingJson = serializeEmbedding(embedding);
+        // Check semantic similarity against candidates (only if we have candidates with embeddings)
+        if (candidates.length > 0) {
+            for (const candidate of candidates) {
+                if (candidate.vec_rowid === null)
+                    continue;
+                // Find candidate embedding in in-memory cache (T-096)
+                const cachedEntry = this.#embeddingCache.find(entry => entry.rowid === candidate.vec_rowid);
+                if (!cachedEntry)
+                    continue;
+                // Compute cosine similarity. Distance = 1 - similarity, so similarity >= 0.92 means distance <= 0.08.
+                const distance = cosineDistance(embedding, cachedEntry.embedding);
+                if (distance <= 0.08) {
+                    // Semantically equivalent fact found — return existing ID without inserting
+                    return candidate.id;
+                }
+            }
+        }
+        // No duplicate found (neither exact nor semantic) — proceed with normal insertion
+        const id = crypto.randomUUID();
         // Begin transaction
         this.#db.exec('BEGIN');
         try {
-            // Insert fact
+            // Insert fact (T-079: include source_chunk_id)
             const factStmt = this.#db.prepare(`
         INSERT INTO facts
           (id, user_id, subject, predicate, object,
            confidence, decay_rate, timestamp, source_session_id,
-           source_session_label, context)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+           source_session_label, context, source_chunk_id)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
       `);
             factStmt.bind([
                 id,
@@ -95,6 +293,7 @@ export class LocalStore {
                 fact.sourceSessionId,
                 fact.sourceSessionLabel ?? null,
                 fact.context ?? null,
+                sourceChunkId ?? null,
             ]);
             factStmt.step();
             factStmt.finalize();
@@ -110,6 +309,8 @@ export class LocalStore {
             updateStmt.step();
             updateStmt.finalize();
             this.#db.exec('COMMIT');
+            // T-096: Append new embedding to in-memory cache
+            this.#embeddingCache.push({ rowid: vecRowid, embedding });
         }
         catch (err) {
             this.#db.exec('ROLLBACK');
@@ -118,9 +319,18 @@ export class LocalStore {
         return id;
     }
     async search(query, limit = 20) {
-        return searchFacts(this.#db, this.#userId, query, limit);
+        // T-096: Pass in-memory embedding cache to searchFacts (eliminates 292ms SQLite load per query)
+        return searchFacts(this.#db, this.#userId, query, limit, this.#embeddingCache);
     }
     async delete(id) {
+        // T-096: Get vec_rowid before soft-deleting so we can remove from cache
+        const vecRowidStmt = this.#db.prepare(`
+      SELECT vec_rowid FROM facts WHERE id = ? AND user_id = ?
+    `);
+        vecRowidStmt.bind([id, this.#userId]);
+        vecRowidStmt.step();
+        const vecRowid = vecRowidStmt.get(0);
+        vecRowidStmt.finalize();
         // Soft delete only — never hard delete.
         const stmt = this.#db.prepare(`
       UPDATE facts SET deleted_at = ? WHERE id = ? AND user_id = ?
@@ -128,6 +338,13 @@ export class LocalStore {
         stmt.bind([new Date().toISOString(), id, this.#userId]);
         stmt.step();
         stmt.finalize();
+        // T-096: Remove from in-memory embedding cache
+        if (vecRowid !== null) {
+            const cacheIdx = this.#embeddingCache.findIndex(entry => entry.rowid === vecRowid);
+            if (cacheIdx !== -1) {
+                this.#embeddingCache.splice(cacheIdx, 1);
+            }
+        }
     }
     async status() {
         const factStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM facts WHERE user_id = ? AND deleted_at IS NULL`);
@@ -159,18 +376,19 @@ export class LocalStore {
         const chunkText = formatExchange(exchange);
         // Compute content hash for deduplication (scoped per userId).
         const contentHash = createHash('sha256').update(chunkText).digest('hex');
-        // Embed before opening the DB transaction.
-        const embedding = await embed(chunkText);
-        const embeddingJson = serializeEmbedding(embedding);
+        // T-108: Parent-child chunking — don't embed parent, only children.
+        // Parent extract_status: 'no_llm' if no config, otherwise 'pending' (extraction runs on parent only).
+        const extractStatus = this.#llmConfig ? 'pending' : 'no_llm';
         // Attempt insert — catch UNIQUE constraint violations (duplicate content_hash).
         try {
             this.#db.exec('BEGIN');
-            // Insert into raw_log
+            // T-108: Insert parent row (no embedding, no vec_rowid).
             const rawLogStmt = this.#db.prepare(`
         INSERT INTO raw_log
           (id, user_id, session_id, session_label,
-           user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+           user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
+           embed_status, embed_error, embed_model, extract_status, parent_id)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
       `);
             rawLogStmt.bind([
                 rawLogId,
@@ -184,20 +402,81 @@ export class LocalStore {
                 chunkText,
                 0,
                 contentHash,
+                'no_embed', // Parent is not embedded (T-108)
+                null,
+                null,
+                extractStatus,
+                null, // parent_id=NULL for parent rows
             ]);
             rawLogStmt.step();
             rawLogStmt.finalize();
-            // Insert embedding into vec_raw_log (auto-assigned id).
-            const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
-            vecStmt.bind([embeddingJson]);
-            vecStmt.step();
-            vecStmt.finalize();
-            const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
-            // Back-fill vec_rowid so raw-log-search can join without a mapping table.
-            const updateStmt = this.#db.prepare(`UPDATE raw_log SET vec_rowid = ? WHERE id = ?`);
-            updateStmt.bind([vecRowid, rawLogId]);
-            updateStmt.step();
-            updateStmt.finalize();
+            // T-108: Split parent into child chunks and embed each child.
+            const childChunks = splitIntoChildren(chunkText);
+            for (let i = 0; i < childChunks.length; i++) {
+                const childText = childChunks[i];
+                if (!childText)
+                    continue;
+                const childId = crypto.randomUUID();
+                let childEmbedding = null;
+                let childEmbeddingJson = null;
+                let childEmbedStatus = 'pending';
+                let childEmbedError = null;
+                let childEmbedModel = null;
+                // Embed the child chunk
+                try {
+                    childEmbedding = await embed(childText);
+                    childEmbeddingJson = serializeEmbedding(childEmbedding);
+                    childEmbedStatus = 'done';
+                    childEmbedModel = 'Xenova/bge-small-en-v1.5';
+                }
+                catch (err) {
+                    childEmbedStatus = 'failed';
+                    childEmbedError = err instanceof Error ? err.message : String(err);
+                }
+                // Insert child row
+                const childStmt = this.#db.prepare(`
+          INSERT INTO raw_log
+            (id, user_id, session_id, session_label,
+             user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
+             embed_status, embed_error, embed_model, extract_status, parent_id)
+          VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        `);
+                childStmt.bind([
+                    childId,
+                    this.#userId,
+                    exchange.sessionId,
+                    exchange.sessionLabel ?? null,
+                    exchange.userMessage,
+                    exchange.agentResponse,
+                    exchange.timestamp.toISOString(),
+                    exchange.source,
+                    childText,
+                    i, // chunk_index for ordering
+                    null, // No content_hash for children (they don't participate in dedup)
+                    childEmbedStatus,
+                    childEmbedError,
+                    childEmbedModel,
+                    'child', // T-108: Mark as 'child' to prevent extraction
+                    rawLogId, // parent_id points to parent
+                ]);
+                childStmt.step();
+                childStmt.finalize();
+                // Insert child embedding into vec_raw_log if embedding succeeded
+                if (childEmbeddingJson !== null) {
+                    const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
+                    vecStmt.bind([childEmbeddingJson]);
+                    vecStmt.step();
+                    vecStmt.finalize();
+                    const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
+                    // Back-fill vec_rowid on child row
+                    const updateStmt = this.#db.prepare(`UPDATE raw_log SET vec_rowid = ? WHERE id = ?`);
+                    updateStmt.bind([vecRowid, childId]);
+                    updateStmt.step();
+                    updateStmt.finalize();
+                    // T-103: Append child embedding to in-memory cache
+                    this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding: childEmbedding });
+                }
+            }
             this.#db.exec('COMMIT');
         }
         catch (err) {
@@ -215,9 +494,11 @@ export class LocalStore {
             // Re-throw other errors (e.g., real DB issues).
             throw err;
         }
-        // Layer 2: enqueue exchange for batched fact extraction (T-071).
-        // ExtractionQueue handles draining on interval or batch size threshold.
-        this.#extractionQueue.enqueue(exchange, this.#userId);
+        // Layer 2: enqueue exchange for batched fact extraction (T-071) only if LLM config is present.
+        // If no LLM config, extract_status is already set to 'no_llm', so skip enqueue.
+        if (this.#llmConfig) {
+            this.#extractionQueue.enqueue(exchange, this.#userId, rawLogId);
+        }
         return {
             rawLogId,
             factsExtracted: 0,
@@ -229,7 +510,8 @@ export class LocalStore {
      * See raw-log-search.ts for the full pipeline description.
      */
     async searchRawLog(query, limit = 10) {
-        return searchRawLog(this.#db, this.#userId, query, limit);
+        // T-103: Pass in-memory embedding cache to searchRawLog (eliminates ~3,700ms SQLite load per query)
+        return searchRawLog(this.#db, this.#userId, query, limit, this.#rawLogEmbeddingCache);
     }
     /**
      * Wait for all queued fact extractions to complete.
@@ -384,7 +666,12 @@ export class LocalStore {
         source,
         chunk_text AS chunkText,
         chunk_index AS chunkIndex,
-        content_hash AS contentHash
+        content_hash AS contentHash,
+        embed_status AS embedStatus,
+        embed_error AS embedError,
+        embed_model AS embedModel,
+        extract_status AS extractStatus,
+        extract_error AS extractError
       FROM raw_log
       WHERE user_id = ?
       ORDER BY timestamp DESC
@@ -397,6 +684,194 @@ export class LocalStore {
         rawLogStmt.finalize();
         return { facts, rawLog };
     }
+    /**
+     * Start background backlog processor drain loops (T-095).
+     * Launches continuous async loops for embed and extract backlogs.
+     * Call this after store.extractionQueue.start() in plugin-module.ts.
+     */
+    startBacklogProcessor() {
+        // Start embed drain loop
+        if (this.#embedDrainPromise === null) {
+            this.#embedDrainStopped = false;
+            this.#embedDrainPromise = this.#embedDrainLoop();
+        }
+        // Start extract drain loop (only if LLM config is present)
+        if (this.#llmConfig && this.#extractDrainPromise === null) {
+            this.#extractDrainStopped = false;
+            this.#extractDrainPromise = this.#extractDrainLoop();
+        }
+    }
+    /**
+     * Stop background backlog processor drain loops (T-095).
+     * Signals both loops to stop and awaits in-flight work.
+     * Call this alongside store.extractionQueue.stop() in session_end and process exit handlers.
+     */
+    async stopBacklogProcessor() {
+        // Signal loops to stop
+        this.#embedDrainStopped = true;
+        this.#extractDrainStopped = true;
+        // Await drain loop Promises (waits for in-flight work to complete)
+        const promises = [];
+        if (this.#embedDrainPromise !== null) {
+            promises.push(this.#embedDrainPromise);
+            this.#embedDrainPromise = null;
+        }
+        if (this.#extractDrainPromise !== null) {
+            promises.push(this.#extractDrainPromise);
+            this.#extractDrainPromise = null;
+        }
+        await Promise.all(promises);
+    }
+    /**
+     * Continuous drain loop for embed backlog (T-095).
+     * Runs as fast as the Worker thread allows, with no artificial throttling.
+     * Only sleeps when the queue is empty.
+     */
+    async #embedDrainLoop() {
+        while (!this.#embedDrainStopped) {
+            const processed = await this.#processEmbedBatch();
+            if (processed === 0) {
+                // Queue is empty — sleep before checking again
+                await new Promise(resolve => setTimeout(resolve, this.#embedIdleMs));
+            }
+            // If processed > 0: immediately loop to grab the next batch
+        }
+    }
+    /**
+     * Process one batch of embed backlog rows (T-095).
+     * Uses Promise.all for parallelism across the batch (embed runs in Worker, no API limits).
+     * Returns count of rows processed.
+     */
+    async #processEmbedBatch() {
+        const BATCH_SIZE = 50; // Large batch — embed is CPU-bound, no rate limit
+        // T-108: Fetch pending child rows only (parent_id IS NOT NULL).
+        // Old parent rows (parent_id IS NULL, embed_status='pending') are left as-is for fallback search.
+        const stmt = this.#db.prepare(`
+      SELECT id, chunk_text FROM raw_log
+      WHERE user_id = ? AND embed_status = 'pending' AND parent_id IS NOT NULL
+      ORDER BY rowid ASC
+      LIMIT ?
+    `);
+        stmt.bind([this.#userId, BATCH_SIZE]);
+        const pendingRows = [];
+        while (stmt.step()) {
+            pendingRows.push(stmt.get({}));
+        }
+        stmt.finalize();
+        if (pendingRows.length === 0)
+            return 0;
+        // Process rows concurrently with Promise.all
+        await Promise.all(pendingRows.map(async (row) => {
+            try {
+                const embedding = await embed(row.chunk_text);
+                const embeddingJson = serializeEmbedding(embedding);
+                const embedModel = 'Xenova/bge-small-en-v1.5';
+                // Insert into vec_raw_log (transaction per row for isolation)
+                this.#db.exec('BEGIN');
+                const vecStmt = this.#db.prepare(`INSERT INTO vec_raw_log(embedding) VALUES (?)`);
+                vecStmt.bind([embeddingJson]);
+                vecStmt.step();
+                vecStmt.finalize();
+                const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
+                // Update raw_log: embed_status='done', vec_rowid, embed_model
+                const updateStmt = this.#db.prepare(`
+            UPDATE raw_log
+            SET embed_status = 'done', embed_error = NULL, embed_model = ?, vec_rowid = ?
+            WHERE id = ?
+          `);
+                updateStmt.bind([embedModel, vecRowid, row.id]);
+                updateStmt.step();
+                updateStmt.finalize();
+                this.#db.exec('COMMIT');
+                // T-103: Append new embedding to in-memory cache
+                this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding });
+            }
+            catch (err) {
+                // Embedding failed — update embed_status='failed' with error
+                const errorMsg = err instanceof Error ? err.message : String(err);
+                const updateStmt = this.#db.prepare(`
+            UPDATE raw_log
+            SET embed_status = 'failed', embed_error = ?
+            WHERE id = ?
+          `);
+                updateStmt.bind([errorMsg, row.id]);
+                updateStmt.step();
+                updateStmt.finalize();
+            }
+        }));
+        return pendingRows.length;
+    }
+    /**
+     * Continuous drain loop for extract backlog (T-095).
+     * Fetches up to `concurrency` rows and processes them concurrently with 429 backoff.
+     * Only sleeps when the queue is empty.
+     */
+    async #extractDrainLoop() {
+        while (!this.#extractDrainStopped) {
+            // Fetch pending rows (up to concurrency limit)
+            const stmt = this.#db.prepare(`
+        SELECT id, user_message, agent_response, timestamp, session_id, session_label, source
+        FROM raw_log
+        WHERE user_id = ? AND extract_status = 'pending'
+        ORDER BY rowid ASC
+        LIMIT ?
+      `);
+            stmt.bind([this.#userId, this.#extractConcurrency]);
+            const pendingRows = [];
+            while (stmt.step()) {
+                pendingRows.push(stmt.get({}));
+            }
+            stmt.finalize();
+            if (pendingRows.length === 0) {
+                // Queue is empty — sleep before checking again
+                await new Promise(resolve => setTimeout(resolve, this.#extractIdleMs));
+                continue;
+            }
+            // Process rows concurrently with 429 backoff
+            await Promise.all(pendingRows.map(async (row) => {
+                const exchange = {
+                    userMessage: row.user_message,
+                    agentResponse: row.agent_response,
+                    timestamp: new Date(row.timestamp),
+                    source: row.source,
+                    sessionId: row.session_id,
+                    ...(row.session_label !== null ? { sessionLabel: row.session_label } : {}),
+                };
+                await this.#extractRowWithBackoff(exchange, row.id);
+            }));
+        }
+    }
+    /**
+     * Extract facts for one row with exponential backoff on 429 errors (T-095).
+     * Calls extractFn directly (bypasses ExtractionQueue for backlog processing).
+     * extractFn already handles DB status updates (extract_status=done/failed).
+     */
+    async #extractRowWithBackoff(exchange, sourceChunkId) {
+        const MAX_RETRIES = 4;
+        let attempt = 0;
+        while (attempt <= MAX_RETRIES) {
+            try {
+                await this.#extractFn(exchange, this.#userId, sourceChunkId);
+                return; // Success
+            }
+            catch (err) {
+                const errorMsg = err instanceof Error ? err.message : String(err);
+                const is429 = errorMsg.toLowerCase().includes('429') ||
+                    errorMsg.toLowerCase().includes('rate') ||
+                    errorMsg.toLowerCase().includes('quota');
+                if (is429 && attempt < MAX_RETRIES) {
+                    // Exponential backoff: 2s, 4s, 8s, 16s
+                    const backoffMs = this.#retryBackoffMs * Math.pow(2, attempt);
+                    await new Promise(resolve => setTimeout(resolve, backoffMs));
+                    attempt++;
+                }
+                else {
+                    // Not a 429, or max retries reached — extractFn already marked extract_status='failed'
+                    return;
+                }
+            }
+        }
+    }
     /** Close the database connection. Call when done (e.g. in tests). */
     close() {
         this.#db.close();