npm - @getplumb/core - Versions diffs - 0.3.0 → 0.4.1 - Mend

@getplumb/core 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/context-builder.d.ts +1 -7
package/dist/context-builder.d.ts.map +1 -1
package/dist/context-builder.js +7 -44
package/dist/context-builder.js.map +1 -1
package/dist/index.d.ts +4 -10
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -5
package/dist/index.js.map +1 -1
package/dist/local-store.d.ts +7 -82
package/dist/local-store.d.ts.map +1 -1
package/dist/local-store.js +84 -468
package/dist/local-store.js.map +1 -1
package/dist/read-path.d.ts +6 -23
package/dist/read-path.d.ts.map +1 -1
package/dist/read-path.js +9 -48
package/dist/read-path.js.map +1 -1
package/dist/schema.d.ts +4 -13
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +6 -73
package/dist/schema.js.map +1 -1
package/dist/scorer.d.ts +0 -9
package/dist/scorer.d.ts.map +1 -1
package/dist/scorer.js +1 -31
package/dist/scorer.js.map +1 -1
package/dist/store.d.ts +2 -16
package/dist/store.d.ts.map +1 -1
package/dist/types.d.ts +0 -25
package/dist/types.d.ts.map +1 -1
package/dist/types.js +1 -6
package/dist/types.js.map +1 -1
package/package.json +1 -1
package/dist/extraction-queue.d.ts +0 -72
package/dist/extraction-queue.d.ts.map +0 -1
package/dist/extraction-queue.js +0 -101
package/dist/extraction-queue.js.map +0 -1
package/dist/extractor.d.ts +0 -22
package/dist/extractor.d.ts.map +0 -1
package/dist/extractor.js +0 -188
package/dist/extractor.js.map +0 -1
package/dist/extractor.test.d.ts +0 -2
package/dist/extractor.test.d.ts.map +0 -1
package/dist/extractor.test.js +0 -158
package/dist/extractor.test.js.map +0 -1
package/dist/fact-search.d.ts +0 -32
package/dist/fact-search.d.ts.map +0 -1
package/dist/fact-search.js +0 -174
package/dist/fact-search.js.map +0 -1
package/dist/fact-search.test.d.ts +0 -12
package/dist/fact-search.test.d.ts.map +0 -1
package/dist/fact-search.test.js +0 -117
package/dist/fact-search.test.js.map +0 -1
package/dist/llm-client.d.ts +0 -59
package/dist/llm-client.d.ts.map +0 -1
package/dist/llm-client.js +0 -227
package/dist/llm-client.js.map +0 -1
package/dist/local-store.test.d.ts +0 -2
package/dist/local-store.test.d.ts.map +0 -1
package/dist/local-store.test.js +0 -146
package/dist/local-store.test.js.map +0 -1
package/dist/raw-log-search.test.d.ts +0 -12
package/dist/raw-log-search.test.d.ts.map +0 -1
package/dist/raw-log-search.test.js +0 -124
package/dist/raw-log-search.test.js.map +0 -1
package/dist/read-path.test.d.ts +0 -15
package/dist/read-path.test.d.ts.map +0 -1
package/dist/read-path.test.js +0 -393
package/dist/read-path.test.js.map +0 -1
package/dist/scorer.test.d.ts +0 -10
package/dist/scorer.test.d.ts.map +0 -1
package/dist/scorer.test.js +0 -169
package/dist/scorer.test.js.map +0 -1

package/dist/local-store.js CHANGED Viewed

@@ -4,35 +4,37 @@ import { mkdirSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { openDb } from './wasm-db.js';
 import { applySchema } from './schema.js';
-import { extractFacts } from './extractor.js';
-import { callLLMWithConfig } from './llm-client.js';
 import { embed, warmEmbedder, warmReranker } from './embedder.js';
 import { formatExchange } from './chunker.js';
 import { searchRawLog } from './raw-log-search.js';
-import { searchFacts } from './fact-search.js';
-import { ExtractionQueue } from './extraction-queue.js';
-import { serializeEmbedding, deserializeEmbedding, cosineDistance } from './vector-search.js';
+import { serializeEmbedding, deserializeEmbedding } from './vector-search.js';
 /**
  * Split text into overlapping child chunks for parent-child chunking (T-108).
  * Target: ~250 chars per chunk with ~50 char overlap.
  * Prefers sentence boundaries, falls back to word boundaries, hard-cuts at 300 chars max.
+ *
+ * Uses a generator to avoid materializing the full chunk array in memory,
+ * which prevents OOM crashes on large inputs (fix for splitIntoChildren array limit bug).
  */
-function splitIntoChildren(text) {
+function* splitIntoChildren(text) {
     const TARGET_SIZE = 250;
     const OVERLAP = 50;
     const MAX_SIZE = 300;
     const SENTENCE_ENDINGS = /[.!?]\s+/g;
     if (text.length <= TARGET_SIZE) {
-        // Text is already small enough — return as single child
-        return [text];
+        // Text is already small enough — yield as single child
+        if (text.trim().length > 0)
+            yield text;
+        return;
     }
-    const chunks = [];
     let pos = 0;
     while (pos < text.length) {
         let endPos = Math.min(pos + TARGET_SIZE, text.length);
         // If we're at the end of the text, take the rest
         if (endPos >= text.length) {
-            chunks.push(text.slice(pos));
+            const last = text.slice(pos).trim();
+            if (last.length > 0)
+                yield last;
             break;
         }
         // Try to find a sentence boundary within the target range
@@ -61,13 +63,14 @@ function splitIntoChildren(text) {
             // No sentence boundary found — fall back to word boundary
             endPos = findWordBoundary(text, pos, TARGET_SIZE, MAX_SIZE);
         }
-        chunks.push(text.slice(pos, endPos).trim());
+        const chunk = text.slice(pos, endPos).trim();
+        if (chunk.length > 0)
+            yield chunk;
         // Move position forward, with overlap
         pos = endPos - OVERLAP;
         if (pos < 0)
             pos = endPos; // Safety: don't go negative
     }
-    return chunks.filter(chunk => chunk.length > 0);
 }
 /**
  * Find a word boundary near the target position.
@@ -94,22 +97,18 @@ function findWordBoundary(text, start, targetSize, maxSize) {
 export class LocalStore {
     #db;
     #userId;
-    #llmConfig;
-    #extractionQueue;
-    // Backlog processor state (T-095: drain loops)
+    // Backlog processor state (T-095: drain loop)
     #embedDrainStopped = false;
-    #extractDrainStopped = false;
     #embedDrainPromise = null;
-    #extractDrainPromise = null;
     #embedIdleMs;
-    #extractIdleMs;
-    #extractConcurrency;
-    #retryBackoffMs;
-    #extractFn;
-    // T-096: In-memory embedding cache for vec_facts (eliminates 292ms SQLite load on each query)
-    #embeddingCache = [];
     // T-103: In-memory embedding cache for vec_raw_log (eliminates ~3,700ms SQLite load on each query)
     #rawLogEmbeddingCache = [];
+    // FIX 3: WAL checkpoint throttling to prevent unbounded WAL growth
+    #lastCheckpoint = Date.now();
+    #checkpointIntervalMs = 60000; // Checkpoint every minute
+    // FIX 4: Health check to detect stuck drain loops
+    #lastActivityTimestamp = Date.now();
+    #healthCheckInterval = null;
     /** Expose database for plugin use (e.g., NudgeManager) */
     get db() {
         return this.#db;
@@ -118,21 +117,11 @@ export class LocalStore {
     get userId() {
         return this.#userId;
     }
-    /** Expose extraction queue for lifecycle management (start/stop) */
-    get extractionQueue() {
-        return this.#extractionQueue;
-    }
-    constructor(db, userId, llmConfig, extractionQueue, extractFn, backlog) {
+    constructor(db, userId, backlog) {
         this.#db = db;
         this.#userId = userId;
-        this.#llmConfig = llmConfig;
-        this.#extractionQueue = extractionQueue;
-        this.#extractFn = extractFn;
-        // Initialize backlog processor config — defaults run as fast as possible with concurrency.
+        // Initialize backlog processor config
         this.#embedIdleMs = backlog?.embedIdleMs ?? 5000;
-        this.#extractIdleMs = backlog?.extractIdleMs ?? 5000;
-        this.#extractConcurrency = backlog?.concurrency ?? 5;
-        this.#retryBackoffMs = backlog?.retryBackoffMs ?? 2000;
     }
     /**
      * Create a new LocalStore instance (async factory).
@@ -141,66 +130,19 @@ export class LocalStore {
     static async create(options = {}) {
         const dbPath = options.dbPath ?? join(homedir(), '.plumb', 'memory.db');
         const userId = options.userId ?? 'default';
-        const llmConfig = options.llmConfig;
         mkdirSync(dirname(dbPath), { recursive: true });
         const db = await openDb(dbPath);
         // Enable WAL mode and foreign keys
         db.exec('PRAGMA journal_mode = WAL');
         db.exec('PRAGMA foreign_keys = ON');
         applySchema(db);
-        // Use a mutable cell to hold the store reference (needed for circular dependency)
-        let storeRef = null;
-        // Initialize extraction queue with deferred store lookup
-        // T-079: Wrapper handles extract_status updates on success/failure.
-        const extractFn = async (exchange, userId, sourceChunkId) => {
-            if (!storeRef)
-                throw new Error('Store not initialized');
-            const llmFn = llmConfig
-                ? (prompt) => callLLMWithConfig(prompt, llmConfig)
-                : undefined;
-            try {
-                const facts = await extractFacts(exchange, userId, storeRef, llmFn, sourceChunkId);
-                // T-079: Update extract_status='done' on success.
-                const updateStmt = db.prepare(`
-          UPDATE raw_log SET extract_status = 'done' WHERE id = ?
-        `);
-                updateStmt.bind([sourceChunkId]);
-                updateStmt.step();
-                updateStmt.finalize();
-                return facts;
-            }
-            catch (err) {
-                // T-079: Update extract_status='failed' with error message.
-                const errorMsg = err instanceof Error ? err.message : String(err);
-                const updateStmt = db.prepare(`
-          UPDATE raw_log SET extract_status = 'failed', extract_error = ? WHERE id = ?
-        `);
-                updateStmt.bind([errorMsg, sourceChunkId]);
-                updateStmt.step();
-                updateStmt.finalize();
-                // Re-throw so Promise.allSettled() in flush() sees the rejection.
-                throw err;
-            }
-        };
-        const extractionQueue = options.extractionQueue ?? new ExtractionQueue(extractFn);
-        // Create store and assign to ref
-        const store = new LocalStore(db, userId, llmConfig, extractionQueue, extractFn, options.backlog);
-        storeRef = store;
+        // Create store
+        const store = new LocalStore(db, userId, options.backlog);
         // T-096: Warm embedder pipeline to eliminate 365ms cold-start on first query
         await warmEmbedder();
         // T-101: Warm reranker pipeline to eliminate ~200ms cold-start on first query
         // (intentionally loads ~80MB model at init for consistent <250ms query performance)
         await warmReranker();
-        // T-096: Load all vec_facts embeddings into in-memory cache (eliminates 292ms SQLite load per query)
-        const vecStmt = db.prepare(`SELECT rowid, embedding FROM vec_facts`);
-        while (vecStmt.step()) {
-            const row = vecStmt.get({});
-            store.#embeddingCache.push({
-                rowid: row.rowid,
-                embedding: deserializeEmbedding(row.embedding),
-            });
-        }
-        vecStmt.finalize();
         // T-103/T-108: Load vec_raw_log embeddings for child rows only (eliminates ~3,700ms SQLite load per query)
         // Child rows have parent_id IS NOT NULL. Parent rows are not embedded (embed_status='no_embed').
         const rawLogVecStmt = db.prepare(`
@@ -219,139 +161,7 @@ export class LocalStore {
         rawLogVecStmt.finalize();
         return store;
     }
-    async store(fact, sourceChunkId) {
-        // T-097: Cross-chunk fact deduplication — prevent storing duplicate facts across different chunks.
-        // A fact is considered a duplicate if it has the same subject+predicate and the object is either:
-        // 1. Identical (case-insensitive, normalized whitespace), OR
-        // 2. Semantically similar (cosine similarity >= 0.92 on embeddings)
-        //
-        // Pre-filter by subject+predicate via SQL (uses index, avoids full corpus scan).
-        const candidateStmt = this.#db.prepare(`
-      SELECT id, object, vec_rowid
-      FROM facts
-      WHERE user_id = ? AND subject = ? AND predicate = ? AND deleted_at IS NULL
-    `);
-        candidateStmt.bind([this.#userId, fact.subject, fact.predicate]);
-        const candidates = [];
-        while (candidateStmt.step()) {
-            candidates.push(candidateStmt.get({}));
-        }
-        candidateStmt.finalize();
-        // Helper: Normalize text for exact-match check (lowercase, trim, collapse multiple spaces)
-        const normalizeText = (text) => text.toLowerCase().trim().replace(/\s+/g, ' ');
-        const normalizedNewObject = normalizeText(fact.object);
-        // Check for exact object match first (avoids embedding call in the common case)
-        for (const candidate of candidates) {
-            if (normalizeText(candidate.object) === normalizedNewObject) {
-                // Exact duplicate found — return existing fact ID without inserting
-                return candidate.id;
-            }
-        }
-        // No exact match found. Now embed the new fact for semantic similarity check and insertion.
-        const text = `${fact.subject} ${fact.predicate} ${fact.object} ${fact.context ?? ''}`.trim();
-        const embedding = await embed(text);
-        const embeddingJson = serializeEmbedding(embedding);
-        // Check semantic similarity against candidates (only if we have candidates with embeddings)
-        if (candidates.length > 0) {
-            for (const candidate of candidates) {
-                if (candidate.vec_rowid === null)
-                    continue;
-                // Find candidate embedding in in-memory cache (T-096)
-                const cachedEntry = this.#embeddingCache.find(entry => entry.rowid === candidate.vec_rowid);
-                if (!cachedEntry)
-                    continue;
-                // Compute cosine similarity. Distance = 1 - similarity, so similarity >= 0.92 means distance <= 0.08.
-                const distance = cosineDistance(embedding, cachedEntry.embedding);
-                if (distance <= 0.08) {
-                    // Semantically equivalent fact found — return existing ID without inserting
-                    return candidate.id;
-                }
-            }
-        }
-        // No duplicate found (neither exact nor semantic) — proceed with normal insertion
-        const id = crypto.randomUUID();
-        // Begin transaction
-        this.#db.exec('BEGIN');
-        try {
-            // Insert fact (T-079: include source_chunk_id)
-            const factStmt = this.#db.prepare(`
-        INSERT INTO facts
-          (id, user_id, subject, predicate, object,
-           confidence, decay_rate, timestamp, source_session_id,
-           source_session_label, context, source_chunk_id)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-      `);
-            factStmt.bind([
-                id,
-                this.#userId,
-                fact.subject,
-                fact.predicate,
-                fact.object,
-                fact.confidence,
-                fact.decayRate,
-                fact.timestamp.toISOString(),
-                fact.sourceSessionId,
-                fact.sourceSessionLabel ?? null,
-                fact.context ?? null,
-                sourceChunkId ?? null,
-            ]);
-            factStmt.step();
-            factStmt.finalize();
-            // Insert embedding into vec_facts (auto-assigned id).
-            const vecStmt = this.#db.prepare(`INSERT INTO vec_facts(embedding) VALUES (?)`);
-            vecStmt.bind([embeddingJson]);
-            vecStmt.step();
-            vecStmt.finalize();
-            const vecRowid = this.#db.selectValue('SELECT last_insert_rowid()');
-            // Back-fill vec_rowid so fact-search can join without a mapping table.
-            const updateStmt = this.#db.prepare(`UPDATE facts SET vec_rowid = ? WHERE id = ?`);
-            updateStmt.bind([vecRowid, id]);
-            updateStmt.step();
-            updateStmt.finalize();
-            this.#db.exec('COMMIT');
-            // T-096: Append new embedding to in-memory cache
-            this.#embeddingCache.push({ rowid: vecRowid, embedding });
-        }
-        catch (err) {
-            this.#db.exec('ROLLBACK');
-            throw err;
-        }
-        return id;
-    }
-    async search(query, limit = 20) {
-        // T-096: Pass in-memory embedding cache to searchFacts (eliminates 292ms SQLite load per query)
-        return searchFacts(this.#db, this.#userId, query, limit, this.#embeddingCache);
-    }
-    async delete(id) {
-        // T-096: Get vec_rowid before soft-deleting so we can remove from cache
-        const vecRowidStmt = this.#db.prepare(`
-      SELECT vec_rowid FROM facts WHERE id = ? AND user_id = ?
-    `);
-        vecRowidStmt.bind([id, this.#userId]);
-        vecRowidStmt.step();
-        const vecRowid = vecRowidStmt.get(0);
-        vecRowidStmt.finalize();
-        // Soft delete only — never hard delete.
-        const stmt = this.#db.prepare(`
-      UPDATE facts SET deleted_at = ? WHERE id = ? AND user_id = ?
-    `);
-        stmt.bind([new Date().toISOString(), id, this.#userId]);
-        stmt.step();
-        stmt.finalize();
-        // T-096: Remove from in-memory embedding cache
-        if (vecRowid !== null) {
-            const cacheIdx = this.#embeddingCache.findIndex(entry => entry.rowid === vecRowid);
-            if (cacheIdx !== -1) {
-                this.#embeddingCache.splice(cacheIdx, 1);
-            }
-        }
-    }
     async status() {
-        const factStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM facts WHERE user_id = ? AND deleted_at IS NULL`);
-        factStmt.bind([this.#userId]);
-        factStmt.step();
-        const factCount = factStmt.get(0);
-        factStmt.finalize();
         const rawLogStmt = this.#db.prepare(`SELECT COUNT(*) AS c FROM raw_log WHERE user_id = ?`);
         rawLogStmt.bind([this.#userId]);
         rawLogStmt.step();
@@ -365,7 +175,6 @@ export class LocalStore {
         const pageCount = this.#db.selectValue('PRAGMA page_count');
         const pageSize = this.#db.selectValue('PRAGMA page_size');
         return {
-            factCount,
             rawLogCount,
             lastIngestion: lastIngestionTs !== null ? new Date(lastIngestionTs) : null,
             storageBytes: pageCount * pageSize,
@@ -376,9 +185,6 @@ export class LocalStore {
         const chunkText = formatExchange(exchange);
         // Compute content hash for deduplication (scoped per userId).
         const contentHash = createHash('sha256').update(chunkText).digest('hex');
-        // T-108: Parent-child chunking — don't embed parent, only children.
-        // Parent extract_status: 'no_llm' if no config, otherwise 'pending' (extraction runs on parent only).
-        const extractStatus = this.#llmConfig ? 'pending' : 'no_llm';
         // Attempt insert — catch UNIQUE constraint violations (duplicate content_hash).
         try {
             this.#db.exec('BEGIN');
@@ -387,8 +193,8 @@ export class LocalStore {
         INSERT INTO raw_log
           (id, user_id, session_id, session_label,
            user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
-           embed_status, embed_error, embed_model, extract_status, parent_id)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+           embed_status, embed_error, embed_model, parent_id)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
       `);
             rawLogStmt.bind([
                 rawLogId,
@@ -405,17 +211,14 @@ export class LocalStore {
                 'no_embed', // Parent is not embedded (T-108)
                 null,
                 null,
-                extractStatus,
                 null, // parent_id=NULL for parent rows
             ]);
             rawLogStmt.step();
             rawLogStmt.finalize();
             // T-108: Split parent into child chunks and embed each child.
-            const childChunks = splitIntoChildren(chunkText);
-            for (let i = 0; i < childChunks.length; i++) {
-                const childText = childChunks[i];
-                if (!childText)
-                    continue;
+            // splitIntoChildren is a generator — iterate lazily to avoid OOM on large inputs.
+            let i = 0;
+            for (const childText of splitIntoChildren(chunkText)) {
                 const childId = crypto.randomUUID();
                 let childEmbedding = null;
                 let childEmbeddingJson = null;
@@ -438,8 +241,8 @@ export class LocalStore {
           INSERT INTO raw_log
             (id, user_id, session_id, session_label,
              user_message, agent_response, timestamp, source, chunk_text, chunk_index, content_hash,
-             embed_status, embed_error, embed_model, extract_status, parent_id)
-          VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+             embed_status, embed_error, embed_model, parent_id)
+          VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
         `);
                 childStmt.bind([
                     childId,
@@ -456,7 +259,6 @@ export class LocalStore {
                     childEmbedStatus,
                     childEmbedError,
                     childEmbedModel,
-                    'child', // T-108: Mark as 'child' to prevent extraction
                     rawLogId, // parent_id points to parent
                 ]);
                 childStmt.step();
@@ -476,6 +278,7 @@ export class LocalStore {
                     // T-103: Append child embedding to in-memory cache
                     this.#rawLogEmbeddingCache.push({ rowid: vecRowid, embedding: childEmbedding });
                 }
+                i++;
             }
             this.#db.exec('COMMIT');
         }
@@ -483,26 +286,17 @@ export class LocalStore {
             this.#db.exec('ROLLBACK');
             // Check for SQLite UNIQUE constraint error on content_hash.
             if (err instanceof Error && err.message.includes('UNIQUE constraint')) {
-                // Duplicate content — skip ingestion and fact extraction.
+                // Duplicate content — skip ingestion.
                 return {
                     rawLogId: '',
-                    factsExtracted: 0,
-                    factIds: [],
                     skipped: true,
                 };
             }
             // Re-throw other errors (e.g., real DB issues).
             throw err;
         }
-        // Layer 2: enqueue exchange for batched fact extraction (T-071) only if LLM config is present.
-        // If no LLM config, extract_status is already set to 'no_llm', so skip enqueue.
-        if (this.#llmConfig) {
-            this.#extractionQueue.enqueue(exchange, this.#userId, rawLogId);
-        }
         return {
             rawLogId,
-            factsExtracted: 0,
-            factIds: [],
         };
     }
     /**
@@ -513,146 +307,11 @@ export class LocalStore {
         // T-103: Pass in-memory embedding cache to searchRawLog (eliminates ~3,700ms SQLite load per query)
         return searchRawLog(this.#db, this.#userId, query, limit, this.#rawLogEmbeddingCache);
     }
-    /**
-     * Wait for all queued fact extractions to complete.
-     * Call this before close() to ensure all async work is done.
-     * Delegates to ExtractionQueue.flush().
-     */
-    async drain() {
-        await this.#extractionQueue.flush();
-    }
-    /**
-     * Re-extract facts for orphaned raw_log chunks (chunks with no corresponding facts).
-     *
-     * This is useful when fact extraction failed during initial ingest (e.g., missing API key,
-     * rate limits, crashes). Re-running the normal seeder won't help because content-hash dedup
-     * skips already-ingested chunks before reaching the extraction phase.
-     *
-     * This method directly calls extractFacts() for each orphaned chunk, bypassing the dedup gate.
-     *
-     * @param throttleMs - Delay between extractions (default 1000ms) to stay under rate limits
-     * @returns Statistics: orphansFound, factsCreated
-     */
-    async reextractOrphans(throttleMs = 1000) {
-        // Query for raw_log entries with no corresponding facts.
-        const stmt = this.#db.prepare(`
-      SELECT
-        id,
-        user_id AS userId,
-        session_id AS sessionId,
-        session_label AS sessionLabel,
-        user_message AS userMessage,
-        agent_response AS agentResponse,
-        timestamp,
-        source
-      FROM raw_log
-      WHERE user_id = ?
-        AND NOT EXISTS (
-          SELECT 1 FROM facts
-          WHERE facts.source_session_id = raw_log.session_id
-        )
-      ORDER BY timestamp ASC
-    `);
-        stmt.bind([this.#userId]);
-        const orphanRows = [];
-        while (stmt.step()) {
-            const row = stmt.get({});
-            orphanRows.push(row);
-        }
-        stmt.finalize();
-        const orphansFound = orphanRows.length;
-        if (orphansFound === 0) {
-            return { orphansFound: 0, factsCreated: 0 };
-        }
-        let factsCreated = 0;
-        for (let i = 0; i < orphanRows.length; i++) {
-            const row = orphanRows[i];
-            if (!row)
-                continue;
-            // Reconstruct MessageExchange from raw_log data
-            const exchange = {
-                userMessage: row.userMessage,
-                agentResponse: row.agentResponse,
-                timestamp: new Date(row.timestamp),
-                source: row.source,
-                sessionId: row.sessionId,
-                ...(row.sessionLabel !== null ? { sessionLabel: row.sessionLabel } : {}),
-            };
-            // Extract facts directly (bypasses ingest dedup gate)
-            try {
-                const llmFn = this.#llmConfig
-                    ? (prompt) => callLLMWithConfig(prompt, this.#llmConfig)
-                    : undefined;
-                const facts = await extractFacts(exchange, this.#userId, this, llmFn);
-                factsCreated += facts.length;
-                console.log(`  ✅ [${i + 1}/${orphansFound}] Re-extracted ${facts.length} fact(s) from session ${row.sessionId}`);
-            }
-            catch (err) {
-                console.error(`  ❌ [${i + 1}/${orphansFound}] Failed to re-extract facts from session ${row.sessionId}:`, err);
-            }
-            // Throttle to stay under rate limits (skip delay after last item)
-            if (i < orphanRows.length - 1) {
-                await new Promise(resolve => setTimeout(resolve, throttleMs));
-            }
-        }
-        return { orphansFound, factsCreated };
-    }
-    /**
-     * Get top subjects by fact count (for plumb status command).
-     * Returns subjects ordered by number of facts (non-deleted only).
-     */
-    topSubjects(userId, limit = 5) {
-        const stmt = this.#db.prepare(`
-      SELECT subject, COUNT(*) as count
-      FROM facts
-      WHERE user_id = ? AND deleted_at IS NULL
-      GROUP BY subject
-      ORDER BY count DESC
-      LIMIT ?
-    `);
-        stmt.bind([userId, limit]);
-        const results = [];
-        while (stmt.step()) {
-            results.push(stmt.get({}));
-        }
-        stmt.finalize();
-        return results;
-    }
     /**
      * Export all data for a user (for plumb export command).
      * Returns raw database rows (no vector data).
-     * Includes soft-deleted facts for transparency.
      */
     exportAll(userId) {
-        // Export all non-deleted facts only (soft-deleted facts are excluded).
-        const factStmt = this.#db.prepare(`
-      SELECT
-        id,
-        user_id AS userId,
-        subject,
-        predicate,
-        object,
-        confidence,
-        decay_rate AS decayRate,
-        timestamp,
-        source_session_id AS sourceSessionId,
-        source_session_label AS sourceSessionLabel,
-        context,
-        deleted_at AS deletedAt
-      FROM facts
-      WHERE user_id = ? AND deleted_at IS NULL
-      ORDER BY timestamp DESC
-    `);
-        factStmt.bind([userId]);
-        const factRows = [];
-        while (factStmt.step()) {
-            factRows.push(factStmt.get({}));
-        }
-        factStmt.finalize();
-        const facts = factRows.map((row) => ({
-            ...row,
-            deleted: false, // All exported facts are non-deleted
-        }));
         // Export all raw_log entries (no vector data).
         const rawLogStmt = this.#db.prepare(`
       SELECT
@@ -669,9 +328,7 @@ export class LocalStore {
         content_hash AS contentHash,
         embed_status AS embedStatus,
         embed_error AS embedError,
-        embed_model AS embedModel,
-        extract_status AS extractStatus,
-        extract_error AS extractError
+        embed_model AS embedModel
       FROM raw_log
       WHERE user_id = ?
       ORDER BY timestamp DESC
@@ -682,12 +339,11 @@ export class LocalStore {
             rawLog.push(rawLogStmt.get({}));
         }
         rawLogStmt.finalize();
-        return { facts, rawLog };
+        return { rawLog };
     }
     /**
-     * Start background backlog processor drain loops (T-095).
-     * Launches continuous async loops for embed and extract backlogs.
-     * Call this after store.extractionQueue.start() in plugin-module.ts.
+     * Start background backlog processor drain loop (T-095).
+     * Launches continuous async loop for embed backlog.
      */
     startBacklogProcessor() {
         // Start embed drain loop
@@ -695,32 +351,36 @@ export class LocalStore {
             this.#embedDrainStopped = false;
             this.#embedDrainPromise = this.#embedDrainLoop();
         }
-        // Start extract drain loop (only if LLM config is present)
-        if (this.#llmConfig && this.#extractDrainPromise === null) {
-            this.#extractDrainStopped = false;
-            this.#extractDrainPromise = this.#extractDrainLoop();
+        // FIX 4: Health check - detect runaway loop that isn't processing or stopping
+        if (this.#healthCheckInterval === null) {
+            this.#healthCheckInterval = setInterval(() => {
+                const idleTime = Date.now() - this.#lastActivityTimestamp;
+                const MAX_IDLE_TIME = 300000; // 5 minutes of no activity
+                // If loop is running but idle for too long, force stop
+                if (idleTime > MAX_IDLE_TIME && !this.#embedDrainStopped) {
+                    console.warn(`[plumb] Drain loop idle for ${Math.round(idleTime / 1000)}s, forcing stop`);
+                    void this.stopBacklogProcessor();
+                }
+            }, 60000); // Check every minute
         }
     }
     /**
-     * Stop background backlog processor drain loops (T-095).
-     * Signals both loops to stop and awaits in-flight work.
-     * Call this alongside store.extractionQueue.stop() in session_end and process exit handlers.
+     * Stop background backlog processor drain loop (T-095).
+     * Signals loop to stop and awaits in-flight work.
      */
     async stopBacklogProcessor() {
-        // Signal loops to stop
+        // FIX 4: Clear health check interval
+        if (this.#healthCheckInterval !== null) {
+            clearInterval(this.#healthCheckInterval);
+            this.#healthCheckInterval = null;
+        }
+        // Signal loop to stop
         this.#embedDrainStopped = true;
-        this.#extractDrainStopped = true;
-        // Await drain loop Promises (waits for in-flight work to complete)
-        const promises = [];
+        // Await drain loop Promise (waits for in-flight work to complete)
         if (this.#embedDrainPromise !== null) {
-            promises.push(this.#embedDrainPromise);
+            await this.#embedDrainPromise;
             this.#embedDrainPromise = null;
         }
-        if (this.#extractDrainPromise !== null) {
-            promises.push(this.#extractDrainPromise);
-            this.#extractDrainPromise = null;
-        }
-        await Promise.all(promises);
     }
     /**
      * Continuous drain loop for embed backlog (T-095).
@@ -728,12 +388,28 @@ export class LocalStore {
      * Only sleeps when the queue is empty.
      */
     async #embedDrainLoop() {
+        // FIX 2: Safety counter to detect infinite loops
+        let consecutiveEmptyBatches = 0;
+        const MAX_EMPTY_BATCHES = 1000; // Safety limit: stop after many empty iterations
         while (!this.#embedDrainStopped) {
             const processed = await this.#processEmbedBatch();
             if (processed === 0) {
+                consecutiveEmptyBatches++;
+                // FIX 2: Safety check - if idle too long, verify stop flag
+                if (consecutiveEmptyBatches >= MAX_EMPTY_BATCHES) {
+                    console.warn('[plumb] Embed drain loop: hit safety limit, verifying stop flag');
+                    if (this.#embedDrainStopped)
+                        break;
+                    consecutiveEmptyBatches = 0; // Reset and continue
+                }
                 // Queue is empty — sleep before checking again
                 await new Promise(resolve => setTimeout(resolve, this.#embedIdleMs));
             }
+            else {
+                consecutiveEmptyBatches = 0;
+                // FIX 4: Update activity timestamp
+                this.#lastActivityTimestamp = Date.now();
+            }
             // If processed > 0: immediately loop to grab the next batch
         }
     }
@@ -799,78 +475,18 @@ export class LocalStore {
                 updateStmt.finalize();
             }
         }));
-        return pendingRows.length;
-    }
-    /**
-     * Continuous drain loop for extract backlog (T-095).
-     * Fetches up to `concurrency` rows and processes them concurrently with 429 backoff.
-     * Only sleeps when the queue is empty.
-     */
-    async #extractDrainLoop() {
-        while (!this.#extractDrainStopped) {
-            // Fetch pending rows (up to concurrency limit)
-            const stmt = this.#db.prepare(`
-        SELECT id, user_message, agent_response, timestamp, session_id, session_label, source
-        FROM raw_log
-        WHERE user_id = ? AND extract_status = 'pending'
-        ORDER BY rowid ASC
-        LIMIT ?
-      `);
-            stmt.bind([this.#userId, this.#extractConcurrency]);
-            const pendingRows = [];
-            while (stmt.step()) {
-                pendingRows.push(stmt.get({}));
-            }
-            stmt.finalize();
-            if (pendingRows.length === 0) {
-                // Queue is empty — sleep before checking again
-                await new Promise(resolve => setTimeout(resolve, this.#extractIdleMs));
-                continue;
-            }
-            // Process rows concurrently with 429 backoff
-            await Promise.all(pendingRows.map(async (row) => {
-                const exchange = {
-                    userMessage: row.user_message,
-                    agentResponse: row.agent_response,
-                    timestamp: new Date(row.timestamp),
-                    source: row.source,
-                    sessionId: row.session_id,
-                    ...(row.session_label !== null ? { sessionLabel: row.session_label } : {}),
-                };
-                await this.#extractRowWithBackoff(exchange, row.id);
-            }));
-        }
-    }
-    /**
-     * Extract facts for one row with exponential backoff on 429 errors (T-095).
-     * Calls extractFn directly (bypasses ExtractionQueue for backlog processing).
-     * extractFn already handles DB status updates (extract_status=done/failed).
-     */
-    async #extractRowWithBackoff(exchange, sourceChunkId) {
-        const MAX_RETRIES = 4;
-        let attempt = 0;
-        while (attempt <= MAX_RETRIES) {
+        // FIX 3: Periodic WAL checkpoint to prevent unbounded growth
+        const now = Date.now();
+        if (now - this.#lastCheckpoint > this.#checkpointIntervalMs) {
             try {
-                await this.#extractFn(exchange, this.#userId, sourceChunkId);
-                return; // Success
+                this.#db.exec('PRAGMA wal_checkpoint(PASSIVE)');
+                this.#lastCheckpoint = now;
             }
-            catch (err) {
-                const errorMsg = err instanceof Error ? err.message : String(err);
-                const is429 = errorMsg.toLowerCase().includes('429') ||
-                    errorMsg.toLowerCase().includes('rate') ||
-                    errorMsg.toLowerCase().includes('quota');
-                if (is429 && attempt < MAX_RETRIES) {
-                    // Exponential backoff: 2s, 4s, 8s, 16s
-                    const backoffMs = this.#retryBackoffMs * Math.pow(2, attempt);
-                    await new Promise(resolve => setTimeout(resolve, backoffMs));
-                    attempt++;
-                }
-                else {
-                    // Not a 429, or max retries reached — extractFn already marked extract_status='failed'
-                    return;
-                }
+            catch (e) {
+                console.warn('[plumb] WAL checkpoint failed:', e);
             }
         }
+        return pendingRows.length;
     }
     /** Close the database connection. Call when done (e.g. in tests). */
     close() {