npm - @comfanion/usethis_search - Versions diffs - 4.3.1 → 4.5.0 - Mend

@comfanion/usethis_search 4.3.1 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/api.ts +34 -17
package/cache/manager.ts +30 -19
package/cli.ts +8 -5
package/file-indexer.ts +28 -11
package/hooks/message-before.ts +5 -5
package/hooks/tool-substitution.ts +4 -120
package/index.ts +17 -6
package/package.json +4 -2
package/tools/codeindex.ts +192 -184
package/tools/graph.ts +265 -0
package/tools/read-interceptor.ts +7 -3
package/tools/search.ts +275 -186
package/tools/workspace-state.ts +1 -2
package/tools/workspace.ts +88 -117
package/vectorizer/analyzers/lsp-client.ts +52 -6
package/vectorizer/chunkers/chunker-factory.ts +6 -0
package/vectorizer/chunkers/code-chunker.ts +73 -16
package/vectorizer/chunkers/lsp-chunker.ts +313 -191
package/vectorizer/graph-db.ts +6 -4
package/vectorizer/index.ts +406 -142
package/vectorizer/query-decomposer.ts +397 -0
package/vectorizer/usage-tracker.ts +36 -0
package/vectorizer.yaml +9 -2

package/vectorizer/index.ts CHANGED Viewed

@@ -19,6 +19,8 @@ import { GraphDB } from "./graph-db.ts";
 import { GraphBuilder, isStructuralPredicate } from "./graph-builder.ts";
 import { UsageTracker } from "./usage-tracker.ts";
 import { ChunkStore } from "./chunk-store.ts";
+import { decomposeQuery, rrfMerge, DEFAULT_DECOMPOSER_CONFIG } from "./query-decomposer.ts";
+import type { DecomposerConfig } from "./query-decomposer.ts";
 // Suppress transformers.js logs unless DEBUG is set
 const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
@@ -86,6 +88,9 @@ let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
 let METRICS_ENABLED = false;
 let CACHE_ENABLED = true;
+// ── Query decomposition config ───────────────────────────────────────────────
+let DECOMPOSER_CONFIG: DecomposerConfig = { ...DEFAULT_DECOMPOSER_CONFIG };
 // ── Search defaults (exposed to tool layer) ──────────────────────────────────
 const DEFAULT_SEARCH_CONFIG = {
   freshen: false,           // Don't freshen on every search — auto_index handles it
@@ -101,8 +106,8 @@ const DEFAULT_WORKSPACE_CONFIG = {
   maxFiles: 30,                // Max number of files in workspace
   attachTopN: 5,               // Top N search results to attach with full content
   attachRelatedPerFile: 3,     // Max graph relations per main file
-  minScoreMain: 0.65,          // Min score for main files
-  minScoreRelated: 0.5,        // Min score for graph relations
+  minScoreMain: 0.65,          // Min score for main files (override in vectorizer.yaml)
+  minScoreRelated: 0.5,        // Min score for graph relations (override in vectorizer.yaml)
   persistContent: false,       // Save full content in snapshots (debug mode)
   autoPruneSearch: true,       // Replace old search outputs with compact summaries
   substituteToolOutputs: true, // Replace tool outputs when files in workspace
@@ -188,6 +193,13 @@ function defaultVectorizerYaml() {
     `    auto_prune_search: true      # Replace old search outputs with compact summaries\n` +
     `    substitute_tool_outputs: true # Replace tool outputs when files in workspace\n` +
     `\n` +
+    `  # Query decomposition (v4 — improves long query relevance)\n` +
+    `  decomposition:\n` +
+    `    enabled: true              # Split complex queries into focused sub-queries\n` +
+    `    min_words: 5               # Min significant words to trigger decomposition\n` +
+    `    max_sub_queries: 4         # Max sub-queries (including keyword core)\n` +
+    `    min_sub_query_words: 2     # Min words per sub-query\n` +
+    `\n` +
     `  # Quality monitoring\n` +
     `  quality:\n` +
     `    enable_metrics: false\n` +
@@ -370,6 +382,17 @@ async function loadConfig(projectRoot) {
       CACHE_ENABLED = parseBool(qs, "enable_cache", true);
     }
+    // ── Parse query decomposition config ────────────────────────────────────
+    const decomposerMatch = section.match(/^\s{2}decomposition:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
+    if (decomposerMatch) {
+      const ds = decomposerMatch[1];
+      DECOMPOSER_CONFIG.enabled = parseBool(ds, "enabled", DEFAULT_DECOMPOSER_CONFIG.enabled);
+      DECOMPOSER_CONFIG.minWords = parseNumber(ds, "min_words", DEFAULT_DECOMPOSER_CONFIG.minWords);
+      DECOMPOSER_CONFIG.maxSubQueries = parseNumber(ds, "max_sub_queries", DEFAULT_DECOMPOSER_CONFIG.maxSubQueries);
+      DECOMPOSER_CONFIG.minSubQueryWords = parseNumber(ds, "min_sub_query_words", DEFAULT_DECOMPOSER_CONFIG.minSubQueryWords);
+      if (DEBUG) console.log("[vectorizer] Decomposer config:", DECOMPOSER_CONFIG);
+    }
     // ── Parse graph config (v3) ──────────────────────────────────────────────
     const graphMatch = section.match(/^\s{2}graph:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|(?![\s\S]))/m);
     if (graphMatch) {
@@ -475,6 +498,54 @@ function clearQueryCache() {
   }
 }
+// ── Shared ONNX model singleton ─────────────────────────────────────────────
+// Model lives for the entire session — not tied to indexer pool TTL.
+// Indexer eviction clears caches/DB but model stays loaded for fast search.
+let _sharedModel: any = null
+let _sharedModelPromise: Promise<any> | null = null
+async function getSharedModel(retries = 3): Promise<any> {
+  if (_sharedModel) return _sharedModel
+  // Prevent concurrent loads — reuse in-flight promise
+  if (_sharedModelPromise) return _sharedModelPromise
+  _sharedModelPromise = (async () => {
+    let lastError: Error | null = null
+    for (let attempt = 1; attempt <= retries; attempt++) {
+      try {
+        if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}... (attempt ${attempt}/${retries})`)
+        _sharedModel = await pipeline("feature-extraction", EMBEDDING_MODEL, {
+          progress_callback: DEBUG ? undefined : null,
+        })
+        if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`)
+        _sharedModelPromise = null
+        return _sharedModel
+      } catch (error) {
+        lastError = error as Error
+        if (attempt < retries) {
+          const delay = attempt * 2000
+          if (DEBUG) console.log(`[vectorizer] Model load attempt ${attempt} failed: ${lastError.message}. Retrying in ${delay}ms...`)
+          await new Promise(r => setTimeout(r, delay))
+        }
+      }
+    }
+    _sharedModelPromise = null
+    throw new Error(`Model loading failed after ${retries} attempts: ${lastError?.message || "unknown"}`)
+  })()
+  return _sharedModelPromise
+}
+/** Dispose shared model — call only on process exit or explicit cleanup. */
+async function disposeSharedModel(): Promise<void> {
+  if (_sharedModel) {
+    try { await _sharedModel.dispose() } catch { /* best effort */ }
+    _sharedModel = null
+  }
+  _sharedModelPromise = null
+}
 class CodebaseIndexer {
   constructor(projectRoot, indexName = "code") {
     this.root = projectRoot;
@@ -543,21 +614,14 @@ class CodebaseIndexer {
   async loadModel() {
     if (!this.model) {
-      try {
-        if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
-        this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
-          progress_callback: DEBUG ? undefined : null,
-        });
-        if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
-      } catch (error) {
-        this.model = null;
-        throw new Error(`Model loading failed: ${error.message || error}`);
-      }
+      this.model = await getSharedModel();
     }
     return this.model;
   }
   async unloadModel() {
+    // Drop reference to shared model — do NOT dispose it.
+    // Model singleton lives for the entire session.
     this.model = null;
     // Release BM25 data held in memory
     if (this.bm25) {
@@ -586,6 +650,14 @@ class CodebaseIndexer {
       try { await this.usageTracker.save(); } catch { /* best effort */ }
       this.usageTracker = null;
     }
+    // Close LanceDB connection (releases file handles)
+    if (this.db) {
+      try {
+        // LanceDB v0.x doesn't expose close() — drop reference to release
+        // Arrow/IPC file handles are released when Connection is GC'd
+        this.db = null;
+      } catch { /* best effort */ }
+    }
     this._chunkCache = null;
     clearQueryCache();
     if (global.gc) global.gc();
@@ -693,6 +765,8 @@ class CodebaseIndexer {
       return null; // unchanged
     }
+    if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [read]`);
     // Extract metadata
     const fileMeta = await extractFileMetadata(filePath, content);
     const archived = this.isArchived(relPath, content);
@@ -700,9 +774,13 @@ class CodebaseIndexer {
     // Clean content before chunking
     const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
+    if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [chunking]`);
     // Semantic chunking (async for LSP-based chunking)
     const chunks = await chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG, filePath, this.root);
+    if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [${chunks.length} chunks, building graph]`);
     // Assign chunk IDs
     const chunksWithIds = this.graphBuilder
       ? this.graphBuilder.assignChunkIds(relPath, chunks)
@@ -730,23 +808,39 @@ class CodebaseIndexer {
     }
     // Return prepared rows (without vector — Phase 2 fills it)
-    const rows = chunksWithIds.map((chunk, i) => ({
-      chunk_id: chunk.chunk_id,
-      file: relPath,
-      chunk_index: i,
-      content: chunk.content,
-      archived,
-      file_type: fileMeta.file_type,
-      language: fileMeta.language,
-      last_modified: fileMeta.last_modified,
-      file_size: fileMeta.file_size,
-      heading_context: chunk.heading_context || "",
-      function_name: chunk.function_name || "",
-      class_name: chunk.class_name || "",
-      tags: (fileMeta.tags || []).join(","),
-      start_line: chunk.start_line ?? -1,
-      end_line: chunk.end_line ?? -1,
-    }));
+    const rows = chunksWithIds.map((chunk, i) => {
+      // Build metadata prefix for embedding enrichment.
+      // This helps the embedding model associate function/class names with the code,
+      // improving search relevance for queries like "ensureBudget" or "WorkspaceCache".
+      // The prefix is stored in a separate field — original content stays clean.
+      const metaParts: string[] = []
+      if (relPath) metaParts.push(`File: ${relPath}`)
+      if (chunk.class_name) metaParts.push(`Class: ${chunk.class_name}`)
+      if (chunk.function_name) metaParts.push(`Method: ${chunk.function_name}`)
+      if (chunk.heading_context) metaParts.push(`Section: ${chunk.heading_context}`)
+      const metaPrefix = metaParts.length > 0 ? `// ${metaParts.join(" | ")}\n` : ""
+      return {
+        chunk_id: chunk.chunk_id,
+        file: relPath,
+        chunk_index: i,
+        content: chunk.content,
+        // Enriched content for embedding — metadata prefix + original content.
+        // Phase 2 embeds this instead of raw content.
+        content_for_embedding: metaPrefix + chunk.content,
+        archived,
+        file_type: fileMeta.file_type,
+        language: fileMeta.language,
+        last_modified: fileMeta.last_modified,
+        file_size: fileMeta.file_size,
+        heading_context: chunk.heading_context || "",
+        function_name: chunk.function_name || "",
+        class_name: chunk.class_name || "",
+        tags: (fileMeta.tags || []).join(","),
+        start_line: chunk.start_line ?? -1,
+        end_line: chunk.end_line ?? -1,
+      }
+    });
     // Store chunks in ChunkStore (Phase 1 — BM25 available immediately)
     if (this.chunkStore) {
@@ -758,6 +852,7 @@ class CodebaseIndexer {
       }
     }
+    if (DEBUG) console.log(`[vectorizer] prepareFile: ${relPath} [done, ${rows.length} rows]`);
     return { relPath, hash, rows };
   }
@@ -782,19 +877,24 @@ class CodebaseIndexer {
     // Batch embed
     const allData = [];
+    let embedErrors = 0;
     for (let i = 0; i < allRows.length; i += batchSize) {
       const batch = allRows.slice(i, i + batchSize);
-      const texts = batch.map(r => r.content);
-      // Embed batch — @xenova/transformers processes array inputs efficiently
-      const embeddings = [];
-      for (const text of texts) {
-        const result = await model(text, { pooling: "mean", normalize: true });
-        embeddings.push(Array.from(result.data));
-      }
-      for (let j = 0; j < batch.length; j++) {
-        allData.push({ ...batch[j], vector: embeddings[j] });
+      for (const row of batch) {
+        try {
+          // Use enriched content (with metadata prefix) for embedding,
+          // but store original content in LanceDB for display.
+          const textToEmbed = row.content_for_embedding || row.content;
+          const result = await model(textToEmbed, { pooling: "mean", normalize: true });
+          // Don't store content_for_embedding in LanceDB — it's only for embedding
+          const { content_for_embedding, ...rowWithoutEmbeddingText } = row;
+          allData.push({ ...rowWithoutEmbeddingText, vector: Array.from(result.data) });
+        } catch (e) {
+          embedErrors++;
+          if (DEBUG) console.log(`[vectorizer] Embed failed for ${row.chunk_id || row.file}: ${(e as Error).message}`);
+          // Skip this chunk — don't let one bad chunk kill the entire index
+        }
       }
       if (onProgress) {
@@ -802,14 +902,25 @@ class CodebaseIndexer {
       }
     }
+    if (embedErrors > 0 && DEBUG) {
+      console.log(`[vectorizer] ${embedErrors} chunks failed to embed (skipped)`);
+    }
     // Bulk store in LanceDB
+    if (allData.length === 0) return 0;
     const tableName = "chunks";
-    const tables = await this.db.tableNames();
-    if (tables.includes(tableName)) {
-      const table = await this.db.openTable(tableName);
-      await table.add(allData);
-    } else {
-      await this.db.createTable(tableName, allData);
+    try {
+      const tables = await this.db.tableNames();
+      if (tables.includes(tableName)) {
+        const table = await this.db.openTable(tableName);
+        await table.add(allData);
+      } else {
+        await this.db.createTable(tableName, allData);
+      }
+    } catch (e) {
+      if (DEBUG) console.log(`[vectorizer] LanceDB store failed: ${(e as Error).message}`);
+      throw e; // Re-throw — caller (indexAll) will catch and log
     }
     // Update hashes + mark vectorized in ChunkStore
@@ -853,8 +964,8 @@ class CodebaseIndexer {
     // Clean content before chunking
     const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
-    // Semantic chunking
-    const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
+    // Semantic chunking (await for LSP-based chunking when filePath is provided)
+    const chunks = await chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG, filePath, this.root);
     // v3: Assign chunk IDs for graph tracking (works without graph — just adds IDs)
     const chunksWithIds = this.graphBuilder
@@ -893,12 +1004,22 @@ class CodebaseIndexer {
     const data = [];
     for (let i = 0; i < chunksWithIds.length; i++) {
-      const embedding = await this.embed(chunksWithIds[i].content);
+      const chunk = chunksWithIds[i];
+      // Build metadata prefix for embedding enrichment (same as prepareFile)
+      const metaParts: string[] = [];
+      if (relPath) metaParts.push(`File: ${relPath}`);
+      if (chunk.class_name) metaParts.push(`Class: ${chunk.class_name}`);
+      if (chunk.function_name) metaParts.push(`Method: ${chunk.function_name}`);
+      if (chunk.heading_context) metaParts.push(`Section: ${chunk.heading_context}`);
+      const metaPrefix = metaParts.length > 0 ? `// ${metaParts.join(" | ")}\n` : "";
+      const textToEmbed = metaPrefix + chunk.content;
+      const embedding = await this.embed(textToEmbed);
       data.push({
-        chunk_id: chunksWithIds[i].chunk_id,
+        chunk_id: chunk.chunk_id,
         file: relPath,
         chunk_index: i,
-        content: chunksWithIds[i].content,
+        content: chunk.content,
         vector: embedding,
         archived: archived,
         // v2 metadata
@@ -906,13 +1027,13 @@ class CodebaseIndexer {
         language: fileMeta.language,
         last_modified: fileMeta.last_modified,
         file_size: fileMeta.file_size,
-        heading_context: chunksWithIds[i].heading_context || "",
-        function_name: chunksWithIds[i].function_name || "",
-        class_name: chunksWithIds[i].class_name || "",
+        heading_context: chunk.heading_context || "",
+        function_name: chunk.function_name || "",
+        class_name: chunk.class_name || "",
         tags: (fileMeta.tags || []).join(","),
         // Line numbers for "from-to" extraction (default to -1 when unknown)
-        start_line: chunksWithIds[i].start_line ?? -1,
-        end_line: chunksWithIds[i].end_line ?? -1,
+        start_line: chunk.start_line ?? -1,
+        end_line: chunk.end_line ?? -1,
       });
     }
@@ -1087,7 +1208,13 @@ class CodebaseIndexer {
           content: neighborChunk.content,
           relation: edge.predicate,
           score,
-          via: edge.source
+          via: edge.source,
+          start_line: neighborChunk.start_line,
+          end_line: neighborChunk.end_line,
+          chunk_index: neighborChunk.chunk_index,
+          language: neighborChunk.language,
+          function_name: neighborChunk.function_name,
+          class_name: neighborChunk.class_name,
         });
       }
@@ -1121,9 +1248,9 @@ class CodebaseIndexer {
     }
   }
-  // ── Search (v3: hybrid + BM25-only fallback + metadata filters + metrics) ──
+  // ── Single-query search (internal — used by search() for each sub-query) ──
-  async search(query, limit = 5, includeArchived = false, options = {}) {
+  async _searchSingle(query, limit = 5, includeArchived = false, options = {}) {
     const tableName = "chunks";
     const tables = await this.db.tableNames();
@@ -1178,14 +1305,9 @@ class CodebaseIndexer {
         }
       }
-      // Apply metadata filters then return (graph context added below)
+      // Apply metadata filters then return
       results = this._applyMetadataFilters(results, includeArchived, options);
-      const finalResults = results.slice(0, limit);
-      // Graph context expansion (same as vector path)
-      await this._expandGraphContext(finalResults, null, query);
-      return finalResults;
+      return results.slice(0, limit);
     }
     // ── Vector search (Phase 2 complete) ─────────────────────────────────────
@@ -1280,7 +1402,51 @@ class CodebaseIndexer {
     // ── Metadata filters ──────────────────────────────────────────────────
     results = this._applyMetadataFilters(results, includeArchived, options);
-    const finalResults = results.slice(0, limit);
+    return results.slice(0, limit);
+  }
+  // ── Search (v4: query decomposition + RRF merge + hybrid + metrics) ────────
+  async search(query, limit = 5, includeArchived = false, options = {}) {
+    // ── Query decomposition ──────────────────────────────────────────────────
+    const decomposition = decomposeQuery(query, DECOMPOSER_CONFIG);
+    let finalResults;
+    if (decomposition.decomposed && decomposition.subQueries.length > 1) {
+      if (DEBUG) {
+        console.log(`[vectorizer] Query decomposed (${decomposition.strategy}): ${decomposition.subQueries.length} sub-queries`);
+        for (const sq of decomposition.subQueries) {
+          console.log(`  → "${sq}"`);
+        }
+      }
+      // Run each sub-query independently, over-fetch to give RRF more signal
+      const perQueryLimit = Math.max(limit * 2, 20);
+      const resultSets = [];
+      for (const subQuery of decomposition.subQueries) {
+        const results = await this._searchSingle(subQuery, perQueryLimit, includeArchived, options);
+        if (results.length > 0) {
+          resultSets.push(results);
+        }
+      }
+      if (resultSets.length === 0) {
+        finalResults = [];
+      } else if (resultSets.length === 1) {
+        finalResults = resultSets[0].slice(0, limit);
+      } else {
+        // RRF merge across sub-query result sets
+        finalResults = rrfMerge(resultSets, 60, limit);
+        if (DEBUG) {
+          console.log(`[vectorizer] RRF merged ${resultSets.length} result sets → ${finalResults.length} results`);
+        }
+      }
+    } else {
+      // Short/simple query — single search (no decomposition overhead)
+      finalResults = await this._searchSingle(query, limit, includeArchived, options);
+    }
     // ── Metrics tracking ────────────────────────────────────────────────────
     if (METRICS_ENABLED) {
@@ -1304,75 +1470,107 @@ class CodebaseIndexer {
     }
     // ── Graph context expansion (v3) ───────────────────────────────────────
+    // Use original query for graph expansion (most complete context)
+    const queryEmbedding = finalResults.length > 0 ? await this.embedQuery(query).catch(() => null) : null;
     await this._expandGraphContext(finalResults, queryEmbedding, query);
     return finalResults;
   }
   async findChunkById(chunkId) {
-    // Lazy-build an in-memory Map keyed by chunk_id on first call.
-    // The cache lives until unloadModel() clears it.
+    // LRU cache with bounded size — avoids loading all 100K rows into memory.
+    // Point lookups: ChunkStore (SQLite) first, LanceDB fallback for vectors.
     if (!this._chunkCache) {
       this._chunkCache = new Map();
+    }
-      // Primary: LanceDB (has vectors for cosine similarity in graph expansion)
-      const tableName = "chunks";
-      const tables = await this.db.tableNames();
-      if (tables.includes(tableName)) {
-        try {
+    // Check LRU cache first
+    const cached = this._chunkCache.get(chunkId);
+    if (cached) {
+      // Move to end (most recently used) — delete & re-insert
+      this._chunkCache.delete(chunkId);
+      this._chunkCache.set(chunkId, cached);
+      return cached;
+    }
+    // Point lookup: ChunkStore (SQLite — fast, no vectors)
+    let chunk = null;
+    if (this.chunkStore) {
+      try {
+        chunk = this.chunkStore.getChunkById(chunkId);
+      } catch {
+        // best effort
+      }
+    }
+    // If we need vectors (for cosine similarity in graph expansion),
+    // try LanceDB point lookup. Only if ChunkStore had no result or we need vectors.
+    if (!chunk) {
+      try {
+        const tableName = "chunks";
+        const tables = await this.db.tableNames();
+        if (tables.includes(tableName)) {
           const table = await this.db.openTable(tableName);
-          const rows = await table.filter("true").limit(100000).execute();
-          for (const row of rows) {
-            if (row.chunk_id) {
-              this._chunkCache.set(row.chunk_id, row);
-            }
+          const rows = await table.filter(`chunk_id = '${chunkId.replace(/'/g, "''")}'`).limit(1).execute();
+          if (rows.length > 0) {
+            chunk = rows[0];
           }
-        } catch (e) {
-          if (DEBUG) console.log("[vectorizer] Chunk cache from LanceDB failed:", e.message);
         }
+      } catch (e) {
+        if (DEBUG) console.log("[vectorizer] LanceDB point lookup failed:", e.message);
       }
+    }
-      // Fallback: ChunkStore (no vectors, but has content for BM25-only mode)
-      if (this._chunkCache.size === 0 && this.chunkStore) {
-        try {
-          const allChunks = this.chunkStore.getAllChunks();
-          for (const chunk of allChunks) {
-            if (chunk.chunk_id) {
-              this._chunkCache.set(chunk.chunk_id, chunk);
-            }
-          }
-          if (DEBUG && allChunks.length > 0) {
-            console.log(`[vectorizer] Chunk cache from ChunkStore (${allChunks.length} chunks, no vectors)`);
-          }
-        } catch (e) {
-          if (DEBUG) console.log("[vectorizer] Chunk cache from ChunkStore failed:", e.message);
+    if (chunk) {
+      // LRU eviction: cap at 500 entries (each ~2KB without vector, ~4KB with)
+      const MAX_CHUNK_CACHE = 500;
+      if (this._chunkCache.size >= MAX_CHUNK_CACHE) {
+        const oldest = this._chunkCache.keys().next().value;
+        if (oldest !== undefined) {
+          this._chunkCache.delete(oldest);
         }
       }
+      this._chunkCache.set(chunkId, chunk);
     }
-    return this._chunkCache.get(chunkId) || null;
+    return chunk || null;
   }
   /**
    * Find all chunks belonging to a specific file path.
+   * Uses ChunkStore (SQLite) for efficient file-level queries — no full cache load.
    * @param {string} filePath - Relative file path (e.g. "src/auth.ts")
    * @returns {Promise<Array>} Array of chunks from this file
    */
   async findChunksByPath(filePath) {
-    // Ensure chunk cache is loaded
-    await this.findChunkById("__force_cache_load__");
-    if (!this._chunkCache) return [];
-    const chunks = [];
-    for (const chunk of this._chunkCache.values()) {
-      if (chunk.file === filePath) {
-        chunks.push(chunk);
+    // Primary: ChunkStore has an index on file column — fast
+    if (this.chunkStore) {
+      try {
+        const chunks = this.chunkStore.getChunksByFile(filePath);
+        if (chunks.length > 0) {
+          chunks.sort((a, b) => (a.chunk_index || 0) - (b.chunk_index || 0));
+          return chunks;
+        }
+      } catch {
+        // fallback below
       }
     }
-    // Sort by chunk_index
-    chunks.sort((a, b) => (a.chunk_index || 0) - (b.chunk_index || 0));
-    return chunks;
+    // Fallback: LanceDB filter by file
+    try {
+      const tableName = "chunks";
+      const tables = await this.db.tableNames();
+      if (tables.includes(tableName)) {
+        const table = await this.db.openTable(tableName);
+        const rows = await table.filter(`file = '${filePath.replace(/'/g, "''")}'`).limit(1000).execute();
+        rows.sort((a, b) => (a.chunk_index || 0) - (b.chunk_index || 0));
+        return rows;
+      }
+    } catch (e) {
+      if (DEBUG) console.log("[vectorizer] findChunksByPath LanceDB failed:", e.message);
+    }
+    return [];
   }
   cosineSimilarity(vecA, vecB) {
@@ -1384,7 +1582,8 @@ class CodebaseIndexer {
       normA += vecA[i] * vecA[i];
       normB += vecB[i] * vecB[i];
     }
-    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
+    const denom = Math.sqrt(normA) * Math.sqrt(normB);
+    return denom === 0 ? 0 : dotProduct / denom;
   }
   async checkHealth(extraIgnore = []) {
@@ -1547,22 +1746,33 @@ class CodebaseIndexer {
     const total = files.length;
     const CONCURRENCY = 5;
+    // Helper: write to indexer.log (always, not just DEBUG)
+    const logPath = path.join(this.root, ".opencode", "indexer.log");
+    const fsSync = await import("fs");
+    const logToFile = (msg: string) => {
+      const ts = new Date().toISOString().slice(11, 19);
+      try { fsSync.appendFileSync(logPath, `${ts} ${msg}\n`); } catch { /* non-fatal */ }
+    };
     // ══════════════════════════════════════════════════════════════════════════
-    // Phase 1: Prepare files in parallel (chunk + graph, no embedding)
+    // Phase 1: Prepare files sequentially (chunk + graph, no embedding)
     // ══════════════════════════════════════════════════════════════════════════
+    logToFile(`Phase 1: preparing ${total} files (concurrency=${CONCURRENCY})`);
     const preparedFiles = [];
     let prepared = 0;
     let skipped = 0;
+    let errors = 0;
-    // Process in batches of CONCURRENCY
     for (let i = 0; i < files.length; i += CONCURRENCY) {
       const batch = files.slice(i, i + CONCURRENCY);
       const promises = batch.map(async (relPath) => {
         const filePath = path.join(this.root, relPath);
         try {
-          const result = await this.prepareFile(filePath);
-          return result;
-        } catch {
+          return await this.prepareFile(filePath);
+        } catch (e) {
+          logToFile(`ERROR prepare ${relPath}: ${(e as Error).message}`);
+          errors++;
           return null;
         }
       });
@@ -1579,7 +1789,7 @@ class CodebaseIndexer {
       }
     }
-    if (DEBUG) console.log(`[vectorizer] Phase 1 done: ${prepared} files prepared, ${skipped} skipped`);
+    logToFile(`Phase 1 done: ${prepared} prepared, ${skipped} skipped, ${errors} errors`);
     // ══════════════════════════════════════════════════════════════════════════
     // Phase 2: Batch embed + store (sequential, batch forward pass)
@@ -1587,19 +1797,24 @@ class CodebaseIndexer {
     let chunksEmbedded = 0;
     if (preparedFiles.length > 0) {
       const totalChunks = preparedFiles.reduce((sum, pf) => sum + pf.rows.length, 0);
-      if (DEBUG) console.log(`[vectorizer] Phase 2: embedding ${totalChunks} chunks from ${preparedFiles.length} files`);
+      logToFile(`Phase 2: embedding ${totalChunks} chunks from ${preparedFiles.length} files`);
-      chunksEmbedded = await this.embedAndStore(preparedFiles, 32, (done, embedTotal, phase) => {
-        if (onProgress) onProgress(done, embedTotal, `embedding`, done, "embed");
-      });
-      if (DEBUG) console.log(`[vectorizer] Phase 2 done: ${chunksEmbedded} chunks embedded and stored`);
+      try {
+        chunksEmbedded = await this.embedAndStore(preparedFiles, 32, (done, embedTotal, phase) => {
+          if (onProgress) onProgress(done, embedTotal, `embedding`, done, "embed");
+        });
+        logToFile(`Phase 2 done: ${chunksEmbedded} chunks embedded and stored`);
+      } catch (e) {
+        logToFile(`Phase 2 FAILED: ${(e as Error).message}\n${(e as Error).stack || ""}`);
+        throw e;
+      }
+    } else {
+      logToFile(`Phase 2 skipped: no prepared files`);
     }
-    const indexed = prepared; // file count for backward compat
+    const indexed = prepared;
     // FR-005: Build semantic similarity edges as post-pass
-    // Disabled by default (O(n²) — slow on large repos). Enable via graph.semantic_edges: true
     let semanticEdges = 0;
     if (chunksEmbedded > 0 && this.graphBuilder && this.graphDB && GRAPH_CONFIG.semantic_edges) {
       try {
@@ -1612,30 +1827,25 @@ class CodebaseIndexer {
             .filter(r => r.chunk_id && r.vector)
             .map(r => ({ chunk_id: r.chunk_id, vector: Array.from(r.vector), file: r.file }));
-          // Skip if too many chunks — O(n²) becomes prohibitive
           const maxChunks = GRAPH_CONFIG.semantic_edges_max_chunks ?? 500;
           if (chunkData.length > maxChunks) {
-            if (DEBUG) console.log(`[vectorizer] Skipping semantic edges: ${chunkData.length} chunks > max ${maxChunks}`);
+            logToFile(`Semantic edges skipped: ${chunkData.length} chunks > max ${maxChunks}`);
           } else {
             semanticEdges = await this.graphBuilder.buildSemanticEdges(chunkData, 0.8, 3);
-            if (DEBUG) console.log(`[vectorizer] Built ${semanticEdges} semantic similarity edges`);
+            logToFile(`Semantic edges: ${semanticEdges} built`);
           }
         }
       } catch (e) {
-        if (DEBUG) console.log(`[vectorizer] Semantic edge building failed:`, e.message);
-        // non-fatal — explicit edges still work
+        logToFile(`Semantic edges FAILED: ${(e as Error).message}`);
       }
     }
-    // Cleanup: shutdown LSP to prevent memory leaks after bulk indexing
+    // Cleanup LSP
     if (this.graphBuilder) {
-      try {
-        await this.graphBuilder.cleanup();
-      } catch {
-        // Best effort — continue even if cleanup fails
-      }
+      try { await this.graphBuilder.cleanup(); } catch { /* best effort */ }
     }
+    logToFile(`indexAll complete: ${indexed} indexed, ${chunksEmbedded} embedded, ${semanticEdges} semantic edges`);
     return { indexed, skipped, total, semanticEdges };
   }
@@ -1699,9 +1909,13 @@ class CodebaseIndexer {
         if (entry.isDirectory() && entry.name !== "lancedb") {
           try {
             const indexer = await new CodebaseIndexer(this.root, entry.name).init();
-            const stat = await indexer.getStats();
-            if (stat.fileCount > 0 || stat.chunkCount > 0) {
-              stats.push(stat);
+            try {
+              const stat = await indexer.getStats();
+              if (stat.fileCount > 0 || stat.chunkCount > 0) {
+                stats.push(stat);
+              }
+            } finally {
+              await indexer.unloadModel();
             }
           } catch {}
         }
@@ -1711,20 +1925,38 @@ class CodebaseIndexer {
   }
   async clear() {
+    // Close open DB handles before deleting their files
+    if (this.chunkStore) { try { this.chunkStore.close(); } catch { /* best effort */ } this.chunkStore = null; }
+    if (this.graphBuilder) { try { await this.graphBuilder.cleanup(); } catch { /* best effort */ } this.graphBuilder = null; }
+    if (this.graphDB) { try { await this.graphDB.close(); } catch { /* best effort */ } this.graphDB = null; }
+    if (this.usageTracker) { try { await this.usageTracker.save(); } catch { /* best effort */ } this.usageTracker = null; }
+    this.db = null;
     await fs.rm(this.cacheDir, { recursive: true, force: true });
     this.hashes = {};
     if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
     this._bm25Rows = null;
+    this._chunkCache = null;
     this.metrics = null;
+    this.model = null;
     await this.init();
   }
   async clearAll() {
+    // Close open DB handles before deleting their files
+    if (this.chunkStore) { try { this.chunkStore.close(); } catch { /* best effort */ } this.chunkStore = null; }
+    if (this.graphBuilder) { try { await this.graphBuilder.cleanup(); } catch { /* best effort */ } this.graphBuilder = null; }
+    if (this.graphDB) { try { await this.graphDB.close(); } catch { /* best effort */ } this.graphDB = null; }
+    if (this.usageTracker) { try { await this.usageTracker.save(); } catch { /* best effort */ } this.usageTracker = null; }
+    this.db = null;
     await fs.rm(this.baseDir, { recursive: true, force: true });
     this.hashes = {};
     if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
     this._bm25Rows = null;
+    this._chunkCache = null;
     this.metrics = null;
+    this.model = null;
     clearQueryCache();
     await this.init();
   }
@@ -1768,7 +2000,9 @@ function getWorkspaceConfig() {
 // ── Singleton indexer pool ──────────────────────────────────────────────────
 // Prevents LevelDB lock conflicts when parallel searches hit the same index.
 // Each unique (projectRoot, indexName) gets one shared CodebaseIndexer.
-const _indexerPool = new Map<string, { indexer: CodebaseIndexer; refCount: number; initPromise: Promise<CodebaseIndexer> }>();
+// TTL eviction: idle indexers are unloaded after POOL_IDLE_TTL_MS to free memory.
+const POOL_IDLE_TTL_MS = 5 * 60 * 1000; // 5 minutes idle → unload model + caches
+const _indexerPool = new Map<string, { indexer: CodebaseIndexer; refCount: number; initPromise: Promise<CodebaseIndexer>; idleTimer?: ReturnType<typeof setTimeout> }>();
 /**
  * Get or create a shared CodebaseIndexer for the given project + index.
@@ -1787,6 +2021,11 @@ async function getIndexer(projectRoot: string, indexName: string = "code"): Prom
   const existing = _indexerPool.get(key);
   if (existing) {
     existing.refCount++;
+    // Cancel pending idle eviction — someone is using it again
+    if (existing.idleTimer) {
+      clearTimeout(existing.idleTimer);
+      existing.idleTimer = undefined;
+    }
     return existing.initPromise;
   }
@@ -1798,15 +2037,35 @@ async function getIndexer(projectRoot: string, indexName: string = "code"): Prom
 /**
  * Release a reference to a shared indexer. When refCount reaches 0,
- * the indexer is kept alive (for future reuse) but model memory is freed.
- * Call destroyIndexer() to fully close and remove from pool.
+ * starts a TTL timer. If no one reuses within POOL_IDLE_TTL_MS,
+ * the indexer is fully unloaded (model disposed, caches freed, DB closed).
  */
 function releaseIndexer(projectRoot: string, indexName: string = "code") {
   const key = `${projectRoot}::${indexName}`;
   const entry = _indexerPool.get(key);
   if (!entry) return;
   entry.refCount = Math.max(0, entry.refCount - 1);
-  // Keep in pool — don't unload. Next search reuses the same instance.
+  if (entry.refCount === 0) {
+    // Start idle eviction timer — free memory if not reused soon
+    if (entry.idleTimer) clearTimeout(entry.idleTimer);
+    entry.idleTimer = setTimeout(async () => {
+      // Double-check: still idle?
+      const current = _indexerPool.get(key);
+      if (!current || current.refCount > 0) return;
+      _indexerPool.delete(key);
+      try {
+        const indexer = await current.initPromise;
+        await indexer.unloadModel();
+      } catch {
+        // best effort
+      }
+    }, POOL_IDLE_TTL_MS);
+    // Don't keep process alive for idle eviction
+    if (entry.idleTimer && typeof entry.idleTimer === "object" && "unref" in entry.idleTimer) {
+      (entry.idleTimer as NodeJS.Timeout).unref();
+    }
+  }
 }
 /**
@@ -1817,6 +2076,7 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
   const key = `${projectRoot}::${indexName}`;
   const entry = _indexerPool.get(key);
   if (!entry) return;
+  if (entry.idleTimer) clearTimeout(entry.idleTimer);
   _indexerPool.delete(key);
   try {
     const indexer = await entry.initPromise;
@@ -1826,4 +2086,8 @@ async function destroyIndexer(projectRoot: string, indexName: string = "code") {
   }
 }
-export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getWorkspaceConfig, getIndexer, releaseIndexer, destroyIndexer };
+function getDecomposerConfig() {
+  return DECOMPOSER_CONFIG;
+}
+export { CodebaseIndexer, INDEX_PRESETS, getEmbeddingModel, getSearchConfig, getWorkspaceConfig, getDecomposerConfig, getIndexer, releaseIndexer, destroyIndexer, disposeSharedModel };