npm - @comfanion/usethis_search - Versions diffs - 0.1.4 → 0.2.0-dev.0 - Mend

@comfanion/usethis_search 0.1.4 → 0.2.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +424 -8
package/file-indexer.ts +21 -1
package/package.json +12 -2
package/tools/codeindex.ts +135 -16
package/tools/search.ts +46 -11
package/vectorizer/bm25-index.ts +155 -0
package/vectorizer/chunkers/chunker-factory.ts +98 -0
package/vectorizer/chunkers/code-chunker.ts +325 -0
package/vectorizer/chunkers/markdown-chunker.ts +177 -0
package/vectorizer/content-cleaner.ts +136 -0
package/vectorizer/hybrid-search.ts +97 -0
package/vectorizer/index.js +395 -16
package/vectorizer/metadata-extractor.ts +125 -0
package/vectorizer/query-cache.ts +126 -0
package/vectorizer/search-metrics.ts +155 -0
package/vectorizer.yaml +81 -0

package/vectorizer/hybrid-search.ts ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Hybrid Search — merges vector similarity and BM25 keyword scores.
+ *
+ * Uses Reciprocal Rank Fusion (RRF) or weighted linear combination
+ * to merge results from two search backends.
+ */
+// ── Types ───────────────────────────────────────────────────────────────────
+export interface HybridSearchConfig {
+  enabled: boolean
+  bm25_weight: number    // 0.0–1.0, vector_weight = 1 - bm25_weight
+}
+export const DEFAULT_HYBRID_CONFIG: HybridSearchConfig = {
+  enabled: false,
+  bm25_weight: 0.3,
+}
+export interface ScoredResult {
+  id: number              // index into the results array
+  vectorScore: number     // 0–1 (1 = best)
+  bm25Score: number       // raw BM25 score (unnormalized)
+  combinedScore: number
+}
+// ── Merge logic ─────────────────────────────────────────────────────────────
+/**
+ * Normalize BM25 scores to 0–1 range using min-max scaling.
+ */
+function normalizeBM25Scores(scores: Map<number, number>): Map<number, number> {
+  if (scores.size === 0) return scores
+  let min = Infinity
+  let max = -Infinity
+  for (const s of scores.values()) {
+    if (s < min) min = s
+    if (s > max) max = s
+  }
+  const range = max - min
+  if (range === 0) {
+    // All same score → normalize to 0.5
+    const result = new Map<number, number>()
+    for (const [id] of scores) result.set(id, 0.5)
+    return result
+  }
+  const result = new Map<number, number>()
+  for (const [id, score] of scores) {
+    result.set(id, (score - min) / range)
+  }
+  return result
+}
+/**
+ * Merge vector and BM25 results using weighted linear combination.
+ *
+ * @param vectorResults Map of chunkIndex → vectorScore (0–1, higher = better)
+ * @param bm25Results   Map of chunkIndex → raw BM25 score
+ * @param config        Hybrid search config (weights)
+ * @param limit         Max results to return
+ */
+export function mergeResults(
+  vectorResults: Map<number, number>,
+  bm25Results: Map<number, number>,
+  config: HybridSearchConfig = DEFAULT_HYBRID_CONFIG,
+  limit: number = 10,
+): ScoredResult[] {
+  const vectorWeight = 1 - config.bm25_weight
+  const bm25Weight = config.bm25_weight
+  const normalizedBM25 = normalizeBM25Scores(bm25Results)
+  // Collect all unique IDs
+  const allIds = new Set<number>()
+  for (const id of vectorResults.keys()) allIds.add(id)
+  for (const id of normalizedBM25.keys()) allIds.add(id)
+  const results: ScoredResult[] = []
+  for (const id of allIds) {
+    const vs = vectorResults.get(id) ?? 0
+    const bs = normalizedBM25.get(id) ?? 0
+    results.push({
+      id,
+      vectorScore: vs,
+      bm25Score: bm25Results.get(id) ?? 0,
+      combinedScore: vectorWeight * vs + bm25Weight * bs,
+    })
+  }
+  results.sort((a, b) => b.combinedScore - a.combinedScore)
+  return results.slice(0, limit)
+}

package/vectorizer/index.js CHANGED Viewed

@@ -1,4 +1,5 @@
 // OpenCode Vectorizer - Semantic Code Search with Multi-Index Support
+// v2: Content cleaning, semantic chunking, hybrid search, metadata, cache, metrics
 import { pipeline, env } from "@xenova/transformers";
 import * as lancedb from "vectordb";
@@ -6,6 +7,15 @@ import fs from "fs/promises";
 import path from "path";
 import crypto from "crypto";
+// ── New modules ─────────────────────────────────────────────────────────────
+import { cleanContent, DEFAULT_CLEANING_CONFIG } from "./content-cleaner.ts";
+import { extractFileMetadata, detectFileType, detectLanguage } from "./metadata-extractor.ts";
+import { chunkContent, DEFAULT_CHUNKING_CONFIG } from "./chunkers/chunker-factory.ts";
+import { BM25Index } from "./bm25-index.ts";
+import { mergeResults, DEFAULT_HYBRID_CONFIG } from "./hybrid-search.ts";
+import { QueryCache, DEFAULT_CACHE_CONFIG } from "./query-cache.ts";
+import { SearchMetrics } from "./search-metrics.ts";
 // Suppress transformers.js logs unless DEBUG is set
 const DEBUG = process.env.DEBUG?.includes("vectorizer") || process.env.DEBUG === "*";
 if (!DEBUG) {
@@ -57,6 +67,13 @@ let GLOBAL_IGNORE = [];
 // Default embedding model (fast). Can be overridden by config.
 let EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
+// ── Extended config parsed from YAML ────────────────────────────────────────
+let CLEANING_CONFIG = { ...DEFAULT_CLEANING_CONFIG };
+let CHUNKING_CONFIG = { ...DEFAULT_CHUNKING_CONFIG };
+let HYBRID_CONFIG = { ...DEFAULT_HYBRID_CONFIG };
+let METRICS_ENABLED = false;
+let CACHE_ENABLED = true;
 function defaultVectorizerYaml() {
   return (
     `vectorizer:\n` +
@@ -64,6 +81,40 @@ function defaultVectorizerYaml() {
     `  auto_index: true\n` +
     `  model: \"${EMBEDDING_MODEL}\"\n` +
     `  debounce_ms: 1000\n` +
+    `\n` +
+    `  # Content cleaning before chunking\n` +
+    `  cleaning:\n` +
+    `    remove_toc: true\n` +
+    `    remove_frontmatter_metadata: false\n` +
+    `    remove_imports: false\n` +
+    `    remove_comments: false\n` +
+    `\n` +
+    `  # Chunking strategy\n` +
+    `  chunking:\n` +
+    `    strategy: \"semantic\"  # fixed | semantic\n` +
+    `    markdown:\n` +
+    `      split_by_headings: true\n` +
+    `      min_chunk_size: 200\n` +
+    `      max_chunk_size: 2000\n` +
+    `      preserve_heading_hierarchy: true\n` +
+    `    code:\n` +
+    `      split_by_functions: true\n` +
+    `      include_function_signature: true\n` +
+    `      min_chunk_size: 300\n` +
+    `      max_chunk_size: 1500\n` +
+    `    fixed:\n` +
+    `      max_chars: 1500\n` +
+    `\n` +
+    `  # Hybrid search (vector + BM25)\n` +
+    `  search:\n` +
+    `    hybrid: false\n` +
+    `    bm25_weight: 0.3\n` +
+    `\n` +
+    `  # Quality monitoring\n` +
+    `  quality:\n` +
+    `    enable_metrics: false\n` +
+    `    enable_cache: true\n` +
+    `\n` +
     `  indexes:\n` +
     `    code:\n` +
     `      enabled: true\n` +
@@ -104,8 +155,25 @@ async function ensureDefaultConfig(projectRoot) {
   }
 }
+// ── YAML mini-parser helpers ────────────────────────────────────────────────
+function parseBool(section, key, fallback) {
+  const m = section.match(new RegExp(`^\\s+${key}:\\s*(true|false)`, "m"));
+  return m ? m[1] === "true" : fallback;
+}
+function parseNumber(section, key, fallback) {
+  const m = section.match(new RegExp(`^\\s+${key}:\\s*(\\d+(?:\\.\\d+)?)`, "m"));
+  return m ? parseFloat(m[1]) : fallback;
+}
+function parseString(section, key, fallback) {
+  const m = section.match(new RegExp(`^\\s+${key}:\\s*["']?([^"'\\n]+)["']?`, "m"));
+  return m ? m[1].trim() : fallback;
+}
 /**
- * Load index configuration from .opencode/vectorizer.yaml (preferred) or .opencode/config.yaml.
+ * Load index configuration from .opencode/vectorizer.yaml.
  */
 async function loadConfig(projectRoot) {
   try {
@@ -142,6 +210,61 @@ async function loadConfig(projectRoot) {
       if (DEBUG) console.log("[vectorizer] Using model from config:", EMBEDDING_MODEL);
     }
+    // ── Parse cleaning config ───────────────────────────────────────────────
+    const cleaningMatch = section.match(/^\s{2}cleaning:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
+    if (cleaningMatch) {
+      const cs = cleaningMatch[1];
+      CLEANING_CONFIG = {
+        remove_toc: parseBool(cs, "remove_toc", true),
+        remove_frontmatter_metadata: parseBool(cs, "remove_frontmatter_metadata", false),
+        remove_imports: parseBool(cs, "remove_imports", false),
+        remove_comments: parseBool(cs, "remove_comments", false),
+      };
+    }
+    // ── Parse chunking config ───────────────────────────────────────────────
+    const chunkingMatch = section.match(/^\s{2}chunking:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
+    if (chunkingMatch) {
+      const cs = chunkingMatch[1];
+      const strategy = parseString(cs, "strategy", "semantic");
+      CHUNKING_CONFIG = {
+        strategy: strategy,
+        markdown: {
+          split_by_headings: parseBool(cs, "split_by_headings", true),
+          min_chunk_size: parseNumber(cs, "min_chunk_size", 200),
+          max_chunk_size: parseNumber(cs, "max_chunk_size", 2000),
+          preserve_heading_hierarchy: parseBool(cs, "preserve_heading_hierarchy", true),
+        },
+        code: {
+          split_by_functions: parseBool(cs, "split_by_functions", true),
+          include_function_signature: parseBool(cs, "include_function_signature", true),
+          min_chunk_size: parseNumber(cs, "min_chunk_size", 300),
+          max_chunk_size: parseNumber(cs, "max_chunk_size", 1500),
+        },
+        fixed: {
+          max_chars: parseNumber(cs, "max_chars", 1500),
+        },
+      };
+    }
+    // ── Parse search config ─────────────────────────────────────────────────
+    const searchMatch = section.match(/^\s{2}search:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
+    if (searchMatch) {
+      const ss = searchMatch[1];
+      HYBRID_CONFIG = {
+        enabled: parseBool(ss, "hybrid", false),
+        bm25_weight: parseNumber(ss, "bm25_weight", 0.3),
+      };
+    }
+    // ── Parse quality config ────────────────────────────────────────────────
+    const qualityMatch = section.match(/^\s{2}quality:\s*\n([\s\S]*?)(?=^\s{2}[a-zA-Z_\-]+:|\Z)/m);
+    if (qualityMatch) {
+      const qs = qualityMatch[1];
+      METRICS_ENABLED = parseBool(qs, "enable_metrics", false);
+      CACHE_ENABLED = parseBool(qs, "enable_cache", true);
+    }
     // Parse global exclude
     const excludeMatch = section.match(/^\s{2}exclude:\s*\n((?:\s{4}-\s+.+\n?)*)/m);
     if (excludeMatch) {
@@ -196,12 +319,25 @@ async function loadConfig(projectRoot) {
       }
     }
-    if (DEBUG) console.log("[vectorizer] Loaded config:", { INDEX_PRESETS, GLOBAL_IGNORE });
+    if (DEBUG) console.log("[vectorizer] Loaded config:", { INDEX_PRESETS, GLOBAL_IGNORE, HYBRID_CONFIG, CHUNKING_CONFIG });
   } catch {
     if (DEBUG) console.log("[vectorizer] Using default presets (config load failed)");
   }
 }
+// ── Shared query cache (singleton per process) ─────────────────────────────
+let _queryCache = null;
+function getQueryCache() {
+  if (!_queryCache) _queryCache = new QueryCache(DEFAULT_CACHE_CONFIG);
+  return _queryCache;
+}
+function clearQueryCache() {
+  if (_queryCache) {
+    _queryCache.destroy();
+    _queryCache = null;
+  }
+}
 class CodebaseIndexer {
   constructor(projectRoot, indexName = "code") {
     this.root = projectRoot;
@@ -212,6 +348,8 @@ class CodebaseIndexer {
     this.db = null;
     this.hashes = {};
     this.configLoaded = false;
+    this.bm25 = null;          // lazy-built BM25 index
+    this.metrics = null;       // lazy-loaded SearchMetrics
   }
   async init() {
@@ -227,17 +365,30 @@ class CodebaseIndexer {
   async loadModel() {
     if (!this.model) {
-      if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
-      this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
-        progress_callback: DEBUG ? undefined : null,
-      });
-      if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
+      try {
+        if (DEBUG) console.log(`[vectorizer] Loading embedding model: ${EMBEDDING_MODEL}...`);
+        this.model = await pipeline("feature-extraction", EMBEDDING_MODEL, {
+          progress_callback: DEBUG ? undefined : null,
+        });
+        if (DEBUG) console.log(`[vectorizer] Model loaded: ${EMBEDDING_MODEL}`);
+      } catch (error) {
+        this.model = null;
+        throw new Error(`Model loading failed: ${error.message || error}`);
+      }
     }
     return this.model;
   }
   async unloadModel() {
     this.model = null;
+    // Release BM25 data held in memory
+    if (this.bm25) {
+      this.bm25.clear();
+      this.bm25 = null;
+    }
+    this._bm25Rows = null;
+    this.metrics = null;
+    clearQueryCache();
     if (global.gc) global.gc();
   }
@@ -274,12 +425,28 @@ class CodebaseIndexer {
     return false;
   }
+  // ── Embedding (with optional cache) ───────────────────────────────────────
   async embed(text) {
     const model = await this.loadModel();
     const result = await model(text, { pooling: "mean", normalize: true });
     return Array.from(result.data);
   }
+  async embedQuery(text) {
+    if (CACHE_ENABLED) {
+      const cache = getQueryCache();
+      const cached = cache.get(text);
+      if (cached) return cached;
+      const embedding = await this.embed(text);
+      cache.set(text, embedding);
+      return embedding;
+    }
+    return this.embed(text);
+  }
+  // ── Legacy chunker (kept for backward compat / "fixed" strategy) ──────────
   chunkCode(content, maxChars = 1500) {
     const chunks = [];
     const lines = content.split("\n");
@@ -309,6 +476,8 @@ class CodebaseIndexer {
     return this.hashes[relPath] !== currentHash;
   }
+  // ── Index a single file (v2: cleaning + semantic chunking + metadata) ─────
   async indexFile(filePath) {
     const relPath = path.relative(this.root, filePath);
@@ -324,21 +493,39 @@ class CodebaseIndexer {
       return false;
     }
-    const chunks = this.chunkCode(content);
+    // Extract metadata
+    const fileMeta = await extractFileMetadata(filePath, content);
     const archived = this.isArchived(relPath, content);
-    const data = [];
+    // Clean content before chunking
+    const cleaned = cleanContent(content, fileMeta.file_type, CLEANING_CONFIG);
+    // Semantic chunking
+    const chunks = chunkContent(cleaned, fileMeta.file_type, fileMeta.language, CHUNKING_CONFIG);
+    const data = [];
     for (let i = 0; i < chunks.length; i++) {
-      const embedding = await this.embed(chunks[i]);
+      const embedding = await this.embed(chunks[i].content);
       data.push({
         file: relPath,
         chunk_index: i,
-        content: chunks[i],
+        content: chunks[i].content,
         vector: embedding,
         archived: archived,
+        // v2 metadata
+        file_type: fileMeta.file_type,
+        language: fileMeta.language,
+        last_modified: fileMeta.last_modified,
+        file_size: fileMeta.file_size,
+        heading_context: chunks[i].heading_context || "",
+        function_name: chunks[i].function_name || "",
+        class_name: chunks[i].class_name || "",
+        tags: (fileMeta.tags || []).join(","),
       });
     }
+    if (data.length === 0) return false;
     const tableName = "chunks";
     const tables = await this.db.tableNames();
     if (tables.includes(tableName)) {
@@ -351,27 +538,189 @@ class CodebaseIndexer {
     this.hashes[relPath] = hash;
     await this.saveHashes();
+    // Invalidate BM25 index (needs rebuild) — release memory
+    if (this.bm25) {
+      this.bm25.clear();
+      this.bm25 = null;
+    }
+    this._bm25Rows = null;
     return true;
   }
-  async search(query, limit = 5, includeArchived = false) {
+  // ── BM25 index management ────────────────────────────────────────────────
+  async ensureBM25() {
+    if (this.bm25) return this.bm25;
+    const tableName = "chunks";
+    const tables = await this.db.tableNames();
+    if (!tables.includes(tableName)) return null;
+    const table = await this.db.openTable(tableName);
+    const allRows = await table.search([0]).limit(100000).execute();
+    if (allRows.length === 0) return null;
+    // Sort for stable ID mapping between builds
+    allRows.sort((a, b) => {
+      const ka = `${a.file}:${a.chunk_index}`;
+      const kb = `${b.file}:${b.chunk_index}`;
+      return ka.localeCompare(kb);
+    });
+    // Release previous data before rebuilding
+    if (this.bm25) this.bm25.clear();
+    this._bm25Rows = null;
+    this.bm25 = new BM25Index();
+    this.bm25.build(allRows.map((r) => r.content));
+    this._bm25Rows = allRows;
+    return this.bm25;
+  }
+  // ── Search (v2: hybrid + metadata filters + metrics) ──────────────────────
+  async search(query, limit = 5, includeArchived = false, options = {}) {
     const tableName = "chunks";
     const tables = await this.db.tableNames();
     if (!tables.includes(tableName)) {
       return [];
     }
-    const queryEmbedding = await this.embed(query);
+    const queryEmbedding = await this.embedQuery(query);
     const table = await this.db.openTable(tableName);
-    const fetchLimit = includeArchived ? limit : limit * 3;
+    // Only over-fetch when filters or hybrid search are active
+    const hasFilters = !includeArchived || options.fileType || options.language ||
+                       options.modifiedAfter || options.modifiedBefore ||
+                       (options.tags && options.tags.length > 0);
+    const isHybrid = HYBRID_CONFIG.enabled || options.hybrid;
+    const fetchLimit = (hasFilters || isHybrid) ? Math.max(limit * 3, 50) : limit;
     let results = await table.search(queryEmbedding).limit(fetchLimit).execute();
+    // ── Hybrid search ───────────────────────────────────────────────────────
+    if (HYBRID_CONFIG.enabled || options.hybrid) {
+      try {
+        const bm25 = await this.ensureBM25();
+        if (bm25 && this._bm25Rows) {
+          const bm25Results = bm25.search(query, fetchLimit);
+          // Build score maps
+          const vectorScores = new Map();
+          for (let i = 0; i < results.length; i++) {
+            const score = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
+            vectorScores.set(i, score);
+          }
+          const bm25Scores = new Map();
+          for (const r of bm25Results) {
+            bm25Scores.set(r.id, r.score);
+          }
+          // We need a unified ID space. Since vector results and BM25 results
+          // reference different row sets, we use the full table rows for BM25
+          // and merge by file+chunk_index key.
+          const resultMap = new Map();
+          for (let i = 0; i < results.length; i++) {
+            const key = `${results[i].file}:${results[i].chunk_index}`;
+            const vs = results[i]._distance != null ? 1 - results[i]._distance : 0.5;
+            resultMap.set(key, { row: results[i], vectorScore: vs, bm25Score: 0 });
+          }
+          for (const br of bm25Results) {
+            if (br.id < this._bm25Rows.length) {
+              const bRow = this._bm25Rows[br.id];
+              const key = `${bRow.file}:${bRow.chunk_index}`;
+              if (resultMap.has(key)) {
+                resultMap.get(key).bm25Score = br.score;
+              } else {
+                resultMap.set(key, { row: bRow, vectorScore: 0, bm25Score: br.score });
+              }
+            }
+          }
+          // Normalize BM25 scores
+          let maxBM25 = 0;
+          for (const v of resultMap.values()) {
+            if (v.bm25Score > maxBM25) maxBM25 = v.bm25Score;
+          }
+          const bw = (options.bm25_weight ?? HYBRID_CONFIG.bm25_weight) || 0.3;
+          const vw = 1 - bw;
+          const merged = [];
+          for (const v of resultMap.values()) {
+            const normBM25 = maxBM25 > 0 ? v.bm25Score / maxBM25 : 0;
+            const combined = vw * v.vectorScore + bw * normBM25;
+            merged.push({ ...v.row, _combinedScore: combined, _distance: v.row._distance });
+          }
+          merged.sort((a, b) => b._combinedScore - a._combinedScore);
+          results = merged;
+        }
+      } catch (e) {
+        if (DEBUG) console.log("[vectorizer] Hybrid search fallback:", e.message);
+        // Fall through to vector-only results
+      }
+    }
+    // ── Metadata filters ──────────────────────────────────────────────────
     if (!includeArchived) {
       results = results.filter((r) => !r.archived);
     }
-    return results.slice(0, limit);
+    if (options.fileType) {
+      results = results.filter((r) => r.file_type === options.fileType);
+    }
+    if (options.language) {
+      results = results.filter((r) => r.language === options.language);
+    }
+    if (options.modifiedAfter) {
+      const after = new Date(options.modifiedAfter).getTime();
+      results = results.filter((r) => r.last_modified && new Date(r.last_modified).getTime() >= after);
+    }
+    if (options.modifiedBefore) {
+      const before = new Date(options.modifiedBefore).getTime();
+      results = results.filter((r) => r.last_modified && new Date(r.last_modified).getTime() <= before);
+    }
+    if (options.tags && options.tags.length > 0) {
+      results = results.filter((r) => {
+        const rowTags = (r.tags || "").split(",").filter(Boolean);
+        return options.tags.some((t) => rowTags.includes(t));
+      });
+    }
+    const finalResults = results.slice(0, limit);
+    // ── Metrics tracking ────────────────────────────────────────────────────
+    if (METRICS_ENABLED) {
+      try {
+        if (!this.metrics) {
+          this.metrics = new SearchMetrics(this.root);
+          await this.metrics.load();
+        }
+        const scores = finalResults.map((r) =>
+          r._combinedScore != null
+            ? r._combinedScore
+            : r._distance != null
+              ? 1 - r._distance
+              : 0
+        );
+        this.metrics.recordQuery(query, this.indexName, scores, HYBRID_CONFIG.enabled || !!options.hybrid);
+        await this.metrics.save();
+      } catch {
+        // non-fatal
+      }
+    }
+    return finalResults;
   }
   async checkHealth(extraIgnore = []) {
@@ -478,7 +827,14 @@ class CodebaseIndexer {
   async indexSingleFile(filePath) {
     const absPath = path.isAbsolute(filePath) ? filePath : path.join(this.root, filePath);
-    return await this.indexFile(absPath);
+    // Prevent path traversal outside project root
+    const normalized = path.normalize(absPath);
+    const relative = path.relative(this.root, normalized);
+    if (relative.startsWith("..") || path.isAbsolute(relative)) {
+      if (DEBUG) console.log(`[vectorizer] Path traversal blocked: ${filePath}`);
+      return false;
+    }
+    return await this.indexFile(normalized);
   }
   async getStats() {
@@ -500,6 +856,12 @@ class CodebaseIndexer {
       model: EMBEDDING_MODEL,
       fileCount,
       chunkCount,
+      features: {
+        chunking: CHUNKING_CONFIG.strategy,
+        hybrid: HYBRID_CONFIG.enabled,
+        metrics: METRICS_ENABLED,
+        cache: CACHE_ENABLED,
+      },
     };
   }
@@ -525,12 +887,19 @@ class CodebaseIndexer {
   async clear() {
     await fs.rm(this.cacheDir, { recursive: true, force: true });
     this.hashes = {};
+    if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
+    this._bm25Rows = null;
+    this.metrics = null;
     await this.init();
   }
   async clearAll() {
     await fs.rm(this.baseDir, { recursive: true, force: true });
     this.hashes = {};
+    if (this.bm25) { this.bm25.clear(); this.bm25 = null; }
+    this._bm25Rows = null;
+    this.metrics = null;
+    clearQueryCache();
     await this.init();
   }
@@ -546,6 +915,16 @@ class CodebaseIndexer {
     } catch {}
     return indexes;
   }
+  // ── Metrics access ────────────────────────────────────────────────────────
+  async getMetrics() {
+    if (!this.metrics) {
+      this.metrics = new SearchMetrics(this.root);
+      await this.metrics.load();
+    }
+    return this.metrics.getSummary();
+  }
 }
 function getEmbeddingModel() {