npm - bluera-knowledge - Versions diffs - 0.35.0 → 0.37.0 - Mend

bluera-knowledge 0.35.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +32 -0
package/README.md +8 -20
package/bun.lock +27 -0
package/dist/{chunk-L2SC6J4K.js → chunk-724FNI27.js} +466 -171
package/dist/chunk-724FNI27.js.map +1 -0
package/dist/{chunk-DNGE7FZ4.js → chunk-AO45YFHO.js} +1386 -42
package/dist/chunk-AO45YFHO.js.map +1 -0
package/dist/{chunk-MQQ46BST.js → chunk-F6DGSS2N.js} +2 -2
package/dist/index.js +72 -5
package/dist/index.js.map +1 -1
package/dist/mcp/server.d.ts +37 -3
package/dist/mcp/server.js +2 -2
package/dist/workers/background-worker-cli.js +2 -2
package/hooks/check-ready.sh +17 -7
package/hooks/hooks.json +17 -1
package/hooks/lib/store_summary.py +111 -0
package/hooks/posttooluse-bk-reminder.py +33 -6
package/hooks/stop-bk-check.py +86 -0
package/hooks/userpromptsubmit-bk-nudge.py +156 -0
package/package.json +3 -1
package/scripts/auto-setup.sh +11 -3
package/scripts/eval-candidates.sh +235 -0
package/skills/advanced-workflows/references/combining-workflows.md +17 -0
package/skills/advanced-workflows/references/error-recovery.md +44 -0
package/skills/advanced-workflows/references/handling-large-results.md +48 -0
package/skills/advanced-workflows/references/multi-store-search.md +42 -0
package/skills/knowledge-search/SKILL.md +1 -1
package/skills/search/statusline.md +75 -0
package/skills/store-lifecycle/references/failure-recovery.md +80 -0
package/skills/store-lifecycle/references/indexing-strategies.md +67 -0
package/skills/store-lifecycle/references/job-monitoring.md +72 -0
package/skills/store-lifecycle/references/lifecycle-checklist.md +20 -0
package/skills/store-lifecycle/references/storage-management.md +43 -0
package/skills/suggest/SKILL.md +13 -6
package/dist/chunk-DNGE7FZ4.js.map +0 -1
package/dist/chunk-L2SC6J4K.js.map +0 -1
/package/dist/{chunk-MQQ46BST.js.map → chunk-F6DGSS2N.js.map} +0 -0

package/dist/{chunk-L2SC6J4K.js → chunk-724FNI27.js} RENAMED Viewed

@@ -2072,40 +2072,6 @@ import { readFile as readFile2, access } from "fs/promises";
 import { homedir as homedir2 } from "os";
 import { isAbsolute, join as join6, resolve } from "path";
-// src/services/reranker-env.ts
-var logger = createLogger("reranker-env");
-function parseRerankerEnvOverrides(strict) {
-  return {
-    enabled: parseEnabled(process.env["BK_RERANKER_ENABLED"], strict),
-    topK: parseTopK(process.env["BK_RERANKER_TOPK"], strict)
-  };
-}
-function parseEnabled(raw, strict) {
-  if (raw === void 0 || raw === "") return void 0;
-  if (raw === "1") return true;
-  if (raw === "0") return false;
-  const msg = `BK_RERANKER_ENABLED must be '0' or '1', got: "${raw}"`;
-  if (strict) throw new Error(msg);
-  logger.warn(msg);
-  return void 0;
-}
-function parseTopK(raw, strict) {
-  if (raw === void 0 || raw === "") return void 0;
-  const parsed = Number.parseInt(raw, 10);
-  if (Number.isNaN(parsed) || parsed < 1) {
-    const msg = `BK_RERANKER_TOPK must be a positive integer, got: "${raw}"`;
-    if (strict) throw new Error(msg);
-    logger.warn(msg);
-    return void 0;
-  }
-  return parsed;
-}
-// src/db/embeddings.ts
-import { homedir } from "os";
-import { join as join5 } from "path";
-import { pipeline, env } from "@huggingface/transformers";
 // src/models/registry.ts
 var MODEL_REGISTRY = {
   // ============================================================
@@ -2362,7 +2328,7 @@ var MODEL_REGISTRY = {
     normalize: true,
     queryPrefix: "",
     docPrefix: "",
-    category: "other",
+    category: "jina",
     sizeCategory: "small",
     notes: "8192 token context. Good for long documents."
   },
@@ -2374,12 +2340,102 @@ var MODEL_REGISTRY = {
     normalize: true,
     queryPrefix: "",
     docPrefix: "",
-    category: "other",
+    category: "jina",
     sizeCategory: "base",
     notes: "8192 token context. Larger Jina variant."
+  },
+  "jina-embeddings-v2-base-code": {
+    id: "jinaai/jina-embeddings-v2-base-code",
+    name: "Jina Embeddings v2 Base Code",
+    dimensions: 768,
+    pooling: "mean",
+    normalize: true,
+    queryPrefix: "",
+    docPrefix: "",
+    category: "jina",
+    sizeCategory: "base",
+    notes: "161M params. Code-specific (150M+ code QA pairs, 30+ langs). 8K context. May need trust_remote_code."
+  },
+  // ============================================================
+  // Snowflake Arctic Embed - Retrieval-optimized
+  // ============================================================
+  "snowflake-arctic-embed-xs": {
+    id: "Snowflake/snowflake-arctic-embed-xs",
+    name: "Snowflake Arctic Embed XS",
+    dimensions: 384,
+    pooling: "cls",
+    normalize: true,
+    queryPrefix: "Represent this sentence for searching relevant passages: ",
+    docPrefix: "",
+    category: "snowflake",
+    sizeCategory: "small",
+    notes: "22M params. Ultra-small retrieval model. Based on all-MiniLM-L6-v2."
+  },
+  "snowflake-arctic-embed-s": {
+    id: "Snowflake/snowflake-arctic-embed-s",
+    name: "Snowflake Arctic Embed S",
+    dimensions: 384,
+    pooling: "cls",
+    normalize: true,
+    queryPrefix: "Represent this sentence for searching relevant passages: ",
+    docPrefix: "",
+    category: "snowflake",
+    sizeCategory: "small",
+    notes: "33M params. Same size as bge-small, trained for retrieval. Drop-in candidate."
+  },
+  "snowflake-arctic-embed-m-v1.5": {
+    id: "Snowflake/snowflake-arctic-embed-m-v1.5",
+    name: "Snowflake Arctic Embed M v1.5",
+    dimensions: 768,
+    pooling: "cls",
+    normalize: true,
+    queryPrefix: "Represent this sentence for searching relevant passages: ",
+    docPrefix: "",
+    category: "snowflake",
+    sizeCategory: "base",
+    notes: "109M params. BEIR 55.14. Matryoshka (truncate to 256d). 7 ONNX quant variants."
+  },
+  "snowflake-arctic-embed-m-v2.0": {
+    id: "Snowflake/snowflake-arctic-embed-m-v2.0",
+    name: "Snowflake Arctic Embed M v2.0",
+    dimensions: 768,
+    pooling: "cls",
+    normalize: true,
+    queryPrefix: "query: ",
+    docPrefix: "",
+    category: "snowflake",
+    sizeCategory: "base",
+    notes: "305M params. Multilingual, 8K context. Custom GTE arch \u2014 may need trust_remote_code."
+  },
+  // ============================================================
+  // ModernBERT Embedding Models - Latest architecture (2024+)
+  // ============================================================
+  "gte-modernbert-base": {
+    id: "Alibaba-NLP/gte-modernbert-base",
+    name: "GTE ModernBERT Base",
+    dimensions: 768,
+    pooling: "cls",
+    normalize: true,
+    queryPrefix: "",
+    docPrefix: "",
+    category: "gte",
+    sizeCategory: "base",
+    notes: "149M params. CoIR code retrieval 79.31. BEIR 55.33. 8K context. No trust_remote_code. Top candidate."
+  },
+  "modernbert-embed-base": {
+    id: "nomic-ai/modernbert-embed-base",
+    name: "ModernBERT Embed Base (Nomic)",
+    dimensions: 768,
+    pooling: "mean",
+    normalize: true,
+    queryPrefix: "search_query: ",
+    docPrefix: "search_document: ",
+    category: "nomic",
+    sizeCategory: "base",
+    notes: "149M params. BEIR 52.89. Matryoshka (truncate to 256d). 8K context."
   }
 };
-var DEFAULT_MODEL_ID = "bge-small-en-v1.5";
+var DEFAULT_MODEL_ID = "snowflake-arctic-embed-s";
 function getModelConfig(modelId) {
   if (modelId in MODEL_REGISTRY) {
     return MODEL_REGISTRY[modelId];
@@ -2403,8 +2459,75 @@ function getConfiguredModelId() {
   }
   return DEFAULT_MODEL_ID;
 }
+var RERANKER_REGISTRY = {
+  "ms-marco-MiniLM-L-6-v2": {
+    id: "Xenova/ms-marco-MiniLM-L-6-v2",
+    name: "MS MARCO MiniLM L6 v2",
+    notes: "Default reranker. Fast cross-encoder for passage ranking."
+  },
+  "ms-marco-MiniLM-L-12-v2": {
+    id: "Xenova/ms-marco-MiniLM-L-12-v2",
+    name: "MS MARCO MiniLM L12 v2",
+    notes: "Deeper reranker. Better quality but slower."
+  },
+  "bge-reranker-base": {
+    id: "Xenova/bge-reranker-base",
+    name: "BGE Reranker Base",
+    notes: "BGE-family cross-encoder. Better fit for BGE embeddings."
+  },
+  "bge-reranker-large": {
+    id: "Xenova/bge-reranker-large",
+    name: "BGE Reranker Large",
+    notes: "Larger BGE cross-encoder. Higher quality, slower."
+  }
+};
+// src/services/reranker-env.ts
+var logger = createLogger("reranker-env");
+function parseRerankerEnvOverrides(strict) {
+  return {
+    enabled: parseEnabled(process.env["BK_RERANKER_ENABLED"], strict),
+    topK: parseTopK(process.env["BK_RERANKER_TOPK"], strict),
+    model: parseModel(process.env["BK_RERANKER_MODEL"], strict)
+  };
+}
+function parseEnabled(raw, strict) {
+  if (raw === void 0 || raw === "") return void 0;
+  if (raw === "1") return true;
+  if (raw === "0") return false;
+  const msg = `BK_RERANKER_ENABLED must be '0' or '1', got: "${raw}"`;
+  if (strict) throw new Error(msg);
+  logger.warn(msg);
+  return void 0;
+}
+function parseTopK(raw, strict) {
+  if (raw === void 0 || raw === "") return void 0;
+  const parsed = Number.parseInt(raw, 10);
+  if (Number.isNaN(parsed) || parsed < 1) {
+    const msg = `BK_RERANKER_TOPK must be a positive integer, got: "${raw}"`;
+    if (strict) throw new Error(msg);
+    logger.warn(msg);
+    return void 0;
+  }
+  return parsed;
+}
+function parseModel(raw, strict) {
+  if (raw === void 0 || raw === "") return void 0;
+  const entry = RERANKER_REGISTRY[raw];
+  if (entry === void 0) {
+    const valid = Object.keys(RERANKER_REGISTRY).join(", ");
+    const msg = `BK_RERANKER_MODEL must be one of [${valid}], got: "${raw}"`;
+    if (strict) throw new Error(msg);
+    logger.warn(msg);
+    return void 0;
+  }
+  return entry.id;
+}
 // src/db/embeddings.ts
+import { homedir } from "os";
+import { join as join5 } from "path";
+import { pipeline, env } from "@huggingface/transformers";
 env.cacheDir = join5(homedir(), ".cache", "huggingface-transformers");
 function getFinetunedModelPath() {
   const path4 = process.env["BK_FINETUNED_MODEL"];
@@ -2419,14 +2542,16 @@ function buildEmbeddingConfig(modelId, overrides) {
   const envQueryPrefix = process.env["BK_QUERY_PREFIX"];
   const modelConfig = getModelConfig(modelId);
   if (modelConfig === void 0) {
+    const baseModelId = process.env["BK_BASE_MODEL"];
+    const baseConfig = baseModelId !== void 0 && baseModelId !== "" ? getModelConfig(baseModelId) : void 0;
     return {
       model: modelId,
       batchSize: overrides?.batchSize ?? 32,
       dtype: overrides?.dtype ?? "fp32",
-      pooling: overrides?.pooling ?? envPooling ?? "mean",
-      normalize: overrides?.normalize ?? true,
-      queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? "",
-      docPrefix: overrides?.docPrefix ?? "",
+      pooling: overrides?.pooling ?? envPooling ?? baseConfig?.pooling ?? "mean",
+      normalize: overrides?.normalize ?? baseConfig?.normalize ?? true,
+      queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? baseConfig?.queryPrefix ?? "",
+      docPrefix: overrides?.docPrefix ?? baseConfig?.docPrefix ?? "",
       maxInFlightBatches: overrides?.maxInFlightBatches ?? 1
     };
   }
@@ -2670,10 +2795,10 @@ var DEFAULT_CONFIG = {
   version: 1,
   dataDir: ".bluera/bluera-knowledge/data",
   embedding: {
-    model: "Xenova/bge-small-en-v1.5",
+    model: "Snowflake/snowflake-arctic-embed-s",
     batchSize: 32,
     dtype: "fp32",
-    pooling: "mean",
+    pooling: "cls",
     normalize: true,
     queryPrefix: "Represent this sentence for searching relevant passages: ",
     docPrefix: "",
@@ -2824,13 +2949,14 @@ var ConfigService = class {
       };
     }
     const rerankerOverrides = parseRerankerEnvOverrides(false);
-    if (rerankerOverrides.enabled !== void 0 || rerankerOverrides.topK !== void 0) {
+    if (rerankerOverrides.enabled !== void 0 || rerankerOverrides.topK !== void 0 || rerankerOverrides.model !== void 0) {
       this.config = {
         ...this.config,
         reranker: {
           ...this.config.reranker,
           ...rerankerOverrides.enabled !== void 0 ? { enabled: rerankerOverrides.enabled } : {},
-          ...rerankerOverrides.topK !== void 0 ? { topK: rerankerOverrides.topK } : {}
+          ...rerankerOverrides.topK !== void 0 ? { topK: rerankerOverrides.topK } : {},
+          ...rerankerOverrides.model !== void 0 ? { model: rerankerOverrides.model } : {}
         }
       };
     }
@@ -3361,11 +3487,15 @@ var DriftService = class {
   }
 };
-// src/utils/text-extensions.ts
+// src/services/index.service.ts
+var minimatch = minimatchFn;
+var execFileAsync = promisify(execFile);
+var logger2 = createLogger("index-service");
 var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
   // Text/docs
   ".txt",
   ".md",
+  ".mdx",
   ".rst",
   ".adoc",
   // JavaScript/TypeScript
@@ -3471,11 +3601,33 @@ var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
   ".makefile",
   ".cmake"
 ]);
-// src/services/index.service.ts
-var minimatch = minimatchFn;
-var execFileAsync = promisify(execFile);
-var logger2 = createLogger("index-service");
+var TEXT_BASENAMES = /* @__PURE__ */ new Set([
+  // Build systems
+  "Dockerfile",
+  "Makefile",
+  "Rakefile",
+  "Gemfile",
+  "Procfile",
+  "Vagrantfile",
+  "Justfile",
+  "Brewfile",
+  "Earthfile",
+  "Tiltfile",
+  "CMakeLists.txt",
+  "BUILD",
+  "BUILD.bazel",
+  "WORKSPACE",
+  // Dotfiles (config)
+  ".gitignore",
+  ".gitattributes",
+  ".editorconfig",
+  ".dockerignore",
+  ".eslintignore",
+  ".prettierignore",
+  ".npmignore",
+  ".env.example",
+  ".env.sample"
+]);
 function isMinifiedFile(filename) {
   const minPatterns = [
     /\.min\.(js|css|mjs|cjs)$/i,
@@ -3542,6 +3694,7 @@ var EXT_TO_LANGUAGE = {
   ".psm1": "powershell",
   ".sql": "sql",
   ".md": "markdown",
+  ".mdx": "mdx",
   ".rst": "restructuredtext",
   ".lua": "lua",
   ".r": "r",
@@ -3581,6 +3734,19 @@ var EXT_TO_LANGUAGE = {
   ".toml": "toml",
   ".xml": "xml"
 };
+var BASENAME_TO_LANGUAGE = {
+  Dockerfile: "dockerfile",
+  Makefile: "makefile",
+  Rakefile: "ruby",
+  Gemfile: "ruby",
+  Brewfile: "ruby",
+  "CMakeLists.txt": "cmake",
+  BUILD: "starlark",
+  "BUILD.bazel": "starlark",
+  WORKSPACE: "starlark",
+  Tiltfile: "starlark",
+  Justfile: "just"
+};
 var ENTRY_POINT_NAMES = /* @__PURE__ */ new Set([
   "index.ts",
   "index.js",
@@ -3969,7 +4135,7 @@ ${rawContent}` : rawContent;
     const ext = extname(filePath).toLowerCase();
     const fileName = basename(filePath).toLowerCase();
     const fileType = this.classifyFileType(ext, fileName, filePath);
-    const language = EXT_TO_LANGUAGE[ext];
+    const language = EXT_TO_LANGUAGE[ext] ?? BASENAME_TO_LANGUAGE[basename(filePath)];
     const normalizedRelPath = relativePath.replaceAll("\\", "/");
     const depth = normalizedRelPath.split("/").length - 1;
     const isEntryPoint = ENTRY_POINT_NAMES.has(basename(filePath));
@@ -4058,7 +4224,8 @@ ${rawContent}` : rawContent;
       candidates = await this.scanDirectory(storePath);
       logger2.debug({ storePath, count: candidates.length }, "Using filesystem walk for discovery");
     }
-    return this.filterFiles(candidates, storePath, ingestConfig);
+    const { files } = await this.filterFiles(candidates, storePath, ingestConfig);
+    return files;
   }
   /**
    * Apply extension filter, ignore patterns, size limit, and per-store ingest filters.
@@ -4070,23 +4237,35 @@ ${rawContent}` : rawContent;
     const skippedMinified = [];
     const skippedBinary = [];
     const skippedExcluded = [];
+    let skippedExtension = 0;
+    let skippedDirSegment = 0;
+    let skippedIgnorePattern = 0;
+    let skippedStatError = 0;
     const maxFileSize = ingestConfig?.maxFileSizeBytes ?? this.maxFileSizeBytes;
-    const skipMinified = ingestConfig?.skipMinified ?? true;
+    const skipMinifiedOpt = ingestConfig?.skipMinified ?? true;
     const skipBinaries = ingestConfig?.skipBinaries ?? true;
     const excludeGlobs = ingestConfig?.excludeGlobs ?? [];
+    const maxFiles = ingestConfig?.maxFiles;
     for (const filePath of files) {
       const ext = extname(filePath).toLowerCase();
       const filename = basename(filePath);
-      if (!TEXT_EXTENSIONS.has(ext)) continue;
+      if (!TEXT_EXTENSIONS.has(ext) && !TEXT_BASENAMES.has(filename)) {
+        skippedExtension++;
+        continue;
+      }
       const relativePath = relative(storePath, filePath).replaceAll("\\", "/");
       const pathSegments = relativePath.split("/");
       const dirSegments = pathSegments.slice(0, -1);
       if (dirSegments.some((segment) => this.ignoreDirs.has(segment))) {
+        skippedDirSegment++;
         continue;
       }
       const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(filename));
-      if (shouldIgnore) continue;
-      if (skipMinified && isMinifiedFile(filename)) {
+      if (shouldIgnore) {
+        skippedIgnorePattern++;
+        continue;
+      }
+      if (skipMinifiedOpt && isMinifiedFile(filename)) {
         skippedMinified.push(filename);
         continue;
       }
@@ -4100,13 +4279,14 @@ ${rawContent}` : rawContent;
       }
       let fileSize;
       try {
-        const stats = await stat2(filePath);
-        fileSize = stats.size;
+        const stats2 = await stat2(filePath);
+        fileSize = stats2.size;
         if (fileSize > maxFileSize) {
           skippedLarge.push({ path: filePath, size: fileSize });
           continue;
         }
       } catch {
+        skippedStatError++;
         continue;
       }
       if (skipBinaries) {
@@ -4116,17 +4296,23 @@ ${rawContent}` : rawContent;
             continue;
           }
         } catch {
+          skippedStatError++;
           continue;
         }
       }
       result.push(filePath);
     }
+    if (maxFiles !== void 0 && result.length > maxFiles) {
+      logger2.info({ storePath, total: result.length, maxFiles }, "Applying maxFiles cap");
+      result.length = maxFiles;
+    }
+    const skippedLargeBytes = skippedLarge.reduce((sum, f) => sum + f.size, 0);
     if (skippedLarge.length > 0) {
       logger2.info(
         {
           storePath,
           count: skippedLarge.length,
-          totalBytes: skippedLarge.reduce((sum, f) => sum + f.size, 0),
+          totalBytes: skippedLargeBytes,
           examples: skippedLarge.slice(0, 5).map((f) => relative(storePath, f.path))
         },
         "Skipped large files"
@@ -4150,7 +4336,21 @@ ${rawContent}` : rawContent;
         "Skipped excluded files"
       );
     }
-    return result;
+    const stats = {
+      candidates: files.length,
+      accepted: result.length,
+      skippedExtension,
+      skippedDirSegment,
+      skippedIgnorePattern,
+      skippedMinified: skippedMinified.length,
+      skippedExcluded: skippedExcluded.length,
+      skippedLargeFiles: skippedLarge.length,
+      skippedLargeBytes,
+      skippedBinary: skippedBinary.length,
+      skippedStatError
+    };
+    logger2.info({ storePath, ...stats }, "File filtering complete");
+    return { files: result, stats };
   }
   async scanDirectory(dir) {
     const files = [];
@@ -4167,7 +4367,7 @@ ${rawContent}` : rawContent;
           continue;
         }
         const ext = extname(entry.name).toLowerCase();
-        if (TEXT_EXTENSIONS.has(ext)) {
+        if (TEXT_EXTENSIONS.has(ext) || TEXT_BASENAMES.has(entry.name)) {
           files.push(fullPath);
         }
       }
@@ -4761,7 +4961,8 @@ function parseSearchEnvOverrides(strict) {
   return {
     rrfK: parseRrfK(process.env["BK_RRF_K"], strict),
     vectorWeight: parseVectorWeight(process.env["BK_RRF_VECTOR_WEIGHT"], strict),
-    candidateMultiplier: parseCandidateMultiplier(process.env["BK_CANDIDATE_MULTIPLIER"], strict)
+    candidateMultiplier: parseCandidateMultiplier(process.env["BK_CANDIDATE_MULTIPLIER"], strict),
+    queryExpansion: parseBoolFlag(process.env["BK_QUERY_EXPANSION"], "BK_QUERY_EXPANSION", strict)
   };
 }
 function parseRrfK(raw, strict) {
@@ -4797,6 +4998,69 @@ function parseCandidateMultiplier(raw, strict) {
   }
   return parsed;
 }
+function parseBoolFlag(raw, name, strict) {
+  if (raw === void 0 || raw === "") return void 0;
+  if (raw === "1") return true;
+  if (raw === "0") return false;
+  const msg = `${name} must be '0' or '1', got: "${raw}"`;
+  if (strict) throw new Error(msg);
+  logger3.warn(msg);
+  return void 0;
+}
+// src/utils/code-tokenizer.ts
+function splitIdentifier(identifier) {
+  const segments = identifier.split(/[_.]/).filter((s) => s.length > 0);
+  const words = [];
+  for (const segment of segments) {
+    const parts = segment.replace(/([a-z])([A-Z])/g, "$1\0$2").replace(/([A-Z]{2,})([A-Z][a-z])/g, "$1\0$2").replace(/([a-zA-Z])(\d)/g, "$1\0$2").replace(/(\d)([a-zA-Z])/g, "$1\0$2").split("\0");
+    for (const part of parts) {
+      if (part.length > 0) {
+        words.push(part);
+      }
+    }
+  }
+  return words;
+}
+var IDENTIFIER_PATTERN = /(?:[a-zA-Z_$][\w$]*(?:\.[\w$]+)*)/g;
+function isSplittable(identifier) {
+  if (/[a-z][A-Z]/.test(identifier)) return true;
+  if (identifier.includes("_")) return true;
+  if (identifier.includes(".")) return true;
+  if (/[A-Z]{2,}[a-z]/.test(identifier)) return true;
+  if (/[a-zA-Z]\d|\d[a-zA-Z]/.test(identifier)) return true;
+  return false;
+}
+function extractSplitVariants(text) {
+  const seen = /* @__PURE__ */ new Set();
+  const variants = [];
+  for (const match of text.matchAll(IDENTIFIER_PATTERN)) {
+    const identifier = match[0];
+    if (identifier.length < 3 || seen.has(identifier)) continue;
+    seen.add(identifier);
+    if (!isSplittable(identifier)) continue;
+    const parts = splitIdentifier(identifier);
+    if (parts.length > 1) {
+      variants.push(parts.join(" "));
+      const lower = parts.map((p) => p.toLowerCase()).join(" ");
+      if (lower !== parts.join(" ").toLowerCase()) {
+        variants.push(lower);
+      }
+    }
+  }
+  return variants.join(" ");
+}
+function buildFtsContent(originalContent) {
+  const variants = extractSplitVariants(originalContent);
+  if (variants.length === 0) return originalContent;
+  return `${originalContent}
+${variants}`;
+}
+function normalizeFtsQuery(query) {
+  const variants = extractSplitVariants(query);
+  if (variants.length === 0) return query;
+  return `${query} ${variants}`;
+}
 // src/services/search.service.ts
 var logger4 = createLogger("search-service");
@@ -4868,6 +5132,18 @@ var INTENT_FILE_BOOSTS = {
     changelog: 1.1,
     // Often contains bug fixes and known issues
     other: 1
+  },
+  testing: {
+    "documentation-primary": 0.8,
+    documentation: 0.85,
+    example: 1,
+    source: 0.9,
+    "source-internal": 0.85,
+    test: 1.5,
+    // Tests are exactly what the user wants
+    config: 0.7,
+    changelog: 0.6,
+    other: 0.9
   }
 };
 var FRAMEWORK_PATTERNS = [
@@ -4923,12 +5199,23 @@ var CONCEPTUAL_PATTERNS = [
   /\bhow does .* work\b/i,
   /\bwhat('s| is) the (purpose|point|idea)\b/i
 ];
+var TESTING_PATTERNS = [
+  /\b(test|tests|testing)\b/i,
+  /\b(mock|mocking|stub|stubs)\b/i,
+  /\b(assert|assertion|expect)\b/i,
+  /\btest\s*(client|runner|helper|fixture|suite)\b/i,
+  /\b(unit|integration|e2e|end-to-end)\s*test/i,
+  /\b(pytest|jest|vitest|mocha|gotest)\b/i
+];
 function classifyQueryIntents(query) {
   const q = query.toLowerCase();
   const intents = [];
   if (IMPLEMENTATION_PATTERNS.some((p) => p.test(q))) {
     intents.push({ intent: "implementation", confidence: 0.9 });
   }
+  if (TESTING_PATTERNS.some((p) => p.test(q))) {
+    intents.push({ intent: "testing", confidence: 0.85 });
+  }
   if (DEBUGGING_PATTERNS.some((p) => p.test(q))) {
     intents.push({ intent: "debugging", confidence: 0.85 });
   }
@@ -4954,41 +5241,16 @@ function mapSearchIntentToQueryIntent(intent) {
     case "find-pattern":
     case "find-implementation":
     case "find-definition":
-    case "find-files":
       return "implementation";
     case "find-usage":
     case "find-documentation":
       return "how-to";
+    case "find-files":
+      return "implementation";
   }
 }
-var INTENT_EXPANSION_TERMS = {
-  "find-implementation": "source code implementation function class",
-  "find-documentation": "documentation guide tutorial example",
-  "find-usage": "usage example how to use",
-  "find-pattern": "pattern matching code structure",
-  "find-definition": "definition type interface declaration",
-  "find-files": "file module path"
-};
-function expandQueryWithIntent(query, intent) {
-  if (intent === void 0) return query;
-  const expansion = INTENT_EXPANSION_TERMS[intent];
-  return `${query} ${expansion}`;
-}
-function isStrongFtsSignal(query, ftsResults) {
-  if (ftsResults.length < 2) return false;
-  const top = ftsResults[0];
-  const second = ftsResults[1];
-  if (top === void 0 || second === void 0) return false;
-  if (second.score > 0 && top.score / second.score <= 2) return false;
-  const queryLower = query.toLowerCase();
-  const rawFile = top.metadata["file"] ?? top.metadata["path"];
-  const rawName = top.metadata["name"];
-  const filePath = typeof rawFile === "string" ? rawFile : "";
-  const name = typeof rawName === "string" ? rawName : "";
-  return filePath.toLowerCase().includes(queryLower) || name.toLowerCase().includes(queryLower);
-}
 var RRF_PRESETS = {
-  code: { k: 25, vectorWeight: 0.75, ftsWeight: 0.25 },
+  code: { k: 25, vectorWeight: 0.35, ftsWeight: 0.65 },
   web: { k: 30, vectorWeight: 0.7, ftsWeight: 0.3 }
 };
 var DEFAULT_CANDIDATE_MULTIPLIER = 2;
@@ -5075,8 +5337,7 @@ var SearchService = class {
     let rerankTimeMs;
     const fetchLimit = limit * 3;
     if (mode === "vector") {
-      const expandedQuery = expandQueryWithIntent(query.query, query.intent);
-      const rawResults = await this.vectorSearchRaw(expandedQuery, stores, fetchLimit);
+      const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
       maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
       allResults = this.normalizeAndFilterScores(rawResults, query.threshold).slice(0, fetchLimit);
     } else if (mode === "fts") {
@@ -5086,8 +5347,7 @@ var SearchService = class {
         query.query,
         stores,
         fetchLimit,
-        query.threshold,
-        query.intent
+        query.threshold
       );
       allResults = hybridResult.results;
       maxRawScore = hybridResult.maxRawScore;
@@ -5226,6 +5486,41 @@ var SearchService = class {
     }
     return normalized;
   }
+  /**
+   * Generate query variants for multi-query expansion.
+   * Strips intent prefixes to create a keyword-focused variant.
+   * Returns original + variants (deduplicated).
+   */
+  expandQuery(query) {
+    const queries = [query];
+    const stripped = query.replace(
+      /^(how to |how do I |how does |implement |usage of |find the |what is |show me |where is |where are )/i,
+      ""
+    ).trim();
+    if (stripped !== query && stripped.length >= 5) {
+      queries.push(stripped);
+    }
+    return queries;
+  }
+  /**
+   * Run vector search across multiple query variants and merge results.
+   * Deduplicates by document ID, keeping the highest score.
+   */
+  async multiQueryVectorSearch(queries, stores, limit) {
+    const allResults = await Promise.all(
+      queries.map((q) => this.vectorSearchRaw(q, stores, limit))
+    );
+    const merged = /* @__PURE__ */ new Map();
+    for (const results of allResults) {
+      for (const r of results) {
+        const existing = merged.get(r.id);
+        if (existing === void 0 || r.score > existing.score) {
+          merged.set(r.id, r);
+        }
+      }
+    }
+    return [...merged.values()].sort((a, b) => b.score - a.score).slice(0, limit);
+  }
   /**
    * Fetch raw vector search results without normalization.
    * Returns results with raw cosine similarity scores [0-1].
@@ -5249,9 +5544,10 @@ var SearchService = class {
   }
   async ftsSearch(query, stores, limit) {
     const results = [];
+    const normalizedQuery = normalizeFtsQuery(query);
     for (const storeId of stores) {
       try {
-        const hits = await this.lanceStore.fullTextSearch(storeId, query, limit);
+        const hits = await this.lanceStore.fullTextSearch(storeId, normalizedQuery, limit);
         results.push(
           ...hits.map((r) => ({
             id: r.id,
@@ -5268,37 +5564,20 @@ var SearchService = class {
   /**
    * Internal hybrid search result with additional metadata for confidence calculation.
    */
-  async hybridSearchWithMetadata(query, stores, limit, threshold, searchIntent) {
+  async hybridSearchWithMetadata(query, stores, limit, threshold) {
     const intents = classifyQueryIntents(query);
     const envOverrides = parseSearchEnvOverrides(false);
     const candidateMultiplier = envOverrides.candidateMultiplier ?? DEFAULT_CANDIDATE_MULTIPLIER;
-    const ftsResults = await this.ftsSearch(query, stores, limit * candidateMultiplier);
-    if (isStrongFtsSignal(query, ftsResults)) {
-      logger4.debug(
-        { query, topScore: ftsResults[0]?.score },
-        "Strong FTS signal \u2014 skipping vector search"
-      );
-      const sorted2 = ftsResults.slice(0, limit).map((r, i) => ({
-        ...r,
-        score: Math.round((1 - i / Math.max(ftsResults.length, 1)) * 1e6) / 1e6
-      }));
-      if (threshold !== void 0) {
-        return { results: sorted2.filter((r) => r.score >= threshold), maxRawScore: 0 };
-      }
-      return { results: sorted2, maxRawScore: 0 };
-    }
-    const expandedQuery = expandQueryWithIntent(query, searchIntent);
-    const rawVectorResults = await this.vectorSearchRaw(
-      expandedQuery,
-      stores,
-      limit * candidateMultiplier
-    );
+    const fetchLimit = limit * candidateMultiplier;
+    const useExpansion = envOverrides.queryExpansion === true;
+    const rawVectorResults = useExpansion ? await this.multiQueryVectorSearch(this.expandQuery(query), stores, fetchLimit) : await this.vectorSearchRaw(query, stores, fetchLimit);
     const rawVectorScores = /* @__PURE__ */ new Map();
     rawVectorResults.forEach((r) => {
       rawVectorScores.set(r.id, r.score);
     });
     const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
     const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
+    const ftsResults = await this.ftsSearch(query, stores, limit * candidateMultiplier);
     const vectorRanks = /* @__PURE__ */ new Map();
     const ftsRanks = /* @__PURE__ */ new Map();
     const allDocs = /* @__PURE__ */ new Map();
@@ -5334,6 +5613,8 @@ var SearchService = class {
       const pathKeywordBoost = this.getPathKeywordBoost(query, result2);
       const depthBoost = this.getDepthBoost(result2, getPrimaryIntent(intents));
       const entryPointBoost = this.getEntryPointBoost(result2, getPrimaryIntent(intents));
+      const sectionHeaderBoost = 1;
+      const symbolNameBoost = 1;
       const metadata = {
         vectorRRF,
         ftsRRF,
@@ -5342,7 +5623,9 @@ var SearchService = class {
         urlKeywordBoost,
         pathKeywordBoost,
         depthBoost,
-        entryPointBoost
+        entryPointBoost,
+        sectionHeaderBoost,
+        symbolNameBoost
       };
       if (vectorRank !== Infinity) {
         metadata.vectorRank = vectorRank;
@@ -5355,7 +5638,7 @@ var SearchService = class {
       }
       rrfScores.push({
         id,
-        score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost * depthBoost * entryPointBoost,
+        score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost * depthBoost * entryPointBoost * sectionHeaderBoost * symbolNameBoost,
         result: result2,
         rawVectorScore,
         metadata
@@ -5376,20 +5659,10 @@ var SearchService = class {
       reranked.results.forEach((r) => {
         rerankedScores.set(r.id, r.rerankerScore);
       });
-      const maxRrfScore = sortedAll[0]?.score ?? 1;
-      sorted = sortedAll.map((r, rrfRank) => {
-        const rerankerScore = rerankedScores.get(r.id);
-        if (rerankerScore === void 0) {
-          return { ...r, blendedScore: -Infinity };
-        }
-        const normalizedRrf = maxRrfScore > 0 ? r.score / maxRrfScore : 0;
-        const rrfWeight = rrfRank < 3 ? 0.7 : rrfRank < 10 ? 0.5 : 0.3;
-        const rerankerWeight = 1 - rrfWeight;
-        return {
-          ...r,
-          blendedScore: normalizedRrf * rrfWeight + rerankerScore * rerankerWeight
-        };
-      }).sort((a, b) => b.blendedScore - a.blendedScore).slice(0, limit);
+      sorted = sortedAll.map((r) => ({
+        ...r,
+        rerankerScore: rerankedScores.get(r.id)
+      })).sort((a, b) => (b.rerankerScore ?? -Infinity) - (a.rerankerScore ?? -Infinity)).slice(0, limit);
     } else {
       sorted = sortedAll.slice(0, limit);
     }
@@ -5489,7 +5762,9 @@ var SearchService = class {
     const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
     const finalBoost = baseBoost * blendedMultiplier;
     if (fileType === "test") {
-      return Math.min(finalBoost, 0.6);
+      const primaryIntent = intents[0]?.intent;
+      const cap = primaryIntent === "testing" ? 1.5 : 0.6;
+      return Math.min(finalBoost, cap);
     }
     return finalBoost;
   }
@@ -5646,12 +5921,6 @@ var SearchService = class {
       location: `${path4}${codeUnit ? `:${String(codeUnit.startLine)}` : ""}`,
       relevanceReason: this.generateRelevanceReason(result, query)
     };
-    if (graph) {
-      const relatedFiles = this.getRelatedFilePaths(graph, path4, symbolName);
-      if (relatedFiles.length > 0) {
-        enhanced.summary = { ...enhanced.summary, relatedFiles };
-      }
-    }
     if (detail === "contextual" || detail === "full") {
       const usage = this.getUsageFromGraph(graph, path4, symbolName);
       enhanced.context = {
@@ -5867,23 +6136,9 @@ var SearchService = class {
     };
   }
   /**
-   * Get related file paths from code graph edges for follow-up reads.
-   * Returns unique file paths (max 5) from callers/callees, excluding the result's own file.
+   * Get related code from graph.
+   * Returns callers and callees for the symbol.
    */
-  getRelatedFilePaths(graph, filePath, symbolName) {
-    if (symbolName === "" || symbolName === "(anonymous)") return [];
-    const nodeId = `${filePath}:${symbolName}`;
-    const files = /* @__PURE__ */ new Set();
-    for (const edge of graph.getIncomingEdges(nodeId)) {
-      const [file] = this.parseNodeId(edge.from);
-      if (file && file !== filePath) files.add(file);
-    }
-    for (const edge of graph.getEdges(nodeId)) {
-      const [file] = this.parseNodeId(edge.to);
-      if (file && file !== filePath) files.add(file);
-    }
-    return Array.from(files).slice(0, 5);
-  }
   getRelatedCodeFromGraph(graph, filePath, symbolName) {
     if (!graph || symbolName === "" || symbolName === "(anonymous)") {
       return [];
@@ -5947,7 +6202,9 @@ var IngestConfigSchema = z3.object({
   /** Skip binary files detected by content heuristic - default true */
   skipBinaries: z3.boolean().optional(),
   /** Override max file size for this store (bytes) */
-  maxFileSizeBytes: z3.number().int().positive().optional()
+  maxFileSizeBytes: z3.number().int().positive().optional(),
+  /** Maximum number of files to index (cap applied after all other filters) */
+  maxFiles: z3.number().int().positive().optional()
 });
 var FileStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
   type: z3.literal("file"),
@@ -6190,7 +6447,7 @@ async function cloneRepository(options) {
   if (partialClone) {
     args.push("--filter=blob:none");
   }
-  args.push("--depth", String(depth), "--single-branch");
+  args.push("--depth", String(depth), "--single-branch", "--no-tags");
   if (branch !== void 0) {
     args.push("--branch", branch);
   }
@@ -6202,12 +6459,17 @@ async function cloneRepository(options) {
       branch,
       depth,
       singleBranch: true,
+      noTags: true,
+      lfsSkipSmudge: true,
       partialClone
     },
     "Starting git clone"
   );
   return new Promise((resolve4) => {
-    const git = spawn("git", args, { stdio: ["ignore", "pipe", "pipe"] });
+    const git = spawn("git", args, {
+      stdio: ["ignore", "pipe", "pipe"],
+      env: { ...process.env, GIT_LFS_SKIP_SMUDGE: "1" }
+    });
     let timedOut = false;
     let forceKillTimeout = null;
     const timeout = setTimeout(() => {
@@ -7212,6 +7474,8 @@ var LanceStore = class {
         const schema = LanceSchema({
           id: new Utf8(),
           content: this.embeddingFunction.sourceField(),
+          fts_content: new Utf8(),
+          // content + identifier-split variants for FTS
           vector: this.embeddingFunction.vectorField(),
           metadata: new Utf8()
         });
@@ -7222,6 +7486,7 @@ var LanceStore = class {
           {
             id: "__init__",
             content: "",
+            fts_content: "",
             vector: new Array(this._dimensions).fill(0),
             metadata: "{}"
           }
@@ -7236,13 +7501,25 @@ var LanceStore = class {
   }
   async addDocuments(storeId, documents) {
     const table = await this.getTable(storeId);
-    const lanceDocuments = documents.map((doc) => ({
-      id: doc.id,
-      content: doc.content,
-      vector: [...doc.vector],
-      metadata: JSON.stringify(doc.metadata)
-    }));
-    await table.add(lanceDocuments);
+    const hasFts = await this.hasFtsContentColumn(storeId);
+    if (hasFts) {
+      const lanceDocuments = documents.map((doc) => ({
+        id: doc.id,
+        content: doc.content,
+        fts_content: buildFtsContent(doc.content),
+        vector: [...doc.vector],
+        metadata: JSON.stringify(doc.metadata)
+      }));
+      await table.add(lanceDocuments);
+    } else {
+      const lanceDocuments = documents.map((doc) => ({
+        id: doc.id,
+        content: doc.content,
+        vector: [...doc.vector],
+        metadata: JSON.stringify(doc.metadata)
+      }));
+      await table.add(lanceDocuments);
+    }
   }
   async deleteDocuments(storeId, documentIds) {
     if (documentIds.length === 0) {
@@ -7297,10 +7574,29 @@ var LanceStore = class {
   }
   async createFtsIndex(storeId) {
     const table = await this.getTable(storeId);
-    await table.createIndex("content", {
-      config: lancedb.Index.fts()
+    const ftsColumn = await this.hasFtsContentColumn(storeId) ? "fts_content" : "content";
+    await table.createIndex(ftsColumn, {
+      config: lancedb.Index.fts({
+        stem: true,
+        removeStopWords: false,
+        lowercase: true,
+        language: "English"
+      })
     });
   }
+  /**
+   * Check if a table has the fts_content column (v3 schema).
+   * Tables created before the FTS improvement only have content.
+   */
+  async hasFtsContentColumn(storeId) {
+    try {
+      const table = await this.getTable(storeId);
+      const schema = await table.schema();
+      return schema.fields.some((f) => f.name === "fts_content");
+    } catch {
+      return false;
+    }
+  }
   async fullTextSearch(storeId, query, limit) {
     const table = await this.getTable(storeId);
     const results = await table.search(query, "fts").limit(limit).toArray();
@@ -7637,7 +7933,6 @@ export {
   ASTParser,
   ok,
   err,
-  TEXT_EXTENSIONS,
   classifyWebContentType,
   isFileStoreDefinition,
   isRepoStoreDefinition,
@@ -7650,4 +7945,4 @@ export {
   createServices,
   destroyServices
 };
-//# sourceMappingURL=chunk-L2SC6J4K.js.map
+//# sourceMappingURL=chunk-724FNI27.js.map