npm - kontext-engine - Versions diffs - 0.1.2 → 0.1.4 - Mend

kontext-engine 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -1198,12 +1198,15 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
       const rows = db.prepare(
         `SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
                   c.line_start as lineStart, c.line_end as lineEnd,
-                  c.type, c.name, c.parent, c.text
+                  c.type, c.name, c.parent, c.text, c.exports as exports
            FROM chunks c
            JOIN files f ON f.id = c.file_id
            WHERE c.id IN (${placeholders})`
       ).all(...ids);
-      return rows;
+      return rows.map((r) => ({
+        ...r,
+        exports: r.exports === 1
+      }));
     },
     searchChunks(filters, limit) {
       const conditions = [];
@@ -1240,7 +1243,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
       const sql = `
         SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
                c.line_start as lineStart, c.line_end as lineEnd,
-               c.type, c.name, c.parent, c.text
+               c.type, c.name, c.parent, c.text, c.exports as exports
         FROM chunks c
         JOIN files f ON f.id = c.file_id
         ${where}
@@ -1248,7 +1251,11 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
         LIMIT ?
       `;
       params.push(limit);
-      return db.prepare(sql).all(...params);
+      const rows = db.prepare(sql).all(...params);
+      return rows.map((r) => ({
+        ...r,
+        exports: r.exports === 1
+      }));
     },
     deleteChunksByFile(fileId) {
       const chunkRows = stmtGetChunkIdsByFile.all(fileId);
@@ -1346,6 +1353,7 @@ async function vectorSearch(db, embedder, query, limit, filters) {
       lineEnd: chunk.lineEnd,
       name: chunk.name,
       type: chunk.type,
+      exported: chunk.exports,
       text: chunk.text,
       score: distanceToScore(vr.distance),
       language: chunk.language
@@ -1357,7 +1365,15 @@ async function vectorSearch(db, embedder, query, limit, filters) {
 // src/search/fts.ts
 function sanitizeFtsQuery(query) {
-  return query.replace(/[?()":^~{}!+\-\\]/g, " ").replace(/(?<!\w)\*/g, " ").replace(/\s+/g, " ").trim();
+  const tokenized = query.replace(/[^A-Za-z0-9_*]+/g, " ").trim();
+  if (tokenized.length === 0) return "";
+  const sanitizedTerms = tokenized.split(/\s+/).map((term) => {
+    const hasTrailingWildcard = /\*+$/.test(term);
+    const base = term.replace(/\*/g, "");
+    if (base.length === 0) return "";
+    return hasTrailingWildcard ? `${base}*` : base;
+  }).filter((term) => term.length > 0);
+  return sanitizedTerms.join(" ");
 }
 function bm25ToScore(rank) {
   return 1 / (1 + Math.abs(rank));
@@ -1386,6 +1402,7 @@ function ftsSearch(db, query, limit, filters) {
       lineEnd: chunk.lineEnd,
       name: chunk.name,
       type: chunk.type,
+      exported: chunk.exports,
       text: chunk.text,
       score: bm25ToScore(fts.rank),
       language: chunk.language
@@ -1420,6 +1437,7 @@ function astSearch(db, filters, limit) {
     lineEnd: chunk.lineEnd,
     name: chunk.name,
     type: chunk.type,
+    exported: chunk.exports,
     text: chunk.text,
     score,
     language: chunk.language
@@ -1472,6 +1490,7 @@ function pathSearch(db, pattern, limit) {
         lineEnd: chunk.lineEnd,
         name: chunk.name,
         type: chunk.type,
+        exported: chunk.exports,
         text: chunk.text,
         score: 1,
         language: file.language
@@ -1499,26 +1518,56 @@ function pathKeywordSearch(db, query, limit) {
   }
   if (scoredPaths.length === 0) return [];
   scoredPaths.sort((a, b) => b.score - a.score);
-  const results = [];
+  const matchedFiles = [];
   for (const { filePath, score } of scoredPaths) {
-    if (results.length >= limit) break;
     const file = db.getFile(filePath);
     if (!file) continue;
     const chunks = db.getChunksByFile(file.id);
-    for (const chunk of chunks) {
+    if (chunks.length === 0) continue;
+    matchedFiles.push({
+      filePath: file.path,
+      language: file.language,
+      score,
+      chunks
+    });
+  }
+  if (matchedFiles.length === 0) return [];
+  const results = [];
+  const pushChunk = (filePath, language, score, chunk) => {
+    results.push({
+      chunkId: chunk.id,
+      filePath,
+      lineStart: chunk.lineStart,
+      lineEnd: chunk.lineEnd,
+      name: chunk.name,
+      type: chunk.type,
+      exported: chunk.exports,
+      text: chunk.text,
+      score,
+      language
+    });
+  };
+  for (const matched of matchedFiles) {
+    if (results.length >= limit) break;
+    pushChunk(
+      matched.filePath,
+      matched.language,
+      matched.score,
+      matched.chunks[0]
+    );
+  }
+  let offset = 1;
+  while (results.length < limit) {
+    let addedInRound = false;
+    for (const matched of matchedFiles) {
       if (results.length >= limit) break;
-      results.push({
-        chunkId: chunk.id,
-        filePath: file.path,
-        lineStart: chunk.lineStart,
-        lineEnd: chunk.lineEnd,
-        name: chunk.name,
-        type: chunk.type,
-        text: chunk.text,
-        score,
-        language: file.language
-      });
+      const chunk = matched.chunks[offset];
+      if (!chunk) continue;
+      pushChunk(matched.filePath, matched.language, matched.score, chunk);
+      addedInRound = true;
     }
+    if (!addedInRound) break;
+    offset++;
   }
   return results;
 }
@@ -1563,6 +1612,7 @@ function dependencyTrace(db, chunkId, direction, depth) {
         lineEnd: chunk.lineEnd,
         name: chunk.name,
         type: chunk.type,
+        exported: chunk.exports,
         text: chunk.text,
         score,
         language: chunk.language
@@ -1613,11 +1663,24 @@ var PATH_BOOST_DIR_EXACT = 1.5;
 var PATH_BOOST_FILENAME = 1.4;
 var PATH_BOOST_PARTIAL = 1.2;
 var IMPORT_PENALTY = 0.5;
+var TEST_FILE_PENALTY = 0.65;
+var SMALL_SNIPPET_PENALTY = 0.75;
+var PUBLIC_API_BOOST = 1.12;
+var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
+var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
+var SMALL_SNIPPET_MAX_LINES = 3;
+function extractPathBoostTerms(query) {
+  return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
+}
 function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
   const fused = fusionMerge(strategyResults, limit * 3);
   if (fused.length === 0) return [];
   const boosted = applyPathBoost(fused, pathBoostTerms);
-  const adjusted = applyImportDeprioritization(boosted);
+  const importAdjusted = applyImportDeprioritization(boosted);
+  const testAdjusted = applyTestFileDeprioritization(importAdjusted);
+  const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
+  const boostedApi = applyPublicApiBoost(snippetAdjusted);
+  const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
   adjusted.sort((a, b) => b.score - a.score);
   const sliced = adjusted.slice(0, limit);
   return renormalize(sliced);
@@ -1667,6 +1730,76 @@ function applyImportDeprioritization(results) {
     return r;
   });
 }
+function applyTestFileDeprioritization(results) {
+  const hasNonTestFile = results.some((r) => !isTestFilePath(r.filePath));
+  if (!hasNonTestFile) return results;
+  const maxNonTestScore = Math.max(
+    ...results.filter((r) => !isTestFilePath(r.filePath)).map((r) => r.score),
+    0
+  );
+  if (maxNonTestScore === 0) return results;
+  return results.map((r) => {
+    if (isTestFilePath(r.filePath)) {
+      return { ...r, score: r.score * TEST_FILE_PENALTY };
+    }
+    return r;
+  });
+}
+function applySmallSnippetDeprioritization(results) {
+  const hasNonSmallSnippet = results.some((r) => !isSmallSnippet(r));
+  if (!hasNonSmallSnippet) return results;
+  const maxNonSmallScore = Math.max(
+    ...results.filter((r) => !isSmallSnippet(r)).map((r) => r.score),
+    0
+  );
+  if (maxNonSmallScore === 0) return results;
+  return results.map((r) => {
+    if (isSmallSnippet(r)) {
+      return { ...r, score: r.score * SMALL_SNIPPET_PENALTY };
+    }
+    return r;
+  });
+}
+function applyPublicApiBoost(results) {
+  return results.map((r) => {
+    if (isPublicApiSymbol(r)) {
+      return { ...r, score: r.score * PUBLIC_API_BOOST };
+    }
+    return r;
+  });
+}
+function applyFileDiversityDiminishingReturns(results) {
+  if (results.length <= 1) return results;
+  const ranked = [...results].sort((a, b) => b.score - a.score);
+  const seenPerFile = /* @__PURE__ */ new Map();
+  return ranked.map((r) => {
+    const count = (seenPerFile.get(r.filePath) ?? 0) + 1;
+    seenPerFile.set(r.filePath, count);
+    return {
+      ...r,
+      score: r.score * getFileDiversityFactor(count)
+    };
+  });
+}
+function isTestFilePath(filePath) {
+  const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/");
+  return TEST_FILE_DIRECTORY_PATTERN.test(normalizedPath) || TEST_FILE_NAME_PATTERN.test(normalizedPath);
+}
+function isSmallSnippet(result) {
+  const lineCount = Math.max(1, result.lineEnd - result.lineStart + 1);
+  return lineCount <= SMALL_SNIPPET_MAX_LINES;
+}
+function isPublicApiSymbol(result) {
+  if (result.exported === true) return true;
+  const textStart = result.text.trimStart().toLowerCase();
+  return textStart.startsWith("export ");
+}
+function getFileDiversityFactor(fileOccurrence) {
+  if (fileOccurrence <= 1) return 1;
+  if (fileOccurrence === 2) return 0.9;
+  if (fileOccurrence === 3) return 0.8;
+  return 0.7;
+}
 function renormalize(results) {
   if (results.length === 0) return results;
   const maxScore = Math.max(...results.map((r) => r.score));
@@ -1677,25 +1810,210 @@ function renormalize(results) {
   }));
 }
-// src/steering/llm.ts
-var PLAN_SYSTEM_PROMPT = `You are a code search strategy planner. Given a user query about code, output a JSON object with:
-- "interpretation": a one-line summary of what the user is looking for
-- "strategies": an array of search strategy objects, each with:
-  - "strategy": one of "vector", "fts", "ast", "path", "dependency"
-  - "query": the optimized query string for that strategy
-  - "weight": a number 0-1 indicating importance
-  - "reason": brief explanation of why this strategy is used
+// src/steering/prompts.ts
+var PLAN_SYSTEM_PROMPT = `You are a code-search strategy planner for a TypeScript/JavaScript codebase.
+Given a user query, produce a JSON object with:
+- "interpretation": one sentence summarising what the user wants to find.
+- "strategies": an ordered array of search strategies (most important first).
+Each strategy object has:
+  "strategy" \u2014 one of "vector", "fts", "ast", "path", "dependency"
+  "query"    \u2014 the optimised search string for that strategy (see rules below)
+  "weight"   \u2014 importance 0\u20131 (highest-priority strategy gets 1.0)
+  "reason"   \u2014 one sentence explaining why this strategy helps
+## Strategy selection rules
+| Signal in query | Primary strategy | Supporting strategies |
+|---|---|---|
+| Conceptual / "how does X work" / natural language | vector | fts, ast |
+| Exact keyword, identifier, or error message | fts | ast |
+| Symbol name (function, class, type, variable) | ast | fts |
+| File path, glob, or extension (e.g. "*.test.ts") | path | fts |
+| Import chain / "what depends on X" | dependency | ast, fts |
+| Mixed: natural language + code symbol | vector + ast | fts |
+## Query optimisation rules
+- **vector**: keep the query close to natural language; rephrase for semantic similarity.
+- **fts**: extract the most distinctive keywords/identifiers; drop stop words.
+- **ast**: use only the symbol name (camelCase, snake_case, or PascalCase). Strip surrounding prose.
+- **path**: use a glob or slash-separated path segment (e.g. "src/auth/*.ts").
+- **dependency**: use the bare module or file name being imported.
+## Edge cases
+- **Vague query** (e.g. "help me understand this"): use vector with the full query; add fts with any nouns present.
+- **Multi-concept query** (e.g. "authentication and rate limiting"): create separate strategies for each concept, both at high weight.
+- **Code symbol mixed with prose** (e.g. "where is the validateToken function called"): use ast for the symbol and vector for the intent.
+- **Query is just a symbol** (e.g. "createPool"): use ast at weight 1.0 and fts at weight 0.7. Skip vector.
+## Examples
+User: "how does authentication work"
+\`\`\`json
+{
+  "interpretation": "Understand the authentication flow and related middleware.",
+  "strategies": [
+    { "strategy": "vector", "query": "authentication flow middleware", "weight": 1.0, "reason": "Conceptual question best served by semantic search." },
+    { "strategy": "fts", "query": "authentication middleware auth", "weight": 0.7, "reason": "Keyword fallback for auth-related identifiers." },
+    { "strategy": "ast", "query": "authenticate", "weight": 0.6, "reason": "Likely function or class name." }
+  ]
+}
+\`\`\`
+User: "validateToken"
+\`\`\`json
+{
+  "interpretation": "Find the validateToken symbol definition and usages.",
+  "strategies": [
+    { "strategy": "ast", "query": "validateToken", "weight": 1.0, "reason": "Exact symbol lookup." },
+    { "strategy": "fts", "query": "validateToken", "weight": 0.7, "reason": "Catch references in comments or strings." }
+  ]
+}
+\`\`\`
+User: "where is rate limiting configured in src/middleware"
+\`\`\`json
+{
+  "interpretation": "Locate rate-limiting configuration inside the middleware directory.",
+  "strategies": [
+    { "strategy": "path", "query": "src/middleware/*", "weight": 0.9, "reason": "Scope results to the specified directory." },
+    { "strategy": "vector", "query": "rate limiting configuration", "weight": 1.0, "reason": "Semantic match for the concept." },
+    { "strategy": "fts", "query": "rateLimit rateLimiter", "weight": 0.7, "reason": "Common identifier variants." }
+  ]
+}
+\`\`\`
+User: "authentication and database connection pooling"
+\`\`\`json
+{
+  "interpretation": "Find code related to both authentication and database connection pooling.",
+  "strategies": [
+    { "strategy": "vector", "query": "authentication login", "weight": 1.0, "reason": "Semantic search for the auth concept." },
+    { "strategy": "vector", "query": "database connection pool", "weight": 1.0, "reason": "Semantic search for the DB pooling concept." },
+    { "strategy": "fts", "query": "auth createPool connectionPool", "weight": 0.7, "reason": "Keyword fallback for likely identifiers." }
+  ]
+}
+\`\`\`
+Output ONLY the JSON object. No markdown fences, no commentary.`;
+var SYNTHESIZE_SYSTEM_PROMPT = `You are a code-search assistant. Given a user query and ranked search results, produce a concise, actionable summary.
+## Output structure (plain text, no markdown)
-Choose strategies based on query type:
-- Conceptual/natural language \u2192 vector (semantic search)
-- Keywords/identifiers \u2192 fts (full-text search)
-- Symbol names (functions, classes) \u2192 ast (structural search)
-- File paths or patterns \u2192 path (path glob search)
-- Import/dependency chains \u2192 dependency
+1. **Key finding** (1\u20132 sentences): the most important result or answer first.
+2. **Supporting locations** (bulleted, max 5): each line is "filePath:lineStart \u2013 brief description".
+3. **Additional context** (0\u20132 sentences, optional): relationships between results, patterns, or next steps.
-Output ONLY valid JSON, no markdown.`;
-var SYNTHESIZE_SYSTEM_PROMPT = `You are a code search assistant. Given search results, write a brief, helpful explanation of what was found. Be concise (2-4 sentences). Reference specific files and function names. Do not use markdown.`;
+## Rules
+- Always reference file paths and line numbers from the search results.
+- Mention specific symbol names (functions, classes, types) when they appear in results.
+- If no result clearly answers the query, say so and suggest a refined search.
+- Be concise \u2014 aim for 4\u20138 lines total. Do not repeat the query back.
+- Do not use markdown formatting (no #, *, \`, or fences). Use plain text only.
+- Group related results rather than listing every result individually.
+## Example
+Query: "how does token validation work"
+Results include validateToken in src/auth/tokens.ts:42 and authMiddleware in src/middleware/auth.ts:15.
+Good output:
+Token validation is handled by validateToken (src/auth/tokens.ts:42), which decodes a JWT and checks expiry and signature against the configured secret.
+Related locations:
+- src/auth/tokens.ts:42 \u2013 validateToken: core JWT decode + verify logic
+- src/middleware/auth.ts:15 \u2013 authMiddleware: calls validateToken on every protected route
+- src/auth/types.ts:5 \u2013 TokenPayload type definition
+The middleware extracts the Bearer token from the Authorization header before passing it to validateToken.`;
+// src/steering/classify.ts
+var SYMBOL_CAMEL_RE = /^[a-z][a-zA-Z0-9]*$/;
+var SYMBOL_PASCAL_RE = /^[A-Z][a-zA-Z0-9]*$/;
+var SYMBOL_SNAKE_RE = /^[a-z]+(?:_[a-z]+)+$/;
+var SYMBOL_UPPER_RE = /^[A-Z]+(?:_[A-Z]+)*$/;
+var PATH_EXTENSION_RE = /\.(?:ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|kt|swift|rb|php|cs|cpp|c|h|hpp|json|yaml|yml|toml|md|sql|sh|bash)$/i;
+var QUESTION_WORDS = /* @__PURE__ */ new Set([
+  "how",
+  "what",
+  "where",
+  "why",
+  "when",
+  "which",
+  "show",
+  "explain",
+  "find",
+  "list"
+]);
 var STOP_WORDS = /* @__PURE__ */ new Set([
+  "the",
+  "a",
+  "an",
+  "is",
+  "are",
+  "was",
+  "were",
+  "do",
+  "does",
+  "did",
+  "to",
+  "for",
+  "of",
+  "in",
+  "on",
+  "with",
+  "by",
+  "and",
+  "or"
+]);
+function defaultMultipliers() {
+  return {
+    vector: 1,
+    fts: 1,
+    ast: 1,
+    path: 1,
+    dependency: 1
+  };
+}
+function isSymbolQuery(query) {
+  return SYMBOL_CAMEL_RE.test(query) || SYMBOL_PASCAL_RE.test(query) || SYMBOL_SNAKE_RE.test(query) || SYMBOL_UPPER_RE.test(query);
+}
+function isPathQuery(query) {
+  return query.includes("/") || PATH_EXTENSION_RE.test(query);
+}
+function isNaturalLanguageQuery(query) {
+  const lower = query.toLowerCase();
+  const words = lower.split(/\s+/).filter((w) => w.length > 0);
+  const hasQuestionWord = words.some((w) => QUESTION_WORDS.has(w));
+  const hasStopWord = words.some((w) => STOP_WORDS.has(w));
+  return hasQuestionWord || words.length >= 4 && hasStopWord;
+}
+function classifyQuery(query) {
+  const trimmed = query.trim();
+  const multipliers = defaultMultipliers();
+  if (isPathQuery(trimmed)) {
+    multipliers.path = 2;
+    multipliers.ast = 0.5;
+    return { kind: "path", multipliers };
+  }
+  if (isSymbolQuery(trimmed)) {
+    multipliers.ast = 1.5;
+    multipliers.vector = 0.5;
+    return { kind: "symbol", multipliers };
+  }
+  if (isNaturalLanguageQuery(trimmed)) {
+    multipliers.vector = 1.5;
+    multipliers.path = 1.2;
+    multipliers.ast = 0.7;
+    return { kind: "natural_language", multipliers };
+  }
+  return { kind: "keyword", multipliers };
+}
+// src/steering/llm.ts
+var STOP_WORDS2 = /* @__PURE__ */ new Set([
+  // Interrogatives & conjunctions
   "how",
   "does",
   "what",
@@ -1705,6 +2023,7 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "which",
   "who",
   "whom",
+  // Be-verbs
   "is",
   "are",
   "was",
@@ -1712,10 +2031,12 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "be",
   "been",
   "being",
+  // Do-verbs
   "do",
   "did",
   "doing",
   "done",
+  // Articles, connectors, prepositions
   "the",
   "a",
   "an",
@@ -1734,12 +2055,30 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "by",
   "from",
   "about",
+  "into",
+  "through",
+  "between",
+  "after",
+  "before",
+  "during",
+  // Pronouns & demonstratives
   "it",
   "its",
   "this",
   "that",
   "these",
   "those",
+  "i",
+  "me",
+  "my",
+  "we",
+  "our",
+  "you",
+  "your",
+  "he",
+  "she",
+  "they",
+  // Modals
   "can",
   "could",
   "should",
@@ -1748,32 +2087,150 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "shall",
   "may",
   "might",
+  // Have-verbs
   "has",
   "have",
   "had",
   "having",
-  "i",
-  "me",
-  "my",
-  "we",
-  "our",
-  "you",
-  "your",
-  "he",
-  "she",
-  "they",
+  // Common imperative verbs that carry no search value
   "find",
   "show",
   "get",
-  "tell"
+  "tell",
+  "look",
+  "give",
+  "list",
+  "explain",
+  // Misc filler
+  "all",
+  "any",
+  "some",
+  "each",
+  "every",
+  "much",
+  "many",
+  "also",
+  "just",
+  "like",
+  "then",
+  "there",
+  "here",
+  "very",
+  "really",
+  "use",
+  "used",
+  "using"
 ]);
+var CODE_IDENT_RE = /^(?:[a-z]+(?:[A-Z][a-z]*)+|[A-Z][a-zA-Z]+|[a-z]+(?:_[a-z]+)+|[A-Z]+(?:_[A-Z]+)+)$/;
+var DOTTED_IDENT_RE = /[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+/g;
+var COMMON_STEMS = {
+  authentication: "auth",
+  authorization: "auth",
+  configuration: "config",
+  initialization: "init",
+  initialize: "init",
+  initializing: "init",
+  implementation: "impl",
+  implements: "impl",
+  implementing: "impl",
+  dependency: "dep",
+  dependencies: "dep",
+  middleware: "middleware",
+  validation: "valid",
+  validator: "valid",
+  serialize: "serial",
+  serialization: "serial",
+  deserialize: "deserial",
+  database: "db",
+  logging: "log",
+  logger: "log",
+  testing: "test",
+  handler: "handle",
+  handling: "handle",
+  callback: "callback",
+  subscriber: "subscribe",
+  subscription: "subscribe",
+  rendering: "render",
+  renderer: "render",
+  transformer: "transform",
+  transformation: "transform",
+  connection: "connect",
+  connector: "connect",
+  migration: "migrate",
+  scheduling: "schedule",
+  scheduler: "schedule",
+  parsing: "parse",
+  parser: "parse",
+  routing: "route",
+  router: "route",
+  indexing: "index",
+  indexer: "index"
+};
+var STEM_SUFFIXES = [
+  "tion",
+  "sion",
+  "ment",
+  "ness",
+  "ing",
+  "er",
+  "or",
+  "able",
+  "ible",
+  "ity",
+  "ous",
+  "ive",
+  "ful",
+  "less",
+  "ly"
+];
+function getStemVariant(term) {
+  const lower = term.toLowerCase();
+  const mapped = COMMON_STEMS[lower];
+  if (mapped && mapped !== lower) return mapped;
+  if (!/^[a-z][a-z0-9_]*$/.test(lower)) return null;
+  for (const suffix of STEM_SUFFIXES) {
+    if (!lower.endsWith(suffix)) continue;
+    const stem = lower.slice(0, -suffix.length);
+    if (stem.length >= 4 && stem !== lower) {
+      return stem;
+    }
+  }
+  return null;
+}
 function extractSearchTerms(query) {
-  const words = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2 && !STOP_WORDS.has(w.toLowerCase()));
-  if (words.length === 0) {
+  const terms = [];
+  const seen = /* @__PURE__ */ new Set();
+  const addUnique = (term) => {
+    const key = term.toLowerCase();
+    if (!seen.has(key)) {
+      seen.add(key);
+      terms.push(term);
+    }
+  };
+  const addTermAndVariants = (term) => {
+    addUnique(term);
+    const variant = getStemVariant(term);
+    if (variant && variant !== term.toLowerCase()) {
+      addUnique(variant);
+    }
+  };
+  const dottedMatches = query.match(DOTTED_IDENT_RE) ?? [];
+  for (const m of dottedMatches) addTermAndVariants(m);
+  const pathTokens = query.split(/\s+/).filter((t) => t.includes("/"));
+  for (const p of pathTokens) addTermAndVariants(p.replace(/[?!,;]+$/g, ""));
+  const words = query.replace(/[^a-zA-Z0-9_.\s/-]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
+  for (const w of words) {
+    const lower = w.toLowerCase();
+    if (seen.has(lower)) continue;
+    if (STOP_WORDS2.has(lower) && !CODE_IDENT_RE.test(w)) continue;
+    addTermAndVariants(w);
+  }
+  if (terms.length === 0) {
     const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
-    return allWords.sort((a, b) => b.length - a.length)[0] ?? query;
+    const longest = allWords.sort((a, b) => b.length - a.length)[0];
+    return longest ?? query;
   }
-  return words.join(" ");
+  return terms.join(" ");
 }
 var VALID_STRATEGIES = /* @__PURE__ */ new Set([
   "vector",
@@ -1783,17 +2240,42 @@ var VALID_STRATEGIES = /* @__PURE__ */ new Set([
   "dependency"
 ]);
 function buildFallbackPlan(query) {
-  const keywords = extractSearchTerms(query);
-  const strategies = [
-    { strategy: "fts", query: keywords, weight: 0.8, reason: "Full-text keyword search" },
-    { strategy: "ast", query: keywords, weight: 0.9, reason: "Structural symbol search" },
-    { strategy: "path", query: keywords, weight: 0.7, reason: "Path keyword search" }
-  ];
+  const strategies = buildFallbackStrategies(query);
   return {
     interpretation: `Searching for: ${query}`,
     strategies
   };
 }
+function buildFallbackStrategies(query) {
+  const keywords = extractSearchTerms(query);
+  const { multipliers } = classifyQuery(query);
+  return [
+    {
+      strategy: "vector",
+      query,
+      weight: 1 * multipliers.vector,
+      reason: "Semantic search over natural language intent"
+    },
+    {
+      strategy: "fts",
+      query: keywords,
+      weight: 0.8 * multipliers.fts,
+      reason: "Full-text keyword search"
+    },
+    {
+      strategy: "ast",
+      query: keywords,
+      weight: 0.9 * multipliers.ast,
+      reason: "Structural symbol search"
+    },
+    {
+      strategy: "path",
+      query: keywords,
+      weight: 0.7 * multipliers.path,
+      reason: "Path keyword search"
+    }
+  ];
+}
 function parseSearchPlan(raw, query) {
   const jsonMatch = raw.match(/\{[\s\S]*\}/);
   if (!jsonMatch) return buildFallbackPlan(query);
@@ -1873,8 +2355,8 @@ async function steer(provider, query, limit, searchExecutor) {
 }
 // src/cli/commands/init.ts
-import fs5 from "fs";
-import path4 from "path";
+import fs6 from "fs";
+import path5 from "path";
 // src/utils/errors.ts
 var ErrorCode = {
@@ -1962,33 +2444,79 @@ function createLogger(options) {
   };
 }
+// src/cli/commands/config.ts
+import fs5 from "fs";
+import path4 from "path";
+var DEFAULT_CONFIG = {
+  embedder: {
+    provider: "local",
+    model: "Xenova/all-MiniLM-L6-v2",
+    dimensions: 384
+  },
+  search: {
+    defaultLimit: 10,
+    strategies: ["vector", "fts", "ast", "path"],
+    weights: { vector: 1, fts: 0.8, ast: 0.9, path: 0.7, dependency: 0.6 }
+  },
+  watch: {
+    debounceMs: 500,
+    ignored: []
+  },
+  llm: {
+    provider: null,
+    model: null
+  }
+};
+var VALID_EMBEDDER_PROVIDERS = /* @__PURE__ */ new Set(["local", "voyage", "openai"]);
+var VALID_LLM_PROVIDERS = /* @__PURE__ */ new Set(["gemini", "openai", "anthropic"]);
+var VALIDATION_RULES = {
+  "embedder.provider": {
+    validate: (v) => typeof v === "string" && VALID_EMBEDDER_PROVIDERS.has(v),
+    message: `Must be one of: ${[...VALID_EMBEDDER_PROVIDERS].join(", ")}`
+  },
+  "embedder.dimensions": {
+    validate: (v) => typeof v === "number" && v > 0 && Number.isInteger(v),
+    message: "Must be a positive integer"
+  },
+  "search.defaultLimit": {
+    validate: (v) => typeof v === "number" && v > 0 && Number.isInteger(v),
+    message: "Must be a positive integer"
+  },
+  "watch.debounceMs": {
+    validate: (v) => typeof v === "number" && v >= 0 && Number.isInteger(v),
+    message: "Must be a non-negative integer"
+  },
+  "llm.provider": {
+    validate: (v) => v === null || typeof v === "string" && VALID_LLM_PROVIDERS.has(v),
+    message: `Must be null or one of: ${[...VALID_LLM_PROVIDERS].join(", ")}`
+  }
+};
 // src/cli/commands/init.ts
 var CTX_DIR = ".ctx";
 var DB_FILENAME = "index.db";
 var CONFIG_FILENAME = "config.json";
 var GITIGNORE_ENTRY = ".ctx/";
 function ensureGitignore(projectRoot) {
-  const gitignorePath = path4.join(projectRoot, ".gitignore");
-  if (fs5.existsSync(gitignorePath)) {
-    const content = fs5.readFileSync(gitignorePath, "utf-8");
+  const gitignorePath = path5.join(projectRoot, ".gitignore");
+  if (fs6.existsSync(gitignorePath)) {
+    const content = fs6.readFileSync(gitignorePath, "utf-8");
     if (content.includes(GITIGNORE_ENTRY)) return;
     const suffix = content.endsWith("\n") ? "" : "\n";
-    fs5.writeFileSync(gitignorePath, `${content}${suffix}${GITIGNORE_ENTRY}
+    fs6.writeFileSync(gitignorePath, `${content}${suffix}${GITIGNORE_ENTRY}
 `);
   } else {
-    fs5.writeFileSync(gitignorePath, `${GITIGNORE_ENTRY}
+    fs6.writeFileSync(gitignorePath, `${GITIGNORE_ENTRY}
 `);
   }
 }
 function ensureConfig(ctxDir) {
-  const configPath = path4.join(ctxDir, CONFIG_FILENAME);
-  if (fs5.existsSync(configPath)) return;
-  const config = {
-    version: 1,
-    dimensions: 384,
-    model: "all-MiniLM-L6-v2"
-  };
-  fs5.writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
+  const configPath = path5.join(ctxDir, CONFIG_FILENAME);
+  if (fs6.existsSync(configPath)) return;
+  fs6.writeFileSync(
+    configPath,
+    JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n"
+  );
 }
 function formatDuration(ms) {
   if (ms < 1e3) return `${Math.round(ms)}ms`;
@@ -2005,14 +2533,14 @@ function formatLanguageSummary(counts) {
 }
 async function runInit(projectPath, options = {}) {
   const log = options.log ?? console.log;
-  const absoluteRoot = path4.resolve(projectPath);
+  const absoluteRoot = path5.resolve(projectPath);
   const start = performance.now();
   log(`Indexing ${absoluteRoot}...`);
-  const ctxDir = path4.join(absoluteRoot, CTX_DIR);
-  if (!fs5.existsSync(ctxDir)) fs5.mkdirSync(ctxDir, { recursive: true });
+  const ctxDir = path5.join(absoluteRoot, CTX_DIR);
+  if (!fs6.existsSync(ctxDir)) fs6.mkdirSync(ctxDir, { recursive: true });
   ensureGitignore(absoluteRoot);
   ensureConfig(ctxDir);
-  const dbPath = path4.join(ctxDir, DB_FILENAME);
+  const dbPath = path5.join(ctxDir, DB_FILENAME);
   const db = createDatabase(dbPath);
   try {
     const discovered = await discoverFiles({
@@ -2117,7 +2645,7 @@ async function runInit(projectPath, options = {}) {
       vectorsCreated = vectors.length;
     }
     const durationMs = performance.now() - start;
-    const dbSize = fs5.existsSync(dbPath) ? fs5.statSync(dbPath).size : 0;
+    const dbSize = fs6.existsSync(dbPath) ? fs6.statSync(dbPath).size : 0;
     log("");
     log(`\u2713 Indexed in ${formatDuration(durationMs)}`);
     log(
@@ -2144,8 +2672,8 @@ async function createEmbedder() {
 }
 // src/cli/commands/query.ts
-import fs6 from "fs";
-import path5 from "path";
+import fs7 from "fs";
+import path6 from "path";
 var CTX_DIR2 = ".ctx";
 var DB_FILENAME2 = "index.db";
 var SNIPPET_MAX_LENGTH = 200;
@@ -2156,6 +2684,20 @@ var STRATEGY_WEIGHTS = {
   path: 0.7,
   dependency: 0.6
 };
+function getEffectiveStrategyWeights(query) {
+  const { multipliers } = classifyQuery(query);
+  return {
+    vector: STRATEGY_WEIGHTS.vector * multipliers.vector,
+    fts: STRATEGY_WEIGHTS.fts * multipliers.fts,
+    ast: STRATEGY_WEIGHTS.ast * multipliers.ast,
+    path: STRATEGY_WEIGHTS.path * multipliers.path,
+    dependency: STRATEGY_WEIGHTS.dependency * multipliers.dependency
+  };
+}
+function normalizeLimit(limit) {
+  if (!Number.isFinite(limit)) return 0;
+  return Math.max(0, Math.trunc(limit));
+}
 function truncateSnippet(text) {
   const oneLine = text.replace(/\n/g, " ").replace(/\s+/g, " ").trim();
   if (oneLine.length <= SNIPPET_MAX_LENGTH) return oneLine;
@@ -2195,13 +2737,11 @@ function extractSymbolNames(query) {
 function isPathLike(query) {
   return query.includes("/") || query.includes("*") || query.includes(".");
 }
-function extractPathBoostTerms(query) {
-  return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
-}
 async function runQuery(projectPath, query, options) {
-  const absoluteRoot = path5.resolve(projectPath);
-  const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
-  if (!fs6.existsSync(dbPath)) {
+  const limit = normalizeLimit(options.limit);
+  const absoluteRoot = path6.resolve(projectPath);
+  const dbPath = path6.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
+  if (!fs7.existsSync(dbPath)) {
     throw new KontextError(
       `Project not initialized. Run "ctx init" first. (${CTX_DIR2}/${DB_FILENAME2} not found)`,
       ErrorCode.NOT_INITIALIZED
@@ -2210,9 +2750,9 @@ async function runQuery(projectPath, query, options) {
   const start = performance.now();
   const db = createDatabase(dbPath);
   try {
-    const strategyResults = await runStrategies(db, query, options);
+    const strategyResults = await runStrategies(db, query, { ...options, limit });
     const pathBoostTerms = extractPathBoostTerms(query);
-    const fused = fusionMergeWithPathBoost(strategyResults, options.limit, pathBoostTerms);
+    const fused = fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
     const outputResults = fused.map(toOutputResult);
     const searchTimeMs = Math.round(performance.now() - start);
     const text = options.format === "text" ? formatTextOutput(query, outputResults) : void 0;
@@ -2234,8 +2774,9 @@ async function runStrategies(db, query, options) {
   const results = [];
   const filters = options.language ? { language: options.language } : void 0;
   const limit = options.limit * 3;
+  const effectiveWeights = getEffectiveStrategyWeights(query);
   for (const strategy of options.strategies) {
-    const weight = STRATEGY_WEIGHTS[strategy];
+    const weight = effectiveWeights[strategy];
     const searchResults = await executeStrategy(
       db,
       strategy,
@@ -2292,12 +2833,16 @@ async function loadEmbedder() {
 }
 // src/cli/commands/ask.ts
-import fs7 from "fs";
-import path6 from "path";
+import fs8 from "fs";
+import path7 from "path";
 var CTX_DIR3 = ".ctx";
 var DB_FILENAME3 = "index.db";
 var SNIPPET_MAX_LENGTH2 = 200;
 var FALLBACK_NOTICE = "No LLM provider configured. Set CTX_GEMINI_KEY, CTX_OPENAI_KEY, or CTX_ANTHROPIC_KEY. Running basic search instead.";
+function normalizeLimit2(limit) {
+  if (!Number.isFinite(limit)) return 0;
+  return Math.max(0, Math.trunc(limit));
+}
 function truncateSnippet2(text) {
   const oneLine = text.replace(/\n/g, " ").replace(/\s+/g, " ").trim();
   if (oneLine.length <= SNIPPET_MAX_LENGTH2) return oneLine;
@@ -2350,7 +2895,8 @@ function formatTextOutput2(output) {
   );
   return lines.join("\n");
 }
-function createSearchExecutor(db) {
+function createSearchExecutor(db, query) {
+  const pathBoostTerms = extractPathBoostTerms(query);
   return async (strategies, limit) => {
     const strategyResults = [];
     const fetchLimit = limit * 3;
@@ -2364,7 +2910,7 @@ function createSearchExecutor(db) {
         });
       }
     }
-    return fusionMerge(strategyResults, limit);
+    return fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
   };
 }
 function extractSymbolNames2(query) {
@@ -2412,13 +2958,8 @@ async function loadEmbedder2() {
   return embedderInstance2;
 }
 async function fallbackSearch(db, query, limit) {
-  const executor = createSearchExecutor(db);
-  const keywords = extractSearchTerms(query);
-  const fallbackStrategies = [
-    { strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
-    { strategy: "ast", query: keywords, weight: 0.9, reason: "fallback structural search" },
-    { strategy: "path", query: keywords, weight: 0.7, reason: "fallback path search" }
-  ];
+  const executor = createSearchExecutor(db, query);
+  const fallbackStrategies = buildFallbackStrategies(query);
   const results = await executor(fallbackStrategies, limit);
   return {
     query,
@@ -2435,9 +2976,10 @@ async function fallbackSearch(db, query, limit) {
   };
 }
 async function runAsk(projectPath, query, options) {
-  const absoluteRoot = path6.resolve(projectPath);
-  const dbPath = path6.join(absoluteRoot, CTX_DIR3, DB_FILENAME3);
-  if (!fs7.existsSync(dbPath)) {
+  const limit = normalizeLimit2(options.limit);
+  const absoluteRoot = path7.resolve(projectPath);
+  const dbPath = path7.join(absoluteRoot, CTX_DIR3, DB_FILENAME3);
+  if (!fs8.existsSync(dbPath)) {
     throw new KontextError(
       `Project not initialized. Run "ctx init" first. (${CTX_DIR3}/${DB_FILENAME3} not found)`,
       ErrorCode.NOT_INITIALIZED
@@ -2447,25 +2989,25 @@ async function runAsk(projectPath, query, options) {
   try {
     const provider = options.provider ?? null;
     if (!provider) {
-      const output = await fallbackSearch(db, query, options.limit);
+      const output = await fallbackSearch(db, query, limit);
       output.warning = FALLBACK_NOTICE;
       if (options.format === "text") {
         output.text = formatTextOutput2(output);
       }
       return output;
     }
-    const executor = createSearchExecutor(db);
+    const executor = createSearchExecutor(db, query);
     if (options.noExplain) {
-      return await runNoExplain(provider, query, options, executor);
+      return await runNoExplain(provider, query, limit, options, executor);
     }
-    return await runWithSteering(provider, query, options, executor);
+    return await runWithSteering(provider, query, limit, options, executor);
   } finally {
     db.close();
   }
 }
-async function runNoExplain(provider, query, options, executor) {
+async function runNoExplain(provider, query, limit, options, executor) {
   const plan = await planSearch(provider, query);
-  const results = await executor(plan.strategies, options.limit);
+  const results = await executor(plan.strategies, limit);
   const output = {
     query,
     interpretation: plan.interpretation,
@@ -2483,8 +3025,8 @@ async function runNoExplain(provider, query, options, executor) {
   }
   return output;
 }
-async function runWithSteering(provider, query, options, executor) {
-  const result = await steer(provider, query, options.limit, executor);
+async function runWithSteering(provider, query, limit, options, executor) {
+  const result = await steer(provider, query, limit, executor);
   const output = {
     query,
     interpretation: result.interpretation,
@@ -2504,8 +3046,8 @@ async function runWithSteering(provider, query, options, executor) {
 }
 // src/cli/commands/status.ts
-import fs8 from "fs";
-import path7 from "path";
+import fs9 from "fs";
+import path8 from "path";
 var CTX_DIR4 = ".ctx";
 var DB_FILENAME4 = "index.db";
 var CONFIG_FILENAME2 = "config.json";
@@ -2524,14 +3066,15 @@ function capitalize(s) {
   return s.charAt(0).toUpperCase() + s.slice(1);
 }
 function readConfig(ctxDir) {
-  const configPath = path7.join(ctxDir, CONFIG_FILENAME2);
-  if (!fs8.existsSync(configPath)) return null;
+  const configPath = path8.join(ctxDir, CONFIG_FILENAME2);
+  if (!fs9.existsSync(configPath)) return null;
   try {
-    const raw = fs8.readFileSync(configPath, "utf-8");
+    const raw = fs9.readFileSync(configPath, "utf-8");
     const parsed = JSON.parse(raw);
+    const embedder = parsed.embedder;
     return {
-      model: parsed.model ?? "unknown",
-      dimensions: parsed.dimensions ?? 0
+      model: embedder?.model ?? parsed.model ?? "unknown",
+      dimensions: embedder?.dimensions ?? parsed.dimensions ?? 0
     };
   } catch {
     return null;
@@ -2580,10 +3123,10 @@ function formatStatus(projectPath, output) {
   return lines.join("\n");
 }
 async function runStatus(projectPath) {
-  const absoluteRoot = path7.resolve(projectPath);
-  const ctxDir = path7.join(absoluteRoot, CTX_DIR4);
-  const dbPath = path7.join(ctxDir, DB_FILENAME4);
-  if (!fs8.existsSync(dbPath)) {
+  const absoluteRoot = path8.resolve(projectPath);
+  const ctxDir = path8.join(absoluteRoot, CTX_DIR4);
+  const dbPath = path8.join(ctxDir, DB_FILENAME4);
+  if (!fs9.existsSync(dbPath)) {
     const output = {
       initialized: false,
       fileCount: 0,
@@ -2605,7 +3148,7 @@ async function runStatus(projectPath) {
     const languages = db.getLanguageBreakdown();
     const lastIndexed = db.getLastIndexed();
     const config = readConfig(ctxDir);
-    const dbSizeBytes = fs8.statSync(dbPath).size;
+    const dbSizeBytes = fs9.statSync(dbPath).size;
     const output = {
       initialized: true,
       fileCount,