npm - kontext-engine - Versions diffs - 0.1.2 → 0.1.3 - Mend

kontext-engine 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli/index.js CHANGED Viewed

@@ -1208,12 +1208,15 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
       const rows = db.prepare(
         `SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
                   c.line_start as lineStart, c.line_end as lineEnd,
-                  c.type, c.name, c.parent, c.text
+                  c.type, c.name, c.parent, c.text, c.exports as exports
            FROM chunks c
            JOIN files f ON f.id = c.file_id
            WHERE c.id IN (${placeholders})`
       ).all(...ids);
-      return rows;
+      return rows.map((r) => ({
+        ...r,
+        exports: r.exports === 1
+      }));
     },
     searchChunks(filters, limit) {
       const conditions = [];
@@ -1250,7 +1253,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
       const sql = `
         SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
                c.line_start as lineStart, c.line_end as lineEnd,
-               c.type, c.name, c.parent, c.text
+               c.type, c.name, c.parent, c.text, c.exports as exports
         FROM chunks c
         JOIN files f ON f.id = c.file_id
         ${where}
@@ -1258,7 +1261,11 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
         LIMIT ?
       `;
       params.push(limit);
-      return db.prepare(sql).all(...params);
+      const rows = db.prepare(sql).all(...params);
+      return rows.map((r) => ({
+        ...r,
+        exports: r.exports === 1
+      }));
     },
     deleteChunksByFile(fileId) {
       const chunkRows = stmtGetChunkIdsByFile.all(fileId);
@@ -1558,6 +1565,7 @@ async function vectorSearch(db, embedder, query, limit, filters) {
       lineEnd: chunk.lineEnd,
       name: chunk.name,
       type: chunk.type,
+      exported: chunk.exports,
       text: chunk.text,
       score: distanceToScore(vr.distance),
       language: chunk.language
@@ -1598,6 +1606,7 @@ function ftsSearch(db, query, limit, filters) {
       lineEnd: chunk.lineEnd,
       name: chunk.name,
       type: chunk.type,
+      exported: chunk.exports,
       text: chunk.text,
       score: bm25ToScore(fts.rank),
       language: chunk.language
@@ -1632,6 +1641,7 @@ function astSearch(db, filters, limit) {
     lineEnd: chunk.lineEnd,
     name: chunk.name,
     type: chunk.type,
+    exported: chunk.exports,
     text: chunk.text,
     score,
     language: chunk.language
@@ -1684,6 +1694,7 @@ function pathSearch(db, pattern, limit) {
         lineEnd: chunk.lineEnd,
         name: chunk.name,
         type: chunk.type,
+        exported: chunk.exports,
         text: chunk.text,
         score: 1,
         language: file.language
@@ -1726,6 +1737,7 @@ function pathKeywordSearch(db, query, limit) {
         lineEnd: chunk.lineEnd,
         name: chunk.name,
         type: chunk.type,
+        exported: chunk.exports,
         text: chunk.text,
         score,
         language: file.language
@@ -1782,11 +1794,24 @@ var PATH_BOOST_DIR_EXACT = 1.5;
 var PATH_BOOST_FILENAME = 1.4;
 var PATH_BOOST_PARTIAL = 1.2;
 var IMPORT_PENALTY = 0.5;
+var TEST_FILE_PENALTY = 0.65;
+var SMALL_SNIPPET_PENALTY = 0.75;
+var PUBLIC_API_BOOST = 1.12;
+var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
+var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
+var SMALL_SNIPPET_MAX_LINES = 3;
+function extractPathBoostTerms(query) {
+  return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
+}
 function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
   const fused = fusionMerge(strategyResults, limit * 3);
   if (fused.length === 0) return [];
   const boosted = applyPathBoost(fused, pathBoostTerms);
-  const adjusted = applyImportDeprioritization(boosted);
+  const importAdjusted = applyImportDeprioritization(boosted);
+  const testAdjusted = applyTestFileDeprioritization(importAdjusted);
+  const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
+  const boostedApi = applyPublicApiBoost(snippetAdjusted);
+  const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
   adjusted.sort((a, b) => b.score - a.score);
   const sliced = adjusted.slice(0, limit);
   return renormalize(sliced);
@@ -1836,6 +1861,76 @@ function applyImportDeprioritization(results) {
     return r;
   });
 }
+function applyTestFileDeprioritization(results) {
+  const hasNonTestFile = results.some((r) => !isTestFilePath(r.filePath));
+  if (!hasNonTestFile) return results;
+  const maxNonTestScore = Math.max(
+    ...results.filter((r) => !isTestFilePath(r.filePath)).map((r) => r.score),
+    0
+  );
+  if (maxNonTestScore === 0) return results;
+  return results.map((r) => {
+    if (isTestFilePath(r.filePath)) {
+      return { ...r, score: r.score * TEST_FILE_PENALTY };
+    }
+    return r;
+  });
+}
+function applySmallSnippetDeprioritization(results) {
+  const hasNonSmallSnippet = results.some((r) => !isSmallSnippet(r));
+  if (!hasNonSmallSnippet) return results;
+  const maxNonSmallScore = Math.max(
+    ...results.filter((r) => !isSmallSnippet(r)).map((r) => r.score),
+    0
+  );
+  if (maxNonSmallScore === 0) return results;
+  return results.map((r) => {
+    if (isSmallSnippet(r)) {
+      return { ...r, score: r.score * SMALL_SNIPPET_PENALTY };
+    }
+    return r;
+  });
+}
+function applyPublicApiBoost(results) {
+  return results.map((r) => {
+    if (isPublicApiSymbol(r)) {
+      return { ...r, score: r.score * PUBLIC_API_BOOST };
+    }
+    return r;
+  });
+}
+function applyFileDiversityDiminishingReturns(results) {
+  if (results.length <= 1) return results;
+  const ranked = [...results].sort((a, b) => b.score - a.score);
+  const seenPerFile = /* @__PURE__ */ new Map();
+  return ranked.map((r) => {
+    const count = (seenPerFile.get(r.filePath) ?? 0) + 1;
+    seenPerFile.set(r.filePath, count);
+    return {
+      ...r,
+      score: r.score * getFileDiversityFactor(count)
+    };
+  });
+}
+function isTestFilePath(filePath) {
+  const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/");
+  return TEST_FILE_DIRECTORY_PATTERN.test(normalizedPath) || TEST_FILE_NAME_PATTERN.test(normalizedPath);
+}
+function isSmallSnippet(result) {
+  const lineCount = Math.max(1, result.lineEnd - result.lineStart + 1);
+  return lineCount <= SMALL_SNIPPET_MAX_LINES;
+}
+function isPublicApiSymbol(result) {
+  if (result.exported === true) return true;
+  const textStart = result.text.trimStart().toLowerCase();
+  return textStart.startsWith("export ");
+}
+function getFileDiversityFactor(fileOccurrence) {
+  if (fileOccurrence <= 1) return 1;
+  if (fileOccurrence === 2) return 0.9;
+  if (fileOccurrence === 3) return 0.8;
+  return 0.7;
+}
 function renormalize(results) {
   if (results.length === 0) return results;
   const maxScore = Math.max(...results.map((r) => r.score));
@@ -1896,9 +1991,6 @@ function extractSymbolNames(query) {
 function isPathLike(query) {
   return query.includes("/") || query.includes("*") || query.includes(".");
 }
-function extractPathBoostTerms(query) {
-  return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
-}
 async function runQuery(projectPath, query, options) {
   const absoluteRoot = path5.resolve(projectPath);
   const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
@@ -2028,27 +2120,128 @@ function registerQueryCommand(program2) {
 import fs7 from "fs";
 import path6 from "path";
+// src/steering/prompts.ts
+var PLAN_SYSTEM_PROMPT = `You are a code-search strategy planner for a TypeScript/JavaScript codebase.
+Given a user query, produce a JSON object with:
+- "interpretation": one sentence summarising what the user wants to find.
+- "strategies": an ordered array of search strategies (most important first).
+Each strategy object has:
+  "strategy" \u2014 one of "vector", "fts", "ast", "path", "dependency"
+  "query"    \u2014 the optimised search string for that strategy (see rules below)
+  "weight"   \u2014 importance 0\u20131 (highest-priority strategy gets 1.0)
+  "reason"   \u2014 one sentence explaining why this strategy helps
+## Strategy selection rules
+| Signal in query | Primary strategy | Supporting strategies |
+|---|---|---|
+| Conceptual / "how does X work" / natural language | vector | fts, ast |
+| Exact keyword, identifier, or error message | fts | ast |
+| Symbol name (function, class, type, variable) | ast | fts |
+| File path, glob, or extension (e.g. "*.test.ts") | path | fts |
+| Import chain / "what depends on X" | dependency | ast, fts |
+| Mixed: natural language + code symbol | vector + ast | fts |
+## Query optimisation rules
+- **vector**: keep the query close to natural language; rephrase for semantic similarity.
+- **fts**: extract the most distinctive keywords/identifiers; drop stop words.
+- **ast**: use only the symbol name (camelCase, snake_case, or PascalCase). Strip surrounding prose.
+- **path**: use a glob or slash-separated path segment (e.g. "src/auth/*.ts").
+- **dependency**: use the bare module or file name being imported.
+## Edge cases
+- **Vague query** (e.g. "help me understand this"): use vector with the full query; add fts with any nouns present.
+- **Multi-concept query** (e.g. "authentication and rate limiting"): create separate strategies for each concept, both at high weight.
+- **Code symbol mixed with prose** (e.g. "where is the validateToken function called"): use ast for the symbol and vector for the intent.
+- **Query is just a symbol** (e.g. "createPool"): use ast at weight 1.0 and fts at weight 0.7. Skip vector.
+## Examples
+User: "how does authentication work"
+\`\`\`json
+{
+  "interpretation": "Understand the authentication flow and related middleware.",
+  "strategies": [
+    { "strategy": "vector", "query": "authentication flow middleware", "weight": 1.0, "reason": "Conceptual question best served by semantic search." },
+    { "strategy": "fts", "query": "authentication middleware auth", "weight": 0.7, "reason": "Keyword fallback for auth-related identifiers." },
+    { "strategy": "ast", "query": "authenticate", "weight": 0.6, "reason": "Likely function or class name." }
+  ]
+}
+\`\`\`
+User: "validateToken"
+\`\`\`json
+{
+  "interpretation": "Find the validateToken symbol definition and usages.",
+  "strategies": [
+    { "strategy": "ast", "query": "validateToken", "weight": 1.0, "reason": "Exact symbol lookup." },
+    { "strategy": "fts", "query": "validateToken", "weight": 0.7, "reason": "Catch references in comments or strings." }
+  ]
+}
+\`\`\`
+User: "where is rate limiting configured in src/middleware"
+\`\`\`json
+{
+  "interpretation": "Locate rate-limiting configuration inside the middleware directory.",
+  "strategies": [
+    { "strategy": "path", "query": "src/middleware/*", "weight": 0.9, "reason": "Scope results to the specified directory." },
+    { "strategy": "vector", "query": "rate limiting configuration", "weight": 1.0, "reason": "Semantic match for the concept." },
+    { "strategy": "fts", "query": "rateLimit rateLimiter", "weight": 0.7, "reason": "Common identifier variants." }
+  ]
+}
+\`\`\`
+User: "authentication and database connection pooling"
+\`\`\`json
+{
+  "interpretation": "Find code related to both authentication and database connection pooling.",
+  "strategies": [
+    { "strategy": "vector", "query": "authentication login", "weight": 1.0, "reason": "Semantic search for the auth concept." },
+    { "strategy": "vector", "query": "database connection pool", "weight": 1.0, "reason": "Semantic search for the DB pooling concept." },
+    { "strategy": "fts", "query": "auth createPool connectionPool", "weight": 0.7, "reason": "Keyword fallback for likely identifiers." }
+  ]
+}
+\`\`\`
+Output ONLY the JSON object. No markdown fences, no commentary.`;
+var SYNTHESIZE_SYSTEM_PROMPT = `You are a code-search assistant. Given a user query and ranked search results, produce a concise, actionable summary.
+## Output structure (plain text, no markdown)
+1. **Key finding** (1\u20132 sentences): the most important result or answer first.
+2. **Supporting locations** (bulleted, max 5): each line is "filePath:lineStart \u2013 brief description".
+3. **Additional context** (0\u20132 sentences, optional): relationships between results, patterns, or next steps.
+## Rules
+- Always reference file paths and line numbers from the search results.
+- Mention specific symbol names (functions, classes, types) when they appear in results.
+- If no result clearly answers the query, say so and suggest a refined search.
+- Be concise \u2014 aim for 4\u20138 lines total. Do not repeat the query back.
+- Do not use markdown formatting (no #, *, \`, or fences). Use plain text only.
+- Group related results rather than listing every result individually.
+## Example
+Query: "how does token validation work"
+Results include validateToken in src/auth/tokens.ts:42 and authMiddleware in src/middleware/auth.ts:15.
+Good output:
+Token validation is handled by validateToken (src/auth/tokens.ts:42), which decodes a JWT and checks expiry and signature against the configured secret.
+Related locations:
+- src/auth/tokens.ts:42 \u2013 validateToken: core JWT decode + verify logic
+- src/middleware/auth.ts:15 \u2013 authMiddleware: calls validateToken on every protected route
+- src/auth/types.ts:5 \u2013 TokenPayload type definition
+The middleware extracts the Bearer token from the Authorization header before passing it to validateToken.`;
 // src/steering/llm.ts
 var GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:generateContent";
 var OPENAI_URL = "https://api.openai.com/v1/responses";
 var ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
-var PLAN_SYSTEM_PROMPT = `You are a code search strategy planner. Given a user query about code, output a JSON object with:
-- "interpretation": a one-line summary of what the user is looking for
-- "strategies": an array of search strategy objects, each with:
-  - "strategy": one of "vector", "fts", "ast", "path", "dependency"
-  - "query": the optimized query string for that strategy
-  - "weight": a number 0-1 indicating importance
-  - "reason": brief explanation of why this strategy is used
-Choose strategies based on query type:
-- Conceptual/natural language \u2192 vector (semantic search)
-- Keywords/identifiers \u2192 fts (full-text search)
-- Symbol names (functions, classes) \u2192 ast (structural search)
-- File paths or patterns \u2192 path (path glob search)
-- Import/dependency chains \u2192 dependency
-Output ONLY valid JSON, no markdown.`;
-var SYNTHESIZE_SYSTEM_PROMPT = `You are a code search assistant. Given search results, write a brief, helpful explanation of what was found. Be concise (2-4 sentences). Reference specific files and function names. Do not use markdown.`;
 function createGeminiProvider(apiKey) {
   return {
     name: "gemini",
@@ -2156,6 +2349,7 @@ function createAnthropicProvider(apiKey) {
   };
 }
 var STOP_WORDS = /* @__PURE__ */ new Set([
+  // Interrogatives & conjunctions
   "how",
   "does",
   "what",
@@ -2165,6 +2359,7 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "which",
   "who",
   "whom",
+  // Be-verbs
   "is",
   "are",
   "was",
@@ -2172,10 +2367,12 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "be",
   "been",
   "being",
+  // Do-verbs
   "do",
   "did",
   "doing",
   "done",
+  // Articles, connectors, prepositions
   "the",
   "a",
   "an",
@@ -2194,12 +2391,30 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "by",
   "from",
   "about",
+  "into",
+  "through",
+  "between",
+  "after",
+  "before",
+  "during",
+  // Pronouns & demonstratives
   "it",
   "its",
   "this",
   "that",
   "these",
   "those",
+  "i",
+  "me",
+  "my",
+  "we",
+  "our",
+  "you",
+  "your",
+  "he",
+  "she",
+  "they",
+  // Modals
   "can",
   "could",
   "should",
@@ -2208,32 +2423,69 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "shall",
   "may",
   "might",
+  // Have-verbs
   "has",
   "have",
   "had",
   "having",
-  "i",
-  "me",
-  "my",
-  "we",
-  "our",
-  "you",
-  "your",
-  "he",
-  "she",
-  "they",
+  // Common imperative verbs that carry no search value
   "find",
   "show",
   "get",
-  "tell"
+  "tell",
+  "look",
+  "give",
+  "list",
+  "explain",
+  // Misc filler
+  "all",
+  "any",
+  "some",
+  "each",
+  "every",
+  "much",
+  "many",
+  "also",
+  "just",
+  "like",
+  "then",
+  "there",
+  "here",
+  "very",
+  "really",
+  "use",
+  "used",
+  "using"
 ]);
+var CODE_IDENT_RE = /^(?:[a-z]+(?:[A-Z][a-z]*)+|[A-Z][a-zA-Z]+|[a-z]+(?:_[a-z]+)+|[A-Z]+(?:_[A-Z]+)+)$/;
+var DOTTED_IDENT_RE = /[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+/g;
 function extractSearchTerms(query) {
-  const words = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2 && !STOP_WORDS.has(w.toLowerCase()));
-  if (words.length === 0) {
+  const terms = [];
+  const seen = /* @__PURE__ */ new Set();
+  const addUnique = (term) => {
+    const key = term.toLowerCase();
+    if (!seen.has(key)) {
+      seen.add(key);
+      terms.push(term);
+    }
+  };
+  const dottedMatches = query.match(DOTTED_IDENT_RE) ?? [];
+  for (const m of dottedMatches) addUnique(m);
+  const pathTokens = query.split(/\s+/).filter((t) => t.includes("/"));
+  for (const p of pathTokens) addUnique(p.replace(/[?!,;]+$/g, ""));
+  const words = query.replace(/[^a-zA-Z0-9_.\s/-]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
+  for (const w of words) {
+    const lower = w.toLowerCase();
+    if (seen.has(lower)) continue;
+    if (STOP_WORDS.has(lower) && !CODE_IDENT_RE.test(w)) continue;
+    addUnique(w);
+  }
+  if (terms.length === 0) {
     const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
-    return allWords.sort((a, b) => b.length - a.length)[0] ?? query;
+    const longest = allWords.sort((a, b) => b.length - a.length)[0];
+    return longest ?? query;
   }
-  return words.join(" ");
+  return terms.join(" ");
 }
 var VALID_STRATEGIES = /* @__PURE__ */ new Set([
   "vector",
@@ -2418,7 +2670,8 @@ function formatTextOutput2(output) {
   );
   return lines.join("\n");
 }
-function createSearchExecutor(db) {
+function createSearchExecutor(db, query) {
+  const pathBoostTerms = extractPathBoostTerms(query);
   return async (strategies, limit) => {
     const strategyResults = [];
     const fetchLimit = limit * 3;
@@ -2432,7 +2685,7 @@ function createSearchExecutor(db) {
         });
       }
     }
-    return fusionMerge(strategyResults, limit);
+    return fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
   };
 }
 function extractSymbolNames2(query) {
@@ -2480,7 +2733,7 @@ async function loadEmbedder2() {
   return embedderInstance2;
 }
 async function fallbackSearch(db, query, limit) {
-  const executor = createSearchExecutor(db);
+  const executor = createSearchExecutor(db, query);
   const keywords = extractSearchTerms(query);
   const fallbackStrategies = [
     { strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
@@ -2522,7 +2775,7 @@ async function runAsk(projectPath, query, options) {
       }
       return output;
     }
-    const executor = createSearchExecutor(db);
+    const executor = createSearchExecutor(db, query);
     if (options.noExplain) {
       return await runNoExplain(provider, query, options, executor);
     }