kontext-engine 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1568,12 +1568,17 @@ async function vectorSearch(db, embedder, query, limit, filters) {
1568
1568
  }
1569
1569
 
1570
1570
  // src/search/fts.ts
1571
+ function sanitizeFtsQuery(query) {
1572
+ return query.replace(/[?()":^~{}!+\-\\]/g, " ").replace(/(?<!\w)\*/g, " ").replace(/\s+/g, " ").trim();
1573
+ }
1571
1574
  function bm25ToScore(rank) {
1572
1575
  return 1 / (1 + Math.abs(rank));
1573
1576
  }
1574
1577
  function ftsSearch(db, query, limit, filters) {
1578
+ const safeQuery = sanitizeFtsQuery(query);
1579
+ if (safeQuery.length === 0) return [];
1575
1580
  const fetchLimit = filters?.language ? limit * 3 : limit;
1576
- const ftsResults = db.searchFTS(query, fetchLimit);
1581
+ const ftsResults = db.searchFTS(safeQuery, fetchLimit);
1577
1582
  if (ftsResults.length === 0) return [];
1578
1583
  const chunkIds = ftsResults.map((r) => r.chunkId);
1579
1584
  const chunks = db.getChunksByIds(chunkIds);
@@ -1687,6 +1692,61 @@ function pathSearch(db, pattern, limit) {
1687
1692
  }
1688
1693
  return results;
1689
1694
  }
1695
+ var SCORE_DIR_EXACT = 1;
1696
+ var SCORE_FILENAME = 0.9;
1697
+ var SCORE_PARTIAL = 0.7;
1698
+ function pathKeywordSearch(db, query, limit) {
1699
+ const terms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
1700
+ if (terms.length === 0) return [];
1701
+ const allPaths = db.getAllFilePaths();
1702
+ const scoredPaths = [];
1703
+ for (const filePath of allPaths) {
1704
+ let bestScore = 0;
1705
+ for (const term of terms) {
1706
+ bestScore = Math.max(bestScore, scorePathMatch(filePath, term));
1707
+ }
1708
+ if (bestScore > 0) {
1709
+ scoredPaths.push({ filePath, score: bestScore });
1710
+ }
1711
+ }
1712
+ if (scoredPaths.length === 0) return [];
1713
+ scoredPaths.sort((a, b) => b.score - a.score);
1714
+ const results = [];
1715
+ for (const { filePath, score } of scoredPaths) {
1716
+ if (results.length >= limit) break;
1717
+ const file = db.getFile(filePath);
1718
+ if (!file) continue;
1719
+ const chunks = db.getChunksByFile(file.id);
1720
+ for (const chunk of chunks) {
1721
+ if (results.length >= limit) break;
1722
+ results.push({
1723
+ chunkId: chunk.id,
1724
+ filePath: file.path,
1725
+ lineStart: chunk.lineStart,
1726
+ lineEnd: chunk.lineEnd,
1727
+ name: chunk.name,
1728
+ type: chunk.type,
1729
+ text: chunk.text,
1730
+ score,
1731
+ language: file.language
1732
+ });
1733
+ }
1734
+ }
1735
+ return results;
1736
+ }
1737
+ function scorePathMatch(filePath, queryLower) {
1738
+ const pathLower = filePath.toLowerCase();
1739
+ const segments = pathLower.split("/");
1740
+ const dirSegments = segments.slice(0, -1);
1741
+ for (const seg of dirSegments) {
1742
+ if (seg === queryLower) return SCORE_DIR_EXACT;
1743
+ }
1744
+ const fileName = segments[segments.length - 1];
1745
+ const fileNameNoExt = fileName.replace(/\.[^.]+$/, "");
1746
+ if (fileNameNoExt === queryLower) return SCORE_FILENAME;
1747
+ if (pathLower.includes(queryLower)) return SCORE_PARTIAL;
1748
+ return 0;
1749
+ }
1690
1750
 
1691
1751
  // src/search/fusion.ts
1692
1752
  var K = 60;
@@ -1718,6 +1778,73 @@ function fusionMerge(strategyResults, limit) {
1718
1778
  }
1719
1779
  return results;
1720
1780
  }
1781
+ var PATH_BOOST_DIR_EXACT = 1.5;
1782
+ var PATH_BOOST_FILENAME = 1.4;
1783
+ var PATH_BOOST_PARTIAL = 1.2;
1784
+ var IMPORT_PENALTY = 0.5;
1785
+ function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
1786
+ const fused = fusionMerge(strategyResults, limit * 3);
1787
+ if (fused.length === 0) return [];
1788
+ const boosted = applyPathBoost(fused, pathBoostTerms);
1789
+ const adjusted = applyImportDeprioritization(boosted);
1790
+ adjusted.sort((a, b) => b.score - a.score);
1791
+ const sliced = adjusted.slice(0, limit);
1792
+ return renormalize(sliced);
1793
+ }
1794
+ function applyPathBoost(results, terms) {
1795
+ if (terms.length === 0) return results;
1796
+ return results.map((r) => {
1797
+ const boost = getPathBoostFactor(r.filePath, terms);
1798
+ return { ...r, score: r.score * boost };
1799
+ });
1800
+ }
1801
+ function getPathBoostFactor(filePath, terms) {
1802
+ let maxBoost = 1;
1803
+ const pathLower = filePath.toLowerCase();
1804
+ const segments = pathLower.split("/");
1805
+ const dirSegments = segments.slice(0, -1);
1806
+ const fileName = segments[segments.length - 1];
1807
+ const fileNameNoExt = fileName.replace(/\.[^.]+$/, "");
1808
+ for (const term of terms) {
1809
+ const termLower = term.toLowerCase();
1810
+ for (const seg of dirSegments) {
1811
+ if (seg === termLower) {
1812
+ maxBoost = Math.max(maxBoost, PATH_BOOST_DIR_EXACT);
1813
+ }
1814
+ }
1815
+ if (fileNameNoExt === termLower) {
1816
+ maxBoost = Math.max(maxBoost, PATH_BOOST_FILENAME);
1817
+ }
1818
+ if (maxBoost < PATH_BOOST_PARTIAL && pathLower.includes(termLower)) {
1819
+ maxBoost = Math.max(maxBoost, PATH_BOOST_PARTIAL);
1820
+ }
1821
+ }
1822
+ return maxBoost;
1823
+ }
1824
+ function applyImportDeprioritization(results) {
1825
+ const hasNonImport = results.some((r) => r.type !== "import");
1826
+ if (!hasNonImport) return results;
1827
+ const maxNonImportScore = Math.max(
1828
+ ...results.filter((r) => r.type !== "import").map((r) => r.score),
1829
+ 0
1830
+ );
1831
+ if (maxNonImportScore === 0) return results;
1832
+ return results.map((r) => {
1833
+ if (r.type === "import") {
1834
+ return { ...r, score: r.score * IMPORT_PENALTY };
1835
+ }
1836
+ return r;
1837
+ });
1838
+ }
1839
+ function renormalize(results) {
1840
+ if (results.length === 0) return results;
1841
+ const maxScore = Math.max(...results.map((r) => r.score));
1842
+ if (maxScore === 0) return results;
1843
+ return results.map((r) => ({
1844
+ ...r,
1845
+ score: r.score / maxScore
1846
+ }));
1847
+ }
1721
1848
 
1722
1849
  // src/cli/commands/query.ts
1723
1850
  var CTX_DIR2 = ".ctx";
@@ -1769,6 +1896,9 @@ function extractSymbolNames(query) {
1769
1896
  function isPathLike(query) {
1770
1897
  return query.includes("/") || query.includes("*") || query.includes(".");
1771
1898
  }
1899
+ function extractPathBoostTerms(query) {
1900
+ return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
1901
+ }
1772
1902
  async function runQuery(projectPath, query, options) {
1773
1903
  const absoluteRoot = path5.resolve(projectPath);
1774
1904
  const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
@@ -1782,7 +1912,8 @@ async function runQuery(projectPath, query, options) {
1782
1912
  const db = createDatabase(dbPath);
1783
1913
  try {
1784
1914
  const strategyResults = await runStrategies(db, query, options);
1785
- const fused = fusionMerge(strategyResults, options.limit);
1915
+ const pathBoostTerms = extractPathBoostTerms(query);
1916
+ const fused = fusionMergeWithPathBoost(strategyResults, options.limit, pathBoostTerms);
1786
1917
  const outputResults = fused.map(toOutputResult);
1787
1918
  const searchTimeMs = Math.round(performance.now() - start);
1788
1919
  const text = options.format === "text" ? formatTextOutput(query, outputResults) : void 0;
@@ -1847,8 +1978,8 @@ async function executeStrategy(db, strategy, query, limit, filters) {
1847
1978
  });
1848
1979
  }
1849
1980
  case "path": {
1850
- if (!isPathLike(query)) return [];
1851
- return pathSearch(db, query, limit);
1981
+ if (isPathLike(query)) return pathSearch(db, query, limit);
1982
+ return pathKeywordSearch(db, query, limit);
1852
1983
  }
1853
1984
  case "dependency":
1854
1985
  return [];
@@ -1864,12 +1995,12 @@ function registerQueryCommand(program2) {
1864
1995
  program2.command("query <query>").description("Multi-strategy code search").option("-l, --limit <n>", "Max results", "10").option(
1865
1996
  "-s, --strategy <list>",
1866
1997
  "Comma-separated strategies: vector,fts,ast,path",
1867
- "fts,ast"
1998
+ "fts,ast,path"
1868
1999
  ).option("--language <lang>", "Filter by language").option("-f, --format <fmt>", "Output format: json|text", "json").option("--no-vectors", "Skip vector search").action(async (query, opts) => {
1869
2000
  const projectPath = process.cwd();
1870
2001
  const verbose = program2.opts()["verbose"] === true;
1871
2002
  const logger = createLogger({ level: verbose ? LogLevel.DEBUG : LogLevel.INFO });
1872
- const strategies = (opts["strategy"] ?? "fts,ast").split(",").map((s) => s.trim());
2003
+ const strategies = (opts["strategy"] ?? "fts,ast,path").split(",").map((s) => s.trim());
1873
2004
  try {
1874
2005
  const output = await runQuery(projectPath, query, {
1875
2006
  limit: parseInt(opts["limit"] ?? "10", 10),
@@ -2024,6 +2155,86 @@ function createAnthropicProvider(apiKey) {
2024
2155
  }
2025
2156
  };
2026
2157
  }
2158
+ var STOP_WORDS = /* @__PURE__ */ new Set([
2159
+ "how",
2160
+ "does",
2161
+ "what",
2162
+ "where",
2163
+ "when",
2164
+ "why",
2165
+ "which",
2166
+ "who",
2167
+ "whom",
2168
+ "is",
2169
+ "are",
2170
+ "was",
2171
+ "were",
2172
+ "be",
2173
+ "been",
2174
+ "being",
2175
+ "do",
2176
+ "did",
2177
+ "doing",
2178
+ "done",
2179
+ "the",
2180
+ "a",
2181
+ "an",
2182
+ "and",
2183
+ "or",
2184
+ "not",
2185
+ "no",
2186
+ "nor",
2187
+ "in",
2188
+ "on",
2189
+ "at",
2190
+ "to",
2191
+ "for",
2192
+ "of",
2193
+ "with",
2194
+ "by",
2195
+ "from",
2196
+ "about",
2197
+ "it",
2198
+ "its",
2199
+ "this",
2200
+ "that",
2201
+ "these",
2202
+ "those",
2203
+ "can",
2204
+ "could",
2205
+ "should",
2206
+ "would",
2207
+ "will",
2208
+ "shall",
2209
+ "may",
2210
+ "might",
2211
+ "has",
2212
+ "have",
2213
+ "had",
2214
+ "having",
2215
+ "i",
2216
+ "me",
2217
+ "my",
2218
+ "we",
2219
+ "our",
2220
+ "you",
2221
+ "your",
2222
+ "he",
2223
+ "she",
2224
+ "they",
2225
+ "find",
2226
+ "show",
2227
+ "get",
2228
+ "tell"
2229
+ ]);
2230
+ function extractSearchTerms(query) {
2231
+ const words = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2 && !STOP_WORDS.has(w.toLowerCase()));
2232
+ if (words.length === 0) {
2233
+ const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
2234
+ return allWords.sort((a, b) => b.length - a.length)[0] ?? query;
2235
+ }
2236
+ return words.join(" ");
2237
+ }
2027
2238
  var VALID_STRATEGIES = /* @__PURE__ */ new Set([
2028
2239
  "vector",
2029
2240
  "fts",
@@ -2032,9 +2243,11 @@ var VALID_STRATEGIES = /* @__PURE__ */ new Set([
2032
2243
  "dependency"
2033
2244
  ]);
2034
2245
  function buildFallbackPlan(query) {
2246
+ const keywords = extractSearchTerms(query);
2035
2247
  const strategies = [
2036
- { strategy: "fts", query, weight: 0.8, reason: "Full-text keyword search" },
2037
- { strategy: "ast", query, weight: 0.9, reason: "Structural symbol search" }
2248
+ { strategy: "fts", query: keywords, weight: 0.8, reason: "Full-text keyword search" },
2249
+ { strategy: "ast", query: keywords, weight: 0.9, reason: "Structural symbol search" },
2250
+ { strategy: "path", query: keywords, weight: 0.7, reason: "Path keyword search" }
2038
2251
  ];
2039
2252
  return {
2040
2253
  interpretation: `Searching for: ${query}`,
@@ -2222,6 +2435,13 @@ function createSearchExecutor(db) {
2222
2435
  return fusionMerge(strategyResults, limit);
2223
2436
  };
2224
2437
  }
2438
+ function extractSymbolNames2(query) {
2439
+ const matches = query.match(/[A-Z]?[a-z]+(?:[A-Z][a-z]+)*|[a-z]+(?:_[a-z]+)+|[A-Z][a-zA-Z]+/g);
2440
+ return matches ?? [];
2441
+ }
2442
+ function isPathLike2(query) {
2443
+ return query.includes("/") || query.includes("*") || query.includes(".");
2444
+ }
2225
2445
  async function executeStrategy2(db, plan, limit) {
2226
2446
  switch (plan.strategy) {
2227
2447
  case "vector": {
@@ -2230,10 +2450,25 @@ async function executeStrategy2(db, plan, limit) {
2230
2450
  }
2231
2451
  case "fts":
2232
2452
  return ftsSearch(db, plan.query, limit);
2233
- case "ast":
2234
- return astSearch(db, { name: plan.query }, limit);
2235
- case "path":
2236
- return pathSearch(db, plan.query, limit);
2453
+ case "ast": {
2454
+ const symbols = extractSymbolNames2(plan.query);
2455
+ if (symbols.length === 0) return [];
2456
+ const allResults = [];
2457
+ for (const name of symbols) {
2458
+ const results = astSearch(db, { name }, limit);
2459
+ allResults.push(...results);
2460
+ }
2461
+ const seen = /* @__PURE__ */ new Set();
2462
+ return allResults.filter((r) => {
2463
+ if (seen.has(r.chunkId)) return false;
2464
+ seen.add(r.chunkId);
2465
+ return true;
2466
+ });
2467
+ }
2468
+ case "path": {
2469
+ if (isPathLike2(plan.query)) return pathSearch(db, plan.query, limit);
2470
+ return pathKeywordSearch(db, plan.query, limit);
2471
+ }
2237
2472
  case "dependency":
2238
2473
  return [];
2239
2474
  }
@@ -2246,9 +2481,11 @@ async function loadEmbedder2() {
2246
2481
  }
2247
2482
  async function fallbackSearch(db, query, limit) {
2248
2483
  const executor = createSearchExecutor(db);
2484
+ const keywords = extractSearchTerms(query);
2249
2485
  const fallbackStrategies = [
2250
- { strategy: "fts", query, weight: 0.8, reason: "fallback keyword search" },
2251
- { strategy: "ast", query, weight: 0.9, reason: "fallback structural search" }
2486
+ { strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
2487
+ { strategy: "ast", query: keywords, weight: 0.9, reason: "fallback structural search" },
2488
+ { strategy: "path", query: keywords, weight: 0.7, reason: "fallback path search" }
2252
2489
  ];
2253
2490
  const results = await executor(fallbackStrategies, limit);
2254
2491
  return {
@@ -2279,6 +2516,7 @@ async function runAsk(projectPath, query, options) {
2279
2516
  const provider = options.provider ?? null;
2280
2517
  if (!provider) {
2281
2518
  const output = await fallbackSearch(db, query, options.limit);
2519
+ output.warning = FALLBACK_NOTICE;
2282
2520
  if (options.format === "text") {
2283
2521
  output.text = formatTextOutput2(output);
2284
2522
  }
@@ -2346,6 +2584,9 @@ function registerAskCommand(program2) {
2346
2584
  provider: provider ?? void 0,
2347
2585
  noExplain: opts["explain"] === false
2348
2586
  });
2587
+ if (output.warning) {
2588
+ console.error(`\u26A0 ${output.warning}`);
2589
+ }
2349
2590
  if (output.text) {
2350
2591
  console.log(output.text);
2351
2592
  } else {