kontext-engine 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1568,12 +1568,17 @@ async function vectorSearch(db, embedder, query, limit, filters) {
1568
1568
  }
1569
1569
 
1570
1570
  // src/search/fts.ts
1571
+ function sanitizeFtsQuery(query) {
1572
+ return query.replace(/[?()":^~{}!+\-\\]/g, " ").replace(/(?<!\w)\*/g, " ").replace(/\s+/g, " ").trim();
1573
+ }
1571
1574
  function bm25ToScore(rank) {
1572
1575
  return 1 / (1 + Math.abs(rank));
1573
1576
  }
1574
1577
  function ftsSearch(db, query, limit, filters) {
1578
+ const safeQuery = sanitizeFtsQuery(query);
1579
+ if (safeQuery.length === 0) return [];
1575
1580
  const fetchLimit = filters?.language ? limit * 3 : limit;
1576
- const ftsResults = db.searchFTS(query, fetchLimit);
1581
+ const ftsResults = db.searchFTS(safeQuery, fetchLimit);
1577
1582
  if (ftsResults.length === 0) return [];
1578
1583
  const chunkIds = ftsResults.map((r) => r.chunkId);
1579
1584
  const chunks = db.getChunksByIds(chunkIds);
@@ -1691,13 +1696,17 @@ var SCORE_DIR_EXACT = 1;
1691
1696
  var SCORE_FILENAME = 0.9;
1692
1697
  var SCORE_PARTIAL = 0.7;
1693
1698
  function pathKeywordSearch(db, query, limit) {
1694
- const queryLower = query.toLowerCase();
1699
+ const terms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
1700
+ if (terms.length === 0) return [];
1695
1701
  const allPaths = db.getAllFilePaths();
1696
1702
  const scoredPaths = [];
1697
1703
  for (const filePath of allPaths) {
1698
- const score = scorePathMatch(filePath, queryLower);
1699
- if (score > 0) {
1700
- scoredPaths.push({ filePath, score });
1704
+ let bestScore = 0;
1705
+ for (const term of terms) {
1706
+ bestScore = Math.max(bestScore, scorePathMatch(filePath, term));
1707
+ }
1708
+ if (bestScore > 0) {
1709
+ scoredPaths.push({ filePath, score: bestScore });
1701
1710
  }
1702
1711
  }
1703
1712
  if (scoredPaths.length === 0) return [];
@@ -2146,6 +2155,86 @@ function createAnthropicProvider(apiKey) {
2146
2155
  }
2147
2156
  };
2148
2157
  }
2158
+ var STOP_WORDS = /* @__PURE__ */ new Set([
2159
+ "how",
2160
+ "does",
2161
+ "what",
2162
+ "where",
2163
+ "when",
2164
+ "why",
2165
+ "which",
2166
+ "who",
2167
+ "whom",
2168
+ "is",
2169
+ "are",
2170
+ "was",
2171
+ "were",
2172
+ "be",
2173
+ "been",
2174
+ "being",
2175
+ "do",
2176
+ "did",
2177
+ "doing",
2178
+ "done",
2179
+ "the",
2180
+ "a",
2181
+ "an",
2182
+ "and",
2183
+ "or",
2184
+ "not",
2185
+ "no",
2186
+ "nor",
2187
+ "in",
2188
+ "on",
2189
+ "at",
2190
+ "to",
2191
+ "for",
2192
+ "of",
2193
+ "with",
2194
+ "by",
2195
+ "from",
2196
+ "about",
2197
+ "it",
2198
+ "its",
2199
+ "this",
2200
+ "that",
2201
+ "these",
2202
+ "those",
2203
+ "can",
2204
+ "could",
2205
+ "should",
2206
+ "would",
2207
+ "will",
2208
+ "shall",
2209
+ "may",
2210
+ "might",
2211
+ "has",
2212
+ "have",
2213
+ "had",
2214
+ "having",
2215
+ "i",
2216
+ "me",
2217
+ "my",
2218
+ "we",
2219
+ "our",
2220
+ "you",
2221
+ "your",
2222
+ "he",
2223
+ "she",
2224
+ "they",
2225
+ "find",
2226
+ "show",
2227
+ "get",
2228
+ "tell"
2229
+ ]);
2230
+ function extractSearchTerms(query) {
2231
+ const words = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2 && !STOP_WORDS.has(w.toLowerCase()));
2232
+ if (words.length === 0) {
2233
+ const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
2234
+ return allWords.sort((a, b) => b.length - a.length)[0] ?? query;
2235
+ }
2236
+ return words.join(" ");
2237
+ }
2149
2238
  var VALID_STRATEGIES = /* @__PURE__ */ new Set([
2150
2239
  "vector",
2151
2240
  "fts",
@@ -2154,9 +2243,11 @@ var VALID_STRATEGIES = /* @__PURE__ */ new Set([
2154
2243
  "dependency"
2155
2244
  ]);
2156
2245
  function buildFallbackPlan(query) {
2246
+ const keywords = extractSearchTerms(query);
2157
2247
  const strategies = [
2158
- { strategy: "fts", query, weight: 0.8, reason: "Full-text keyword search" },
2159
- { strategy: "ast", query, weight: 0.9, reason: "Structural symbol search" }
2248
+ { strategy: "fts", query: keywords, weight: 0.8, reason: "Full-text keyword search" },
2249
+ { strategy: "ast", query: keywords, weight: 0.9, reason: "Structural symbol search" },
2250
+ { strategy: "path", query: keywords, weight: 0.7, reason: "Path keyword search" }
2160
2251
  ];
2161
2252
  return {
2162
2253
  interpretation: `Searching for: ${query}`,
@@ -2344,6 +2435,13 @@ function createSearchExecutor(db) {
2344
2435
  return fusionMerge(strategyResults, limit);
2345
2436
  };
2346
2437
  }
2438
+ function extractSymbolNames2(query) {
2439
+ const matches = query.match(/[A-Z]?[a-z]+(?:[A-Z][a-z]+)*|[a-z]+(?:_[a-z]+)+|[A-Z][a-zA-Z]+/g);
2440
+ return matches ?? [];
2441
+ }
2442
+ function isPathLike2(query) {
2443
+ return query.includes("/") || query.includes("*") || query.includes(".");
2444
+ }
2347
2445
  async function executeStrategy2(db, plan, limit) {
2348
2446
  switch (plan.strategy) {
2349
2447
  case "vector": {
@@ -2352,10 +2450,25 @@ async function executeStrategy2(db, plan, limit) {
2352
2450
  }
2353
2451
  case "fts":
2354
2452
  return ftsSearch(db, plan.query, limit);
2355
- case "ast":
2356
- return astSearch(db, { name: plan.query }, limit);
2357
- case "path":
2358
- return pathSearch(db, plan.query, limit);
2453
+ case "ast": {
2454
+ const symbols = extractSymbolNames2(plan.query);
2455
+ if (symbols.length === 0) return [];
2456
+ const allResults = [];
2457
+ for (const name of symbols) {
2458
+ const results = astSearch(db, { name }, limit);
2459
+ allResults.push(...results);
2460
+ }
2461
+ const seen = /* @__PURE__ */ new Set();
2462
+ return allResults.filter((r) => {
2463
+ if (seen.has(r.chunkId)) return false;
2464
+ seen.add(r.chunkId);
2465
+ return true;
2466
+ });
2467
+ }
2468
+ case "path": {
2469
+ if (isPathLike2(plan.query)) return pathSearch(db, plan.query, limit);
2470
+ return pathKeywordSearch(db, plan.query, limit);
2471
+ }
2359
2472
  case "dependency":
2360
2473
  return [];
2361
2474
  }
@@ -2368,9 +2481,11 @@ async function loadEmbedder2() {
2368
2481
  }
2369
2482
  async function fallbackSearch(db, query, limit) {
2370
2483
  const executor = createSearchExecutor(db);
2484
+ const keywords = extractSearchTerms(query);
2371
2485
  const fallbackStrategies = [
2372
- { strategy: "fts", query, weight: 0.8, reason: "fallback keyword search" },
2373
- { strategy: "ast", query, weight: 0.9, reason: "fallback structural search" }
2486
+ { strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
2487
+ { strategy: "ast", query: keywords, weight: 0.9, reason: "fallback structural search" },
2488
+ { strategy: "path", query: keywords, weight: 0.7, reason: "fallback path search" }
2374
2489
  ];
2375
2490
  const results = await executor(fallbackStrategies, limit);
2376
2491
  return {
@@ -2401,6 +2516,7 @@ async function runAsk(projectPath, query, options) {
2401
2516
  const provider = options.provider ?? null;
2402
2517
  if (!provider) {
2403
2518
  const output = await fallbackSearch(db, query, options.limit);
2519
+ output.warning = FALLBACK_NOTICE;
2404
2520
  if (options.format === "text") {
2405
2521
  output.text = formatTextOutput2(output);
2406
2522
  }
@@ -2468,6 +2584,9 @@ function registerAskCommand(program2) {
2468
2584
  provider: provider ?? void 0,
2469
2585
  noExplain: opts["explain"] === false
2470
2586
  });
2587
+ if (output.warning) {
2588
+ console.error(`\u26A0 ${output.warning}`);
2589
+ }
2471
2590
  if (output.text) {
2472
2591
  console.log(output.text);
2473
2592
  } else {