kontext-engine 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +255 -14
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +250 -12
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -1568,12 +1568,17 @@ async function vectorSearch(db, embedder, query, limit, filters) {
|
|
|
1568
1568
|
}
|
|
1569
1569
|
|
|
1570
1570
|
// src/search/fts.ts
|
|
1571
|
+
function sanitizeFtsQuery(query) {
|
|
1572
|
+
return query.replace(/[?()":^~{}!+\-\\]/g, " ").replace(/(?<!\w)\*/g, " ").replace(/\s+/g, " ").trim();
|
|
1573
|
+
}
|
|
1571
1574
|
function bm25ToScore(rank) {
|
|
1572
1575
|
return 1 / (1 + Math.abs(rank));
|
|
1573
1576
|
}
|
|
1574
1577
|
function ftsSearch(db, query, limit, filters) {
|
|
1578
|
+
const safeQuery = sanitizeFtsQuery(query);
|
|
1579
|
+
if (safeQuery.length === 0) return [];
|
|
1575
1580
|
const fetchLimit = filters?.language ? limit * 3 : limit;
|
|
1576
|
-
const ftsResults = db.searchFTS(
|
|
1581
|
+
const ftsResults = db.searchFTS(safeQuery, fetchLimit);
|
|
1577
1582
|
if (ftsResults.length === 0) return [];
|
|
1578
1583
|
const chunkIds = ftsResults.map((r) => r.chunkId);
|
|
1579
1584
|
const chunks = db.getChunksByIds(chunkIds);
|
|
@@ -1687,6 +1692,61 @@ function pathSearch(db, pattern, limit) {
|
|
|
1687
1692
|
}
|
|
1688
1693
|
return results;
|
|
1689
1694
|
}
|
|
1695
|
+
var SCORE_DIR_EXACT = 1;
|
|
1696
|
+
var SCORE_FILENAME = 0.9;
|
|
1697
|
+
var SCORE_PARTIAL = 0.7;
|
|
1698
|
+
function pathKeywordSearch(db, query, limit) {
|
|
1699
|
+
const terms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
|
|
1700
|
+
if (terms.length === 0) return [];
|
|
1701
|
+
const allPaths = db.getAllFilePaths();
|
|
1702
|
+
const scoredPaths = [];
|
|
1703
|
+
for (const filePath of allPaths) {
|
|
1704
|
+
let bestScore = 0;
|
|
1705
|
+
for (const term of terms) {
|
|
1706
|
+
bestScore = Math.max(bestScore, scorePathMatch(filePath, term));
|
|
1707
|
+
}
|
|
1708
|
+
if (bestScore > 0) {
|
|
1709
|
+
scoredPaths.push({ filePath, score: bestScore });
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
if (scoredPaths.length === 0) return [];
|
|
1713
|
+
scoredPaths.sort((a, b) => b.score - a.score);
|
|
1714
|
+
const results = [];
|
|
1715
|
+
for (const { filePath, score } of scoredPaths) {
|
|
1716
|
+
if (results.length >= limit) break;
|
|
1717
|
+
const file = db.getFile(filePath);
|
|
1718
|
+
if (!file) continue;
|
|
1719
|
+
const chunks = db.getChunksByFile(file.id);
|
|
1720
|
+
for (const chunk of chunks) {
|
|
1721
|
+
if (results.length >= limit) break;
|
|
1722
|
+
results.push({
|
|
1723
|
+
chunkId: chunk.id,
|
|
1724
|
+
filePath: file.path,
|
|
1725
|
+
lineStart: chunk.lineStart,
|
|
1726
|
+
lineEnd: chunk.lineEnd,
|
|
1727
|
+
name: chunk.name,
|
|
1728
|
+
type: chunk.type,
|
|
1729
|
+
text: chunk.text,
|
|
1730
|
+
score,
|
|
1731
|
+
language: file.language
|
|
1732
|
+
});
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
1735
|
+
return results;
|
|
1736
|
+
}
|
|
1737
|
+
function scorePathMatch(filePath, queryLower) {
|
|
1738
|
+
const pathLower = filePath.toLowerCase();
|
|
1739
|
+
const segments = pathLower.split("/");
|
|
1740
|
+
const dirSegments = segments.slice(0, -1);
|
|
1741
|
+
for (const seg of dirSegments) {
|
|
1742
|
+
if (seg === queryLower) return SCORE_DIR_EXACT;
|
|
1743
|
+
}
|
|
1744
|
+
const fileName = segments[segments.length - 1];
|
|
1745
|
+
const fileNameNoExt = fileName.replace(/\.[^.]+$/, "");
|
|
1746
|
+
if (fileNameNoExt === queryLower) return SCORE_FILENAME;
|
|
1747
|
+
if (pathLower.includes(queryLower)) return SCORE_PARTIAL;
|
|
1748
|
+
return 0;
|
|
1749
|
+
}
|
|
1690
1750
|
|
|
1691
1751
|
// src/search/fusion.ts
|
|
1692
1752
|
var K = 60;
|
|
@@ -1718,6 +1778,73 @@ function fusionMerge(strategyResults, limit) {
|
|
|
1718
1778
|
}
|
|
1719
1779
|
return results;
|
|
1720
1780
|
}
|
|
1781
|
+
var PATH_BOOST_DIR_EXACT = 1.5;
|
|
1782
|
+
var PATH_BOOST_FILENAME = 1.4;
|
|
1783
|
+
var PATH_BOOST_PARTIAL = 1.2;
|
|
1784
|
+
var IMPORT_PENALTY = 0.5;
|
|
1785
|
+
function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
|
|
1786
|
+
const fused = fusionMerge(strategyResults, limit * 3);
|
|
1787
|
+
if (fused.length === 0) return [];
|
|
1788
|
+
const boosted = applyPathBoost(fused, pathBoostTerms);
|
|
1789
|
+
const adjusted = applyImportDeprioritization(boosted);
|
|
1790
|
+
adjusted.sort((a, b) => b.score - a.score);
|
|
1791
|
+
const sliced = adjusted.slice(0, limit);
|
|
1792
|
+
return renormalize(sliced);
|
|
1793
|
+
}
|
|
1794
|
+
function applyPathBoost(results, terms) {
|
|
1795
|
+
if (terms.length === 0) return results;
|
|
1796
|
+
return results.map((r) => {
|
|
1797
|
+
const boost = getPathBoostFactor(r.filePath, terms);
|
|
1798
|
+
return { ...r, score: r.score * boost };
|
|
1799
|
+
});
|
|
1800
|
+
}
|
|
1801
|
+
function getPathBoostFactor(filePath, terms) {
|
|
1802
|
+
let maxBoost = 1;
|
|
1803
|
+
const pathLower = filePath.toLowerCase();
|
|
1804
|
+
const segments = pathLower.split("/");
|
|
1805
|
+
const dirSegments = segments.slice(0, -1);
|
|
1806
|
+
const fileName = segments[segments.length - 1];
|
|
1807
|
+
const fileNameNoExt = fileName.replace(/\.[^.]+$/, "");
|
|
1808
|
+
for (const term of terms) {
|
|
1809
|
+
const termLower = term.toLowerCase();
|
|
1810
|
+
for (const seg of dirSegments) {
|
|
1811
|
+
if (seg === termLower) {
|
|
1812
|
+
maxBoost = Math.max(maxBoost, PATH_BOOST_DIR_EXACT);
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
if (fileNameNoExt === termLower) {
|
|
1816
|
+
maxBoost = Math.max(maxBoost, PATH_BOOST_FILENAME);
|
|
1817
|
+
}
|
|
1818
|
+
if (maxBoost < PATH_BOOST_PARTIAL && pathLower.includes(termLower)) {
|
|
1819
|
+
maxBoost = Math.max(maxBoost, PATH_BOOST_PARTIAL);
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
return maxBoost;
|
|
1823
|
+
}
|
|
1824
|
+
function applyImportDeprioritization(results) {
|
|
1825
|
+
const hasNonImport = results.some((r) => r.type !== "import");
|
|
1826
|
+
if (!hasNonImport) return results;
|
|
1827
|
+
const maxNonImportScore = Math.max(
|
|
1828
|
+
...results.filter((r) => r.type !== "import").map((r) => r.score),
|
|
1829
|
+
0
|
|
1830
|
+
);
|
|
1831
|
+
if (maxNonImportScore === 0) return results;
|
|
1832
|
+
return results.map((r) => {
|
|
1833
|
+
if (r.type === "import") {
|
|
1834
|
+
return { ...r, score: r.score * IMPORT_PENALTY };
|
|
1835
|
+
}
|
|
1836
|
+
return r;
|
|
1837
|
+
});
|
|
1838
|
+
}
|
|
1839
|
+
function renormalize(results) {
|
|
1840
|
+
if (results.length === 0) return results;
|
|
1841
|
+
const maxScore = Math.max(...results.map((r) => r.score));
|
|
1842
|
+
if (maxScore === 0) return results;
|
|
1843
|
+
return results.map((r) => ({
|
|
1844
|
+
...r,
|
|
1845
|
+
score: r.score / maxScore
|
|
1846
|
+
}));
|
|
1847
|
+
}
|
|
1721
1848
|
|
|
1722
1849
|
// src/cli/commands/query.ts
|
|
1723
1850
|
var CTX_DIR2 = ".ctx";
|
|
@@ -1769,6 +1896,9 @@ function extractSymbolNames(query) {
|
|
|
1769
1896
|
function isPathLike(query) {
|
|
1770
1897
|
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
1771
1898
|
}
|
|
1899
|
+
function extractPathBoostTerms(query) {
|
|
1900
|
+
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1901
|
+
}
|
|
1772
1902
|
async function runQuery(projectPath, query, options) {
|
|
1773
1903
|
const absoluteRoot = path5.resolve(projectPath);
|
|
1774
1904
|
const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
|
|
@@ -1782,7 +1912,8 @@ async function runQuery(projectPath, query, options) {
|
|
|
1782
1912
|
const db = createDatabase(dbPath);
|
|
1783
1913
|
try {
|
|
1784
1914
|
const strategyResults = await runStrategies(db, query, options);
|
|
1785
|
-
const
|
|
1915
|
+
const pathBoostTerms = extractPathBoostTerms(query);
|
|
1916
|
+
const fused = fusionMergeWithPathBoost(strategyResults, options.limit, pathBoostTerms);
|
|
1786
1917
|
const outputResults = fused.map(toOutputResult);
|
|
1787
1918
|
const searchTimeMs = Math.round(performance.now() - start);
|
|
1788
1919
|
const text = options.format === "text" ? formatTextOutput(query, outputResults) : void 0;
|
|
@@ -1847,8 +1978,8 @@ async function executeStrategy(db, strategy, query, limit, filters) {
|
|
|
1847
1978
|
});
|
|
1848
1979
|
}
|
|
1849
1980
|
case "path": {
|
|
1850
|
-
if (
|
|
1851
|
-
return
|
|
1981
|
+
if (isPathLike(query)) return pathSearch(db, query, limit);
|
|
1982
|
+
return pathKeywordSearch(db, query, limit);
|
|
1852
1983
|
}
|
|
1853
1984
|
case "dependency":
|
|
1854
1985
|
return [];
|
|
@@ -1864,12 +1995,12 @@ function registerQueryCommand(program2) {
|
|
|
1864
1995
|
program2.command("query <query>").description("Multi-strategy code search").option("-l, --limit <n>", "Max results", "10").option(
|
|
1865
1996
|
"-s, --strategy <list>",
|
|
1866
1997
|
"Comma-separated strategies: vector,fts,ast,path",
|
|
1867
|
-
"fts,ast"
|
|
1998
|
+
"fts,ast,path"
|
|
1868
1999
|
).option("--language <lang>", "Filter by language").option("-f, --format <fmt>", "Output format: json|text", "json").option("--no-vectors", "Skip vector search").action(async (query, opts) => {
|
|
1869
2000
|
const projectPath = process.cwd();
|
|
1870
2001
|
const verbose = program2.opts()["verbose"] === true;
|
|
1871
2002
|
const logger = createLogger({ level: verbose ? LogLevel.DEBUG : LogLevel.INFO });
|
|
1872
|
-
const strategies = (opts["strategy"] ?? "fts,ast").split(",").map((s) => s.trim());
|
|
2003
|
+
const strategies = (opts["strategy"] ?? "fts,ast,path").split(",").map((s) => s.trim());
|
|
1873
2004
|
try {
|
|
1874
2005
|
const output = await runQuery(projectPath, query, {
|
|
1875
2006
|
limit: parseInt(opts["limit"] ?? "10", 10),
|
|
@@ -2024,6 +2155,86 @@ function createAnthropicProvider(apiKey) {
|
|
|
2024
2155
|
}
|
|
2025
2156
|
};
|
|
2026
2157
|
}
|
|
2158
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
2159
|
+
"how",
|
|
2160
|
+
"does",
|
|
2161
|
+
"what",
|
|
2162
|
+
"where",
|
|
2163
|
+
"when",
|
|
2164
|
+
"why",
|
|
2165
|
+
"which",
|
|
2166
|
+
"who",
|
|
2167
|
+
"whom",
|
|
2168
|
+
"is",
|
|
2169
|
+
"are",
|
|
2170
|
+
"was",
|
|
2171
|
+
"were",
|
|
2172
|
+
"be",
|
|
2173
|
+
"been",
|
|
2174
|
+
"being",
|
|
2175
|
+
"do",
|
|
2176
|
+
"did",
|
|
2177
|
+
"doing",
|
|
2178
|
+
"done",
|
|
2179
|
+
"the",
|
|
2180
|
+
"a",
|
|
2181
|
+
"an",
|
|
2182
|
+
"and",
|
|
2183
|
+
"or",
|
|
2184
|
+
"not",
|
|
2185
|
+
"no",
|
|
2186
|
+
"nor",
|
|
2187
|
+
"in",
|
|
2188
|
+
"on",
|
|
2189
|
+
"at",
|
|
2190
|
+
"to",
|
|
2191
|
+
"for",
|
|
2192
|
+
"of",
|
|
2193
|
+
"with",
|
|
2194
|
+
"by",
|
|
2195
|
+
"from",
|
|
2196
|
+
"about",
|
|
2197
|
+
"it",
|
|
2198
|
+
"its",
|
|
2199
|
+
"this",
|
|
2200
|
+
"that",
|
|
2201
|
+
"these",
|
|
2202
|
+
"those",
|
|
2203
|
+
"can",
|
|
2204
|
+
"could",
|
|
2205
|
+
"should",
|
|
2206
|
+
"would",
|
|
2207
|
+
"will",
|
|
2208
|
+
"shall",
|
|
2209
|
+
"may",
|
|
2210
|
+
"might",
|
|
2211
|
+
"has",
|
|
2212
|
+
"have",
|
|
2213
|
+
"had",
|
|
2214
|
+
"having",
|
|
2215
|
+
"i",
|
|
2216
|
+
"me",
|
|
2217
|
+
"my",
|
|
2218
|
+
"we",
|
|
2219
|
+
"our",
|
|
2220
|
+
"you",
|
|
2221
|
+
"your",
|
|
2222
|
+
"he",
|
|
2223
|
+
"she",
|
|
2224
|
+
"they",
|
|
2225
|
+
"find",
|
|
2226
|
+
"show",
|
|
2227
|
+
"get",
|
|
2228
|
+
"tell"
|
|
2229
|
+
]);
|
|
2230
|
+
function extractSearchTerms(query) {
|
|
2231
|
+
const words = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2 && !STOP_WORDS.has(w.toLowerCase()));
|
|
2232
|
+
if (words.length === 0) {
|
|
2233
|
+
const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2234
|
+
return allWords.sort((a, b) => b.length - a.length)[0] ?? query;
|
|
2235
|
+
}
|
|
2236
|
+
return words.join(" ");
|
|
2237
|
+
}
|
|
2027
2238
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
2028
2239
|
"vector",
|
|
2029
2240
|
"fts",
|
|
@@ -2032,9 +2243,11 @@ var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
|
2032
2243
|
"dependency"
|
|
2033
2244
|
]);
|
|
2034
2245
|
function buildFallbackPlan(query) {
|
|
2246
|
+
const keywords = extractSearchTerms(query);
|
|
2035
2247
|
const strategies = [
|
|
2036
|
-
{ strategy: "fts", query, weight: 0.8, reason: "Full-text keyword search" },
|
|
2037
|
-
{ strategy: "ast", query, weight: 0.9, reason: "Structural symbol search" }
|
|
2248
|
+
{ strategy: "fts", query: keywords, weight: 0.8, reason: "Full-text keyword search" },
|
|
2249
|
+
{ strategy: "ast", query: keywords, weight: 0.9, reason: "Structural symbol search" },
|
|
2250
|
+
{ strategy: "path", query: keywords, weight: 0.7, reason: "Path keyword search" }
|
|
2038
2251
|
];
|
|
2039
2252
|
return {
|
|
2040
2253
|
interpretation: `Searching for: ${query}`,
|
|
@@ -2222,6 +2435,13 @@ function createSearchExecutor(db) {
|
|
|
2222
2435
|
return fusionMerge(strategyResults, limit);
|
|
2223
2436
|
};
|
|
2224
2437
|
}
|
|
2438
|
+
function extractSymbolNames2(query) {
|
|
2439
|
+
const matches = query.match(/[A-Z]?[a-z]+(?:[A-Z][a-z]+)*|[a-z]+(?:_[a-z]+)+|[A-Z][a-zA-Z]+/g);
|
|
2440
|
+
return matches ?? [];
|
|
2441
|
+
}
|
|
2442
|
+
function isPathLike2(query) {
|
|
2443
|
+
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
2444
|
+
}
|
|
2225
2445
|
async function executeStrategy2(db, plan, limit) {
|
|
2226
2446
|
switch (plan.strategy) {
|
|
2227
2447
|
case "vector": {
|
|
@@ -2230,10 +2450,25 @@ async function executeStrategy2(db, plan, limit) {
|
|
|
2230
2450
|
}
|
|
2231
2451
|
case "fts":
|
|
2232
2452
|
return ftsSearch(db, plan.query, limit);
|
|
2233
|
-
case "ast":
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2453
|
+
case "ast": {
|
|
2454
|
+
const symbols = extractSymbolNames2(plan.query);
|
|
2455
|
+
if (symbols.length === 0) return [];
|
|
2456
|
+
const allResults = [];
|
|
2457
|
+
for (const name of symbols) {
|
|
2458
|
+
const results = astSearch(db, { name }, limit);
|
|
2459
|
+
allResults.push(...results);
|
|
2460
|
+
}
|
|
2461
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2462
|
+
return allResults.filter((r) => {
|
|
2463
|
+
if (seen.has(r.chunkId)) return false;
|
|
2464
|
+
seen.add(r.chunkId);
|
|
2465
|
+
return true;
|
|
2466
|
+
});
|
|
2467
|
+
}
|
|
2468
|
+
case "path": {
|
|
2469
|
+
if (isPathLike2(plan.query)) return pathSearch(db, plan.query, limit);
|
|
2470
|
+
return pathKeywordSearch(db, plan.query, limit);
|
|
2471
|
+
}
|
|
2237
2472
|
case "dependency":
|
|
2238
2473
|
return [];
|
|
2239
2474
|
}
|
|
@@ -2246,9 +2481,11 @@ async function loadEmbedder2() {
|
|
|
2246
2481
|
}
|
|
2247
2482
|
async function fallbackSearch(db, query, limit) {
|
|
2248
2483
|
const executor = createSearchExecutor(db);
|
|
2484
|
+
const keywords = extractSearchTerms(query);
|
|
2249
2485
|
const fallbackStrategies = [
|
|
2250
|
-
{ strategy: "fts", query, weight: 0.8, reason: "fallback keyword search" },
|
|
2251
|
-
{ strategy: "ast", query, weight: 0.9, reason: "fallback structural search" }
|
|
2486
|
+
{ strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
|
|
2487
|
+
{ strategy: "ast", query: keywords, weight: 0.9, reason: "fallback structural search" },
|
|
2488
|
+
{ strategy: "path", query: keywords, weight: 0.7, reason: "fallback path search" }
|
|
2252
2489
|
];
|
|
2253
2490
|
const results = await executor(fallbackStrategies, limit);
|
|
2254
2491
|
return {
|
|
@@ -2279,6 +2516,7 @@ async function runAsk(projectPath, query, options) {
|
|
|
2279
2516
|
const provider = options.provider ?? null;
|
|
2280
2517
|
if (!provider) {
|
|
2281
2518
|
const output = await fallbackSearch(db, query, options.limit);
|
|
2519
|
+
output.warning = FALLBACK_NOTICE;
|
|
2282
2520
|
if (options.format === "text") {
|
|
2283
2521
|
output.text = formatTextOutput2(output);
|
|
2284
2522
|
}
|
|
@@ -2346,6 +2584,9 @@ function registerAskCommand(program2) {
|
|
|
2346
2584
|
provider: provider ?? void 0,
|
|
2347
2585
|
noExplain: opts["explain"] === false
|
|
2348
2586
|
});
|
|
2587
|
+
if (output.warning) {
|
|
2588
|
+
console.error(`\u26A0 ${output.warning}`);
|
|
2589
|
+
}
|
|
2349
2590
|
if (output.text) {
|
|
2350
2591
|
console.log(output.text);
|
|
2351
2592
|
} else {
|