kontext-engine 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +297 -44
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.js +297 -43
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -1208,12 +1208,15 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1208
1208
|
const rows = db.prepare(
|
|
1209
1209
|
`SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1210
1210
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1211
|
-
c.type, c.name, c.parent, c.text
|
|
1211
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1212
1212
|
FROM chunks c
|
|
1213
1213
|
JOIN files f ON f.id = c.file_id
|
|
1214
1214
|
WHERE c.id IN (${placeholders})`
|
|
1215
1215
|
).all(...ids);
|
|
1216
|
-
return rows
|
|
1216
|
+
return rows.map((r) => ({
|
|
1217
|
+
...r,
|
|
1218
|
+
exports: r.exports === 1
|
|
1219
|
+
}));
|
|
1217
1220
|
},
|
|
1218
1221
|
searchChunks(filters, limit) {
|
|
1219
1222
|
const conditions = [];
|
|
@@ -1250,7 +1253,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1250
1253
|
const sql = `
|
|
1251
1254
|
SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1252
1255
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1253
|
-
c.type, c.name, c.parent, c.text
|
|
1256
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1254
1257
|
FROM chunks c
|
|
1255
1258
|
JOIN files f ON f.id = c.file_id
|
|
1256
1259
|
${where}
|
|
@@ -1258,7 +1261,11 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1258
1261
|
LIMIT ?
|
|
1259
1262
|
`;
|
|
1260
1263
|
params.push(limit);
|
|
1261
|
-
|
|
1264
|
+
const rows = db.prepare(sql).all(...params);
|
|
1265
|
+
return rows.map((r) => ({
|
|
1266
|
+
...r,
|
|
1267
|
+
exports: r.exports === 1
|
|
1268
|
+
}));
|
|
1262
1269
|
},
|
|
1263
1270
|
deleteChunksByFile(fileId) {
|
|
1264
1271
|
const chunkRows = stmtGetChunkIdsByFile.all(fileId);
|
|
@@ -1558,6 +1565,7 @@ async function vectorSearch(db, embedder, query, limit, filters) {
|
|
|
1558
1565
|
lineEnd: chunk.lineEnd,
|
|
1559
1566
|
name: chunk.name,
|
|
1560
1567
|
type: chunk.type,
|
|
1568
|
+
exported: chunk.exports,
|
|
1561
1569
|
text: chunk.text,
|
|
1562
1570
|
score: distanceToScore(vr.distance),
|
|
1563
1571
|
language: chunk.language
|
|
@@ -1598,6 +1606,7 @@ function ftsSearch(db, query, limit, filters) {
|
|
|
1598
1606
|
lineEnd: chunk.lineEnd,
|
|
1599
1607
|
name: chunk.name,
|
|
1600
1608
|
type: chunk.type,
|
|
1609
|
+
exported: chunk.exports,
|
|
1601
1610
|
text: chunk.text,
|
|
1602
1611
|
score: bm25ToScore(fts.rank),
|
|
1603
1612
|
language: chunk.language
|
|
@@ -1632,6 +1641,7 @@ function astSearch(db, filters, limit) {
|
|
|
1632
1641
|
lineEnd: chunk.lineEnd,
|
|
1633
1642
|
name: chunk.name,
|
|
1634
1643
|
type: chunk.type,
|
|
1644
|
+
exported: chunk.exports,
|
|
1635
1645
|
text: chunk.text,
|
|
1636
1646
|
score,
|
|
1637
1647
|
language: chunk.language
|
|
@@ -1684,6 +1694,7 @@ function pathSearch(db, pattern, limit) {
|
|
|
1684
1694
|
lineEnd: chunk.lineEnd,
|
|
1685
1695
|
name: chunk.name,
|
|
1686
1696
|
type: chunk.type,
|
|
1697
|
+
exported: chunk.exports,
|
|
1687
1698
|
text: chunk.text,
|
|
1688
1699
|
score: 1,
|
|
1689
1700
|
language: file.language
|
|
@@ -1726,6 +1737,7 @@ function pathKeywordSearch(db, query, limit) {
|
|
|
1726
1737
|
lineEnd: chunk.lineEnd,
|
|
1727
1738
|
name: chunk.name,
|
|
1728
1739
|
type: chunk.type,
|
|
1740
|
+
exported: chunk.exports,
|
|
1729
1741
|
text: chunk.text,
|
|
1730
1742
|
score,
|
|
1731
1743
|
language: file.language
|
|
@@ -1782,11 +1794,24 @@ var PATH_BOOST_DIR_EXACT = 1.5;
|
|
|
1782
1794
|
var PATH_BOOST_FILENAME = 1.4;
|
|
1783
1795
|
var PATH_BOOST_PARTIAL = 1.2;
|
|
1784
1796
|
var IMPORT_PENALTY = 0.5;
|
|
1797
|
+
var TEST_FILE_PENALTY = 0.65;
|
|
1798
|
+
var SMALL_SNIPPET_PENALTY = 0.75;
|
|
1799
|
+
var PUBLIC_API_BOOST = 1.12;
|
|
1800
|
+
var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
|
|
1801
|
+
var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
|
|
1802
|
+
var SMALL_SNIPPET_MAX_LINES = 3;
|
|
1803
|
+
function extractPathBoostTerms(query) {
|
|
1804
|
+
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1805
|
+
}
|
|
1785
1806
|
function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
|
|
1786
1807
|
const fused = fusionMerge(strategyResults, limit * 3);
|
|
1787
1808
|
if (fused.length === 0) return [];
|
|
1788
1809
|
const boosted = applyPathBoost(fused, pathBoostTerms);
|
|
1789
|
-
const
|
|
1810
|
+
const importAdjusted = applyImportDeprioritization(boosted);
|
|
1811
|
+
const testAdjusted = applyTestFileDeprioritization(importAdjusted);
|
|
1812
|
+
const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
|
|
1813
|
+
const boostedApi = applyPublicApiBoost(snippetAdjusted);
|
|
1814
|
+
const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
|
|
1790
1815
|
adjusted.sort((a, b) => b.score - a.score);
|
|
1791
1816
|
const sliced = adjusted.slice(0, limit);
|
|
1792
1817
|
return renormalize(sliced);
|
|
@@ -1836,6 +1861,76 @@ function applyImportDeprioritization(results) {
|
|
|
1836
1861
|
return r;
|
|
1837
1862
|
});
|
|
1838
1863
|
}
|
|
1864
|
+
function applyTestFileDeprioritization(results) {
|
|
1865
|
+
const hasNonTestFile = results.some((r) => !isTestFilePath(r.filePath));
|
|
1866
|
+
if (!hasNonTestFile) return results;
|
|
1867
|
+
const maxNonTestScore = Math.max(
|
|
1868
|
+
...results.filter((r) => !isTestFilePath(r.filePath)).map((r) => r.score),
|
|
1869
|
+
0
|
|
1870
|
+
);
|
|
1871
|
+
if (maxNonTestScore === 0) return results;
|
|
1872
|
+
return results.map((r) => {
|
|
1873
|
+
if (isTestFilePath(r.filePath)) {
|
|
1874
|
+
return { ...r, score: r.score * TEST_FILE_PENALTY };
|
|
1875
|
+
}
|
|
1876
|
+
return r;
|
|
1877
|
+
});
|
|
1878
|
+
}
|
|
1879
|
+
function applySmallSnippetDeprioritization(results) {
|
|
1880
|
+
const hasNonSmallSnippet = results.some((r) => !isSmallSnippet(r));
|
|
1881
|
+
if (!hasNonSmallSnippet) return results;
|
|
1882
|
+
const maxNonSmallScore = Math.max(
|
|
1883
|
+
...results.filter((r) => !isSmallSnippet(r)).map((r) => r.score),
|
|
1884
|
+
0
|
|
1885
|
+
);
|
|
1886
|
+
if (maxNonSmallScore === 0) return results;
|
|
1887
|
+
return results.map((r) => {
|
|
1888
|
+
if (isSmallSnippet(r)) {
|
|
1889
|
+
return { ...r, score: r.score * SMALL_SNIPPET_PENALTY };
|
|
1890
|
+
}
|
|
1891
|
+
return r;
|
|
1892
|
+
});
|
|
1893
|
+
}
|
|
1894
|
+
function applyPublicApiBoost(results) {
|
|
1895
|
+
return results.map((r) => {
|
|
1896
|
+
if (isPublicApiSymbol(r)) {
|
|
1897
|
+
return { ...r, score: r.score * PUBLIC_API_BOOST };
|
|
1898
|
+
}
|
|
1899
|
+
return r;
|
|
1900
|
+
});
|
|
1901
|
+
}
|
|
1902
|
+
function applyFileDiversityDiminishingReturns(results) {
|
|
1903
|
+
if (results.length <= 1) return results;
|
|
1904
|
+
const ranked = [...results].sort((a, b) => b.score - a.score);
|
|
1905
|
+
const seenPerFile = /* @__PURE__ */ new Map();
|
|
1906
|
+
return ranked.map((r) => {
|
|
1907
|
+
const count = (seenPerFile.get(r.filePath) ?? 0) + 1;
|
|
1908
|
+
seenPerFile.set(r.filePath, count);
|
|
1909
|
+
return {
|
|
1910
|
+
...r,
|
|
1911
|
+
score: r.score * getFileDiversityFactor(count)
|
|
1912
|
+
};
|
|
1913
|
+
});
|
|
1914
|
+
}
|
|
1915
|
+
function isTestFilePath(filePath) {
|
|
1916
|
+
const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/");
|
|
1917
|
+
return TEST_FILE_DIRECTORY_PATTERN.test(normalizedPath) || TEST_FILE_NAME_PATTERN.test(normalizedPath);
|
|
1918
|
+
}
|
|
1919
|
+
function isSmallSnippet(result) {
|
|
1920
|
+
const lineCount = Math.max(1, result.lineEnd - result.lineStart + 1);
|
|
1921
|
+
return lineCount <= SMALL_SNIPPET_MAX_LINES;
|
|
1922
|
+
}
|
|
1923
|
+
function isPublicApiSymbol(result) {
|
|
1924
|
+
if (result.exported === true) return true;
|
|
1925
|
+
const textStart = result.text.trimStart().toLowerCase();
|
|
1926
|
+
return textStart.startsWith("export ");
|
|
1927
|
+
}
|
|
1928
|
+
function getFileDiversityFactor(fileOccurrence) {
|
|
1929
|
+
if (fileOccurrence <= 1) return 1;
|
|
1930
|
+
if (fileOccurrence === 2) return 0.9;
|
|
1931
|
+
if (fileOccurrence === 3) return 0.8;
|
|
1932
|
+
return 0.7;
|
|
1933
|
+
}
|
|
1839
1934
|
function renormalize(results) {
|
|
1840
1935
|
if (results.length === 0) return results;
|
|
1841
1936
|
const maxScore = Math.max(...results.map((r) => r.score));
|
|
@@ -1896,9 +1991,6 @@ function extractSymbolNames(query) {
|
|
|
1896
1991
|
function isPathLike(query) {
|
|
1897
1992
|
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
1898
1993
|
}
|
|
1899
|
-
function extractPathBoostTerms(query) {
|
|
1900
|
-
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1901
|
-
}
|
|
1902
1994
|
async function runQuery(projectPath, query, options) {
|
|
1903
1995
|
const absoluteRoot = path5.resolve(projectPath);
|
|
1904
1996
|
const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
|
|
@@ -2028,27 +2120,128 @@ function registerQueryCommand(program2) {
|
|
|
2028
2120
|
import fs7 from "fs";
|
|
2029
2121
|
import path6 from "path";
|
|
2030
2122
|
|
|
2123
|
+
// src/steering/prompts.ts
|
|
2124
|
+
var PLAN_SYSTEM_PROMPT = `You are a code-search strategy planner for a TypeScript/JavaScript codebase.
|
|
2125
|
+
|
|
2126
|
+
Given a user query, produce a JSON object with:
|
|
2127
|
+
- "interpretation": one sentence summarising what the user wants to find.
|
|
2128
|
+
- "strategies": an ordered array of search strategies (most important first).
|
|
2129
|
+
|
|
2130
|
+
Each strategy object has:
|
|
2131
|
+
"strategy" \u2014 one of "vector", "fts", "ast", "path", "dependency"
|
|
2132
|
+
"query" \u2014 the optimised search string for that strategy (see rules below)
|
|
2133
|
+
"weight" \u2014 importance 0\u20131 (highest-priority strategy gets 1.0)
|
|
2134
|
+
"reason" \u2014 one sentence explaining why this strategy helps
|
|
2135
|
+
|
|
2136
|
+
## Strategy selection rules
|
|
2137
|
+
|
|
2138
|
+
| Signal in query | Primary strategy | Supporting strategies |
|
|
2139
|
+
|---|---|---|
|
|
2140
|
+
| Conceptual / "how does X work" / natural language | vector | fts, ast |
|
|
2141
|
+
| Exact keyword, identifier, or error message | fts | ast |
|
|
2142
|
+
| Symbol name (function, class, type, variable) | ast | fts |
|
|
2143
|
+
| File path, glob, or extension (e.g. "*.test.ts") | path | fts |
|
|
2144
|
+
| Import chain / "what depends on X" | dependency | ast, fts |
|
|
2145
|
+
| Mixed: natural language + code symbol | vector + ast | fts |
|
|
2146
|
+
|
|
2147
|
+
## Query optimisation rules
|
|
2148
|
+
- **vector**: keep the query close to natural language; rephrase for semantic similarity.
|
|
2149
|
+
- **fts**: extract the most distinctive keywords/identifiers; drop stop words.
|
|
2150
|
+
- **ast**: use only the symbol name (camelCase, snake_case, or PascalCase). Strip surrounding prose.
|
|
2151
|
+
- **path**: use a glob or slash-separated path segment (e.g. "src/auth/*.ts").
|
|
2152
|
+
- **dependency**: use the bare module or file name being imported.
|
|
2153
|
+
|
|
2154
|
+
## Edge cases
|
|
2155
|
+
- **Vague query** (e.g. "help me understand this"): use vector with the full query; add fts with any nouns present.
|
|
2156
|
+
- **Multi-concept query** (e.g. "authentication and rate limiting"): create separate strategies for each concept, both at high weight.
|
|
2157
|
+
- **Code symbol mixed with prose** (e.g. "where is the validateToken function called"): use ast for the symbol and vector for the intent.
|
|
2158
|
+
- **Query is just a symbol** (e.g. "createPool"): use ast at weight 1.0 and fts at weight 0.7. Skip vector.
|
|
2159
|
+
|
|
2160
|
+
## Examples
|
|
2161
|
+
|
|
2162
|
+
User: "how does authentication work"
|
|
2163
|
+
\`\`\`json
|
|
2164
|
+
{
|
|
2165
|
+
"interpretation": "Understand the authentication flow and related middleware.",
|
|
2166
|
+
"strategies": [
|
|
2167
|
+
{ "strategy": "vector", "query": "authentication flow middleware", "weight": 1.0, "reason": "Conceptual question best served by semantic search." },
|
|
2168
|
+
{ "strategy": "fts", "query": "authentication middleware auth", "weight": 0.7, "reason": "Keyword fallback for auth-related identifiers." },
|
|
2169
|
+
{ "strategy": "ast", "query": "authenticate", "weight": 0.6, "reason": "Likely function or class name." }
|
|
2170
|
+
]
|
|
2171
|
+
}
|
|
2172
|
+
\`\`\`
|
|
2173
|
+
|
|
2174
|
+
User: "validateToken"
|
|
2175
|
+
\`\`\`json
|
|
2176
|
+
{
|
|
2177
|
+
"interpretation": "Find the validateToken symbol definition and usages.",
|
|
2178
|
+
"strategies": [
|
|
2179
|
+
{ "strategy": "ast", "query": "validateToken", "weight": 1.0, "reason": "Exact symbol lookup." },
|
|
2180
|
+
{ "strategy": "fts", "query": "validateToken", "weight": 0.7, "reason": "Catch references in comments or strings." }
|
|
2181
|
+
]
|
|
2182
|
+
}
|
|
2183
|
+
\`\`\`
|
|
2184
|
+
|
|
2185
|
+
User: "where is rate limiting configured in src/middleware"
|
|
2186
|
+
\`\`\`json
|
|
2187
|
+
{
|
|
2188
|
+
"interpretation": "Locate rate-limiting configuration inside the middleware directory.",
|
|
2189
|
+
"strategies": [
|
|
2190
|
+
{ "strategy": "path", "query": "src/middleware/*", "weight": 0.9, "reason": "Scope results to the specified directory." },
|
|
2191
|
+
{ "strategy": "vector", "query": "rate limiting configuration", "weight": 1.0, "reason": "Semantic match for the concept." },
|
|
2192
|
+
{ "strategy": "fts", "query": "rateLimit rateLimiter", "weight": 0.7, "reason": "Common identifier variants." }
|
|
2193
|
+
]
|
|
2194
|
+
}
|
|
2195
|
+
\`\`\`
|
|
2196
|
+
|
|
2197
|
+
User: "authentication and database connection pooling"
|
|
2198
|
+
\`\`\`json
|
|
2199
|
+
{
|
|
2200
|
+
"interpretation": "Find code related to both authentication and database connection pooling.",
|
|
2201
|
+
"strategies": [
|
|
2202
|
+
{ "strategy": "vector", "query": "authentication login", "weight": 1.0, "reason": "Semantic search for the auth concept." },
|
|
2203
|
+
{ "strategy": "vector", "query": "database connection pool", "weight": 1.0, "reason": "Semantic search for the DB pooling concept." },
|
|
2204
|
+
{ "strategy": "fts", "query": "auth createPool connectionPool", "weight": 0.7, "reason": "Keyword fallback for likely identifiers." }
|
|
2205
|
+
]
|
|
2206
|
+
}
|
|
2207
|
+
\`\`\`
|
|
2208
|
+
|
|
2209
|
+
Output ONLY the JSON object. No markdown fences, no commentary.`;
|
|
2210
|
+
var SYNTHESIZE_SYSTEM_PROMPT = `You are a code-search assistant. Given a user query and ranked search results, produce a concise, actionable summary.
|
|
2211
|
+
|
|
2212
|
+
## Output structure (plain text, no markdown)
|
|
2213
|
+
|
|
2214
|
+
1. **Key finding** (1\u20132 sentences): the most important result or answer first.
|
|
2215
|
+
2. **Supporting locations** (bulleted, max 5): each line is "filePath:lineStart \u2013 brief description".
|
|
2216
|
+
3. **Additional context** (0\u20132 sentences, optional): relationships between results, patterns, or next steps.
|
|
2217
|
+
|
|
2218
|
+
## Rules
|
|
2219
|
+
- Always reference file paths and line numbers from the search results.
|
|
2220
|
+
- Mention specific symbol names (functions, classes, types) when they appear in results.
|
|
2221
|
+
- If no result clearly answers the query, say so and suggest a refined search.
|
|
2222
|
+
- Be concise \u2014 aim for 4\u20138 lines total. Do not repeat the query back.
|
|
2223
|
+
- Do not use markdown formatting (no #, *, \`, or fences). Use plain text only.
|
|
2224
|
+
- Group related results rather than listing every result individually.
|
|
2225
|
+
|
|
2226
|
+
## Example
|
|
2227
|
+
|
|
2228
|
+
Query: "how does token validation work"
|
|
2229
|
+
Results include validateToken in src/auth/tokens.ts:42 and authMiddleware in src/middleware/auth.ts:15.
|
|
2230
|
+
|
|
2231
|
+
Good output:
|
|
2232
|
+
Token validation is handled by validateToken (src/auth/tokens.ts:42), which decodes a JWT and checks expiry and signature against the configured secret.
|
|
2233
|
+
|
|
2234
|
+
Related locations:
|
|
2235
|
+
- src/auth/tokens.ts:42 \u2013 validateToken: core JWT decode + verify logic
|
|
2236
|
+
- src/middleware/auth.ts:15 \u2013 authMiddleware: calls validateToken on every protected route
|
|
2237
|
+
- src/auth/types.ts:5 \u2013 TokenPayload type definition
|
|
2238
|
+
|
|
2239
|
+
The middleware extracts the Bearer token from the Authorization header before passing it to validateToken.`;
|
|
2240
|
+
|
|
2031
2241
|
// src/steering/llm.ts
|
|
2032
2242
|
var GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:generateContent";
|
|
2033
2243
|
var OPENAI_URL = "https://api.openai.com/v1/responses";
|
|
2034
2244
|
var ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
2035
|
-
var PLAN_SYSTEM_PROMPT = `You are a code search strategy planner. Given a user query about code, output a JSON object with:
|
|
2036
|
-
- "interpretation": a one-line summary of what the user is looking for
|
|
2037
|
-
- "strategies": an array of search strategy objects, each with:
|
|
2038
|
-
- "strategy": one of "vector", "fts", "ast", "path", "dependency"
|
|
2039
|
-
- "query": the optimized query string for that strategy
|
|
2040
|
-
- "weight": a number 0-1 indicating importance
|
|
2041
|
-
- "reason": brief explanation of why this strategy is used
|
|
2042
|
-
|
|
2043
|
-
Choose strategies based on query type:
|
|
2044
|
-
- Conceptual/natural language \u2192 vector (semantic search)
|
|
2045
|
-
- Keywords/identifiers \u2192 fts (full-text search)
|
|
2046
|
-
- Symbol names (functions, classes) \u2192 ast (structural search)
|
|
2047
|
-
- File paths or patterns \u2192 path (path glob search)
|
|
2048
|
-
- Import/dependency chains \u2192 dependency
|
|
2049
|
-
|
|
2050
|
-
Output ONLY valid JSON, no markdown.`;
|
|
2051
|
-
var SYNTHESIZE_SYSTEM_PROMPT = `You are a code search assistant. Given search results, write a brief, helpful explanation of what was found. Be concise (2-4 sentences). Reference specific files and function names. Do not use markdown.`;
|
|
2052
2245
|
function createGeminiProvider(apiKey) {
|
|
2053
2246
|
return {
|
|
2054
2247
|
name: "gemini",
|
|
@@ -2156,6 +2349,7 @@ function createAnthropicProvider(apiKey) {
|
|
|
2156
2349
|
};
|
|
2157
2350
|
}
|
|
2158
2351
|
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
2352
|
+
// Interrogatives & conjunctions
|
|
2159
2353
|
"how",
|
|
2160
2354
|
"does",
|
|
2161
2355
|
"what",
|
|
@@ -2165,6 +2359,7 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
2165
2359
|
"which",
|
|
2166
2360
|
"who",
|
|
2167
2361
|
"whom",
|
|
2362
|
+
// Be-verbs
|
|
2168
2363
|
"is",
|
|
2169
2364
|
"are",
|
|
2170
2365
|
"was",
|
|
@@ -2172,10 +2367,12 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
2172
2367
|
"be",
|
|
2173
2368
|
"been",
|
|
2174
2369
|
"being",
|
|
2370
|
+
// Do-verbs
|
|
2175
2371
|
"do",
|
|
2176
2372
|
"did",
|
|
2177
2373
|
"doing",
|
|
2178
2374
|
"done",
|
|
2375
|
+
// Articles, connectors, prepositions
|
|
2179
2376
|
"the",
|
|
2180
2377
|
"a",
|
|
2181
2378
|
"an",
|
|
@@ -2194,12 +2391,30 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
2194
2391
|
"by",
|
|
2195
2392
|
"from",
|
|
2196
2393
|
"about",
|
|
2394
|
+
"into",
|
|
2395
|
+
"through",
|
|
2396
|
+
"between",
|
|
2397
|
+
"after",
|
|
2398
|
+
"before",
|
|
2399
|
+
"during",
|
|
2400
|
+
// Pronouns & demonstratives
|
|
2197
2401
|
"it",
|
|
2198
2402
|
"its",
|
|
2199
2403
|
"this",
|
|
2200
2404
|
"that",
|
|
2201
2405
|
"these",
|
|
2202
2406
|
"those",
|
|
2407
|
+
"i",
|
|
2408
|
+
"me",
|
|
2409
|
+
"my",
|
|
2410
|
+
"we",
|
|
2411
|
+
"our",
|
|
2412
|
+
"you",
|
|
2413
|
+
"your",
|
|
2414
|
+
"he",
|
|
2415
|
+
"she",
|
|
2416
|
+
"they",
|
|
2417
|
+
// Modals
|
|
2203
2418
|
"can",
|
|
2204
2419
|
"could",
|
|
2205
2420
|
"should",
|
|
@@ -2208,32 +2423,69 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
2208
2423
|
"shall",
|
|
2209
2424
|
"may",
|
|
2210
2425
|
"might",
|
|
2426
|
+
// Have-verbs
|
|
2211
2427
|
"has",
|
|
2212
2428
|
"have",
|
|
2213
2429
|
"had",
|
|
2214
2430
|
"having",
|
|
2215
|
-
|
|
2216
|
-
"me",
|
|
2217
|
-
"my",
|
|
2218
|
-
"we",
|
|
2219
|
-
"our",
|
|
2220
|
-
"you",
|
|
2221
|
-
"your",
|
|
2222
|
-
"he",
|
|
2223
|
-
"she",
|
|
2224
|
-
"they",
|
|
2431
|
+
// Common imperative verbs that carry no search value
|
|
2225
2432
|
"find",
|
|
2226
2433
|
"show",
|
|
2227
2434
|
"get",
|
|
2228
|
-
"tell"
|
|
2435
|
+
"tell",
|
|
2436
|
+
"look",
|
|
2437
|
+
"give",
|
|
2438
|
+
"list",
|
|
2439
|
+
"explain",
|
|
2440
|
+
// Misc filler
|
|
2441
|
+
"all",
|
|
2442
|
+
"any",
|
|
2443
|
+
"some",
|
|
2444
|
+
"each",
|
|
2445
|
+
"every",
|
|
2446
|
+
"much",
|
|
2447
|
+
"many",
|
|
2448
|
+
"also",
|
|
2449
|
+
"just",
|
|
2450
|
+
"like",
|
|
2451
|
+
"then",
|
|
2452
|
+
"there",
|
|
2453
|
+
"here",
|
|
2454
|
+
"very",
|
|
2455
|
+
"really",
|
|
2456
|
+
"use",
|
|
2457
|
+
"used",
|
|
2458
|
+
"using"
|
|
2229
2459
|
]);
|
|
2460
|
+
var CODE_IDENT_RE = /^(?:[a-z]+(?:[A-Z][a-z]*)+|[A-Z][a-zA-Z]+|[a-z]+(?:_[a-z]+)+|[A-Z]+(?:_[A-Z]+)+)$/;
|
|
2461
|
+
var DOTTED_IDENT_RE = /[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+/g;
|
|
2230
2462
|
function extractSearchTerms(query) {
|
|
2231
|
-
const
|
|
2232
|
-
|
|
2463
|
+
const terms = [];
|
|
2464
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2465
|
+
const addUnique = (term) => {
|
|
2466
|
+
const key = term.toLowerCase();
|
|
2467
|
+
if (!seen.has(key)) {
|
|
2468
|
+
seen.add(key);
|
|
2469
|
+
terms.push(term);
|
|
2470
|
+
}
|
|
2471
|
+
};
|
|
2472
|
+
const dottedMatches = query.match(DOTTED_IDENT_RE) ?? [];
|
|
2473
|
+
for (const m of dottedMatches) addUnique(m);
|
|
2474
|
+
const pathTokens = query.split(/\s+/).filter((t) => t.includes("/"));
|
|
2475
|
+
for (const p of pathTokens) addUnique(p.replace(/[?!,;]+$/g, ""));
|
|
2476
|
+
const words = query.replace(/[^a-zA-Z0-9_.\s/-]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2477
|
+
for (const w of words) {
|
|
2478
|
+
const lower = w.toLowerCase();
|
|
2479
|
+
if (seen.has(lower)) continue;
|
|
2480
|
+
if (STOP_WORDS.has(lower) && !CODE_IDENT_RE.test(w)) continue;
|
|
2481
|
+
addUnique(w);
|
|
2482
|
+
}
|
|
2483
|
+
if (terms.length === 0) {
|
|
2233
2484
|
const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2234
|
-
|
|
2485
|
+
const longest = allWords.sort((a, b) => b.length - a.length)[0];
|
|
2486
|
+
return longest ?? query;
|
|
2235
2487
|
}
|
|
2236
|
-
return
|
|
2488
|
+
return terms.join(" ");
|
|
2237
2489
|
}
|
|
2238
2490
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
2239
2491
|
"vector",
|
|
@@ -2418,7 +2670,8 @@ function formatTextOutput2(output) {
|
|
|
2418
2670
|
);
|
|
2419
2671
|
return lines.join("\n");
|
|
2420
2672
|
}
|
|
2421
|
-
function createSearchExecutor(db) {
|
|
2673
|
+
function createSearchExecutor(db, query) {
|
|
2674
|
+
const pathBoostTerms = extractPathBoostTerms(query);
|
|
2422
2675
|
return async (strategies, limit) => {
|
|
2423
2676
|
const strategyResults = [];
|
|
2424
2677
|
const fetchLimit = limit * 3;
|
|
@@ -2432,7 +2685,7 @@ function createSearchExecutor(db) {
|
|
|
2432
2685
|
});
|
|
2433
2686
|
}
|
|
2434
2687
|
}
|
|
2435
|
-
return
|
|
2688
|
+
return fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
|
|
2436
2689
|
};
|
|
2437
2690
|
}
|
|
2438
2691
|
function extractSymbolNames2(query) {
|
|
@@ -2480,7 +2733,7 @@ async function loadEmbedder2() {
|
|
|
2480
2733
|
return embedderInstance2;
|
|
2481
2734
|
}
|
|
2482
2735
|
async function fallbackSearch(db, query, limit) {
|
|
2483
|
-
const executor = createSearchExecutor(db);
|
|
2736
|
+
const executor = createSearchExecutor(db, query);
|
|
2484
2737
|
const keywords = extractSearchTerms(query);
|
|
2485
2738
|
const fallbackStrategies = [
|
|
2486
2739
|
{ strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
|
|
@@ -2522,7 +2775,7 @@ async function runAsk(projectPath, query, options) {
|
|
|
2522
2775
|
}
|
|
2523
2776
|
return output;
|
|
2524
2777
|
}
|
|
2525
|
-
const executor = createSearchExecutor(db);
|
|
2778
|
+
const executor = createSearchExecutor(db, query);
|
|
2526
2779
|
if (options.noExplain) {
|
|
2527
2780
|
return await runNoExplain(provider, query, options, executor);
|
|
2528
2781
|
}
|