kontext-engine 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +414 -42
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.js +411 -41
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -1208,12 +1208,15 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1208
1208
|
const rows = db.prepare(
|
|
1209
1209
|
`SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1210
1210
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1211
|
-
c.type, c.name, c.parent, c.text
|
|
1211
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1212
1212
|
FROM chunks c
|
|
1213
1213
|
JOIN files f ON f.id = c.file_id
|
|
1214
1214
|
WHERE c.id IN (${placeholders})`
|
|
1215
1215
|
).all(...ids);
|
|
1216
|
-
return rows
|
|
1216
|
+
return rows.map((r) => ({
|
|
1217
|
+
...r,
|
|
1218
|
+
exports: r.exports === 1
|
|
1219
|
+
}));
|
|
1217
1220
|
},
|
|
1218
1221
|
searchChunks(filters, limit) {
|
|
1219
1222
|
const conditions = [];
|
|
@@ -1250,7 +1253,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1250
1253
|
const sql = `
|
|
1251
1254
|
SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1252
1255
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1253
|
-
c.type, c.name, c.parent, c.text
|
|
1256
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1254
1257
|
FROM chunks c
|
|
1255
1258
|
JOIN files f ON f.id = c.file_id
|
|
1256
1259
|
${where}
|
|
@@ -1258,7 +1261,11 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1258
1261
|
LIMIT ?
|
|
1259
1262
|
`;
|
|
1260
1263
|
params.push(limit);
|
|
1261
|
-
|
|
1264
|
+
const rows = db.prepare(sql).all(...params);
|
|
1265
|
+
return rows.map((r) => ({
|
|
1266
|
+
...r,
|
|
1267
|
+
exports: r.exports === 1
|
|
1268
|
+
}));
|
|
1262
1269
|
},
|
|
1263
1270
|
deleteChunksByFile(fileId) {
|
|
1264
1271
|
const chunkRows = stmtGetChunkIdsByFile.all(fileId);
|
|
@@ -1558,6 +1565,7 @@ async function vectorSearch(db, embedder, query, limit, filters) {
|
|
|
1558
1565
|
lineEnd: chunk.lineEnd,
|
|
1559
1566
|
name: chunk.name,
|
|
1560
1567
|
type: chunk.type,
|
|
1568
|
+
exported: chunk.exports,
|
|
1561
1569
|
text: chunk.text,
|
|
1562
1570
|
score: distanceToScore(vr.distance),
|
|
1563
1571
|
language: chunk.language
|
|
@@ -1568,12 +1576,17 @@ async function vectorSearch(db, embedder, query, limit, filters) {
|
|
|
1568
1576
|
}
|
|
1569
1577
|
|
|
1570
1578
|
// src/search/fts.ts
|
|
1579
|
+
function sanitizeFtsQuery(query) {
|
|
1580
|
+
return query.replace(/[?()":^~{}!+\-\\]/g, " ").replace(/(?<!\w)\*/g, " ").replace(/\s+/g, " ").trim();
|
|
1581
|
+
}
|
|
1571
1582
|
function bm25ToScore(rank) {
|
|
1572
1583
|
return 1 / (1 + Math.abs(rank));
|
|
1573
1584
|
}
|
|
1574
1585
|
function ftsSearch(db, query, limit, filters) {
|
|
1586
|
+
const safeQuery = sanitizeFtsQuery(query);
|
|
1587
|
+
if (safeQuery.length === 0) return [];
|
|
1575
1588
|
const fetchLimit = filters?.language ? limit * 3 : limit;
|
|
1576
|
-
const ftsResults = db.searchFTS(
|
|
1589
|
+
const ftsResults = db.searchFTS(safeQuery, fetchLimit);
|
|
1577
1590
|
if (ftsResults.length === 0) return [];
|
|
1578
1591
|
const chunkIds = ftsResults.map((r) => r.chunkId);
|
|
1579
1592
|
const chunks = db.getChunksByIds(chunkIds);
|
|
@@ -1593,6 +1606,7 @@ function ftsSearch(db, query, limit, filters) {
|
|
|
1593
1606
|
lineEnd: chunk.lineEnd,
|
|
1594
1607
|
name: chunk.name,
|
|
1595
1608
|
type: chunk.type,
|
|
1609
|
+
exported: chunk.exports,
|
|
1596
1610
|
text: chunk.text,
|
|
1597
1611
|
score: bm25ToScore(fts.rank),
|
|
1598
1612
|
language: chunk.language
|
|
@@ -1627,6 +1641,7 @@ function astSearch(db, filters, limit) {
|
|
|
1627
1641
|
lineEnd: chunk.lineEnd,
|
|
1628
1642
|
name: chunk.name,
|
|
1629
1643
|
type: chunk.type,
|
|
1644
|
+
exported: chunk.exports,
|
|
1630
1645
|
text: chunk.text,
|
|
1631
1646
|
score,
|
|
1632
1647
|
language: chunk.language
|
|
@@ -1679,6 +1694,7 @@ function pathSearch(db, pattern, limit) {
|
|
|
1679
1694
|
lineEnd: chunk.lineEnd,
|
|
1680
1695
|
name: chunk.name,
|
|
1681
1696
|
type: chunk.type,
|
|
1697
|
+
exported: chunk.exports,
|
|
1682
1698
|
text: chunk.text,
|
|
1683
1699
|
score: 1,
|
|
1684
1700
|
language: file.language
|
|
@@ -1691,13 +1707,17 @@ var SCORE_DIR_EXACT = 1;
|
|
|
1691
1707
|
var SCORE_FILENAME = 0.9;
|
|
1692
1708
|
var SCORE_PARTIAL = 0.7;
|
|
1693
1709
|
function pathKeywordSearch(db, query, limit) {
|
|
1694
|
-
const
|
|
1710
|
+
const terms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 2);
|
|
1711
|
+
if (terms.length === 0) return [];
|
|
1695
1712
|
const allPaths = db.getAllFilePaths();
|
|
1696
1713
|
const scoredPaths = [];
|
|
1697
1714
|
for (const filePath of allPaths) {
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1715
|
+
let bestScore = 0;
|
|
1716
|
+
for (const term of terms) {
|
|
1717
|
+
bestScore = Math.max(bestScore, scorePathMatch(filePath, term));
|
|
1718
|
+
}
|
|
1719
|
+
if (bestScore > 0) {
|
|
1720
|
+
scoredPaths.push({ filePath, score: bestScore });
|
|
1701
1721
|
}
|
|
1702
1722
|
}
|
|
1703
1723
|
if (scoredPaths.length === 0) return [];
|
|
@@ -1717,6 +1737,7 @@ function pathKeywordSearch(db, query, limit) {
|
|
|
1717
1737
|
lineEnd: chunk.lineEnd,
|
|
1718
1738
|
name: chunk.name,
|
|
1719
1739
|
type: chunk.type,
|
|
1740
|
+
exported: chunk.exports,
|
|
1720
1741
|
text: chunk.text,
|
|
1721
1742
|
score,
|
|
1722
1743
|
language: file.language
|
|
@@ -1773,11 +1794,24 @@ var PATH_BOOST_DIR_EXACT = 1.5;
|
|
|
1773
1794
|
var PATH_BOOST_FILENAME = 1.4;
|
|
1774
1795
|
var PATH_BOOST_PARTIAL = 1.2;
|
|
1775
1796
|
var IMPORT_PENALTY = 0.5;
|
|
1797
|
+
var TEST_FILE_PENALTY = 0.65;
|
|
1798
|
+
var SMALL_SNIPPET_PENALTY = 0.75;
|
|
1799
|
+
var PUBLIC_API_BOOST = 1.12;
|
|
1800
|
+
var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
|
|
1801
|
+
var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
|
|
1802
|
+
var SMALL_SNIPPET_MAX_LINES = 3;
|
|
1803
|
+
function extractPathBoostTerms(query) {
|
|
1804
|
+
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1805
|
+
}
|
|
1776
1806
|
function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
|
|
1777
1807
|
const fused = fusionMerge(strategyResults, limit * 3);
|
|
1778
1808
|
if (fused.length === 0) return [];
|
|
1779
1809
|
const boosted = applyPathBoost(fused, pathBoostTerms);
|
|
1780
|
-
const
|
|
1810
|
+
const importAdjusted = applyImportDeprioritization(boosted);
|
|
1811
|
+
const testAdjusted = applyTestFileDeprioritization(importAdjusted);
|
|
1812
|
+
const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
|
|
1813
|
+
const boostedApi = applyPublicApiBoost(snippetAdjusted);
|
|
1814
|
+
const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
|
|
1781
1815
|
adjusted.sort((a, b) => b.score - a.score);
|
|
1782
1816
|
const sliced = adjusted.slice(0, limit);
|
|
1783
1817
|
return renormalize(sliced);
|
|
@@ -1827,6 +1861,76 @@ function applyImportDeprioritization(results) {
|
|
|
1827
1861
|
return r;
|
|
1828
1862
|
});
|
|
1829
1863
|
}
|
|
1864
|
+
function applyTestFileDeprioritization(results) {
|
|
1865
|
+
const hasNonTestFile = results.some((r) => !isTestFilePath(r.filePath));
|
|
1866
|
+
if (!hasNonTestFile) return results;
|
|
1867
|
+
const maxNonTestScore = Math.max(
|
|
1868
|
+
...results.filter((r) => !isTestFilePath(r.filePath)).map((r) => r.score),
|
|
1869
|
+
0
|
|
1870
|
+
);
|
|
1871
|
+
if (maxNonTestScore === 0) return results;
|
|
1872
|
+
return results.map((r) => {
|
|
1873
|
+
if (isTestFilePath(r.filePath)) {
|
|
1874
|
+
return { ...r, score: r.score * TEST_FILE_PENALTY };
|
|
1875
|
+
}
|
|
1876
|
+
return r;
|
|
1877
|
+
});
|
|
1878
|
+
}
|
|
1879
|
+
function applySmallSnippetDeprioritization(results) {
|
|
1880
|
+
const hasNonSmallSnippet = results.some((r) => !isSmallSnippet(r));
|
|
1881
|
+
if (!hasNonSmallSnippet) return results;
|
|
1882
|
+
const maxNonSmallScore = Math.max(
|
|
1883
|
+
...results.filter((r) => !isSmallSnippet(r)).map((r) => r.score),
|
|
1884
|
+
0
|
|
1885
|
+
);
|
|
1886
|
+
if (maxNonSmallScore === 0) return results;
|
|
1887
|
+
return results.map((r) => {
|
|
1888
|
+
if (isSmallSnippet(r)) {
|
|
1889
|
+
return { ...r, score: r.score * SMALL_SNIPPET_PENALTY };
|
|
1890
|
+
}
|
|
1891
|
+
return r;
|
|
1892
|
+
});
|
|
1893
|
+
}
|
|
1894
|
+
function applyPublicApiBoost(results) {
|
|
1895
|
+
return results.map((r) => {
|
|
1896
|
+
if (isPublicApiSymbol(r)) {
|
|
1897
|
+
return { ...r, score: r.score * PUBLIC_API_BOOST };
|
|
1898
|
+
}
|
|
1899
|
+
return r;
|
|
1900
|
+
});
|
|
1901
|
+
}
|
|
1902
|
+
function applyFileDiversityDiminishingReturns(results) {
|
|
1903
|
+
if (results.length <= 1) return results;
|
|
1904
|
+
const ranked = [...results].sort((a, b) => b.score - a.score);
|
|
1905
|
+
const seenPerFile = /* @__PURE__ */ new Map();
|
|
1906
|
+
return ranked.map((r) => {
|
|
1907
|
+
const count = (seenPerFile.get(r.filePath) ?? 0) + 1;
|
|
1908
|
+
seenPerFile.set(r.filePath, count);
|
|
1909
|
+
return {
|
|
1910
|
+
...r,
|
|
1911
|
+
score: r.score * getFileDiversityFactor(count)
|
|
1912
|
+
};
|
|
1913
|
+
});
|
|
1914
|
+
}
|
|
1915
|
+
function isTestFilePath(filePath) {
|
|
1916
|
+
const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/");
|
|
1917
|
+
return TEST_FILE_DIRECTORY_PATTERN.test(normalizedPath) || TEST_FILE_NAME_PATTERN.test(normalizedPath);
|
|
1918
|
+
}
|
|
1919
|
+
function isSmallSnippet(result) {
|
|
1920
|
+
const lineCount = Math.max(1, result.lineEnd - result.lineStart + 1);
|
|
1921
|
+
return lineCount <= SMALL_SNIPPET_MAX_LINES;
|
|
1922
|
+
}
|
|
1923
|
+
function isPublicApiSymbol(result) {
|
|
1924
|
+
if (result.exported === true) return true;
|
|
1925
|
+
const textStart = result.text.trimStart().toLowerCase();
|
|
1926
|
+
return textStart.startsWith("export ");
|
|
1927
|
+
}
|
|
1928
|
+
function getFileDiversityFactor(fileOccurrence) {
|
|
1929
|
+
if (fileOccurrence <= 1) return 1;
|
|
1930
|
+
if (fileOccurrence === 2) return 0.9;
|
|
1931
|
+
if (fileOccurrence === 3) return 0.8;
|
|
1932
|
+
return 0.7;
|
|
1933
|
+
}
|
|
1830
1934
|
function renormalize(results) {
|
|
1831
1935
|
if (results.length === 0) return results;
|
|
1832
1936
|
const maxScore = Math.max(...results.map((r) => r.score));
|
|
@@ -1887,9 +1991,6 @@ function extractSymbolNames(query) {
|
|
|
1887
1991
|
function isPathLike(query) {
|
|
1888
1992
|
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
1889
1993
|
}
|
|
1890
|
-
function extractPathBoostTerms(query) {
|
|
1891
|
-
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1892
|
-
}
|
|
1893
1994
|
async function runQuery(projectPath, query, options) {
|
|
1894
1995
|
const absoluteRoot = path5.resolve(projectPath);
|
|
1895
1996
|
const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
|
|
@@ -2019,27 +2120,128 @@ function registerQueryCommand(program2) {
|
|
|
2019
2120
|
import fs7 from "fs";
|
|
2020
2121
|
import path6 from "path";
|
|
2021
2122
|
|
|
2123
|
+
// src/steering/prompts.ts
|
|
2124
|
+
var PLAN_SYSTEM_PROMPT = `You are a code-search strategy planner for a TypeScript/JavaScript codebase.
|
|
2125
|
+
|
|
2126
|
+
Given a user query, produce a JSON object with:
|
|
2127
|
+
- "interpretation": one sentence summarising what the user wants to find.
|
|
2128
|
+
- "strategies": an ordered array of search strategies (most important first).
|
|
2129
|
+
|
|
2130
|
+
Each strategy object has:
|
|
2131
|
+
"strategy" \u2014 one of "vector", "fts", "ast", "path", "dependency"
|
|
2132
|
+
"query" \u2014 the optimised search string for that strategy (see rules below)
|
|
2133
|
+
"weight" \u2014 importance 0\u20131 (highest-priority strategy gets 1.0)
|
|
2134
|
+
"reason" \u2014 one sentence explaining why this strategy helps
|
|
2135
|
+
|
|
2136
|
+
## Strategy selection rules
|
|
2137
|
+
|
|
2138
|
+
| Signal in query | Primary strategy | Supporting strategies |
|
|
2139
|
+
|---|---|---|
|
|
2140
|
+
| Conceptual / "how does X work" / natural language | vector | fts, ast |
|
|
2141
|
+
| Exact keyword, identifier, or error message | fts | ast |
|
|
2142
|
+
| Symbol name (function, class, type, variable) | ast | fts |
|
|
2143
|
+
| File path, glob, or extension (e.g. "*.test.ts") | path | fts |
|
|
2144
|
+
| Import chain / "what depends on X" | dependency | ast, fts |
|
|
2145
|
+
| Mixed: natural language + code symbol | vector + ast | fts |
|
|
2146
|
+
|
|
2147
|
+
## Query optimisation rules
|
|
2148
|
+
- **vector**: keep the query close to natural language; rephrase for semantic similarity.
|
|
2149
|
+
- **fts**: extract the most distinctive keywords/identifiers; drop stop words.
|
|
2150
|
+
- **ast**: use only the symbol name (camelCase, snake_case, or PascalCase). Strip surrounding prose.
|
|
2151
|
+
- **path**: use a glob or slash-separated path segment (e.g. "src/auth/*.ts").
|
|
2152
|
+
- **dependency**: use the bare module or file name being imported.
|
|
2153
|
+
|
|
2154
|
+
## Edge cases
|
|
2155
|
+
- **Vague query** (e.g. "help me understand this"): use vector with the full query; add fts with any nouns present.
|
|
2156
|
+
- **Multi-concept query** (e.g. "authentication and rate limiting"): create separate strategies for each concept, both at high weight.
|
|
2157
|
+
- **Code symbol mixed with prose** (e.g. "where is the validateToken function called"): use ast for the symbol and vector for the intent.
|
|
2158
|
+
- **Query is just a symbol** (e.g. "createPool"): use ast at weight 1.0 and fts at weight 0.7. Skip vector.
|
|
2159
|
+
|
|
2160
|
+
## Examples
|
|
2161
|
+
|
|
2162
|
+
User: "how does authentication work"
|
|
2163
|
+
\`\`\`json
|
|
2164
|
+
{
|
|
2165
|
+
"interpretation": "Understand the authentication flow and related middleware.",
|
|
2166
|
+
"strategies": [
|
|
2167
|
+
{ "strategy": "vector", "query": "authentication flow middleware", "weight": 1.0, "reason": "Conceptual question best served by semantic search." },
|
|
2168
|
+
{ "strategy": "fts", "query": "authentication middleware auth", "weight": 0.7, "reason": "Keyword fallback for auth-related identifiers." },
|
|
2169
|
+
{ "strategy": "ast", "query": "authenticate", "weight": 0.6, "reason": "Likely function or class name." }
|
|
2170
|
+
]
|
|
2171
|
+
}
|
|
2172
|
+
\`\`\`
|
|
2173
|
+
|
|
2174
|
+
User: "validateToken"
|
|
2175
|
+
\`\`\`json
|
|
2176
|
+
{
|
|
2177
|
+
"interpretation": "Find the validateToken symbol definition and usages.",
|
|
2178
|
+
"strategies": [
|
|
2179
|
+
{ "strategy": "ast", "query": "validateToken", "weight": 1.0, "reason": "Exact symbol lookup." },
|
|
2180
|
+
{ "strategy": "fts", "query": "validateToken", "weight": 0.7, "reason": "Catch references in comments or strings." }
|
|
2181
|
+
]
|
|
2182
|
+
}
|
|
2183
|
+
\`\`\`
|
|
2184
|
+
|
|
2185
|
+
User: "where is rate limiting configured in src/middleware"
|
|
2186
|
+
\`\`\`json
|
|
2187
|
+
{
|
|
2188
|
+
"interpretation": "Locate rate-limiting configuration inside the middleware directory.",
|
|
2189
|
+
"strategies": [
|
|
2190
|
+
{ "strategy": "path", "query": "src/middleware/*", "weight": 0.9, "reason": "Scope results to the specified directory." },
|
|
2191
|
+
{ "strategy": "vector", "query": "rate limiting configuration", "weight": 1.0, "reason": "Semantic match for the concept." },
|
|
2192
|
+
{ "strategy": "fts", "query": "rateLimit rateLimiter", "weight": 0.7, "reason": "Common identifier variants." }
|
|
2193
|
+
]
|
|
2194
|
+
}
|
|
2195
|
+
\`\`\`
|
|
2196
|
+
|
|
2197
|
+
User: "authentication and database connection pooling"
|
|
2198
|
+
\`\`\`json
|
|
2199
|
+
{
|
|
2200
|
+
"interpretation": "Find code related to both authentication and database connection pooling.",
|
|
2201
|
+
"strategies": [
|
|
2202
|
+
{ "strategy": "vector", "query": "authentication login", "weight": 1.0, "reason": "Semantic search for the auth concept." },
|
|
2203
|
+
{ "strategy": "vector", "query": "database connection pool", "weight": 1.0, "reason": "Semantic search for the DB pooling concept." },
|
|
2204
|
+
{ "strategy": "fts", "query": "auth createPool connectionPool", "weight": 0.7, "reason": "Keyword fallback for likely identifiers." }
|
|
2205
|
+
]
|
|
2206
|
+
}
|
|
2207
|
+
\`\`\`
|
|
2208
|
+
|
|
2209
|
+
Output ONLY the JSON object. No markdown fences, no commentary.`;
|
|
2210
|
+
var SYNTHESIZE_SYSTEM_PROMPT = `You are a code-search assistant. Given a user query and ranked search results, produce a concise, actionable summary.
|
|
2211
|
+
|
|
2212
|
+
## Output structure (plain text, no markdown)
|
|
2213
|
+
|
|
2214
|
+
1. **Key finding** (1\u20132 sentences): the most important result or answer first.
|
|
2215
|
+
2. **Supporting locations** (bulleted, max 5): each line is "filePath:lineStart \u2013 brief description".
|
|
2216
|
+
3. **Additional context** (0\u20132 sentences, optional): relationships between results, patterns, or next steps.
|
|
2217
|
+
|
|
2218
|
+
## Rules
|
|
2219
|
+
- Always reference file paths and line numbers from the search results.
|
|
2220
|
+
- Mention specific symbol names (functions, classes, types) when they appear in results.
|
|
2221
|
+
- If no result clearly answers the query, say so and suggest a refined search.
|
|
2222
|
+
- Be concise \u2014 aim for 4\u20138 lines total. Do not repeat the query back.
|
|
2223
|
+
- Do not use markdown formatting (no #, *, \`, or fences). Use plain text only.
|
|
2224
|
+
- Group related results rather than listing every result individually.
|
|
2225
|
+
|
|
2226
|
+
## Example
|
|
2227
|
+
|
|
2228
|
+
Query: "how does token validation work"
|
|
2229
|
+
Results include validateToken in src/auth/tokens.ts:42 and authMiddleware in src/middleware/auth.ts:15.
|
|
2230
|
+
|
|
2231
|
+
Good output:
|
|
2232
|
+
Token validation is handled by validateToken (src/auth/tokens.ts:42), which decodes a JWT and checks expiry and signature against the configured secret.
|
|
2233
|
+
|
|
2234
|
+
Related locations:
|
|
2235
|
+
- src/auth/tokens.ts:42 \u2013 validateToken: core JWT decode + verify logic
|
|
2236
|
+
- src/middleware/auth.ts:15 \u2013 authMiddleware: calls validateToken on every protected route
|
|
2237
|
+
- src/auth/types.ts:5 \u2013 TokenPayload type definition
|
|
2238
|
+
|
|
2239
|
+
The middleware extracts the Bearer token from the Authorization header before passing it to validateToken.`;
|
|
2240
|
+
|
|
2022
2241
|
// src/steering/llm.ts
|
|
2023
2242
|
var GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:generateContent";
|
|
2024
2243
|
var OPENAI_URL = "https://api.openai.com/v1/responses";
|
|
2025
2244
|
var ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
2026
|
-
var PLAN_SYSTEM_PROMPT = `You are a code search strategy planner. Given a user query about code, output a JSON object with:
|
|
2027
|
-
- "interpretation": a one-line summary of what the user is looking for
|
|
2028
|
-
- "strategies": an array of search strategy objects, each with:
|
|
2029
|
-
- "strategy": one of "vector", "fts", "ast", "path", "dependency"
|
|
2030
|
-
- "query": the optimized query string for that strategy
|
|
2031
|
-
- "weight": a number 0-1 indicating importance
|
|
2032
|
-
- "reason": brief explanation of why this strategy is used
|
|
2033
|
-
|
|
2034
|
-
Choose strategies based on query type:
|
|
2035
|
-
- Conceptual/natural language \u2192 vector (semantic search)
|
|
2036
|
-
- Keywords/identifiers \u2192 fts (full-text search)
|
|
2037
|
-
- Symbol names (functions, classes) \u2192 ast (structural search)
|
|
2038
|
-
- File paths or patterns \u2192 path (path glob search)
|
|
2039
|
-
- Import/dependency chains \u2192 dependency
|
|
2040
|
-
|
|
2041
|
-
Output ONLY valid JSON, no markdown.`;
|
|
2042
|
-
var SYNTHESIZE_SYSTEM_PROMPT = `You are a code search assistant. Given search results, write a brief, helpful explanation of what was found. Be concise (2-4 sentences). Reference specific files and function names. Do not use markdown.`;
|
|
2043
2245
|
function createGeminiProvider(apiKey) {
|
|
2044
2246
|
return {
|
|
2045
2247
|
name: "gemini",
|
|
@@ -2146,6 +2348,145 @@ function createAnthropicProvider(apiKey) {
|
|
|
2146
2348
|
}
|
|
2147
2349
|
};
|
|
2148
2350
|
}
|
|
2351
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
2352
|
+
// Interrogatives & conjunctions
|
|
2353
|
+
"how",
|
|
2354
|
+
"does",
|
|
2355
|
+
"what",
|
|
2356
|
+
"where",
|
|
2357
|
+
"when",
|
|
2358
|
+
"why",
|
|
2359
|
+
"which",
|
|
2360
|
+
"who",
|
|
2361
|
+
"whom",
|
|
2362
|
+
// Be-verbs
|
|
2363
|
+
"is",
|
|
2364
|
+
"are",
|
|
2365
|
+
"was",
|
|
2366
|
+
"were",
|
|
2367
|
+
"be",
|
|
2368
|
+
"been",
|
|
2369
|
+
"being",
|
|
2370
|
+
// Do-verbs
|
|
2371
|
+
"do",
|
|
2372
|
+
"did",
|
|
2373
|
+
"doing",
|
|
2374
|
+
"done",
|
|
2375
|
+
// Articles, connectors, prepositions
|
|
2376
|
+
"the",
|
|
2377
|
+
"a",
|
|
2378
|
+
"an",
|
|
2379
|
+
"and",
|
|
2380
|
+
"or",
|
|
2381
|
+
"not",
|
|
2382
|
+
"no",
|
|
2383
|
+
"nor",
|
|
2384
|
+
"in",
|
|
2385
|
+
"on",
|
|
2386
|
+
"at",
|
|
2387
|
+
"to",
|
|
2388
|
+
"for",
|
|
2389
|
+
"of",
|
|
2390
|
+
"with",
|
|
2391
|
+
"by",
|
|
2392
|
+
"from",
|
|
2393
|
+
"about",
|
|
2394
|
+
"into",
|
|
2395
|
+
"through",
|
|
2396
|
+
"between",
|
|
2397
|
+
"after",
|
|
2398
|
+
"before",
|
|
2399
|
+
"during",
|
|
2400
|
+
// Pronouns & demonstratives
|
|
2401
|
+
"it",
|
|
2402
|
+
"its",
|
|
2403
|
+
"this",
|
|
2404
|
+
"that",
|
|
2405
|
+
"these",
|
|
2406
|
+
"those",
|
|
2407
|
+
"i",
|
|
2408
|
+
"me",
|
|
2409
|
+
"my",
|
|
2410
|
+
"we",
|
|
2411
|
+
"our",
|
|
2412
|
+
"you",
|
|
2413
|
+
"your",
|
|
2414
|
+
"he",
|
|
2415
|
+
"she",
|
|
2416
|
+
"they",
|
|
2417
|
+
// Modals
|
|
2418
|
+
"can",
|
|
2419
|
+
"could",
|
|
2420
|
+
"should",
|
|
2421
|
+
"would",
|
|
2422
|
+
"will",
|
|
2423
|
+
"shall",
|
|
2424
|
+
"may",
|
|
2425
|
+
"might",
|
|
2426
|
+
// Have-verbs
|
|
2427
|
+
"has",
|
|
2428
|
+
"have",
|
|
2429
|
+
"had",
|
|
2430
|
+
"having",
|
|
2431
|
+
// Common imperative verbs that carry no search value
|
|
2432
|
+
"find",
|
|
2433
|
+
"show",
|
|
2434
|
+
"get",
|
|
2435
|
+
"tell",
|
|
2436
|
+
"look",
|
|
2437
|
+
"give",
|
|
2438
|
+
"list",
|
|
2439
|
+
"explain",
|
|
2440
|
+
// Misc filler
|
|
2441
|
+
"all",
|
|
2442
|
+
"any",
|
|
2443
|
+
"some",
|
|
2444
|
+
"each",
|
|
2445
|
+
"every",
|
|
2446
|
+
"much",
|
|
2447
|
+
"many",
|
|
2448
|
+
"also",
|
|
2449
|
+
"just",
|
|
2450
|
+
"like",
|
|
2451
|
+
"then",
|
|
2452
|
+
"there",
|
|
2453
|
+
"here",
|
|
2454
|
+
"very",
|
|
2455
|
+
"really",
|
|
2456
|
+
"use",
|
|
2457
|
+
"used",
|
|
2458
|
+
"using"
|
|
2459
|
+
]);
|
|
2460
|
+
var CODE_IDENT_RE = /^(?:[a-z]+(?:[A-Z][a-z]*)+|[A-Z][a-zA-Z]+|[a-z]+(?:_[a-z]+)+|[A-Z]+(?:_[A-Z]+)+)$/;
|
|
2461
|
+
var DOTTED_IDENT_RE = /[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+/g;
|
|
2462
|
+
function extractSearchTerms(query) {
|
|
2463
|
+
const terms = [];
|
|
2464
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2465
|
+
const addUnique = (term) => {
|
|
2466
|
+
const key = term.toLowerCase();
|
|
2467
|
+
if (!seen.has(key)) {
|
|
2468
|
+
seen.add(key);
|
|
2469
|
+
terms.push(term);
|
|
2470
|
+
}
|
|
2471
|
+
};
|
|
2472
|
+
const dottedMatches = query.match(DOTTED_IDENT_RE) ?? [];
|
|
2473
|
+
for (const m of dottedMatches) addUnique(m);
|
|
2474
|
+
const pathTokens = query.split(/\s+/).filter((t) => t.includes("/"));
|
|
2475
|
+
for (const p of pathTokens) addUnique(p.replace(/[?!,;]+$/g, ""));
|
|
2476
|
+
const words = query.replace(/[^a-zA-Z0-9_.\s/-]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2477
|
+
for (const w of words) {
|
|
2478
|
+
const lower = w.toLowerCase();
|
|
2479
|
+
if (seen.has(lower)) continue;
|
|
2480
|
+
if (STOP_WORDS.has(lower) && !CODE_IDENT_RE.test(w)) continue;
|
|
2481
|
+
addUnique(w);
|
|
2482
|
+
}
|
|
2483
|
+
if (terms.length === 0) {
|
|
2484
|
+
const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2485
|
+
const longest = allWords.sort((a, b) => b.length - a.length)[0];
|
|
2486
|
+
return longest ?? query;
|
|
2487
|
+
}
|
|
2488
|
+
return terms.join(" ");
|
|
2489
|
+
}
|
|
2149
2490
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
2150
2491
|
"vector",
|
|
2151
2492
|
"fts",
|
|
@@ -2154,9 +2495,11 @@ var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
|
2154
2495
|
"dependency"
|
|
2155
2496
|
]);
|
|
2156
2497
|
function buildFallbackPlan(query) {
|
|
2498
|
+
const keywords = extractSearchTerms(query);
|
|
2157
2499
|
const strategies = [
|
|
2158
|
-
{ strategy: "fts", query, weight: 0.8, reason: "Full-text keyword search" },
|
|
2159
|
-
{ strategy: "ast", query, weight: 0.9, reason: "Structural symbol search" }
|
|
2500
|
+
{ strategy: "fts", query: keywords, weight: 0.8, reason: "Full-text keyword search" },
|
|
2501
|
+
{ strategy: "ast", query: keywords, weight: 0.9, reason: "Structural symbol search" },
|
|
2502
|
+
{ strategy: "path", query: keywords, weight: 0.7, reason: "Path keyword search" }
|
|
2160
2503
|
];
|
|
2161
2504
|
return {
|
|
2162
2505
|
interpretation: `Searching for: ${query}`,
|
|
@@ -2327,7 +2670,8 @@ function formatTextOutput2(output) {
|
|
|
2327
2670
|
);
|
|
2328
2671
|
return lines.join("\n");
|
|
2329
2672
|
}
|
|
2330
|
-
function createSearchExecutor(db) {
|
|
2673
|
+
function createSearchExecutor(db, query) {
|
|
2674
|
+
const pathBoostTerms = extractPathBoostTerms(query);
|
|
2331
2675
|
return async (strategies, limit) => {
|
|
2332
2676
|
const strategyResults = [];
|
|
2333
2677
|
const fetchLimit = limit * 3;
|
|
@@ -2341,9 +2685,16 @@ function createSearchExecutor(db) {
|
|
|
2341
2685
|
});
|
|
2342
2686
|
}
|
|
2343
2687
|
}
|
|
2344
|
-
return
|
|
2688
|
+
return fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
|
|
2345
2689
|
};
|
|
2346
2690
|
}
|
|
2691
|
+
function extractSymbolNames2(query) {
|
|
2692
|
+
const matches = query.match(/[A-Z]?[a-z]+(?:[A-Z][a-z]+)*|[a-z]+(?:_[a-z]+)+|[A-Z][a-zA-Z]+/g);
|
|
2693
|
+
return matches ?? [];
|
|
2694
|
+
}
|
|
2695
|
+
function isPathLike2(query) {
|
|
2696
|
+
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
2697
|
+
}
|
|
2347
2698
|
async function executeStrategy2(db, plan, limit) {
|
|
2348
2699
|
switch (plan.strategy) {
|
|
2349
2700
|
case "vector": {
|
|
@@ -2352,10 +2703,25 @@ async function executeStrategy2(db, plan, limit) {
|
|
|
2352
2703
|
}
|
|
2353
2704
|
case "fts":
|
|
2354
2705
|
return ftsSearch(db, plan.query, limit);
|
|
2355
|
-
case "ast":
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2706
|
+
case "ast": {
|
|
2707
|
+
const symbols = extractSymbolNames2(plan.query);
|
|
2708
|
+
if (symbols.length === 0) return [];
|
|
2709
|
+
const allResults = [];
|
|
2710
|
+
for (const name of symbols) {
|
|
2711
|
+
const results = astSearch(db, { name }, limit);
|
|
2712
|
+
allResults.push(...results);
|
|
2713
|
+
}
|
|
2714
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2715
|
+
return allResults.filter((r) => {
|
|
2716
|
+
if (seen.has(r.chunkId)) return false;
|
|
2717
|
+
seen.add(r.chunkId);
|
|
2718
|
+
return true;
|
|
2719
|
+
});
|
|
2720
|
+
}
|
|
2721
|
+
case "path": {
|
|
2722
|
+
if (isPathLike2(plan.query)) return pathSearch(db, plan.query, limit);
|
|
2723
|
+
return pathKeywordSearch(db, plan.query, limit);
|
|
2724
|
+
}
|
|
2359
2725
|
case "dependency":
|
|
2360
2726
|
return [];
|
|
2361
2727
|
}
|
|
@@ -2367,10 +2733,12 @@ async function loadEmbedder2() {
|
|
|
2367
2733
|
return embedderInstance2;
|
|
2368
2734
|
}
|
|
2369
2735
|
async function fallbackSearch(db, query, limit) {
|
|
2370
|
-
const executor = createSearchExecutor(db);
|
|
2736
|
+
const executor = createSearchExecutor(db, query);
|
|
2737
|
+
const keywords = extractSearchTerms(query);
|
|
2371
2738
|
const fallbackStrategies = [
|
|
2372
|
-
{ strategy: "fts", query, weight: 0.8, reason: "fallback keyword search" },
|
|
2373
|
-
{ strategy: "ast", query, weight: 0.9, reason: "fallback structural search" }
|
|
2739
|
+
{ strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
|
|
2740
|
+
{ strategy: "ast", query: keywords, weight: 0.9, reason: "fallback structural search" },
|
|
2741
|
+
{ strategy: "path", query: keywords, weight: 0.7, reason: "fallback path search" }
|
|
2374
2742
|
];
|
|
2375
2743
|
const results = await executor(fallbackStrategies, limit);
|
|
2376
2744
|
return {
|
|
@@ -2401,12 +2769,13 @@ async function runAsk(projectPath, query, options) {
|
|
|
2401
2769
|
const provider = options.provider ?? null;
|
|
2402
2770
|
if (!provider) {
|
|
2403
2771
|
const output = await fallbackSearch(db, query, options.limit);
|
|
2772
|
+
output.warning = FALLBACK_NOTICE;
|
|
2404
2773
|
if (options.format === "text") {
|
|
2405
2774
|
output.text = formatTextOutput2(output);
|
|
2406
2775
|
}
|
|
2407
2776
|
return output;
|
|
2408
2777
|
}
|
|
2409
|
-
const executor = createSearchExecutor(db);
|
|
2778
|
+
const executor = createSearchExecutor(db, query);
|
|
2410
2779
|
if (options.noExplain) {
|
|
2411
2780
|
return await runNoExplain(provider, query, options, executor);
|
|
2412
2781
|
}
|
|
@@ -2468,6 +2837,9 @@ function registerAskCommand(program2) {
|
|
|
2468
2837
|
provider: provider ?? void 0,
|
|
2469
2838
|
noExplain: opts["explain"] === false
|
|
2470
2839
|
});
|
|
2840
|
+
if (output.warning) {
|
|
2841
|
+
console.error(`\u26A0 ${output.warning}`);
|
|
2842
|
+
}
|
|
2471
2843
|
if (output.text) {
|
|
2472
2844
|
console.log(output.text);
|
|
2473
2845
|
} else {
|