kontext-engine 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +297 -44
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.js +297 -43
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ interface SearchResult {
|
|
|
6
6
|
lineEnd: number;
|
|
7
7
|
name: string | null;
|
|
8
8
|
type: string;
|
|
9
|
+
exported?: boolean;
|
|
9
10
|
text: string;
|
|
10
11
|
score: number;
|
|
11
12
|
language: string;
|
|
@@ -69,6 +70,7 @@ interface ChunkWithFile {
|
|
|
69
70
|
name: string | null;
|
|
70
71
|
parent: string | null;
|
|
71
72
|
text: string;
|
|
73
|
+
exports: boolean;
|
|
72
74
|
}
|
|
73
75
|
interface ChunkSearchFilters {
|
|
74
76
|
name?: string;
|
|
@@ -261,6 +263,7 @@ interface SteeringResult {
|
|
|
261
263
|
costEstimate: number;
|
|
262
264
|
}
|
|
263
265
|
type SearchExecutor = (strategies: StrategyPlan[], limit: number) => Promise<SearchResult[]>;
|
|
266
|
+
|
|
264
267
|
/** Ask the LLM to interpret a query and plan which search strategies to use. */
|
|
265
268
|
declare function planSearch(provider: LLMProvider, query: string): Promise<SearchPlan>;
|
|
266
269
|
/** Full steering pipeline: plan → search → synthesize. Falls back to basic search on failure. */
|
package/dist/index.js
CHANGED
|
@@ -1198,12 +1198,15 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1198
1198
|
const rows = db.prepare(
|
|
1199
1199
|
`SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1200
1200
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1201
|
-
c.type, c.name, c.parent, c.text
|
|
1201
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1202
1202
|
FROM chunks c
|
|
1203
1203
|
JOIN files f ON f.id = c.file_id
|
|
1204
1204
|
WHERE c.id IN (${placeholders})`
|
|
1205
1205
|
).all(...ids);
|
|
1206
|
-
return rows
|
|
1206
|
+
return rows.map((r) => ({
|
|
1207
|
+
...r,
|
|
1208
|
+
exports: r.exports === 1
|
|
1209
|
+
}));
|
|
1207
1210
|
},
|
|
1208
1211
|
searchChunks(filters, limit) {
|
|
1209
1212
|
const conditions = [];
|
|
@@ -1240,7 +1243,7 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1240
1243
|
const sql = `
|
|
1241
1244
|
SELECT c.id, c.file_id as fileId, f.path as filePath, f.language,
|
|
1242
1245
|
c.line_start as lineStart, c.line_end as lineEnd,
|
|
1243
|
-
c.type, c.name, c.parent, c.text
|
|
1246
|
+
c.type, c.name, c.parent, c.text, c.exports as exports
|
|
1244
1247
|
FROM chunks c
|
|
1245
1248
|
JOIN files f ON f.id = c.file_id
|
|
1246
1249
|
${where}
|
|
@@ -1248,7 +1251,11 @@ function createDatabase(dbPath, dimensions = DEFAULT_DIMENSIONS) {
|
|
|
1248
1251
|
LIMIT ?
|
|
1249
1252
|
`;
|
|
1250
1253
|
params.push(limit);
|
|
1251
|
-
|
|
1254
|
+
const rows = db.prepare(sql).all(...params);
|
|
1255
|
+
return rows.map((r) => ({
|
|
1256
|
+
...r,
|
|
1257
|
+
exports: r.exports === 1
|
|
1258
|
+
}));
|
|
1252
1259
|
},
|
|
1253
1260
|
deleteChunksByFile(fileId) {
|
|
1254
1261
|
const chunkRows = stmtGetChunkIdsByFile.all(fileId);
|
|
@@ -1346,6 +1353,7 @@ async function vectorSearch(db, embedder, query, limit, filters) {
|
|
|
1346
1353
|
lineEnd: chunk.lineEnd,
|
|
1347
1354
|
name: chunk.name,
|
|
1348
1355
|
type: chunk.type,
|
|
1356
|
+
exported: chunk.exports,
|
|
1349
1357
|
text: chunk.text,
|
|
1350
1358
|
score: distanceToScore(vr.distance),
|
|
1351
1359
|
language: chunk.language
|
|
@@ -1386,6 +1394,7 @@ function ftsSearch(db, query, limit, filters) {
|
|
|
1386
1394
|
lineEnd: chunk.lineEnd,
|
|
1387
1395
|
name: chunk.name,
|
|
1388
1396
|
type: chunk.type,
|
|
1397
|
+
exported: chunk.exports,
|
|
1389
1398
|
text: chunk.text,
|
|
1390
1399
|
score: bm25ToScore(fts.rank),
|
|
1391
1400
|
language: chunk.language
|
|
@@ -1420,6 +1429,7 @@ function astSearch(db, filters, limit) {
|
|
|
1420
1429
|
lineEnd: chunk.lineEnd,
|
|
1421
1430
|
name: chunk.name,
|
|
1422
1431
|
type: chunk.type,
|
|
1432
|
+
exported: chunk.exports,
|
|
1423
1433
|
text: chunk.text,
|
|
1424
1434
|
score,
|
|
1425
1435
|
language: chunk.language
|
|
@@ -1472,6 +1482,7 @@ function pathSearch(db, pattern, limit) {
|
|
|
1472
1482
|
lineEnd: chunk.lineEnd,
|
|
1473
1483
|
name: chunk.name,
|
|
1474
1484
|
type: chunk.type,
|
|
1485
|
+
exported: chunk.exports,
|
|
1475
1486
|
text: chunk.text,
|
|
1476
1487
|
score: 1,
|
|
1477
1488
|
language: file.language
|
|
@@ -1514,6 +1525,7 @@ function pathKeywordSearch(db, query, limit) {
|
|
|
1514
1525
|
lineEnd: chunk.lineEnd,
|
|
1515
1526
|
name: chunk.name,
|
|
1516
1527
|
type: chunk.type,
|
|
1528
|
+
exported: chunk.exports,
|
|
1517
1529
|
text: chunk.text,
|
|
1518
1530
|
score,
|
|
1519
1531
|
language: file.language
|
|
@@ -1563,6 +1575,7 @@ function dependencyTrace(db, chunkId, direction, depth) {
|
|
|
1563
1575
|
lineEnd: chunk.lineEnd,
|
|
1564
1576
|
name: chunk.name,
|
|
1565
1577
|
type: chunk.type,
|
|
1578
|
+
exported: chunk.exports,
|
|
1566
1579
|
text: chunk.text,
|
|
1567
1580
|
score,
|
|
1568
1581
|
language: chunk.language
|
|
@@ -1613,11 +1626,24 @@ var PATH_BOOST_DIR_EXACT = 1.5;
|
|
|
1613
1626
|
var PATH_BOOST_FILENAME = 1.4;
|
|
1614
1627
|
var PATH_BOOST_PARTIAL = 1.2;
|
|
1615
1628
|
var IMPORT_PENALTY = 0.5;
|
|
1629
|
+
var TEST_FILE_PENALTY = 0.65;
|
|
1630
|
+
var SMALL_SNIPPET_PENALTY = 0.75;
|
|
1631
|
+
var PUBLIC_API_BOOST = 1.12;
|
|
1632
|
+
var TEST_FILE_DIRECTORY_PATTERN = /(?:^|\/)(?:tests|__tests__)(?:\/|$)/;
|
|
1633
|
+
var TEST_FILE_NAME_PATTERN = /(?:^|\/)[^/]*\.(?:test|spec)\.[cm]?[jt]sx?$/;
|
|
1634
|
+
var SMALL_SNIPPET_MAX_LINES = 3;
|
|
1635
|
+
function extractPathBoostTerms(query) {
|
|
1636
|
+
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
1637
|
+
}
|
|
1616
1638
|
function fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms) {
|
|
1617
1639
|
const fused = fusionMerge(strategyResults, limit * 3);
|
|
1618
1640
|
if (fused.length === 0) return [];
|
|
1619
1641
|
const boosted = applyPathBoost(fused, pathBoostTerms);
|
|
1620
|
-
const
|
|
1642
|
+
const importAdjusted = applyImportDeprioritization(boosted);
|
|
1643
|
+
const testAdjusted = applyTestFileDeprioritization(importAdjusted);
|
|
1644
|
+
const snippetAdjusted = applySmallSnippetDeprioritization(testAdjusted);
|
|
1645
|
+
const boostedApi = applyPublicApiBoost(snippetAdjusted);
|
|
1646
|
+
const adjusted = applyFileDiversityDiminishingReturns(boostedApi);
|
|
1621
1647
|
adjusted.sort((a, b) => b.score - a.score);
|
|
1622
1648
|
const sliced = adjusted.slice(0, limit);
|
|
1623
1649
|
return renormalize(sliced);
|
|
@@ -1667,6 +1693,76 @@ function applyImportDeprioritization(results) {
|
|
|
1667
1693
|
return r;
|
|
1668
1694
|
});
|
|
1669
1695
|
}
|
|
1696
|
+
function applyTestFileDeprioritization(results) {
|
|
1697
|
+
const hasNonTestFile = results.some((r) => !isTestFilePath(r.filePath));
|
|
1698
|
+
if (!hasNonTestFile) return results;
|
|
1699
|
+
const maxNonTestScore = Math.max(
|
|
1700
|
+
...results.filter((r) => !isTestFilePath(r.filePath)).map((r) => r.score),
|
|
1701
|
+
0
|
|
1702
|
+
);
|
|
1703
|
+
if (maxNonTestScore === 0) return results;
|
|
1704
|
+
return results.map((r) => {
|
|
1705
|
+
if (isTestFilePath(r.filePath)) {
|
|
1706
|
+
return { ...r, score: r.score * TEST_FILE_PENALTY };
|
|
1707
|
+
}
|
|
1708
|
+
return r;
|
|
1709
|
+
});
|
|
1710
|
+
}
|
|
1711
|
+
function applySmallSnippetDeprioritization(results) {
|
|
1712
|
+
const hasNonSmallSnippet = results.some((r) => !isSmallSnippet(r));
|
|
1713
|
+
if (!hasNonSmallSnippet) return results;
|
|
1714
|
+
const maxNonSmallScore = Math.max(
|
|
1715
|
+
...results.filter((r) => !isSmallSnippet(r)).map((r) => r.score),
|
|
1716
|
+
0
|
|
1717
|
+
);
|
|
1718
|
+
if (maxNonSmallScore === 0) return results;
|
|
1719
|
+
return results.map((r) => {
|
|
1720
|
+
if (isSmallSnippet(r)) {
|
|
1721
|
+
return { ...r, score: r.score * SMALL_SNIPPET_PENALTY };
|
|
1722
|
+
}
|
|
1723
|
+
return r;
|
|
1724
|
+
});
|
|
1725
|
+
}
|
|
1726
|
+
function applyPublicApiBoost(results) {
|
|
1727
|
+
return results.map((r) => {
|
|
1728
|
+
if (isPublicApiSymbol(r)) {
|
|
1729
|
+
return { ...r, score: r.score * PUBLIC_API_BOOST };
|
|
1730
|
+
}
|
|
1731
|
+
return r;
|
|
1732
|
+
});
|
|
1733
|
+
}
|
|
1734
|
+
function applyFileDiversityDiminishingReturns(results) {
|
|
1735
|
+
if (results.length <= 1) return results;
|
|
1736
|
+
const ranked = [...results].sort((a, b) => b.score - a.score);
|
|
1737
|
+
const seenPerFile = /* @__PURE__ */ new Map();
|
|
1738
|
+
return ranked.map((r) => {
|
|
1739
|
+
const count = (seenPerFile.get(r.filePath) ?? 0) + 1;
|
|
1740
|
+
seenPerFile.set(r.filePath, count);
|
|
1741
|
+
return {
|
|
1742
|
+
...r,
|
|
1743
|
+
score: r.score * getFileDiversityFactor(count)
|
|
1744
|
+
};
|
|
1745
|
+
});
|
|
1746
|
+
}
|
|
1747
|
+
function isTestFilePath(filePath) {
|
|
1748
|
+
const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/");
|
|
1749
|
+
return TEST_FILE_DIRECTORY_PATTERN.test(normalizedPath) || TEST_FILE_NAME_PATTERN.test(normalizedPath);
|
|
1750
|
+
}
|
|
1751
|
+
function isSmallSnippet(result) {
|
|
1752
|
+
const lineCount = Math.max(1, result.lineEnd - result.lineStart + 1);
|
|
1753
|
+
return lineCount <= SMALL_SNIPPET_MAX_LINES;
|
|
1754
|
+
}
|
|
1755
|
+
function isPublicApiSymbol(result) {
|
|
1756
|
+
if (result.exported === true) return true;
|
|
1757
|
+
const textStart = result.text.trimStart().toLowerCase();
|
|
1758
|
+
return textStart.startsWith("export ");
|
|
1759
|
+
}
|
|
1760
|
+
function getFileDiversityFactor(fileOccurrence) {
|
|
1761
|
+
if (fileOccurrence <= 1) return 1;
|
|
1762
|
+
if (fileOccurrence === 2) return 0.9;
|
|
1763
|
+
if (fileOccurrence === 3) return 0.8;
|
|
1764
|
+
return 0.7;
|
|
1765
|
+
}
|
|
1670
1766
|
function renormalize(results) {
|
|
1671
1767
|
if (results.length === 0) return results;
|
|
1672
1768
|
const maxScore = Math.max(...results.map((r) => r.score));
|
|
@@ -1677,25 +1773,127 @@ function renormalize(results) {
|
|
|
1677
1773
|
}));
|
|
1678
1774
|
}
|
|
1679
1775
|
|
|
1680
|
-
// src/steering/
|
|
1681
|
-
var PLAN_SYSTEM_PROMPT = `You are a code
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1776
|
+
// src/steering/prompts.ts
|
|
1777
|
+
var PLAN_SYSTEM_PROMPT = `You are a code-search strategy planner for a TypeScript/JavaScript codebase.
|
|
1778
|
+
|
|
1779
|
+
Given a user query, produce a JSON object with:
|
|
1780
|
+
- "interpretation": one sentence summarising what the user wants to find.
|
|
1781
|
+
- "strategies": an ordered array of search strategies (most important first).
|
|
1782
|
+
|
|
1783
|
+
Each strategy object has:
|
|
1784
|
+
"strategy" \u2014 one of "vector", "fts", "ast", "path", "dependency"
|
|
1785
|
+
"query" \u2014 the optimised search string for that strategy (see rules below)
|
|
1786
|
+
"weight" \u2014 importance 0\u20131 (highest-priority strategy gets 1.0)
|
|
1787
|
+
"reason" \u2014 one sentence explaining why this strategy helps
|
|
1788
|
+
|
|
1789
|
+
## Strategy selection rules
|
|
1688
1790
|
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1791
|
+
| Signal in query | Primary strategy | Supporting strategies |
|
|
1792
|
+
|---|---|---|
|
|
1793
|
+
| Conceptual / "how does X work" / natural language | vector | fts, ast |
|
|
1794
|
+
| Exact keyword, identifier, or error message | fts | ast |
|
|
1795
|
+
| Symbol name (function, class, type, variable) | ast | fts |
|
|
1796
|
+
| File path, glob, or extension (e.g. "*.test.ts") | path | fts |
|
|
1797
|
+
| Import chain / "what depends on X" | dependency | ast, fts |
|
|
1798
|
+
| Mixed: natural language + code symbol | vector + ast | fts |
|
|
1695
1799
|
|
|
1696
|
-
|
|
1697
|
-
|
|
1800
|
+
## Query optimisation rules
|
|
1801
|
+
- **vector**: keep the query close to natural language; rephrase for semantic similarity.
|
|
1802
|
+
- **fts**: extract the most distinctive keywords/identifiers; drop stop words.
|
|
1803
|
+
- **ast**: use only the symbol name (camelCase, snake_case, or PascalCase). Strip surrounding prose.
|
|
1804
|
+
- **path**: use a glob or slash-separated path segment (e.g. "src/auth/*.ts").
|
|
1805
|
+
- **dependency**: use the bare module or file name being imported.
|
|
1806
|
+
|
|
1807
|
+
## Edge cases
|
|
1808
|
+
- **Vague query** (e.g. "help me understand this"): use vector with the full query; add fts with any nouns present.
|
|
1809
|
+
- **Multi-concept query** (e.g. "authentication and rate limiting"): create separate strategies for each concept, both at high weight.
|
|
1810
|
+
- **Code symbol mixed with prose** (e.g. "where is the validateToken function called"): use ast for the symbol and vector for the intent.
|
|
1811
|
+
- **Query is just a symbol** (e.g. "createPool"): use ast at weight 1.0 and fts at weight 0.7. Skip vector.
|
|
1812
|
+
|
|
1813
|
+
## Examples
|
|
1814
|
+
|
|
1815
|
+
User: "how does authentication work"
|
|
1816
|
+
\`\`\`json
|
|
1817
|
+
{
|
|
1818
|
+
"interpretation": "Understand the authentication flow and related middleware.",
|
|
1819
|
+
"strategies": [
|
|
1820
|
+
{ "strategy": "vector", "query": "authentication flow middleware", "weight": 1.0, "reason": "Conceptual question best served by semantic search." },
|
|
1821
|
+
{ "strategy": "fts", "query": "authentication middleware auth", "weight": 0.7, "reason": "Keyword fallback for auth-related identifiers." },
|
|
1822
|
+
{ "strategy": "ast", "query": "authenticate", "weight": 0.6, "reason": "Likely function or class name." }
|
|
1823
|
+
]
|
|
1824
|
+
}
|
|
1825
|
+
\`\`\`
|
|
1826
|
+
|
|
1827
|
+
User: "validateToken"
|
|
1828
|
+
\`\`\`json
|
|
1829
|
+
{
|
|
1830
|
+
"interpretation": "Find the validateToken symbol definition and usages.",
|
|
1831
|
+
"strategies": [
|
|
1832
|
+
{ "strategy": "ast", "query": "validateToken", "weight": 1.0, "reason": "Exact symbol lookup." },
|
|
1833
|
+
{ "strategy": "fts", "query": "validateToken", "weight": 0.7, "reason": "Catch references in comments or strings." }
|
|
1834
|
+
]
|
|
1835
|
+
}
|
|
1836
|
+
\`\`\`
|
|
1837
|
+
|
|
1838
|
+
User: "where is rate limiting configured in src/middleware"
|
|
1839
|
+
\`\`\`json
|
|
1840
|
+
{
|
|
1841
|
+
"interpretation": "Locate rate-limiting configuration inside the middleware directory.",
|
|
1842
|
+
"strategies": [
|
|
1843
|
+
{ "strategy": "path", "query": "src/middleware/*", "weight": 0.9, "reason": "Scope results to the specified directory." },
|
|
1844
|
+
{ "strategy": "vector", "query": "rate limiting configuration", "weight": 1.0, "reason": "Semantic match for the concept." },
|
|
1845
|
+
{ "strategy": "fts", "query": "rateLimit rateLimiter", "weight": 0.7, "reason": "Common identifier variants." }
|
|
1846
|
+
]
|
|
1847
|
+
}
|
|
1848
|
+
\`\`\`
|
|
1849
|
+
|
|
1850
|
+
User: "authentication and database connection pooling"
|
|
1851
|
+
\`\`\`json
|
|
1852
|
+
{
|
|
1853
|
+
"interpretation": "Find code related to both authentication and database connection pooling.",
|
|
1854
|
+
"strategies": [
|
|
1855
|
+
{ "strategy": "vector", "query": "authentication login", "weight": 1.0, "reason": "Semantic search for the auth concept." },
|
|
1856
|
+
{ "strategy": "vector", "query": "database connection pool", "weight": 1.0, "reason": "Semantic search for the DB pooling concept." },
|
|
1857
|
+
{ "strategy": "fts", "query": "auth createPool connectionPool", "weight": 0.7, "reason": "Keyword fallback for likely identifiers." }
|
|
1858
|
+
]
|
|
1859
|
+
}
|
|
1860
|
+
\`\`\`
|
|
1861
|
+
|
|
1862
|
+
Output ONLY the JSON object. No markdown fences, no commentary.`;
|
|
1863
|
+
var SYNTHESIZE_SYSTEM_PROMPT = `You are a code-search assistant. Given a user query and ranked search results, produce a concise, actionable summary.
|
|
1864
|
+
|
|
1865
|
+
## Output structure (plain text, no markdown)
|
|
1866
|
+
|
|
1867
|
+
1. **Key finding** (1\u20132 sentences): the most important result or answer first.
|
|
1868
|
+
2. **Supporting locations** (bulleted, max 5): each line is "filePath:lineStart \u2013 brief description".
|
|
1869
|
+
3. **Additional context** (0\u20132 sentences, optional): relationships between results, patterns, or next steps.
|
|
1870
|
+
|
|
1871
|
+
## Rules
|
|
1872
|
+
- Always reference file paths and line numbers from the search results.
|
|
1873
|
+
- Mention specific symbol names (functions, classes, types) when they appear in results.
|
|
1874
|
+
- If no result clearly answers the query, say so and suggest a refined search.
|
|
1875
|
+
- Be concise \u2014 aim for 4\u20138 lines total. Do not repeat the query back.
|
|
1876
|
+
- Do not use markdown formatting (no #, *, \`, or fences). Use plain text only.
|
|
1877
|
+
- Group related results rather than listing every result individually.
|
|
1878
|
+
|
|
1879
|
+
## Example
|
|
1880
|
+
|
|
1881
|
+
Query: "how does token validation work"
|
|
1882
|
+
Results include validateToken in src/auth/tokens.ts:42 and authMiddleware in src/middleware/auth.ts:15.
|
|
1883
|
+
|
|
1884
|
+
Good output:
|
|
1885
|
+
Token validation is handled by validateToken (src/auth/tokens.ts:42), which decodes a JWT and checks expiry and signature against the configured secret.
|
|
1886
|
+
|
|
1887
|
+
Related locations:
|
|
1888
|
+
- src/auth/tokens.ts:42 \u2013 validateToken: core JWT decode + verify logic
|
|
1889
|
+
- src/middleware/auth.ts:15 \u2013 authMiddleware: calls validateToken on every protected route
|
|
1890
|
+
- src/auth/types.ts:5 \u2013 TokenPayload type definition
|
|
1891
|
+
|
|
1892
|
+
The middleware extracts the Bearer token from the Authorization header before passing it to validateToken.`;
|
|
1893
|
+
|
|
1894
|
+
// src/steering/llm.ts
|
|
1698
1895
|
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
1896
|
+
// Interrogatives & conjunctions
|
|
1699
1897
|
"how",
|
|
1700
1898
|
"does",
|
|
1701
1899
|
"what",
|
|
@@ -1705,6 +1903,7 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
1705
1903
|
"which",
|
|
1706
1904
|
"who",
|
|
1707
1905
|
"whom",
|
|
1906
|
+
// Be-verbs
|
|
1708
1907
|
"is",
|
|
1709
1908
|
"are",
|
|
1710
1909
|
"was",
|
|
@@ -1712,10 +1911,12 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
1712
1911
|
"be",
|
|
1713
1912
|
"been",
|
|
1714
1913
|
"being",
|
|
1914
|
+
// Do-verbs
|
|
1715
1915
|
"do",
|
|
1716
1916
|
"did",
|
|
1717
1917
|
"doing",
|
|
1718
1918
|
"done",
|
|
1919
|
+
// Articles, connectors, prepositions
|
|
1719
1920
|
"the",
|
|
1720
1921
|
"a",
|
|
1721
1922
|
"an",
|
|
@@ -1734,12 +1935,30 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
1734
1935
|
"by",
|
|
1735
1936
|
"from",
|
|
1736
1937
|
"about",
|
|
1938
|
+
"into",
|
|
1939
|
+
"through",
|
|
1940
|
+
"between",
|
|
1941
|
+
"after",
|
|
1942
|
+
"before",
|
|
1943
|
+
"during",
|
|
1944
|
+
// Pronouns & demonstratives
|
|
1737
1945
|
"it",
|
|
1738
1946
|
"its",
|
|
1739
1947
|
"this",
|
|
1740
1948
|
"that",
|
|
1741
1949
|
"these",
|
|
1742
1950
|
"those",
|
|
1951
|
+
"i",
|
|
1952
|
+
"me",
|
|
1953
|
+
"my",
|
|
1954
|
+
"we",
|
|
1955
|
+
"our",
|
|
1956
|
+
"you",
|
|
1957
|
+
"your",
|
|
1958
|
+
"he",
|
|
1959
|
+
"she",
|
|
1960
|
+
"they",
|
|
1961
|
+
// Modals
|
|
1743
1962
|
"can",
|
|
1744
1963
|
"could",
|
|
1745
1964
|
"should",
|
|
@@ -1748,32 +1967,69 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
|
1748
1967
|
"shall",
|
|
1749
1968
|
"may",
|
|
1750
1969
|
"might",
|
|
1970
|
+
// Have-verbs
|
|
1751
1971
|
"has",
|
|
1752
1972
|
"have",
|
|
1753
1973
|
"had",
|
|
1754
1974
|
"having",
|
|
1755
|
-
|
|
1756
|
-
"me",
|
|
1757
|
-
"my",
|
|
1758
|
-
"we",
|
|
1759
|
-
"our",
|
|
1760
|
-
"you",
|
|
1761
|
-
"your",
|
|
1762
|
-
"he",
|
|
1763
|
-
"she",
|
|
1764
|
-
"they",
|
|
1975
|
+
// Common imperative verbs that carry no search value
|
|
1765
1976
|
"find",
|
|
1766
1977
|
"show",
|
|
1767
1978
|
"get",
|
|
1768
|
-
"tell"
|
|
1979
|
+
"tell",
|
|
1980
|
+
"look",
|
|
1981
|
+
"give",
|
|
1982
|
+
"list",
|
|
1983
|
+
"explain",
|
|
1984
|
+
// Misc filler
|
|
1985
|
+
"all",
|
|
1986
|
+
"any",
|
|
1987
|
+
"some",
|
|
1988
|
+
"each",
|
|
1989
|
+
"every",
|
|
1990
|
+
"much",
|
|
1991
|
+
"many",
|
|
1992
|
+
"also",
|
|
1993
|
+
"just",
|
|
1994
|
+
"like",
|
|
1995
|
+
"then",
|
|
1996
|
+
"there",
|
|
1997
|
+
"here",
|
|
1998
|
+
"very",
|
|
1999
|
+
"really",
|
|
2000
|
+
"use",
|
|
2001
|
+
"used",
|
|
2002
|
+
"using"
|
|
1769
2003
|
]);
|
|
2004
|
+
var CODE_IDENT_RE = /^(?:[a-z]+(?:[A-Z][a-z]*)+|[A-Z][a-zA-Z]+|[a-z]+(?:_[a-z]+)+|[A-Z]+(?:_[A-Z]+)+)$/;
|
|
2005
|
+
var DOTTED_IDENT_RE = /[a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+/g;
|
|
1770
2006
|
function extractSearchTerms(query) {
|
|
1771
|
-
const
|
|
1772
|
-
|
|
2007
|
+
const terms = [];
|
|
2008
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2009
|
+
const addUnique = (term) => {
|
|
2010
|
+
const key = term.toLowerCase();
|
|
2011
|
+
if (!seen.has(key)) {
|
|
2012
|
+
seen.add(key);
|
|
2013
|
+
terms.push(term);
|
|
2014
|
+
}
|
|
2015
|
+
};
|
|
2016
|
+
const dottedMatches = query.match(DOTTED_IDENT_RE) ?? [];
|
|
2017
|
+
for (const m of dottedMatches) addUnique(m);
|
|
2018
|
+
const pathTokens = query.split(/\s+/).filter((t) => t.includes("/"));
|
|
2019
|
+
for (const p of pathTokens) addUnique(p.replace(/[?!,;]+$/g, ""));
|
|
2020
|
+
const words = query.replace(/[^a-zA-Z0-9_.\s/-]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
2021
|
+
for (const w of words) {
|
|
2022
|
+
const lower = w.toLowerCase();
|
|
2023
|
+
if (seen.has(lower)) continue;
|
|
2024
|
+
if (STOP_WORDS.has(lower) && !CODE_IDENT_RE.test(w)) continue;
|
|
2025
|
+
addUnique(w);
|
|
2026
|
+
}
|
|
2027
|
+
if (terms.length === 0) {
|
|
1773
2028
|
const allWords = query.replace(/[^a-zA-Z0-9_\s]/g, " ").split(/\s+/).filter((w) => w.length >= 2);
|
|
1774
|
-
|
|
2029
|
+
const longest = allWords.sort((a, b) => b.length - a.length)[0];
|
|
2030
|
+
return longest ?? query;
|
|
1775
2031
|
}
|
|
1776
|
-
return
|
|
2032
|
+
return terms.join(" ");
|
|
1777
2033
|
}
|
|
1778
2034
|
var VALID_STRATEGIES = /* @__PURE__ */ new Set([
|
|
1779
2035
|
"vector",
|
|
@@ -2195,9 +2451,6 @@ function extractSymbolNames(query) {
|
|
|
2195
2451
|
function isPathLike(query) {
|
|
2196
2452
|
return query.includes("/") || query.includes("*") || query.includes(".");
|
|
2197
2453
|
}
|
|
2198
|
-
function extractPathBoostTerms(query) {
|
|
2199
|
-
return query.split(/\s+/).map((t) => t.trim()).filter((t) => t.length >= 2);
|
|
2200
|
-
}
|
|
2201
2454
|
async function runQuery(projectPath, query, options) {
|
|
2202
2455
|
const absoluteRoot = path5.resolve(projectPath);
|
|
2203
2456
|
const dbPath = path5.join(absoluteRoot, CTX_DIR2, DB_FILENAME2);
|
|
@@ -2350,7 +2603,8 @@ function formatTextOutput2(output) {
|
|
|
2350
2603
|
);
|
|
2351
2604
|
return lines.join("\n");
|
|
2352
2605
|
}
|
|
2353
|
-
function createSearchExecutor(db) {
|
|
2606
|
+
function createSearchExecutor(db, query) {
|
|
2607
|
+
const pathBoostTerms = extractPathBoostTerms(query);
|
|
2354
2608
|
return async (strategies, limit) => {
|
|
2355
2609
|
const strategyResults = [];
|
|
2356
2610
|
const fetchLimit = limit * 3;
|
|
@@ -2364,7 +2618,7 @@ function createSearchExecutor(db) {
|
|
|
2364
2618
|
});
|
|
2365
2619
|
}
|
|
2366
2620
|
}
|
|
2367
|
-
return
|
|
2621
|
+
return fusionMergeWithPathBoost(strategyResults, limit, pathBoostTerms);
|
|
2368
2622
|
};
|
|
2369
2623
|
}
|
|
2370
2624
|
function extractSymbolNames2(query) {
|
|
@@ -2412,7 +2666,7 @@ async function loadEmbedder2() {
|
|
|
2412
2666
|
return embedderInstance2;
|
|
2413
2667
|
}
|
|
2414
2668
|
async function fallbackSearch(db, query, limit) {
|
|
2415
|
-
const executor = createSearchExecutor(db);
|
|
2669
|
+
const executor = createSearchExecutor(db, query);
|
|
2416
2670
|
const keywords = extractSearchTerms(query);
|
|
2417
2671
|
const fallbackStrategies = [
|
|
2418
2672
|
{ strategy: "fts", query: keywords, weight: 0.8, reason: "fallback keyword search" },
|
|
@@ -2454,7 +2708,7 @@ async function runAsk(projectPath, query, options) {
|
|
|
2454
2708
|
}
|
|
2455
2709
|
return output;
|
|
2456
2710
|
}
|
|
2457
|
-
const executor = createSearchExecutor(db);
|
|
2711
|
+
const executor = createSearchExecutor(db, query);
|
|
2458
2712
|
if (options.noExplain) {
|
|
2459
2713
|
return await runNoExplain(provider, query, options, executor);
|
|
2460
2714
|
}
|