@liendev/lien 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CURSOR_RULES_TEMPLATE.md +66 -519
- package/dist/index.js +842 -382
- package/dist/index.js.map +1 -1
- package/package.json +9 -2
package/dist/index.js
CHANGED
|
@@ -161,7 +161,7 @@ function migrateConfig(oldConfig) {
|
|
|
161
161
|
path: ".",
|
|
162
162
|
enabled: true,
|
|
163
163
|
config: {
|
|
164
|
-
include: oldConfig.indexing.include ?? ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
|
|
164
|
+
include: oldConfig.indexing.include ?? ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
|
|
165
165
|
exclude: oldConfig.indexing.exclude ?? [
|
|
166
166
|
"**/node_modules/**",
|
|
167
167
|
"**/dist/**",
|
|
@@ -181,7 +181,7 @@ function migrateConfig(oldConfig) {
|
|
|
181
181
|
path: ".",
|
|
182
182
|
enabled: true,
|
|
183
183
|
config: {
|
|
184
|
-
include: ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
|
|
184
|
+
include: ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
|
|
185
185
|
exclude: [
|
|
186
186
|
"**/node_modules/**",
|
|
187
187
|
"**/dist/**",
|
|
@@ -1030,7 +1030,7 @@ async function scanCodebase(options) {
|
|
|
1030
1030
|
".lien/**",
|
|
1031
1031
|
...excludePatterns
|
|
1032
1032
|
]);
|
|
1033
|
-
const patterns = includePatterns.length > 0 ? includePatterns : ["**/*.{ts,tsx,js,jsx,py,go,rs,java,cpp,c,h,md,mdx}"];
|
|
1033
|
+
const patterns = includePatterns.length > 0 ? includePatterns : ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,cpp,c,cs,h,md,mdx}"];
|
|
1034
1034
|
const allFiles = [];
|
|
1035
1035
|
for (const pattern of patterns) {
|
|
1036
1036
|
const files = await glob(pattern, {
|
|
@@ -1388,6 +1388,7 @@ import Parser from "tree-sitter";
|
|
|
1388
1388
|
import TypeScript from "tree-sitter-typescript";
|
|
1389
1389
|
import JavaScript from "tree-sitter-javascript";
|
|
1390
1390
|
import PHPParser from "tree-sitter-php";
|
|
1391
|
+
import Python from "tree-sitter-python";
|
|
1391
1392
|
import { extname } from "path";
|
|
1392
1393
|
function getParser(language) {
|
|
1393
1394
|
if (!parserCache.has(language)) {
|
|
@@ -1414,6 +1415,8 @@ function detectLanguage2(filePath) {
|
|
|
1414
1415
|
return "javascript";
|
|
1415
1416
|
case "php":
|
|
1416
1417
|
return "php";
|
|
1418
|
+
case "py":
|
|
1419
|
+
return "python";
|
|
1417
1420
|
default:
|
|
1418
1421
|
return null;
|
|
1419
1422
|
}
|
|
@@ -1447,8 +1450,9 @@ var init_parser = __esm({
|
|
|
1447
1450
|
languageConfig = {
|
|
1448
1451
|
typescript: TypeScript.typescript,
|
|
1449
1452
|
javascript: JavaScript,
|
|
1450
|
-
php: PHPParser.php
|
|
1453
|
+
php: PHPParser.php,
|
|
1451
1454
|
// Note: tree-sitter-php exports both 'php' (mixed HTML/PHP) and 'php_only'
|
|
1455
|
+
python: Python
|
|
1452
1456
|
};
|
|
1453
1457
|
}
|
|
1454
1458
|
});
|
|
@@ -1524,7 +1528,35 @@ function extractInterfaceInfo(node, _content, _parentClass) {
|
|
|
1524
1528
|
signature: `interface ${nameNode.text}`
|
|
1525
1529
|
};
|
|
1526
1530
|
}
|
|
1527
|
-
function
|
|
1531
|
+
function extractPythonFunctionInfo(node, content, parentClass) {
|
|
1532
|
+
const nameNode = node.childForFieldName("name");
|
|
1533
|
+
if (!nameNode) return null;
|
|
1534
|
+
return {
|
|
1535
|
+
name: nameNode.text,
|
|
1536
|
+
type: parentClass ? "method" : "function",
|
|
1537
|
+
startLine: node.startPosition.row + 1,
|
|
1538
|
+
endLine: node.endPosition.row + 1,
|
|
1539
|
+
parentClass,
|
|
1540
|
+
signature: extractSignature(node, content),
|
|
1541
|
+
parameters: extractParameters(node, content),
|
|
1542
|
+
complexity: calculateComplexity(node)
|
|
1543
|
+
};
|
|
1544
|
+
}
|
|
1545
|
+
function extractPythonClassInfo(node, _content, _parentClass) {
|
|
1546
|
+
const nameNode = node.childForFieldName("name");
|
|
1547
|
+
if (!nameNode) return null;
|
|
1548
|
+
return {
|
|
1549
|
+
name: nameNode.text,
|
|
1550
|
+
type: "class",
|
|
1551
|
+
startLine: node.startPosition.row + 1,
|
|
1552
|
+
endLine: node.endPosition.row + 1,
|
|
1553
|
+
signature: `class ${nameNode.text}`
|
|
1554
|
+
};
|
|
1555
|
+
}
|
|
1556
|
+
function extractSymbolInfo(node, content, parentClass, language) {
|
|
1557
|
+
if (node.type === "function_definition" && language === "python") {
|
|
1558
|
+
return extractPythonFunctionInfo(node, content, parentClass);
|
|
1559
|
+
}
|
|
1528
1560
|
const extractor = symbolExtractors[node.type];
|
|
1529
1561
|
return extractor ? extractor(node, content, parentClass) : null;
|
|
1530
1562
|
}
|
|
@@ -1563,23 +1595,39 @@ function extractReturnType(node, _content) {
|
|
|
1563
1595
|
function calculateComplexity(node) {
|
|
1564
1596
|
let complexity = 1;
|
|
1565
1597
|
const decisionPoints = [
|
|
1566
|
-
// TypeScript/JavaScript
|
|
1598
|
+
// Common across languages (TypeScript/JavaScript/Python/PHP)
|
|
1567
1599
|
"if_statement",
|
|
1600
|
+
// if conditions
|
|
1568
1601
|
"while_statement",
|
|
1569
|
-
|
|
1570
|
-
// do...while loops
|
|
1602
|
+
// while loops
|
|
1571
1603
|
"for_statement",
|
|
1572
|
-
|
|
1573
|
-
"for_of_statement",
|
|
1574
|
-
// for...of loops
|
|
1604
|
+
// for loops
|
|
1575
1605
|
"switch_case",
|
|
1606
|
+
// switch/case statements
|
|
1576
1607
|
"catch_clause",
|
|
1608
|
+
// try/catch error handling
|
|
1577
1609
|
"ternary_expression",
|
|
1610
|
+
// Ternary operator (a ? b : c)
|
|
1578
1611
|
"binary_expression",
|
|
1579
|
-
// For && and ||
|
|
1580
|
-
//
|
|
1581
|
-
"
|
|
1612
|
+
// For && and || logical operators
|
|
1613
|
+
// TypeScript/JavaScript specific
|
|
1614
|
+
"do_statement",
|
|
1615
|
+
// do...while loops
|
|
1616
|
+
"for_in_statement",
|
|
1617
|
+
// for...in loops
|
|
1618
|
+
"for_of_statement",
|
|
1619
|
+
// for...of loops
|
|
1620
|
+
// PHP specific
|
|
1621
|
+
"foreach_statement",
|
|
1582
1622
|
// PHP foreach loops
|
|
1623
|
+
// Python specific
|
|
1624
|
+
"elif_clause",
|
|
1625
|
+
// Python elif (adds decision point)
|
|
1626
|
+
// Note: 'else_clause' is NOT a decision point (it's the default path)
|
|
1627
|
+
"except_clause",
|
|
1628
|
+
// Python except (try/except)
|
|
1629
|
+
"conditional_expression"
|
|
1630
|
+
// Python ternary (x if cond else y)
|
|
1583
1631
|
];
|
|
1584
1632
|
function traverse(n) {
|
|
1585
1633
|
if (decisionPoints.includes(n.type)) {
|
|
@@ -1608,7 +1656,13 @@ function extractImports(rootNode) {
|
|
|
1608
1656
|
if (sourceNode) {
|
|
1609
1657
|
const importPath = sourceNode.text.replace(/['"]/g, "");
|
|
1610
1658
|
imports.push(importPath);
|
|
1659
|
+
} else {
|
|
1660
|
+
const importText = node.text.split("\n")[0];
|
|
1661
|
+
imports.push(importText);
|
|
1611
1662
|
}
|
|
1663
|
+
} else if (node.type === "import_from_statement") {
|
|
1664
|
+
const importText = node.text.split("\n")[0];
|
|
1665
|
+
imports.push(importText);
|
|
1612
1666
|
}
|
|
1613
1667
|
if (node === rootNode) {
|
|
1614
1668
|
for (let i = 0; i < node.namedChildCount; i++) {
|
|
@@ -1635,9 +1689,16 @@ var init_symbols = __esm({
|
|
|
1635
1689
|
"interface_declaration": extractInterfaceInfo,
|
|
1636
1690
|
// PHP
|
|
1637
1691
|
"function_definition": extractFunctionInfo,
|
|
1638
|
-
// PHP functions
|
|
1639
|
-
"method_declaration": extractMethodInfo
|
|
1692
|
+
// PHP functions (Python handled via language check in extractSymbolInfo)
|
|
1693
|
+
"method_declaration": extractMethodInfo,
|
|
1640
1694
|
// PHP methods
|
|
1695
|
+
// Python
|
|
1696
|
+
"async_function_definition": extractPythonFunctionInfo,
|
|
1697
|
+
// Python async functions
|
|
1698
|
+
"class_definition": extractPythonClassInfo
|
|
1699
|
+
// Python classes
|
|
1700
|
+
// Note: Python regular functions use 'function_definition' (same as PHP)
|
|
1701
|
+
// They are dispatched to extractPythonFunctionInfo via language check in extractSymbolInfo()
|
|
1641
1702
|
};
|
|
1642
1703
|
}
|
|
1643
1704
|
});
|
|
@@ -1795,6 +1856,69 @@ var init_php = __esm({
|
|
|
1795
1856
|
}
|
|
1796
1857
|
});
|
|
1797
1858
|
|
|
1859
|
+
// src/indexer/ast/traversers/python.ts
|
|
1860
|
+
var PythonTraverser;
|
|
1861
|
+
var init_python = __esm({
|
|
1862
|
+
"src/indexer/ast/traversers/python.ts"() {
|
|
1863
|
+
"use strict";
|
|
1864
|
+
PythonTraverser = class {
|
|
1865
|
+
targetNodeTypes = [
|
|
1866
|
+
"function_definition",
|
|
1867
|
+
"async_function_definition"
|
|
1868
|
+
];
|
|
1869
|
+
containerTypes = [
|
|
1870
|
+
"class_definition"
|
|
1871
|
+
// We extract methods, not the class itself
|
|
1872
|
+
];
|
|
1873
|
+
declarationTypes = [
|
|
1874
|
+
// Python doesn't have const/let/var declarations like JS/TS
|
|
1875
|
+
// Functions are always defined with 'def' or 'async def'
|
|
1876
|
+
];
|
|
1877
|
+
functionTypes = [
|
|
1878
|
+
"function_definition",
|
|
1879
|
+
"async_function_definition"
|
|
1880
|
+
];
|
|
1881
|
+
shouldExtractChildren(node) {
|
|
1882
|
+
return this.containerTypes.includes(node.type);
|
|
1883
|
+
}
|
|
1884
|
+
isDeclarationWithFunction(_node) {
|
|
1885
|
+
return false;
|
|
1886
|
+
}
|
|
1887
|
+
getContainerBody(node) {
|
|
1888
|
+
if (node.type === "class_definition") {
|
|
1889
|
+
return node.childForFieldName("body");
|
|
1890
|
+
}
|
|
1891
|
+
return null;
|
|
1892
|
+
}
|
|
1893
|
+
shouldTraverseChildren(node) {
|
|
1894
|
+
return node.type === "module" || // Top-level Python file
|
|
1895
|
+
node.type === "block";
|
|
1896
|
+
}
|
|
1897
|
+
findParentContainerName(node) {
|
|
1898
|
+
let current = node.parent;
|
|
1899
|
+
while (current) {
|
|
1900
|
+
if (current.type === "class_definition") {
|
|
1901
|
+
const nameNode = current.childForFieldName("name");
|
|
1902
|
+
return nameNode?.text;
|
|
1903
|
+
}
|
|
1904
|
+
current = current.parent;
|
|
1905
|
+
}
|
|
1906
|
+
return void 0;
|
|
1907
|
+
}
|
|
1908
|
+
/**
|
|
1909
|
+
* Python doesn't have this pattern (const x = () => {})
|
|
1910
|
+
* Functions are always defined with 'def' or 'async def'
|
|
1911
|
+
*/
|
|
1912
|
+
findFunctionInDeclaration(_node) {
|
|
1913
|
+
return {
|
|
1914
|
+
hasFunction: false,
|
|
1915
|
+
functionNode: null
|
|
1916
|
+
};
|
|
1917
|
+
}
|
|
1918
|
+
};
|
|
1919
|
+
}
|
|
1920
|
+
});
|
|
1921
|
+
|
|
1798
1922
|
// src/indexer/ast/traversers/index.ts
|
|
1799
1923
|
function getTraverser(language) {
|
|
1800
1924
|
const traverser = traverserRegistry[language];
|
|
@@ -1809,10 +1933,12 @@ var init_traversers = __esm({
|
|
|
1809
1933
|
"use strict";
|
|
1810
1934
|
init_typescript();
|
|
1811
1935
|
init_php();
|
|
1936
|
+
init_python();
|
|
1812
1937
|
traverserRegistry = {
|
|
1813
1938
|
typescript: new TypeScriptTraverser(),
|
|
1814
1939
|
javascript: new JavaScriptTraverser(),
|
|
1815
|
-
php: new PHPTraverser()
|
|
1940
|
+
php: new PHPTraverser(),
|
|
1941
|
+
python: new PythonTraverser()
|
|
1816
1942
|
};
|
|
1817
1943
|
}
|
|
1818
1944
|
});
|
|
@@ -1843,7 +1969,7 @@ function chunkByAST(filepath, content, options = {}) {
|
|
|
1843
1969
|
}
|
|
1844
1970
|
}
|
|
1845
1971
|
const parentClassName = traverser.findParentContainerName(actualNode);
|
|
1846
|
-
const symbolInfo = extractSymbolInfo(actualNode, content, parentClassName);
|
|
1972
|
+
const symbolInfo = extractSymbolInfo(actualNode, content, parentClassName, language);
|
|
1847
1973
|
const nodeContent = getNodeContent(node, lines);
|
|
1848
1974
|
chunks.push(createChunk(filepath, node, nodeContent, symbolInfo, fileImports, language));
|
|
1849
1975
|
}
|
|
@@ -2398,26 +2524,73 @@ var init_relevance = __esm({
|
|
|
2398
2524
|
});
|
|
2399
2525
|
|
|
2400
2526
|
// src/vectordb/intent-classifier.ts
|
|
2527
|
+
function getSortedRules() {
|
|
2528
|
+
if (cachedSortedRules === null) {
|
|
2529
|
+
cachedSortedRules = [...INTENT_RULES].sort((a, b) => b.priority - a.priority);
|
|
2530
|
+
}
|
|
2531
|
+
return cachedSortedRules;
|
|
2532
|
+
}
|
|
2401
2533
|
function classifyQueryIntent(query) {
|
|
2402
2534
|
const lower = query.toLowerCase().trim();
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
}
|
|
2409
|
-
if (lower.match(/how\s+(is|are)\s+.*\s+(implemented|built|coded)/) || lower.match(/implementation\s+of/) || lower.match(/source\s+code\s+for/)) {
|
|
2410
|
-
return "implementation" /* IMPLEMENTATION */;
|
|
2535
|
+
const sortedRules = getSortedRules();
|
|
2536
|
+
for (const rule of sortedRules) {
|
|
2537
|
+
if (rule.patterns.some((pattern) => pattern.test(lower))) {
|
|
2538
|
+
return rule.intent;
|
|
2539
|
+
}
|
|
2411
2540
|
}
|
|
2412
2541
|
return "implementation" /* IMPLEMENTATION */;
|
|
2413
2542
|
}
|
|
2543
|
+
var INTENT_RULES, INITIAL_RULE_COUNT, cachedSortedRules;
|
|
2414
2544
|
var init_intent_classifier = __esm({
|
|
2415
2545
|
"src/vectordb/intent-classifier.ts"() {
|
|
2416
2546
|
"use strict";
|
|
2547
|
+
INTENT_RULES = [
|
|
2548
|
+
// LOCATION intent (highest priority - most specific)
|
|
2549
|
+
{
|
|
2550
|
+
intent: "location" /* LOCATION */,
|
|
2551
|
+
priority: 3,
|
|
2552
|
+
patterns: [
|
|
2553
|
+
/where\s+(is|are|does|can\s+i\s+find)/,
|
|
2554
|
+
/find\s+the\s+/,
|
|
2555
|
+
/locate\s+/
|
|
2556
|
+
]
|
|
2557
|
+
},
|
|
2558
|
+
// CONCEPTUAL intent (medium priority)
|
|
2559
|
+
{
|
|
2560
|
+
intent: "conceptual" /* CONCEPTUAL */,
|
|
2561
|
+
priority: 2,
|
|
2562
|
+
patterns: [
|
|
2563
|
+
/how\s+does\s+.*\s+work/,
|
|
2564
|
+
/what\s+(is|are|does)/,
|
|
2565
|
+
/explain\s+/,
|
|
2566
|
+
/understand\s+/,
|
|
2567
|
+
/\b(process|workflow|architecture)\b/
|
|
2568
|
+
]
|
|
2569
|
+
},
|
|
2570
|
+
// IMPLEMENTATION intent (low priority - catches "how is X implemented")
|
|
2571
|
+
{
|
|
2572
|
+
intent: "implementation" /* IMPLEMENTATION */,
|
|
2573
|
+
priority: 1,
|
|
2574
|
+
patterns: [
|
|
2575
|
+
/how\s+(is|are)\s+.*\s+(implemented|built|coded)/,
|
|
2576
|
+
/implementation\s+of/,
|
|
2577
|
+
/source\s+code\s+for/
|
|
2578
|
+
]
|
|
2579
|
+
}
|
|
2580
|
+
];
|
|
2581
|
+
INITIAL_RULE_COUNT = INTENT_RULES.length;
|
|
2582
|
+
cachedSortedRules = null;
|
|
2417
2583
|
}
|
|
2418
2584
|
});
|
|
2419
2585
|
|
|
2420
|
-
// src/vectordb/
|
|
2586
|
+
// src/vectordb/boosting/types.ts
|
|
2587
|
+
var init_types2 = __esm({
|
|
2588
|
+
"src/vectordb/boosting/types.ts"() {
|
|
2589
|
+
"use strict";
|
|
2590
|
+
}
|
|
2591
|
+
});
|
|
2592
|
+
|
|
2593
|
+
// src/vectordb/boosting/strategies.ts
|
|
2421
2594
|
import path13 from "path";
|
|
2422
2595
|
function isDocumentationFile(filepath) {
|
|
2423
2596
|
const lower = filepath.toLowerCase();
|
|
@@ -2455,106 +2628,162 @@ function isUtilityFile(filepath) {
|
|
|
2455
2628
|
}
|
|
2456
2629
|
return false;
|
|
2457
2630
|
}
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
}
|
|
2495
|
-
|
|
2496
|
-
|
|
2497
|
-
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2631
|
+
var PathBoostingStrategy, FilenameBoostingStrategy, FileTypeBoostingStrategy;
|
|
2632
|
+
var init_strategies = __esm({
|
|
2633
|
+
"src/vectordb/boosting/strategies.ts"() {
|
|
2634
|
+
"use strict";
|
|
2635
|
+
init_intent_classifier();
|
|
2636
|
+
PathBoostingStrategy = class {
|
|
2637
|
+
name = "path-matching";
|
|
2638
|
+
apply(query, filepath, baseScore) {
|
|
2639
|
+
const queryTokens = query.toLowerCase().split(/\s+/);
|
|
2640
|
+
const pathSegments = filepath.toLowerCase().split("/");
|
|
2641
|
+
let boostFactor = 1;
|
|
2642
|
+
for (const token of queryTokens) {
|
|
2643
|
+
if (token.length <= 2) continue;
|
|
2644
|
+
if (pathSegments.some((seg) => seg.includes(token))) {
|
|
2645
|
+
boostFactor *= 0.9;
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
return baseScore * boostFactor;
|
|
2649
|
+
}
|
|
2650
|
+
};
|
|
2651
|
+
FilenameBoostingStrategy = class {
|
|
2652
|
+
name = "filename-matching";
|
|
2653
|
+
apply(query, filepath, baseScore) {
|
|
2654
|
+
const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
|
|
2655
|
+
const queryTokens = query.toLowerCase().split(/\s+/);
|
|
2656
|
+
let boostFactor = 1;
|
|
2657
|
+
for (const token of queryTokens) {
|
|
2658
|
+
if (token.length <= 2) continue;
|
|
2659
|
+
if (filename === token) {
|
|
2660
|
+
boostFactor *= 0.7;
|
|
2661
|
+
} else if (filename.includes(token)) {
|
|
2662
|
+
boostFactor *= 0.8;
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
return baseScore * boostFactor;
|
|
2666
|
+
}
|
|
2667
|
+
};
|
|
2668
|
+
FileTypeBoostingStrategy = class {
|
|
2669
|
+
constructor(intent) {
|
|
2670
|
+
this.intent = intent;
|
|
2671
|
+
}
|
|
2672
|
+
name = "file-type";
|
|
2673
|
+
apply(query, filepath, baseScore) {
|
|
2674
|
+
switch (this.intent) {
|
|
2675
|
+
case "location" /* LOCATION */:
|
|
2676
|
+
return this.applyLocationBoosting(query, filepath, baseScore);
|
|
2677
|
+
case "conceptual" /* CONCEPTUAL */:
|
|
2678
|
+
return this.applyConceptualBoosting(query, filepath, baseScore);
|
|
2679
|
+
case "implementation" /* IMPLEMENTATION */:
|
|
2680
|
+
return this.applyImplementationBoosting(query, filepath, baseScore);
|
|
2681
|
+
default:
|
|
2682
|
+
return baseScore;
|
|
2683
|
+
}
|
|
2684
|
+
}
|
|
2685
|
+
applyLocationBoosting(_query, filepath, score) {
|
|
2686
|
+
if (isTestFile(filepath)) {
|
|
2687
|
+
score *= 1.1;
|
|
2688
|
+
}
|
|
2689
|
+
return score;
|
|
2690
|
+
}
|
|
2691
|
+
applyConceptualBoosting(_query, filepath, score) {
|
|
2692
|
+
if (isDocumentationFile(filepath)) {
|
|
2693
|
+
score *= 0.65;
|
|
2694
|
+
const lower = filepath.toLowerCase();
|
|
2695
|
+
if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
|
|
2696
|
+
score *= 0.9;
|
|
2697
|
+
}
|
|
2698
|
+
}
|
|
2699
|
+
if (isUtilityFile(filepath)) {
|
|
2700
|
+
score *= 0.95;
|
|
2701
|
+
}
|
|
2702
|
+
return score;
|
|
2703
|
+
}
|
|
2704
|
+
applyImplementationBoosting(_query, filepath, score) {
|
|
2705
|
+
if (isTestFile(filepath)) {
|
|
2706
|
+
score *= 1.1;
|
|
2707
|
+
}
|
|
2708
|
+
return score;
|
|
2709
|
+
}
|
|
2710
|
+
};
|
|
2521
2711
|
}
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2712
|
+
});
|
|
2713
|
+
|
|
2714
|
+
// src/vectordb/boosting/composer.ts
|
|
2715
|
+
var BoostingComposer;
|
|
2716
|
+
var init_composer = __esm({
|
|
2717
|
+
"src/vectordb/boosting/composer.ts"() {
|
|
2718
|
+
"use strict";
|
|
2719
|
+
BoostingComposer = class {
|
|
2720
|
+
strategies = [];
|
|
2721
|
+
/**
|
|
2722
|
+
* Add a boosting strategy to the pipeline.
|
|
2723
|
+
* Strategies are applied in the order they are added.
|
|
2724
|
+
*
|
|
2725
|
+
* @param strategy - The strategy to add
|
|
2726
|
+
* @returns This composer for chaining
|
|
2727
|
+
*/
|
|
2728
|
+
addStrategy(strategy) {
|
|
2729
|
+
this.strategies.push(strategy);
|
|
2730
|
+
return this;
|
|
2529
2731
|
}
|
|
2530
|
-
|
|
2732
|
+
/**
|
|
2733
|
+
* Apply all strategies to a base score.
|
|
2734
|
+
*
|
|
2735
|
+
* @param query - The search query
|
|
2736
|
+
* @param filepath - The file path being scored
|
|
2737
|
+
* @param baseScore - The initial score from vector similarity
|
|
2738
|
+
* @returns The final boosted score after all strategies
|
|
2739
|
+
*/
|
|
2740
|
+
apply(query, filepath, baseScore) {
|
|
2741
|
+
let score = baseScore;
|
|
2742
|
+
for (const strategy of this.strategies) {
|
|
2743
|
+
score = strategy.apply(query, filepath, score);
|
|
2744
|
+
}
|
|
2745
|
+
return score;
|
|
2746
|
+
}
|
|
2747
|
+
/**
|
|
2748
|
+
* Get the names of all strategies in this composer.
|
|
2749
|
+
* Useful for debugging and logging.
|
|
2750
|
+
*/
|
|
2751
|
+
getStrategyNames() {
|
|
2752
|
+
return this.strategies.map((s) => s.name);
|
|
2753
|
+
}
|
|
2754
|
+
/**
|
|
2755
|
+
* Get the number of strategies in this composer.
|
|
2756
|
+
*/
|
|
2757
|
+
getStrategyCount() {
|
|
2758
|
+
return this.strategies.length;
|
|
2759
|
+
}
|
|
2760
|
+
/**
|
|
2761
|
+
* Clear all strategies from this composer.
|
|
2762
|
+
*/
|
|
2763
|
+
clear() {
|
|
2764
|
+
this.strategies = [];
|
|
2765
|
+
}
|
|
2766
|
+
};
|
|
2531
2767
|
}
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2768
|
+
});
|
|
2769
|
+
|
|
2770
|
+
// src/vectordb/boosting/index.ts
|
|
2771
|
+
var init_boosting = __esm({
|
|
2772
|
+
"src/vectordb/boosting/index.ts"() {
|
|
2773
|
+
"use strict";
|
|
2774
|
+
init_types2();
|
|
2775
|
+
init_strategies();
|
|
2776
|
+
init_composer();
|
|
2540
2777
|
}
|
|
2541
|
-
|
|
2542
|
-
|
|
2778
|
+
});
|
|
2779
|
+
|
|
2780
|
+
// src/vectordb/query.ts
|
|
2543
2781
|
function applyRelevanceBoosting(query, filepath, baseScore) {
|
|
2544
2782
|
if (!query) {
|
|
2545
2783
|
return baseScore;
|
|
2546
2784
|
}
|
|
2547
2785
|
const intent = classifyQueryIntent(query);
|
|
2548
|
-
|
|
2549
|
-
case "location" /* LOCATION */:
|
|
2550
|
-
return boostForLocationIntent(query, filepath, baseScore);
|
|
2551
|
-
case "conceptual" /* CONCEPTUAL */:
|
|
2552
|
-
return boostForConceptualIntent(query, filepath, baseScore);
|
|
2553
|
-
case "implementation" /* IMPLEMENTATION */:
|
|
2554
|
-
return boostForImplementationIntent(query, filepath, baseScore);
|
|
2555
|
-
default:
|
|
2556
|
-
return boostForImplementationIntent(query, filepath, baseScore);
|
|
2557
|
-
}
|
|
2786
|
+
return BOOSTING_COMPOSERS[intent].apply(query, filepath, baseScore);
|
|
2558
2787
|
}
|
|
2559
2788
|
function dbRecordToSearchResult(r, query) {
|
|
2560
2789
|
const baseScore = r._distance ?? 0;
|
|
@@ -2736,6 +2965,7 @@ async function querySymbols(table, options) {
|
|
|
2736
2965
|
throw wrapError(error, "Failed to query symbols");
|
|
2737
2966
|
}
|
|
2738
2967
|
}
|
|
2968
|
+
var PATH_STRATEGY, FILENAME_STRATEGY, FILE_TYPE_STRATEGIES, BOOSTING_COMPOSERS;
|
|
2739
2969
|
var init_query = __esm({
|
|
2740
2970
|
"src/vectordb/query.ts"() {
|
|
2741
2971
|
"use strict";
|
|
@@ -2743,6 +2973,19 @@ var init_query = __esm({
|
|
|
2743
2973
|
init_errors();
|
|
2744
2974
|
init_relevance();
|
|
2745
2975
|
init_intent_classifier();
|
|
2976
|
+
init_boosting();
|
|
2977
|
+
PATH_STRATEGY = new PathBoostingStrategy();
|
|
2978
|
+
FILENAME_STRATEGY = new FilenameBoostingStrategy();
|
|
2979
|
+
FILE_TYPE_STRATEGIES = {
|
|
2980
|
+
["location" /* LOCATION */]: new FileTypeBoostingStrategy("location" /* LOCATION */),
|
|
2981
|
+
["conceptual" /* CONCEPTUAL */]: new FileTypeBoostingStrategy("conceptual" /* CONCEPTUAL */),
|
|
2982
|
+
["implementation" /* IMPLEMENTATION */]: new FileTypeBoostingStrategy("implementation" /* IMPLEMENTATION */)
|
|
2983
|
+
};
|
|
2984
|
+
BOOSTING_COMPOSERS = {
|
|
2985
|
+
["location" /* LOCATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["location" /* LOCATION */]),
|
|
2986
|
+
["conceptual" /* CONCEPTUAL */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["conceptual" /* CONCEPTUAL */]),
|
|
2987
|
+
["implementation" /* IMPLEMENTATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["implementation" /* IMPLEMENTATION */])
|
|
2988
|
+
};
|
|
2746
2989
|
}
|
|
2747
2990
|
});
|
|
2748
2991
|
|
|
@@ -3667,6 +3910,22 @@ var init_change_detector = __esm({
|
|
|
3667
3910
|
}
|
|
3668
3911
|
});
|
|
3669
3912
|
|
|
3913
|
+
// src/utils/result.ts
|
|
3914
|
+
function Ok(value) {
|
|
3915
|
+
return { ok: true, value };
|
|
3916
|
+
}
|
|
3917
|
+
function Err(error) {
|
|
3918
|
+
return { ok: false, error };
|
|
3919
|
+
}
|
|
3920
|
+
function isOk(result) {
|
|
3921
|
+
return result.ok;
|
|
3922
|
+
}
|
|
3923
|
+
var init_result = __esm({
|
|
3924
|
+
"src/utils/result.ts"() {
|
|
3925
|
+
"use strict";
|
|
3926
|
+
}
|
|
3927
|
+
});
|
|
3928
|
+
|
|
3670
3929
|
// src/indexer/incremental.ts
|
|
3671
3930
|
import fs16 from "fs/promises";
|
|
3672
3931
|
async function processFileContent(filepath, content, embeddings, config, verbose) {
|
|
@@ -3748,36 +4007,29 @@ async function indexSingleFile(filepath, vectorDB, embeddings, config, options =
|
|
|
3748
4007
|
console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
|
|
3749
4008
|
}
|
|
3750
4009
|
}
|
|
4010
|
+
async function processSingleFileForIndexing(filepath, embeddings, config, verbose) {
|
|
4011
|
+
try {
|
|
4012
|
+
const stats = await fs16.stat(filepath);
|
|
4013
|
+
const content = await fs16.readFile(filepath, "utf-8");
|
|
4014
|
+
const result = await processFileContent(filepath, content, embeddings, config, verbose);
|
|
4015
|
+
return Ok({
|
|
4016
|
+
filepath,
|
|
4017
|
+
result,
|
|
4018
|
+
mtime: stats.mtimeMs
|
|
4019
|
+
});
|
|
4020
|
+
} catch (error) {
|
|
4021
|
+
return Err(`Failed to process ${filepath}: ${error}`);
|
|
4022
|
+
}
|
|
4023
|
+
}
|
|
3751
4024
|
async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, options = {}) {
|
|
3752
4025
|
const { verbose } = options;
|
|
3753
4026
|
let processedCount = 0;
|
|
3754
4027
|
const manifestEntries = [];
|
|
3755
4028
|
for (const filepath of filepaths) {
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3759
|
-
|
|
3760
|
-
fileMtime = stats.mtimeMs;
|
|
3761
|
-
content = await fs16.readFile(filepath, "utf-8");
|
|
3762
|
-
} catch (error) {
|
|
3763
|
-
if (verbose) {
|
|
3764
|
-
console.error(`[Lien] File not readable: ${filepath}`);
|
|
3765
|
-
}
|
|
3766
|
-
try {
|
|
3767
|
-
await vectorDB.deleteByFile(filepath);
|
|
3768
|
-
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
3769
|
-
await manifest.removeFile(filepath);
|
|
3770
|
-
} catch (error2) {
|
|
3771
|
-
if (verbose) {
|
|
3772
|
-
console.error(`[Lien] Note: ${filepath} not in index`);
|
|
3773
|
-
}
|
|
3774
|
-
}
|
|
3775
|
-
processedCount++;
|
|
3776
|
-
continue;
|
|
3777
|
-
}
|
|
3778
|
-
try {
|
|
3779
|
-
const result = await processFileContent(filepath, content, embeddings, config, verbose || false);
|
|
3780
|
-
if (result === null) {
|
|
4029
|
+
const result = await processSingleFileForIndexing(filepath, embeddings, config, verbose || false);
|
|
4030
|
+
if (isOk(result)) {
|
|
4031
|
+
const { result: processResult, mtime } = result.value;
|
|
4032
|
+
if (processResult === null) {
|
|
3781
4033
|
try {
|
|
3782
4034
|
await vectorDB.deleteByFile(filepath);
|
|
3783
4035
|
} catch (error) {
|
|
@@ -3785,7 +4037,7 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
|
|
|
3785
4037
|
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
3786
4038
|
await manifest.updateFile(filepath, {
|
|
3787
4039
|
filepath,
|
|
3788
|
-
lastModified:
|
|
4040
|
+
lastModified: mtime,
|
|
3789
4041
|
chunkCount: 0
|
|
3790
4042
|
});
|
|
3791
4043
|
processedCount++;
|
|
@@ -3796,21 +4048,33 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
|
|
|
3796
4048
|
} catch (error) {
|
|
3797
4049
|
}
|
|
3798
4050
|
await vectorDB.insertBatch(
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
4051
|
+
processResult.vectors,
|
|
4052
|
+
processResult.chunks.map((c) => c.metadata),
|
|
4053
|
+
processResult.texts
|
|
3802
4054
|
);
|
|
3803
4055
|
manifestEntries.push({
|
|
3804
4056
|
filepath,
|
|
3805
|
-
chunkCount:
|
|
3806
|
-
mtime
|
|
4057
|
+
chunkCount: processResult.chunkCount,
|
|
4058
|
+
mtime
|
|
3807
4059
|
});
|
|
3808
4060
|
if (verbose) {
|
|
3809
|
-
console.error(`[Lien] \u2713 Updated ${filepath} (${
|
|
4061
|
+
console.error(`[Lien] \u2713 Updated ${filepath} (${processResult.chunkCount} chunks)`);
|
|
4062
|
+
}
|
|
4063
|
+
processedCount++;
|
|
4064
|
+
} else {
|
|
4065
|
+
if (verbose) {
|
|
4066
|
+
console.error(`[Lien] ${result.error}`);
|
|
4067
|
+
}
|
|
4068
|
+
try {
|
|
4069
|
+
await vectorDB.deleteByFile(filepath);
|
|
4070
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4071
|
+
await manifest.removeFile(filepath);
|
|
4072
|
+
} catch (error) {
|
|
4073
|
+
if (verbose) {
|
|
4074
|
+
console.error(`[Lien] Note: ${filepath} not in index`);
|
|
4075
|
+
}
|
|
3810
4076
|
}
|
|
3811
4077
|
processedCount++;
|
|
3812
|
-
} catch (error) {
|
|
3813
|
-
console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
|
|
3814
4078
|
}
|
|
3815
4079
|
}
|
|
3816
4080
|
if (manifestEntries.length > 0) {
|
|
@@ -3833,6 +4097,7 @@ var init_incremental = __esm({
|
|
|
3833
4097
|
init_schema();
|
|
3834
4098
|
init_manifest();
|
|
3835
4099
|
init_constants();
|
|
4100
|
+
init_result();
|
|
3836
4101
|
}
|
|
3837
4102
|
});
|
|
3838
4103
|
|
|
@@ -3918,6 +4183,99 @@ var init_loading_messages = __esm({
|
|
|
3918
4183
|
}
|
|
3919
4184
|
});
|
|
3920
4185
|
|
|
4186
|
+
// src/indexer/progress-tracker.ts
|
|
4187
|
+
var IndexingProgressTracker;
|
|
4188
|
+
var init_progress_tracker = __esm({
|
|
4189
|
+
"src/indexer/progress-tracker.ts"() {
|
|
4190
|
+
"use strict";
|
|
4191
|
+
init_loading_messages();
|
|
4192
|
+
IndexingProgressTracker = class _IndexingProgressTracker {
|
|
4193
|
+
processedFiles = 0;
|
|
4194
|
+
totalFiles;
|
|
4195
|
+
wittyMessage;
|
|
4196
|
+
spinner;
|
|
4197
|
+
updateInterval;
|
|
4198
|
+
// Configuration constants
|
|
4199
|
+
static SPINNER_UPDATE_INTERVAL_MS = 200;
|
|
4200
|
+
// How often to update spinner
|
|
4201
|
+
static MESSAGE_ROTATION_INTERVAL_MS = 8e3;
|
|
4202
|
+
// How often to rotate message
|
|
4203
|
+
constructor(totalFiles, spinner) {
|
|
4204
|
+
this.totalFiles = totalFiles;
|
|
4205
|
+
this.spinner = spinner;
|
|
4206
|
+
this.wittyMessage = getIndexingMessage();
|
|
4207
|
+
}
|
|
4208
|
+
/**
|
|
4209
|
+
* Start the progress tracker.
|
|
4210
|
+
* Sets up periodic updates for spinner and message rotation.
|
|
4211
|
+
*
|
|
4212
|
+
* Safe to call multiple times - will not create duplicate intervals.
|
|
4213
|
+
*/
|
|
4214
|
+
start() {
|
|
4215
|
+
if (this.updateInterval) {
|
|
4216
|
+
return;
|
|
4217
|
+
}
|
|
4218
|
+
const MESSAGE_ROTATION_TICKS = Math.floor(
|
|
4219
|
+
_IndexingProgressTracker.MESSAGE_ROTATION_INTERVAL_MS / _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS
|
|
4220
|
+
);
|
|
4221
|
+
let spinnerTick = 0;
|
|
4222
|
+
this.updateInterval = setInterval(() => {
|
|
4223
|
+
spinnerTick++;
|
|
4224
|
+
if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
|
|
4225
|
+
this.wittyMessage = getIndexingMessage();
|
|
4226
|
+
spinnerTick = 0;
|
|
4227
|
+
}
|
|
4228
|
+
this.spinner.text = `${this.processedFiles}/${this.totalFiles} files | ${this.wittyMessage}`;
|
|
4229
|
+
}, _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS);
|
|
4230
|
+
}
|
|
4231
|
+
/**
|
|
4232
|
+
* Increment the count of processed files.
|
|
4233
|
+
*
|
|
4234
|
+
* Safe for async operations in Node.js's single-threaded event loop.
|
|
4235
|
+
* Note: Not thread-safe for true concurrent operations (e.g., worker threads).
|
|
4236
|
+
*/
|
|
4237
|
+
incrementFiles() {
|
|
4238
|
+
this.processedFiles++;
|
|
4239
|
+
}
|
|
4240
|
+
/**
|
|
4241
|
+
* Set a custom message (e.g., for special operations like embedding generation).
|
|
4242
|
+
* The message will be displayed until the next automatic rotation.
|
|
4243
|
+
*/
|
|
4244
|
+
setMessage(message) {
|
|
4245
|
+
this.wittyMessage = message;
|
|
4246
|
+
}
|
|
4247
|
+
/**
|
|
4248
|
+
* Stop the progress tracker and clean up intervals.
|
|
4249
|
+
* Must be called when indexing completes or fails.
|
|
4250
|
+
*/
|
|
4251
|
+
stop() {
|
|
4252
|
+
if (this.updateInterval) {
|
|
4253
|
+
clearInterval(this.updateInterval);
|
|
4254
|
+
this.updateInterval = void 0;
|
|
4255
|
+
}
|
|
4256
|
+
}
|
|
4257
|
+
/**
|
|
4258
|
+
* Get the current count of processed files.
|
|
4259
|
+
*/
|
|
4260
|
+
getProcessedCount() {
|
|
4261
|
+
return this.processedFiles;
|
|
4262
|
+
}
|
|
4263
|
+
/**
|
|
4264
|
+
* Get the total number of files to process.
|
|
4265
|
+
*/
|
|
4266
|
+
getTotalFiles() {
|
|
4267
|
+
return this.totalFiles;
|
|
4268
|
+
}
|
|
4269
|
+
/**
|
|
4270
|
+
* Get the current message being displayed.
|
|
4271
|
+
*/
|
|
4272
|
+
getCurrentMessage() {
|
|
4273
|
+
return this.wittyMessage;
|
|
4274
|
+
}
|
|
4275
|
+
};
|
|
4276
|
+
}
|
|
4277
|
+
});
|
|
4278
|
+
|
|
3921
4279
|
// src/indexer/index.ts
|
|
3922
4280
|
var indexer_exports = {};
|
|
3923
4281
|
__export(indexer_exports, {
|
|
@@ -3927,162 +4285,171 @@ import fs17 from "fs/promises";
|
|
|
3927
4285
|
import ora from "ora";
|
|
3928
4286
|
import chalk5 from "chalk";
|
|
3929
4287
|
import pLimit from "p-limit";
|
|
3930
|
-
async function
|
|
3931
|
-
const
|
|
3932
|
-
const
|
|
3933
|
-
|
|
4288
|
+
async function updateGitState(rootDir, vectorDB, manifest) {
|
|
4289
|
+
const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
4290
|
+
const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
|
|
4291
|
+
const gitAvailable = await isGitAvailable2();
|
|
4292
|
+
const isRepo = await isGitRepo2(rootDir);
|
|
4293
|
+
if (!gitAvailable || !isRepo) {
|
|
4294
|
+
return;
|
|
4295
|
+
}
|
|
4296
|
+
const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
|
|
4297
|
+
await gitTracker.initialize();
|
|
4298
|
+
const gitState = gitTracker.getState();
|
|
4299
|
+
if (gitState) {
|
|
4300
|
+
await manifest.updateGitState(gitState);
|
|
4301
|
+
}
|
|
4302
|
+
}
|
|
4303
|
+
async function handleDeletions(deletedFiles, vectorDB, manifest, spinner) {
|
|
4304
|
+
if (deletedFiles.length === 0) {
|
|
4305
|
+
return;
|
|
4306
|
+
}
|
|
4307
|
+
spinner.start(`Removing ${deletedFiles.length} deleted files...`);
|
|
4308
|
+
let removedCount = 0;
|
|
4309
|
+
for (const filepath of deletedFiles) {
|
|
4310
|
+
try {
|
|
4311
|
+
await vectorDB.deleteByFile(filepath);
|
|
4312
|
+
await manifest.removeFile(filepath);
|
|
4313
|
+
removedCount++;
|
|
4314
|
+
} catch (err) {
|
|
4315
|
+
spinner.warn(
|
|
4316
|
+
`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`
|
|
4317
|
+
);
|
|
4318
|
+
}
|
|
4319
|
+
}
|
|
4320
|
+
spinner.succeed(`Removed ${removedCount}/${deletedFiles.length} deleted files`);
|
|
4321
|
+
}
|
|
4322
|
+
async function handleUpdates(addedFiles, modifiedFiles, vectorDB, embeddings, config, options, spinner) {
|
|
4323
|
+
const filesToIndex = [...addedFiles, ...modifiedFiles];
|
|
4324
|
+
if (filesToIndex.length === 0) {
|
|
4325
|
+
return;
|
|
4326
|
+
}
|
|
4327
|
+
spinner.start(`Reindexing ${filesToIndex.length} changed files...`);
|
|
4328
|
+
const count = await indexMultipleFiles(
|
|
4329
|
+
filesToIndex,
|
|
4330
|
+
vectorDB,
|
|
4331
|
+
embeddings,
|
|
4332
|
+
config,
|
|
4333
|
+
{ verbose: options.verbose }
|
|
4334
|
+
);
|
|
4335
|
+
await writeVersionFile(vectorDB.dbPath);
|
|
4336
|
+
spinner.succeed(
|
|
4337
|
+
`Incremental reindex complete: ${count}/${filesToIndex.length} files indexed successfully`
|
|
4338
|
+
);
|
|
4339
|
+
}
|
|
4340
|
+
async function tryIncrementalIndex(rootDir, vectorDB, config, options, spinner) {
|
|
4341
|
+
spinner.text = "Checking for changes...";
|
|
4342
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4343
|
+
const savedManifest = await manifest.load();
|
|
4344
|
+
if (!savedManifest) {
|
|
4345
|
+
return false;
|
|
4346
|
+
}
|
|
4347
|
+
const changes = await detectChanges(rootDir, vectorDB, config);
|
|
4348
|
+
if (changes.reason === "full") {
|
|
4349
|
+
spinner.text = "Full reindex required...";
|
|
4350
|
+
return false;
|
|
4351
|
+
}
|
|
4352
|
+
const totalChanges = changes.added.length + changes.modified.length;
|
|
4353
|
+
const totalDeleted = changes.deleted.length;
|
|
4354
|
+
if (totalChanges === 0 && totalDeleted === 0) {
|
|
4355
|
+
spinner.succeed("No changes detected - index is up to date!");
|
|
4356
|
+
return true;
|
|
4357
|
+
}
|
|
4358
|
+
spinner.succeed(
|
|
4359
|
+
`Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
|
|
4360
|
+
);
|
|
4361
|
+
spinner.start(getModelLoadingMessage());
|
|
4362
|
+
const embeddings = new LocalEmbeddings();
|
|
4363
|
+
await embeddings.initialize();
|
|
4364
|
+
spinner.succeed("Embedding model loaded");
|
|
4365
|
+
await handleDeletions(changes.deleted, vectorDB, manifest, spinner);
|
|
4366
|
+
await handleUpdates(changes.added, changes.modified, vectorDB, embeddings, config, options, spinner);
|
|
4367
|
+
await updateGitState(rootDir, vectorDB, manifest);
|
|
4368
|
+
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4369
|
+
return true;
|
|
4370
|
+
}
|
|
4371
|
+
async function performFullIndex(rootDir, vectorDB, config, options, spinner) {
|
|
4372
|
+
spinner.text = "Scanning codebase...";
|
|
4373
|
+
let files;
|
|
4374
|
+
if (isModernConfig(config) && config.frameworks.length > 0) {
|
|
4375
|
+
files = await scanCodebaseWithFrameworks(rootDir, config);
|
|
4376
|
+
} else if (isLegacyConfig(config)) {
|
|
4377
|
+
files = await scanCodebase({
|
|
4378
|
+
rootDir,
|
|
4379
|
+
includePatterns: config.indexing.include,
|
|
4380
|
+
excludePatterns: config.indexing.exclude
|
|
4381
|
+
});
|
|
4382
|
+
} else {
|
|
4383
|
+
files = await scanCodebase({
|
|
4384
|
+
rootDir,
|
|
4385
|
+
includePatterns: [],
|
|
4386
|
+
excludePatterns: []
|
|
4387
|
+
});
|
|
4388
|
+
}
|
|
4389
|
+
if (files.length === 0) {
|
|
4390
|
+
spinner.fail("No files found to index");
|
|
4391
|
+
return;
|
|
4392
|
+
}
|
|
4393
|
+
spinner.text = `Found ${files.length} files`;
|
|
4394
|
+
spinner.text = getModelLoadingMessage();
|
|
4395
|
+
const embeddings = new LocalEmbeddings();
|
|
4396
|
+
await embeddings.initialize();
|
|
4397
|
+
spinner.succeed("Embedding model loaded");
|
|
4398
|
+
const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
|
|
4399
|
+
const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
|
|
4400
|
+
const vectorDBBatchSize = 100;
|
|
4401
|
+
spinner.start(`Processing files with ${concurrency}x concurrency...`);
|
|
4402
|
+
const startTime = Date.now();
|
|
4403
|
+
let processedChunks = 0;
|
|
4404
|
+
const chunkAccumulator = [];
|
|
4405
|
+
const limit = pLimit(concurrency);
|
|
4406
|
+
const indexedFileEntries = [];
|
|
4407
|
+
const progressTracker = new IndexingProgressTracker(files.length, spinner);
|
|
4408
|
+
progressTracker.start();
|
|
3934
4409
|
try {
|
|
3935
|
-
|
|
3936
|
-
|
|
3937
|
-
|
|
3938
|
-
|
|
3939
|
-
|
|
3940
|
-
|
|
3941
|
-
|
|
3942
|
-
const manifest2 = new ManifestManager(vectorDB.dbPath);
|
|
3943
|
-
const savedManifest = await manifest2.load();
|
|
3944
|
-
if (savedManifest) {
|
|
3945
|
-
const changes = await detectChanges(rootDir, vectorDB, config);
|
|
3946
|
-
if (changes.reason !== "full") {
|
|
3947
|
-
const totalChanges = changes.added.length + changes.modified.length;
|
|
3948
|
-
const totalDeleted = changes.deleted.length;
|
|
3949
|
-
if (totalChanges === 0 && totalDeleted === 0) {
|
|
3950
|
-
spinner.succeed("No changes detected - index is up to date!");
|
|
3951
|
-
return;
|
|
3952
|
-
}
|
|
3953
|
-
spinner.succeed(
|
|
3954
|
-
`Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
|
|
3955
|
-
);
|
|
3956
|
-
spinner.start(getModelLoadingMessage());
|
|
3957
|
-
const embeddings2 = new LocalEmbeddings();
|
|
3958
|
-
await embeddings2.initialize();
|
|
3959
|
-
spinner.succeed("Embedding model loaded");
|
|
3960
|
-
if (totalDeleted > 0) {
|
|
3961
|
-
spinner.start(`Removing ${totalDeleted} deleted files...`);
|
|
3962
|
-
let removedCount = 0;
|
|
3963
|
-
for (const filepath of changes.deleted) {
|
|
3964
|
-
try {
|
|
3965
|
-
await vectorDB.deleteByFile(filepath);
|
|
3966
|
-
await manifest2.removeFile(filepath);
|
|
3967
|
-
removedCount++;
|
|
3968
|
-
} catch (err) {
|
|
3969
|
-
spinner.warn(`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`);
|
|
3970
|
-
}
|
|
3971
|
-
}
|
|
3972
|
-
spinner.succeed(`Removed ${removedCount}/${totalDeleted} deleted files`);
|
|
3973
|
-
}
|
|
3974
|
-
if (totalChanges > 0) {
|
|
3975
|
-
spinner.start(`Reindexing ${totalChanges} changed files...`);
|
|
3976
|
-
const filesToIndex = [...changes.added, ...changes.modified];
|
|
3977
|
-
const count = await indexMultipleFiles(
|
|
3978
|
-
filesToIndex,
|
|
3979
|
-
vectorDB,
|
|
3980
|
-
embeddings2,
|
|
3981
|
-
config,
|
|
3982
|
-
{ verbose: options.verbose }
|
|
3983
|
-
);
|
|
3984
|
-
await writeVersionFile(vectorDB.dbPath);
|
|
3985
|
-
spinner.succeed(
|
|
3986
|
-
`Incremental reindex complete: ${count}/${totalChanges} files indexed successfully`
|
|
3987
|
-
);
|
|
3988
|
-
}
|
|
3989
|
-
const { isGitAvailable: isGitAvailable3, isGitRepo: isGitRepo3 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
3990
|
-
const { GitStateTracker: GitStateTracker3 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
|
|
3991
|
-
const gitAvailable2 = await isGitAvailable3();
|
|
3992
|
-
const isRepo2 = await isGitRepo3(rootDir);
|
|
3993
|
-
if (gitAvailable2 && isRepo2) {
|
|
3994
|
-
const gitTracker = new GitStateTracker3(rootDir, vectorDB.dbPath);
|
|
3995
|
-
await gitTracker.initialize();
|
|
3996
|
-
const gitState = gitTracker.getState();
|
|
3997
|
-
if (gitState) {
|
|
3998
|
-
await manifest2.updateGitState(gitState);
|
|
3999
|
-
}
|
|
4000
|
-
}
|
|
4001
|
-
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4002
|
-
return;
|
|
4003
|
-
}
|
|
4004
|
-
spinner.text = "Full reindex required...";
|
|
4410
|
+
let addChunksLock = null;
|
|
4411
|
+
let processingQueue = null;
|
|
4412
|
+
const processAccumulatedChunks = async () => {
|
|
4413
|
+
if (processingQueue) {
|
|
4414
|
+
processingQueue = processingQueue.then(() => doProcessChunks());
|
|
4415
|
+
} else {
|
|
4416
|
+
processingQueue = doProcessChunks();
|
|
4005
4417
|
}
|
|
4006
|
-
|
|
4007
|
-
spinner.text = "Force flag enabled, performing full reindex...";
|
|
4008
|
-
}
|
|
4009
|
-
spinner.text = "Scanning codebase...";
|
|
4010
|
-
let files;
|
|
4011
|
-
if (isModernConfig(config) && config.frameworks.length > 0) {
|
|
4012
|
-
files = await scanCodebaseWithFrameworks(rootDir, config);
|
|
4013
|
-
} else if (isLegacyConfig(config)) {
|
|
4014
|
-
files = await scanCodebase({
|
|
4015
|
-
rootDir,
|
|
4016
|
-
includePatterns: config.indexing.include,
|
|
4017
|
-
excludePatterns: config.indexing.exclude
|
|
4018
|
-
});
|
|
4019
|
-
} else {
|
|
4020
|
-
files = await scanCodebase({
|
|
4021
|
-
rootDir,
|
|
4022
|
-
includePatterns: [],
|
|
4023
|
-
excludePatterns: []
|
|
4024
|
-
});
|
|
4025
|
-
}
|
|
4026
|
-
if (files.length === 0) {
|
|
4027
|
-
spinner.fail("No files found to index");
|
|
4028
|
-
return;
|
|
4029
|
-
}
|
|
4030
|
-
spinner.text = `Found ${files.length} files`;
|
|
4031
|
-
spinner.text = getModelLoadingMessage();
|
|
4032
|
-
const embeddings = new LocalEmbeddings();
|
|
4033
|
-
await embeddings.initialize();
|
|
4034
|
-
spinner.succeed("Embedding model loaded");
|
|
4035
|
-
const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
|
|
4036
|
-
const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
|
|
4037
|
-
const vectorDBBatchSize = 100;
|
|
4038
|
-
spinner.start(`Processing files with ${concurrency}x concurrency...`);
|
|
4039
|
-
const startTime = Date.now();
|
|
4040
|
-
let processedFiles = 0;
|
|
4041
|
-
let processedChunks = 0;
|
|
4042
|
-
const chunkAccumulator = [];
|
|
4043
|
-
const limit = pLimit(concurrency);
|
|
4044
|
-
const indexedFileEntries = [];
|
|
4045
|
-
const progressState = {
|
|
4046
|
-
processedFiles: 0,
|
|
4047
|
-
totalFiles: files.length,
|
|
4048
|
-
wittyMessage: getIndexingMessage()
|
|
4418
|
+
return processingQueue;
|
|
4049
4419
|
};
|
|
4050
|
-
const
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4073
|
-
|
|
4420
|
+
const doProcessChunks = async () => {
|
|
4421
|
+
if (chunkAccumulator.length === 0) {
|
|
4422
|
+
return;
|
|
4423
|
+
}
|
|
4424
|
+
const currentPromise = processingQueue;
|
|
4425
|
+
try {
|
|
4426
|
+
const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
|
|
4427
|
+
for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
|
|
4428
|
+
const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
|
|
4429
|
+
progressTracker.setMessage(getEmbeddingMessage());
|
|
4430
|
+
const texts = batch.map((item) => item.content);
|
|
4431
|
+
const embeddingVectors = [];
|
|
4432
|
+
for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
|
|
4433
|
+
const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
|
|
4434
|
+
const microResults = await embeddings.embedBatch(microBatch);
|
|
4435
|
+
embeddingVectors.push(...microResults);
|
|
4436
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
4437
|
+
}
|
|
4438
|
+
processedChunks += batch.length;
|
|
4439
|
+
progressTracker.setMessage(`Inserting ${batch.length} chunks into vector space...`);
|
|
4440
|
+
await vectorDB.insertBatch(
|
|
4441
|
+
embeddingVectors,
|
|
4442
|
+
batch.map((item) => item.chunk.metadata),
|
|
4443
|
+
texts
|
|
4444
|
+
);
|
|
4074
4445
|
await new Promise((resolve) => setImmediate(resolve));
|
|
4075
4446
|
}
|
|
4076
|
-
|
|
4077
|
-
|
|
4078
|
-
|
|
4079
|
-
|
|
4080
|
-
|
|
4081
|
-
texts
|
|
4082
|
-
);
|
|
4083
|
-
await new Promise((resolve) => setImmediate(resolve));
|
|
4447
|
+
progressTracker.setMessage(getIndexingMessage());
|
|
4448
|
+
} finally {
|
|
4449
|
+
if (processingQueue === currentPromise) {
|
|
4450
|
+
processingQueue = null;
|
|
4451
|
+
}
|
|
4084
4452
|
}
|
|
4085
|
-
progressState.wittyMessage = getIndexingMessage();
|
|
4086
4453
|
};
|
|
4087
4454
|
const filePromises = files.map(
|
|
4088
4455
|
(file) => limit(async () => {
|
|
@@ -4100,73 +4467,91 @@ async function indexCodebase(options = {}) {
|
|
|
4100
4467
|
astFallback
|
|
4101
4468
|
});
|
|
4102
4469
|
if (chunks.length === 0) {
|
|
4103
|
-
|
|
4104
|
-
progressState.processedFiles = processedFiles;
|
|
4470
|
+
progressTracker.incrementFiles();
|
|
4105
4471
|
return;
|
|
4106
4472
|
}
|
|
4107
|
-
|
|
4108
|
-
|
|
4109
|
-
|
|
4110
|
-
|
|
4473
|
+
{
|
|
4474
|
+
if (addChunksLock) {
|
|
4475
|
+
await addChunksLock;
|
|
4476
|
+
}
|
|
4477
|
+
let releaseAddLock;
|
|
4478
|
+
addChunksLock = new Promise((resolve) => {
|
|
4479
|
+
releaseAddLock = resolve;
|
|
4111
4480
|
});
|
|
4112
|
-
|
|
4113
|
-
|
|
4114
|
-
|
|
4115
|
-
|
|
4116
|
-
|
|
4117
|
-
|
|
4118
|
-
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
|
|
4481
|
+
try {
|
|
4482
|
+
for (const chunk of chunks) {
|
|
4483
|
+
chunkAccumulator.push({
|
|
4484
|
+
chunk,
|
|
4485
|
+
content: chunk.content
|
|
4486
|
+
});
|
|
4487
|
+
}
|
|
4488
|
+
indexedFileEntries.push({
|
|
4489
|
+
filepath: file,
|
|
4490
|
+
chunkCount: chunks.length,
|
|
4491
|
+
mtime: stats.mtimeMs
|
|
4492
|
+
});
|
|
4493
|
+
progressTracker.incrementFiles();
|
|
4494
|
+
if (chunkAccumulator.length >= vectorDBBatchSize) {
|
|
4495
|
+
await processAccumulatedChunks();
|
|
4496
|
+
}
|
|
4497
|
+
} finally {
|
|
4498
|
+
releaseAddLock();
|
|
4499
|
+
addChunksLock = null;
|
|
4500
|
+
}
|
|
4122
4501
|
}
|
|
4123
4502
|
} catch (error) {
|
|
4124
4503
|
if (options.verbose) {
|
|
4125
4504
|
console.error(chalk5.yellow(`
|
|
4126
4505
|
\u26A0\uFE0F Skipping ${file}: ${error}`));
|
|
4127
4506
|
}
|
|
4128
|
-
|
|
4129
|
-
progressState.processedFiles = processedFiles;
|
|
4507
|
+
progressTracker.incrementFiles();
|
|
4130
4508
|
}
|
|
4131
4509
|
})
|
|
4132
4510
|
);
|
|
4133
4511
|
await Promise.all(filePromises);
|
|
4134
|
-
|
|
4512
|
+
progressTracker.setMessage("Processing final chunks...");
|
|
4135
4513
|
await processAccumulatedChunks();
|
|
4136
|
-
|
|
4137
|
-
|
|
4138
|
-
|
|
4139
|
-
|
|
4140
|
-
|
|
4141
|
-
|
|
4142
|
-
|
|
4143
|
-
|
|
4144
|
-
|
|
4145
|
-
|
|
4146
|
-
|
|
4147
|
-
|
|
4148
|
-
|
|
4149
|
-
|
|
4150
|
-
|
|
4151
|
-
|
|
4152
|
-
|
|
4153
|
-
|
|
4154
|
-
|
|
4155
|
-
|
|
4156
|
-
|
|
4514
|
+
} finally {
|
|
4515
|
+
progressTracker.stop();
|
|
4516
|
+
}
|
|
4517
|
+
spinner.start("Saving index manifest...");
|
|
4518
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4519
|
+
await manifest.updateFiles(
|
|
4520
|
+
indexedFileEntries.map((entry) => ({
|
|
4521
|
+
filepath: entry.filepath,
|
|
4522
|
+
// Use actual file mtime for accurate change detection
|
|
4523
|
+
lastModified: entry.mtime,
|
|
4524
|
+
chunkCount: entry.chunkCount
|
|
4525
|
+
}))
|
|
4526
|
+
);
|
|
4527
|
+
await updateGitState(rootDir, vectorDB, manifest);
|
|
4528
|
+
spinner.succeed("Manifest saved");
|
|
4529
|
+
await writeVersionFile(vectorDB.dbPath);
|
|
4530
|
+
const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
4531
|
+
spinner.succeed(
|
|
4532
|
+
`Indexed ${progressTracker.getProcessedCount()} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
|
|
4533
|
+
);
|
|
4534
|
+
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4535
|
+
}
|
|
4536
|
+
async function indexCodebase(options = {}) {
|
|
4537
|
+
const rootDir = options.rootDir ?? process.cwd();
|
|
4538
|
+
const spinner = ora("Starting indexing process...").start();
|
|
4539
|
+
try {
|
|
4540
|
+
spinner.text = "Loading configuration...";
|
|
4541
|
+
const config = await configService.load(rootDir);
|
|
4542
|
+
spinner.text = "Initializing vector database...";
|
|
4543
|
+
const vectorDB = new VectorDB(rootDir);
|
|
4544
|
+
await vectorDB.initialize();
|
|
4545
|
+
if (!options.force) {
|
|
4546
|
+
const completed = await tryIncrementalIndex(rootDir, vectorDB, config, options, spinner);
|
|
4547
|
+
if (completed) {
|
|
4548
|
+
return;
|
|
4157
4549
|
}
|
|
4550
|
+
} else {
|
|
4551
|
+
spinner.text = "Force flag enabled, performing full reindex...";
|
|
4158
4552
|
}
|
|
4159
|
-
|
|
4160
|
-
await writeVersionFile(vectorDB.dbPath);
|
|
4161
|
-
const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
4162
|
-
spinner.succeed(
|
|
4163
|
-
`Indexed ${processedFiles} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
|
|
4164
|
-
);
|
|
4165
|
-
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4553
|
+
await performFullIndex(rootDir, vectorDB, config, options, spinner);
|
|
4166
4554
|
} catch (error) {
|
|
4167
|
-
if (updateInterval) {
|
|
4168
|
-
clearInterval(updateInterval);
|
|
4169
|
-
}
|
|
4170
4555
|
spinner.fail(`Indexing failed: ${error}`);
|
|
4171
4556
|
throw error;
|
|
4172
4557
|
}
|
|
@@ -4186,6 +4571,7 @@ var init_indexer = __esm({
|
|
|
4186
4571
|
init_incremental();
|
|
4187
4572
|
init_loading_messages();
|
|
4188
4573
|
init_constants();
|
|
4574
|
+
init_progress_tracker();
|
|
4189
4575
|
}
|
|
4190
4576
|
});
|
|
4191
4577
|
|
|
@@ -4926,7 +5312,7 @@ async function createNewConfig(rootDir, options) {
|
|
|
4926
5312
|
path: ".",
|
|
4927
5313
|
enabled: true,
|
|
4928
5314
|
config: {
|
|
4929
|
-
include: ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
|
|
5315
|
+
include: ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
|
|
4930
5316
|
exclude: [
|
|
4931
5317
|
"**/node_modules/**",
|
|
4932
5318
|
"**/dist/**",
|
|
@@ -5279,9 +5665,12 @@ var FindSimilarSchema = z2.object({
|
|
|
5279
5665
|
|
|
5280
5666
|
// src/mcp/schemas/file.schema.ts
|
|
5281
5667
|
import { z as z3 } from "zod";
|
|
5282
|
-
var
|
|
5283
|
-
|
|
5284
|
-
"
|
|
5668
|
+
var GetFilesContextSchema = z3.object({
|
|
5669
|
+
filepaths: z3.union([
|
|
5670
|
+
z3.string().min(1, "Filepath cannot be empty"),
|
|
5671
|
+
z3.array(z3.string().min(1, "Filepath cannot be empty")).min(1, "Array must contain at least one filepath").max(50, "Maximum 50 files per request")
|
|
5672
|
+
]).describe(
|
|
5673
|
+
"Single filepath or array of filepaths (relative to workspace root).\n\nSingle file: 'src/components/Button.tsx'\nMultiple files: ['src/auth.ts', 'src/user.ts']\n\nMaximum 50 files per request for batch operations."
|
|
5285
5674
|
),
|
|
5286
5675
|
includeRelated: z3.boolean().default(true).describe(
|
|
5287
5676
|
"Include semantically related chunks from nearby code.\n\nDefault: true\n\nWhen enabled, also returns related code from other files that are semantically similar to the target file's contents."
|
|
@@ -5304,22 +5693,56 @@ var tools = [
|
|
|
5304
5693
|
toMCPToolSchema(
|
|
5305
5694
|
SemanticSearchSchema,
|
|
5306
5695
|
"semantic_search",
|
|
5307
|
-
|
|
5696
|
+
`Search codebase by MEANING, not text. USE THIS INSTEAD OF grep/ripgrep for finding implementations, features, or understanding how code works.
|
|
5697
|
+
|
|
5698
|
+
Examples:
|
|
5699
|
+
- "Where is authentication handled?" \u2192 semantic_search({ query: "handles user authentication" })
|
|
5700
|
+
- "How does payment work?" \u2192 semantic_search({ query: "processes payment transactions" })
|
|
5701
|
+
|
|
5702
|
+
Use natural language describing what the code DOES, not function names. For exact string matching, use grep instead.
|
|
5703
|
+
|
|
5704
|
+
Results include a relevance category (highly_relevant, relevant, loosely_related, not_relevant) for each match.`
|
|
5308
5705
|
),
|
|
5309
5706
|
toMCPToolSchema(
|
|
5310
5707
|
FindSimilarSchema,
|
|
5311
5708
|
"find_similar",
|
|
5312
|
-
|
|
5709
|
+
`Find code structurally similar to a given snippet. Use for:
|
|
5710
|
+
- Ensuring consistency when adding new code
|
|
5711
|
+
- Finding duplicate implementations
|
|
5712
|
+
- Refactoring similar patterns together
|
|
5713
|
+
|
|
5714
|
+
Provide at least 10 characters of code to match against. Results include a relevance category for each match.`
|
|
5313
5715
|
),
|
|
5314
5716
|
toMCPToolSchema(
|
|
5315
|
-
|
|
5316
|
-
"
|
|
5317
|
-
|
|
5717
|
+
GetFilesContextSchema,
|
|
5718
|
+
"get_files_context",
|
|
5719
|
+
`Get context for one or more files including dependencies and test coverage.
|
|
5720
|
+
|
|
5721
|
+
MANDATORY: Call this BEFORE editing any file. Accepts single path or array of paths.
|
|
5722
|
+
|
|
5723
|
+
Single file:
|
|
5724
|
+
get_files_context({ filepaths: "src/auth.ts" })
|
|
5725
|
+
|
|
5726
|
+
Multiple files (batch):
|
|
5727
|
+
get_files_context({ filepaths: ["src/auth.ts", "src/user.ts"] })
|
|
5728
|
+
|
|
5729
|
+
Returns for each file:
|
|
5730
|
+
- All chunks and related code
|
|
5731
|
+
- testAssociations (which tests cover this file)
|
|
5732
|
+
- Relevance scoring
|
|
5733
|
+
|
|
5734
|
+
Batch calls are more efficient than multiple single-file calls.`
|
|
5318
5735
|
),
|
|
5319
5736
|
toMCPToolSchema(
|
|
5320
5737
|
ListFunctionsSchema,
|
|
5321
5738
|
"list_functions",
|
|
5322
|
-
|
|
5739
|
+
`Fast symbol lookup by naming pattern. Use when searching by NAME, not behavior.
|
|
5740
|
+
|
|
5741
|
+
Examples:
|
|
5742
|
+
- "Show all controllers" \u2192 list_functions({ pattern: ".*Controller.*" })
|
|
5743
|
+
- "Find service classes" \u2192 list_functions({ pattern: ".*Service$" })
|
|
5744
|
+
|
|
5745
|
+
10x faster than semantic_search for structural/architectural queries. Use semantic_search instead when searching by what code DOES.`
|
|
5323
5746
|
)
|
|
5324
5747
|
];
|
|
5325
5748
|
|
|
@@ -5618,32 +6041,69 @@ async function startMCPServer(options) {
|
|
|
5618
6041
|
};
|
|
5619
6042
|
}
|
|
5620
6043
|
)(args);
|
|
5621
|
-
case "
|
|
6044
|
+
case "get_files_context":
|
|
5622
6045
|
return await wrapToolHandler(
|
|
5623
|
-
|
|
6046
|
+
GetFilesContextSchema,
|
|
5624
6047
|
async (validatedArgs) => {
|
|
5625
|
-
|
|
6048
|
+
const filepaths = Array.isArray(validatedArgs.filepaths) ? validatedArgs.filepaths : [validatedArgs.filepaths];
|
|
6049
|
+
const isSingleFile = !Array.isArray(validatedArgs.filepaths);
|
|
6050
|
+
log(`Getting context for: ${filepaths.join(", ")}`);
|
|
5626
6051
|
await checkAndReconnect();
|
|
5627
|
-
const
|
|
5628
|
-
const
|
|
5629
|
-
|
|
5630
|
-
|
|
6052
|
+
const fileEmbeddings = await Promise.all(filepaths.map((fp) => embeddings.embed(fp)));
|
|
6053
|
+
const allFileSearches = await Promise.all(
|
|
6054
|
+
fileEmbeddings.map(
|
|
6055
|
+
(embedding, i) => vectorDB.search(embedding, 50, filepaths[i])
|
|
6056
|
+
)
|
|
5631
6057
|
);
|
|
5632
|
-
|
|
5633
|
-
|
|
5634
|
-
|
|
5635
|
-
|
|
5636
|
-
const relatedOtherFiles = related.filter(
|
|
5637
|
-
(r) => !r.metadata.file.includes(validatedArgs.filepath) && !validatedArgs.filepath.includes(r.metadata.file)
|
|
6058
|
+
const fileChunksMap = filepaths.map((filepath, i) => {
|
|
6059
|
+
const allResults = allFileSearches[i];
|
|
6060
|
+
return allResults.filter(
|
|
6061
|
+
(r) => r.metadata.file.includes(filepath) || filepath.includes(r.metadata.file)
|
|
5638
6062
|
);
|
|
5639
|
-
|
|
6063
|
+
});
|
|
6064
|
+
let relatedChunksMap = [];
|
|
6065
|
+
if (validatedArgs.includeRelated) {
|
|
6066
|
+
const filesWithChunks = fileChunksMap.map((chunks, i) => ({ chunks, filepath: filepaths[i], index: i })).filter(({ chunks }) => chunks.length > 0);
|
|
6067
|
+
if (filesWithChunks.length > 0) {
|
|
6068
|
+
const relatedEmbeddings = await Promise.all(
|
|
6069
|
+
filesWithChunks.map(({ chunks }) => embeddings.embed(chunks[0].content))
|
|
6070
|
+
);
|
|
6071
|
+
const relatedSearches = await Promise.all(
|
|
6072
|
+
relatedEmbeddings.map(
|
|
6073
|
+
(embedding, i) => vectorDB.search(embedding, 5, filesWithChunks[i].chunks[0].content)
|
|
6074
|
+
)
|
|
6075
|
+
);
|
|
6076
|
+
relatedChunksMap = Array.from({ length: filepaths.length }, () => []);
|
|
6077
|
+
filesWithChunks.forEach(({ filepath, index }, i) => {
|
|
6078
|
+
const related = relatedSearches[i];
|
|
6079
|
+
relatedChunksMap[index] = related.filter(
|
|
6080
|
+
(r) => !r.metadata.file.includes(filepath) && !filepath.includes(r.metadata.file)
|
|
6081
|
+
);
|
|
6082
|
+
});
|
|
6083
|
+
}
|
|
6084
|
+
}
|
|
6085
|
+
const filesData = {};
|
|
6086
|
+
filepaths.forEach((filepath, i) => {
|
|
6087
|
+
const fileChunks = fileChunksMap[i];
|
|
6088
|
+
const relatedChunks = relatedChunksMap[i] || [];
|
|
6089
|
+
filesData[filepath] = {
|
|
6090
|
+
chunks: [...fileChunks, ...relatedChunks]
|
|
6091
|
+
};
|
|
6092
|
+
});
|
|
6093
|
+
log(`Found ${Object.values(filesData).reduce((sum, f) => sum + f.chunks.length, 0)} total chunks`);
|
|
6094
|
+
if (isSingleFile) {
|
|
6095
|
+
const filepath = filepaths[0];
|
|
6096
|
+
return {
|
|
6097
|
+
indexInfo: getIndexMetadata(),
|
|
6098
|
+
file: filepath,
|
|
6099
|
+
chunks: filesData[filepath].chunks
|
|
6100
|
+
};
|
|
6101
|
+
} else {
|
|
6102
|
+
return {
|
|
6103
|
+
indexInfo: getIndexMetadata(),
|
|
6104
|
+
files: filesData
|
|
6105
|
+
};
|
|
5640
6106
|
}
|
|
5641
|
-
log(`Found ${results.length} chunks`);
|
|
5642
|
-
return {
|
|
5643
|
-
indexInfo: getIndexMetadata(),
|
|
5644
|
-
file: validatedArgs.filepath,
|
|
5645
|
-
chunks: results
|
|
5646
|
-
};
|
|
5647
6107
|
}
|
|
5648
6108
|
)(args);
|
|
5649
6109
|
case "list_functions":
|