@liendev/lien 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -161,7 +161,7 @@ function migrateConfig(oldConfig) {
161
161
  path: ".",
162
162
  enabled: true,
163
163
  config: {
164
- include: oldConfig.indexing.include ?? ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
164
+ include: oldConfig.indexing.include ?? ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
165
165
  exclude: oldConfig.indexing.exclude ?? [
166
166
  "**/node_modules/**",
167
167
  "**/dist/**",
@@ -181,7 +181,7 @@ function migrateConfig(oldConfig) {
181
181
  path: ".",
182
182
  enabled: true,
183
183
  config: {
184
- include: ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
184
+ include: ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
185
185
  exclude: [
186
186
  "**/node_modules/**",
187
187
  "**/dist/**",
@@ -1030,7 +1030,7 @@ async function scanCodebase(options) {
1030
1030
  ".lien/**",
1031
1031
  ...excludePatterns
1032
1032
  ]);
1033
- const patterns = includePatterns.length > 0 ? includePatterns : ["**/*.{ts,tsx,js,jsx,py,go,rs,java,cpp,c,h,md,mdx}"];
1033
+ const patterns = includePatterns.length > 0 ? includePatterns : ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,cpp,c,cs,h,md,mdx}"];
1034
1034
  const allFiles = [];
1035
1035
  for (const pattern of patterns) {
1036
1036
  const files = await glob(pattern, {
@@ -1388,6 +1388,7 @@ import Parser from "tree-sitter";
1388
1388
  import TypeScript from "tree-sitter-typescript";
1389
1389
  import JavaScript from "tree-sitter-javascript";
1390
1390
  import PHPParser from "tree-sitter-php";
1391
+ import Python from "tree-sitter-python";
1391
1392
  import { extname } from "path";
1392
1393
  function getParser(language) {
1393
1394
  if (!parserCache.has(language)) {
@@ -1414,6 +1415,8 @@ function detectLanguage2(filePath) {
1414
1415
  return "javascript";
1415
1416
  case "php":
1416
1417
  return "php";
1418
+ case "py":
1419
+ return "python";
1417
1420
  default:
1418
1421
  return null;
1419
1422
  }
@@ -1447,8 +1450,9 @@ var init_parser = __esm({
1447
1450
  languageConfig = {
1448
1451
  typescript: TypeScript.typescript,
1449
1452
  javascript: JavaScript,
1450
- php: PHPParser.php
1453
+ php: PHPParser.php,
1451
1454
  // Note: tree-sitter-php exports both 'php' (mixed HTML/PHP) and 'php_only'
1455
+ python: Python
1452
1456
  };
1453
1457
  }
1454
1458
  });
@@ -1524,7 +1528,35 @@ function extractInterfaceInfo(node, _content, _parentClass) {
1524
1528
  signature: `interface ${nameNode.text}`
1525
1529
  };
1526
1530
  }
1527
- function extractSymbolInfo(node, content, parentClass) {
1531
+ function extractPythonFunctionInfo(node, content, parentClass) {
1532
+ const nameNode = node.childForFieldName("name");
1533
+ if (!nameNode) return null;
1534
+ return {
1535
+ name: nameNode.text,
1536
+ type: parentClass ? "method" : "function",
1537
+ startLine: node.startPosition.row + 1,
1538
+ endLine: node.endPosition.row + 1,
1539
+ parentClass,
1540
+ signature: extractSignature(node, content),
1541
+ parameters: extractParameters(node, content),
1542
+ complexity: calculateComplexity(node)
1543
+ };
1544
+ }
1545
+ function extractPythonClassInfo(node, _content, _parentClass) {
1546
+ const nameNode = node.childForFieldName("name");
1547
+ if (!nameNode) return null;
1548
+ return {
1549
+ name: nameNode.text,
1550
+ type: "class",
1551
+ startLine: node.startPosition.row + 1,
1552
+ endLine: node.endPosition.row + 1,
1553
+ signature: `class ${nameNode.text}`
1554
+ };
1555
+ }
1556
+ function extractSymbolInfo(node, content, parentClass, language) {
1557
+ if (node.type === "function_definition" && language === "python") {
1558
+ return extractPythonFunctionInfo(node, content, parentClass);
1559
+ }
1528
1560
  const extractor = symbolExtractors[node.type];
1529
1561
  return extractor ? extractor(node, content, parentClass) : null;
1530
1562
  }
@@ -1563,23 +1595,39 @@ function extractReturnType(node, _content) {
1563
1595
  function calculateComplexity(node) {
1564
1596
  let complexity = 1;
1565
1597
  const decisionPoints = [
1566
- // TypeScript/JavaScript
1598
+ // Common across languages (TypeScript/JavaScript/Python/PHP)
1567
1599
  "if_statement",
1600
+ // if conditions
1568
1601
  "while_statement",
1569
- "do_statement",
1570
- // do...while loops
1602
+ // while loops
1571
1603
  "for_statement",
1572
- "for_in_statement",
1573
- "for_of_statement",
1574
- // for...of loops
1604
+ // for loops
1575
1605
  "switch_case",
1606
+ // switch/case statements
1576
1607
  "catch_clause",
1608
+ // try/catch error handling
1577
1609
  "ternary_expression",
1610
+ // Ternary operator (a ? b : c)
1578
1611
  "binary_expression",
1579
- // For && and ||
1580
- // PHP
1581
- "foreach_statement"
1612
+ // For && and || logical operators
1613
+ // TypeScript/JavaScript specific
1614
+ "do_statement",
1615
+ // do...while loops
1616
+ "for_in_statement",
1617
+ // for...in loops
1618
+ "for_of_statement",
1619
+ // for...of loops
1620
+ // PHP specific
1621
+ "foreach_statement",
1582
1622
  // PHP foreach loops
1623
+ // Python specific
1624
+ "elif_clause",
1625
+ // Python elif (adds decision point)
1626
+ // Note: 'else_clause' is NOT a decision point (it's the default path)
1627
+ "except_clause",
1628
+ // Python except (try/except)
1629
+ "conditional_expression"
1630
+ // Python ternary (x if cond else y)
1583
1631
  ];
1584
1632
  function traverse(n) {
1585
1633
  if (decisionPoints.includes(n.type)) {
@@ -1608,7 +1656,13 @@ function extractImports(rootNode) {
1608
1656
  if (sourceNode) {
1609
1657
  const importPath = sourceNode.text.replace(/['"]/g, "");
1610
1658
  imports.push(importPath);
1659
+ } else {
1660
+ const importText = node.text.split("\n")[0];
1661
+ imports.push(importText);
1611
1662
  }
1663
+ } else if (node.type === "import_from_statement") {
1664
+ const importText = node.text.split("\n")[0];
1665
+ imports.push(importText);
1612
1666
  }
1613
1667
  if (node === rootNode) {
1614
1668
  for (let i = 0; i < node.namedChildCount; i++) {
@@ -1635,9 +1689,16 @@ var init_symbols = __esm({
1635
1689
  "interface_declaration": extractInterfaceInfo,
1636
1690
  // PHP
1637
1691
  "function_definition": extractFunctionInfo,
1638
- // PHP functions
1639
- "method_declaration": extractMethodInfo
1692
+ // PHP functions (Python handled via language check in extractSymbolInfo)
1693
+ "method_declaration": extractMethodInfo,
1640
1694
  // PHP methods
1695
+ // Python
1696
+ "async_function_definition": extractPythonFunctionInfo,
1697
+ // Python async functions
1698
+ "class_definition": extractPythonClassInfo
1699
+ // Python classes
1700
+ // Note: Python regular functions use 'function_definition' (same as PHP)
1701
+ // They are dispatched to extractPythonFunctionInfo via language check in extractSymbolInfo()
1641
1702
  };
1642
1703
  }
1643
1704
  });
@@ -1795,6 +1856,69 @@ var init_php = __esm({
1795
1856
  }
1796
1857
  });
1797
1858
 
1859
+ // src/indexer/ast/traversers/python.ts
1860
+ var PythonTraverser;
1861
+ var init_python = __esm({
1862
+ "src/indexer/ast/traversers/python.ts"() {
1863
+ "use strict";
1864
+ PythonTraverser = class {
1865
+ targetNodeTypes = [
1866
+ "function_definition",
1867
+ "async_function_definition"
1868
+ ];
1869
+ containerTypes = [
1870
+ "class_definition"
1871
+ // We extract methods, not the class itself
1872
+ ];
1873
+ declarationTypes = [
1874
+ // Python doesn't have const/let/var declarations like JS/TS
1875
+ // Functions are always defined with 'def' or 'async def'
1876
+ ];
1877
+ functionTypes = [
1878
+ "function_definition",
1879
+ "async_function_definition"
1880
+ ];
1881
+ shouldExtractChildren(node) {
1882
+ return this.containerTypes.includes(node.type);
1883
+ }
1884
+ isDeclarationWithFunction(_node) {
1885
+ return false;
1886
+ }
1887
+ getContainerBody(node) {
1888
+ if (node.type === "class_definition") {
1889
+ return node.childForFieldName("body");
1890
+ }
1891
+ return null;
1892
+ }
1893
+ shouldTraverseChildren(node) {
1894
+ return node.type === "module" || // Top-level Python file
1895
+ node.type === "block";
1896
+ }
1897
+ findParentContainerName(node) {
1898
+ let current = node.parent;
1899
+ while (current) {
1900
+ if (current.type === "class_definition") {
1901
+ const nameNode = current.childForFieldName("name");
1902
+ return nameNode?.text;
1903
+ }
1904
+ current = current.parent;
1905
+ }
1906
+ return void 0;
1907
+ }
1908
+ /**
1909
+ * Python doesn't have this pattern (const x = () => {})
1910
+ * Functions are always defined with 'def' or 'async def'
1911
+ */
1912
+ findFunctionInDeclaration(_node) {
1913
+ return {
1914
+ hasFunction: false,
1915
+ functionNode: null
1916
+ };
1917
+ }
1918
+ };
1919
+ }
1920
+ });
1921
+
1798
1922
  // src/indexer/ast/traversers/index.ts
1799
1923
  function getTraverser(language) {
1800
1924
  const traverser = traverserRegistry[language];
@@ -1809,10 +1933,12 @@ var init_traversers = __esm({
1809
1933
  "use strict";
1810
1934
  init_typescript();
1811
1935
  init_php();
1936
+ init_python();
1812
1937
  traverserRegistry = {
1813
1938
  typescript: new TypeScriptTraverser(),
1814
1939
  javascript: new JavaScriptTraverser(),
1815
- php: new PHPTraverser()
1940
+ php: new PHPTraverser(),
1941
+ python: new PythonTraverser()
1816
1942
  };
1817
1943
  }
1818
1944
  });
@@ -1843,7 +1969,7 @@ function chunkByAST(filepath, content, options = {}) {
1843
1969
  }
1844
1970
  }
1845
1971
  const parentClassName = traverser.findParentContainerName(actualNode);
1846
- const symbolInfo = extractSymbolInfo(actualNode, content, parentClassName);
1972
+ const symbolInfo = extractSymbolInfo(actualNode, content, parentClassName, language);
1847
1973
  const nodeContent = getNodeContent(node, lines);
1848
1974
  chunks.push(createChunk(filepath, node, nodeContent, symbolInfo, fileImports, language));
1849
1975
  }
@@ -2398,26 +2524,73 @@ var init_relevance = __esm({
2398
2524
  });
2399
2525
 
2400
2526
  // src/vectordb/intent-classifier.ts
2527
+ function getSortedRules() {
2528
+ if (cachedSortedRules === null) {
2529
+ cachedSortedRules = [...INTENT_RULES].sort((a, b) => b.priority - a.priority);
2530
+ }
2531
+ return cachedSortedRules;
2532
+ }
2401
2533
  function classifyQueryIntent(query) {
2402
2534
  const lower = query.toLowerCase().trim();
2403
- if (lower.match(/where\s+(is|are|does|can\s+i\s+find)/) || lower.match(/find\s+the\s+/) || lower.match(/locate\s+/)) {
2404
- return "location" /* LOCATION */;
2405
- }
2406
- if (lower.match(/how\s+does\s+.*\s+work/) || lower.match(/what\s+(is|are|does)/) || lower.match(/explain\s+/) || lower.match(/understand\s+/) || lower.match(/\b(process|workflow|architecture)\b/)) {
2407
- return "conceptual" /* CONCEPTUAL */;
2408
- }
2409
- if (lower.match(/how\s+(is|are)\s+.*\s+(implemented|built|coded)/) || lower.match(/implementation\s+of/) || lower.match(/source\s+code\s+for/)) {
2410
- return "implementation" /* IMPLEMENTATION */;
2535
+ const sortedRules = getSortedRules();
2536
+ for (const rule of sortedRules) {
2537
+ if (rule.patterns.some((pattern) => pattern.test(lower))) {
2538
+ return rule.intent;
2539
+ }
2411
2540
  }
2412
2541
  return "implementation" /* IMPLEMENTATION */;
2413
2542
  }
2543
+ var INTENT_RULES, INITIAL_RULE_COUNT, cachedSortedRules;
2414
2544
  var init_intent_classifier = __esm({
2415
2545
  "src/vectordb/intent-classifier.ts"() {
2416
2546
  "use strict";
2547
+ INTENT_RULES = [
2548
+ // LOCATION intent (highest priority - most specific)
2549
+ {
2550
+ intent: "location" /* LOCATION */,
2551
+ priority: 3,
2552
+ patterns: [
2553
+ /where\s+(is|are|does|can\s+i\s+find)/,
2554
+ /find\s+the\s+/,
2555
+ /locate\s+/
2556
+ ]
2557
+ },
2558
+ // CONCEPTUAL intent (medium priority)
2559
+ {
2560
+ intent: "conceptual" /* CONCEPTUAL */,
2561
+ priority: 2,
2562
+ patterns: [
2563
+ /how\s+does\s+.*\s+work/,
2564
+ /what\s+(is|are|does)/,
2565
+ /explain\s+/,
2566
+ /understand\s+/,
2567
+ /\b(process|workflow|architecture)\b/
2568
+ ]
2569
+ },
2570
+ // IMPLEMENTATION intent (low priority - catches "how is X implemented")
2571
+ {
2572
+ intent: "implementation" /* IMPLEMENTATION */,
2573
+ priority: 1,
2574
+ patterns: [
2575
+ /how\s+(is|are)\s+.*\s+(implemented|built|coded)/,
2576
+ /implementation\s+of/,
2577
+ /source\s+code\s+for/
2578
+ ]
2579
+ }
2580
+ ];
2581
+ INITIAL_RULE_COUNT = INTENT_RULES.length;
2582
+ cachedSortedRules = null;
2417
2583
  }
2418
2584
  });
2419
2585
 
2420
- // src/vectordb/query.ts
2586
+ // src/vectordb/boosting/types.ts
2587
+ var init_types2 = __esm({
2588
+ "src/vectordb/boosting/types.ts"() {
2589
+ "use strict";
2590
+ }
2591
+ });
2592
+
2593
+ // src/vectordb/boosting/strategies.ts
2421
2594
  import path13 from "path";
2422
2595
  function isDocumentationFile(filepath) {
2423
2596
  const lower = filepath.toLowerCase();
@@ -2455,106 +2628,162 @@ function isUtilityFile(filepath) {
2455
2628
  }
2456
2629
  return false;
2457
2630
  }
2458
- function boostPathRelevance(query, filepath, baseScore) {
2459
- const queryTokens = query.toLowerCase().split(/\s+/);
2460
- const pathSegments = filepath.toLowerCase().split("/");
2461
- let boostFactor = 1;
2462
- for (const token of queryTokens) {
2463
- if (token.length <= 2) continue;
2464
- if (pathSegments.some((seg) => seg.includes(token))) {
2465
- boostFactor *= 0.9;
2466
- }
2467
- }
2468
- return baseScore * boostFactor;
2469
- }
2470
- function boostFilenameRelevance(query, filepath, baseScore) {
2471
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2472
- const queryTokens = query.toLowerCase().split(/\s+/);
2473
- let boostFactor = 1;
2474
- for (const token of queryTokens) {
2475
- if (token.length <= 2) continue;
2476
- if (filename === token) {
2477
- boostFactor *= 0.7;
2478
- } else if (filename.includes(token)) {
2479
- boostFactor *= 0.8;
2480
- }
2481
- }
2482
- return baseScore * boostFactor;
2483
- }
2484
- function boostForLocationIntent(query, filepath, baseScore) {
2485
- let score = baseScore;
2486
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2487
- const queryTokens = query.toLowerCase().split(/\s+/);
2488
- for (const token of queryTokens) {
2489
- if (token.length <= 2) continue;
2490
- if (filename === token) {
2491
- score *= 0.6;
2492
- } else if (filename.includes(token)) {
2493
- score *= 0.7;
2494
- }
2495
- }
2496
- score = boostPathRelevance(query, filepath, score);
2497
- if (isTestFile(filepath)) {
2498
- score *= 1.1;
2499
- }
2500
- return score;
2501
- }
2502
- function boostForConceptualIntent(query, filepath, baseScore) {
2503
- let score = baseScore;
2504
- if (isDocumentationFile(filepath)) {
2505
- score *= 0.65;
2506
- const lower = filepath.toLowerCase();
2507
- if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
2508
- score *= 0.9;
2509
- }
2510
- }
2511
- if (isUtilityFile(filepath)) {
2512
- score *= 1.05;
2513
- }
2514
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2515
- const queryTokens = query.toLowerCase().split(/\s+/);
2516
- for (const token of queryTokens) {
2517
- if (token.length <= 2) continue;
2518
- if (filename.includes(token)) {
2519
- score *= 0.9;
2520
- }
2631
+ var PathBoostingStrategy, FilenameBoostingStrategy, FileTypeBoostingStrategy;
2632
+ var init_strategies = __esm({
2633
+ "src/vectordb/boosting/strategies.ts"() {
2634
+ "use strict";
2635
+ init_intent_classifier();
2636
+ PathBoostingStrategy = class {
2637
+ name = "path-matching";
2638
+ apply(query, filepath, baseScore) {
2639
+ const queryTokens = query.toLowerCase().split(/\s+/);
2640
+ const pathSegments = filepath.toLowerCase().split("/");
2641
+ let boostFactor = 1;
2642
+ for (const token of queryTokens) {
2643
+ if (token.length <= 2) continue;
2644
+ if (pathSegments.some((seg) => seg.includes(token))) {
2645
+ boostFactor *= 0.9;
2646
+ }
2647
+ }
2648
+ return baseScore * boostFactor;
2649
+ }
2650
+ };
2651
+ FilenameBoostingStrategy = class {
2652
+ name = "filename-matching";
2653
+ apply(query, filepath, baseScore) {
2654
+ const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2655
+ const queryTokens = query.toLowerCase().split(/\s+/);
2656
+ let boostFactor = 1;
2657
+ for (const token of queryTokens) {
2658
+ if (token.length <= 2) continue;
2659
+ if (filename === token) {
2660
+ boostFactor *= 0.7;
2661
+ } else if (filename.includes(token)) {
2662
+ boostFactor *= 0.8;
2663
+ }
2664
+ }
2665
+ return baseScore * boostFactor;
2666
+ }
2667
+ };
2668
+ FileTypeBoostingStrategy = class {
2669
+ constructor(intent) {
2670
+ this.intent = intent;
2671
+ }
2672
+ name = "file-type";
2673
+ apply(query, filepath, baseScore) {
2674
+ switch (this.intent) {
2675
+ case "location" /* LOCATION */:
2676
+ return this.applyLocationBoosting(query, filepath, baseScore);
2677
+ case "conceptual" /* CONCEPTUAL */:
2678
+ return this.applyConceptualBoosting(query, filepath, baseScore);
2679
+ case "implementation" /* IMPLEMENTATION */:
2680
+ return this.applyImplementationBoosting(query, filepath, baseScore);
2681
+ default:
2682
+ return baseScore;
2683
+ }
2684
+ }
2685
+ applyLocationBoosting(_query, filepath, score) {
2686
+ if (isTestFile(filepath)) {
2687
+ score *= 1.1;
2688
+ }
2689
+ return score;
2690
+ }
2691
+ applyConceptualBoosting(_query, filepath, score) {
2692
+ if (isDocumentationFile(filepath)) {
2693
+ score *= 0.65;
2694
+ const lower = filepath.toLowerCase();
2695
+ if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
2696
+ score *= 0.9;
2697
+ }
2698
+ }
2699
+ if (isUtilityFile(filepath)) {
2700
+ score *= 0.95;
2701
+ }
2702
+ return score;
2703
+ }
2704
+ applyImplementationBoosting(_query, filepath, score) {
2705
+ if (isTestFile(filepath)) {
2706
+ score *= 1.1;
2707
+ }
2708
+ return score;
2709
+ }
2710
+ };
2521
2711
  }
2522
- const pathSegments = filepath.toLowerCase().split(path13.sep);
2523
- for (const token of queryTokens) {
2524
- if (token.length <= 2) continue;
2525
- for (const segment of pathSegments) {
2526
- if (segment.includes(token)) {
2527
- score *= 0.95;
2528
- break;
2712
+ });
2713
+
2714
+ // src/vectordb/boosting/composer.ts
2715
+ var BoostingComposer;
2716
+ var init_composer = __esm({
2717
+ "src/vectordb/boosting/composer.ts"() {
2718
+ "use strict";
2719
+ BoostingComposer = class {
2720
+ strategies = [];
2721
+ /**
2722
+ * Add a boosting strategy to the pipeline.
2723
+ * Strategies are applied in the order they are added.
2724
+ *
2725
+ * @param strategy - The strategy to add
2726
+ * @returns This composer for chaining
2727
+ */
2728
+ addStrategy(strategy) {
2729
+ this.strategies.push(strategy);
2730
+ return this;
2529
2731
  }
2530
- }
2732
+ /**
2733
+ * Apply all strategies to a base score.
2734
+ *
2735
+ * @param query - The search query
2736
+ * @param filepath - The file path being scored
2737
+ * @param baseScore - The initial score from vector similarity
2738
+ * @returns The final boosted score after all strategies
2739
+ */
2740
+ apply(query, filepath, baseScore) {
2741
+ let score = baseScore;
2742
+ for (const strategy of this.strategies) {
2743
+ score = strategy.apply(query, filepath, score);
2744
+ }
2745
+ return score;
2746
+ }
2747
+ /**
2748
+ * Get the names of all strategies in this composer.
2749
+ * Useful for debugging and logging.
2750
+ */
2751
+ getStrategyNames() {
2752
+ return this.strategies.map((s) => s.name);
2753
+ }
2754
+ /**
2755
+ * Get the number of strategies in this composer.
2756
+ */
2757
+ getStrategyCount() {
2758
+ return this.strategies.length;
2759
+ }
2760
+ /**
2761
+ * Clear all strategies from this composer.
2762
+ */
2763
+ clear() {
2764
+ this.strategies = [];
2765
+ }
2766
+ };
2531
2767
  }
2532
- return score;
2533
- }
2534
- function boostForImplementationIntent(query, filepath, baseScore) {
2535
- let score = baseScore;
2536
- score = boostFilenameRelevance(query, filepath, score);
2537
- score = boostPathRelevance(query, filepath, score);
2538
- if (isTestFile(filepath)) {
2539
- score *= 0.9;
2768
+ });
2769
+
2770
+ // src/vectordb/boosting/index.ts
2771
+ var init_boosting = __esm({
2772
+ "src/vectordb/boosting/index.ts"() {
2773
+ "use strict";
2774
+ init_types2();
2775
+ init_strategies();
2776
+ init_composer();
2540
2777
  }
2541
- return score;
2542
- }
2778
+ });
2779
+
2780
+ // src/vectordb/query.ts
2543
2781
  function applyRelevanceBoosting(query, filepath, baseScore) {
2544
2782
  if (!query) {
2545
2783
  return baseScore;
2546
2784
  }
2547
2785
  const intent = classifyQueryIntent(query);
2548
- switch (intent) {
2549
- case "location" /* LOCATION */:
2550
- return boostForLocationIntent(query, filepath, baseScore);
2551
- case "conceptual" /* CONCEPTUAL */:
2552
- return boostForConceptualIntent(query, filepath, baseScore);
2553
- case "implementation" /* IMPLEMENTATION */:
2554
- return boostForImplementationIntent(query, filepath, baseScore);
2555
- default:
2556
- return boostForImplementationIntent(query, filepath, baseScore);
2557
- }
2786
+ return BOOSTING_COMPOSERS[intent].apply(query, filepath, baseScore);
2558
2787
  }
2559
2788
  function dbRecordToSearchResult(r, query) {
2560
2789
  const baseScore = r._distance ?? 0;
@@ -2736,6 +2965,7 @@ async function querySymbols(table, options) {
2736
2965
  throw wrapError(error, "Failed to query symbols");
2737
2966
  }
2738
2967
  }
2968
+ var PATH_STRATEGY, FILENAME_STRATEGY, FILE_TYPE_STRATEGIES, BOOSTING_COMPOSERS;
2739
2969
  var init_query = __esm({
2740
2970
  "src/vectordb/query.ts"() {
2741
2971
  "use strict";
@@ -2743,6 +2973,19 @@ var init_query = __esm({
2743
2973
  init_errors();
2744
2974
  init_relevance();
2745
2975
  init_intent_classifier();
2976
+ init_boosting();
2977
+ PATH_STRATEGY = new PathBoostingStrategy();
2978
+ FILENAME_STRATEGY = new FilenameBoostingStrategy();
2979
+ FILE_TYPE_STRATEGIES = {
2980
+ ["location" /* LOCATION */]: new FileTypeBoostingStrategy("location" /* LOCATION */),
2981
+ ["conceptual" /* CONCEPTUAL */]: new FileTypeBoostingStrategy("conceptual" /* CONCEPTUAL */),
2982
+ ["implementation" /* IMPLEMENTATION */]: new FileTypeBoostingStrategy("implementation" /* IMPLEMENTATION */)
2983
+ };
2984
+ BOOSTING_COMPOSERS = {
2985
+ ["location" /* LOCATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["location" /* LOCATION */]),
2986
+ ["conceptual" /* CONCEPTUAL */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["conceptual" /* CONCEPTUAL */]),
2987
+ ["implementation" /* IMPLEMENTATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["implementation" /* IMPLEMENTATION */])
2988
+ };
2746
2989
  }
2747
2990
  });
2748
2991
 
@@ -3667,6 +3910,22 @@ var init_change_detector = __esm({
3667
3910
  }
3668
3911
  });
3669
3912
 
3913
+ // src/utils/result.ts
3914
+ function Ok(value) {
3915
+ return { ok: true, value };
3916
+ }
3917
+ function Err(error) {
3918
+ return { ok: false, error };
3919
+ }
3920
+ function isOk(result) {
3921
+ return result.ok;
3922
+ }
3923
+ var init_result = __esm({
3924
+ "src/utils/result.ts"() {
3925
+ "use strict";
3926
+ }
3927
+ });
3928
+
3670
3929
  // src/indexer/incremental.ts
3671
3930
  import fs16 from "fs/promises";
3672
3931
  async function processFileContent(filepath, content, embeddings, config, verbose) {
@@ -3748,36 +4007,29 @@ async function indexSingleFile(filepath, vectorDB, embeddings, config, options =
3748
4007
  console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
3749
4008
  }
3750
4009
  }
4010
+ async function processSingleFileForIndexing(filepath, embeddings, config, verbose) {
4011
+ try {
4012
+ const stats = await fs16.stat(filepath);
4013
+ const content = await fs16.readFile(filepath, "utf-8");
4014
+ const result = await processFileContent(filepath, content, embeddings, config, verbose);
4015
+ return Ok({
4016
+ filepath,
4017
+ result,
4018
+ mtime: stats.mtimeMs
4019
+ });
4020
+ } catch (error) {
4021
+ return Err(`Failed to process ${filepath}: ${error}`);
4022
+ }
4023
+ }
3751
4024
  async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, options = {}) {
3752
4025
  const { verbose } = options;
3753
4026
  let processedCount = 0;
3754
4027
  const manifestEntries = [];
3755
4028
  for (const filepath of filepaths) {
3756
- let content;
3757
- let fileMtime;
3758
- try {
3759
- const stats = await fs16.stat(filepath);
3760
- fileMtime = stats.mtimeMs;
3761
- content = await fs16.readFile(filepath, "utf-8");
3762
- } catch (error) {
3763
- if (verbose) {
3764
- console.error(`[Lien] File not readable: ${filepath}`);
3765
- }
3766
- try {
3767
- await vectorDB.deleteByFile(filepath);
3768
- const manifest = new ManifestManager(vectorDB.dbPath);
3769
- await manifest.removeFile(filepath);
3770
- } catch (error2) {
3771
- if (verbose) {
3772
- console.error(`[Lien] Note: ${filepath} not in index`);
3773
- }
3774
- }
3775
- processedCount++;
3776
- continue;
3777
- }
3778
- try {
3779
- const result = await processFileContent(filepath, content, embeddings, config, verbose || false);
3780
- if (result === null) {
4029
+ const result = await processSingleFileForIndexing(filepath, embeddings, config, verbose || false);
4030
+ if (isOk(result)) {
4031
+ const { result: processResult, mtime } = result.value;
4032
+ if (processResult === null) {
3781
4033
  try {
3782
4034
  await vectorDB.deleteByFile(filepath);
3783
4035
  } catch (error) {
@@ -3785,7 +4037,7 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
3785
4037
  const manifest = new ManifestManager(vectorDB.dbPath);
3786
4038
  await manifest.updateFile(filepath, {
3787
4039
  filepath,
3788
- lastModified: fileMtime,
4040
+ lastModified: mtime,
3789
4041
  chunkCount: 0
3790
4042
  });
3791
4043
  processedCount++;
@@ -3796,21 +4048,33 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
3796
4048
  } catch (error) {
3797
4049
  }
3798
4050
  await vectorDB.insertBatch(
3799
- result.vectors,
3800
- result.chunks.map((c) => c.metadata),
3801
- result.texts
4051
+ processResult.vectors,
4052
+ processResult.chunks.map((c) => c.metadata),
4053
+ processResult.texts
3802
4054
  );
3803
4055
  manifestEntries.push({
3804
4056
  filepath,
3805
- chunkCount: result.chunkCount,
3806
- mtime: fileMtime
4057
+ chunkCount: processResult.chunkCount,
4058
+ mtime
3807
4059
  });
3808
4060
  if (verbose) {
3809
- console.error(`[Lien] \u2713 Updated ${filepath} (${result.chunkCount} chunks)`);
4061
+ console.error(`[Lien] \u2713 Updated ${filepath} (${processResult.chunkCount} chunks)`);
4062
+ }
4063
+ processedCount++;
4064
+ } else {
4065
+ if (verbose) {
4066
+ console.error(`[Lien] ${result.error}`);
4067
+ }
4068
+ try {
4069
+ await vectorDB.deleteByFile(filepath);
4070
+ const manifest = new ManifestManager(vectorDB.dbPath);
4071
+ await manifest.removeFile(filepath);
4072
+ } catch (error) {
4073
+ if (verbose) {
4074
+ console.error(`[Lien] Note: ${filepath} not in index`);
4075
+ }
3810
4076
  }
3811
4077
  processedCount++;
3812
- } catch (error) {
3813
- console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
3814
4078
  }
3815
4079
  }
3816
4080
  if (manifestEntries.length > 0) {
@@ -3833,6 +4097,7 @@ var init_incremental = __esm({
3833
4097
  init_schema();
3834
4098
  init_manifest();
3835
4099
  init_constants();
4100
+ init_result();
3836
4101
  }
3837
4102
  });
3838
4103
 
@@ -3918,6 +4183,99 @@ var init_loading_messages = __esm({
3918
4183
  }
3919
4184
  });
3920
4185
 
4186
+ // src/indexer/progress-tracker.ts
4187
+ var IndexingProgressTracker;
4188
+ var init_progress_tracker = __esm({
4189
+ "src/indexer/progress-tracker.ts"() {
4190
+ "use strict";
4191
+ init_loading_messages();
4192
+ IndexingProgressTracker = class _IndexingProgressTracker {
4193
+ processedFiles = 0;
4194
+ totalFiles;
4195
+ wittyMessage;
4196
+ spinner;
4197
+ updateInterval;
4198
+ // Configuration constants
4199
+ static SPINNER_UPDATE_INTERVAL_MS = 200;
4200
+ // How often to update spinner
4201
+ static MESSAGE_ROTATION_INTERVAL_MS = 8e3;
4202
+ // How often to rotate message
4203
+ constructor(totalFiles, spinner) {
4204
+ this.totalFiles = totalFiles;
4205
+ this.spinner = spinner;
4206
+ this.wittyMessage = getIndexingMessage();
4207
+ }
4208
+ /**
4209
+ * Start the progress tracker.
4210
+ * Sets up periodic updates for spinner and message rotation.
4211
+ *
4212
+ * Safe to call multiple times - will not create duplicate intervals.
4213
+ */
4214
+ start() {
4215
+ if (this.updateInterval) {
4216
+ return;
4217
+ }
4218
+ const MESSAGE_ROTATION_TICKS = Math.floor(
4219
+ _IndexingProgressTracker.MESSAGE_ROTATION_INTERVAL_MS / _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS
4220
+ );
4221
+ let spinnerTick = 0;
4222
+ this.updateInterval = setInterval(() => {
4223
+ spinnerTick++;
4224
+ if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
4225
+ this.wittyMessage = getIndexingMessage();
4226
+ spinnerTick = 0;
4227
+ }
4228
+ this.spinner.text = `${this.processedFiles}/${this.totalFiles} files | ${this.wittyMessage}`;
4229
+ }, _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS);
4230
+ }
4231
+ /**
4232
+ * Increment the count of processed files.
4233
+ *
4234
+ * Safe for async operations in Node.js's single-threaded event loop.
4235
+ * Note: Not thread-safe for true concurrent operations (e.g., worker threads).
4236
+ */
4237
+ incrementFiles() {
4238
+ this.processedFiles++;
4239
+ }
4240
+ /**
4241
+ * Set a custom message (e.g., for special operations like embedding generation).
4242
+ * The message will be displayed until the next automatic rotation.
4243
+ */
4244
+ setMessage(message) {
4245
+ this.wittyMessage = message;
4246
+ }
4247
+ /**
4248
+ * Stop the progress tracker and clean up intervals.
4249
+ * Must be called when indexing completes or fails.
4250
+ */
4251
+ stop() {
4252
+ if (this.updateInterval) {
4253
+ clearInterval(this.updateInterval);
4254
+ this.updateInterval = void 0;
4255
+ }
4256
+ }
4257
+ /**
4258
+ * Get the current count of processed files.
4259
+ */
4260
+ getProcessedCount() {
4261
+ return this.processedFiles;
4262
+ }
4263
+ /**
4264
+ * Get the total number of files to process.
4265
+ */
4266
+ getTotalFiles() {
4267
+ return this.totalFiles;
4268
+ }
4269
+ /**
4270
+ * Get the current message being displayed.
4271
+ */
4272
+ getCurrentMessage() {
4273
+ return this.wittyMessage;
4274
+ }
4275
+ };
4276
+ }
4277
+ });
4278
+
3921
4279
  // src/indexer/index.ts
3922
4280
  var indexer_exports = {};
3923
4281
  __export(indexer_exports, {
@@ -3927,162 +4285,171 @@ import fs17 from "fs/promises";
3927
4285
  import ora from "ora";
3928
4286
  import chalk5 from "chalk";
3929
4287
  import pLimit from "p-limit";
3930
- async function indexCodebase(options = {}) {
3931
- const rootDir = options.rootDir ?? process.cwd();
3932
- const spinner = ora("Starting indexing process...").start();
3933
- let updateInterval;
4288
+ async function updateGitState(rootDir, vectorDB, manifest) {
4289
+ const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
4290
+ const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
4291
+ const gitAvailable = await isGitAvailable2();
4292
+ const isRepo = await isGitRepo2(rootDir);
4293
+ if (!gitAvailable || !isRepo) {
4294
+ return;
4295
+ }
4296
+ const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
4297
+ await gitTracker.initialize();
4298
+ const gitState = gitTracker.getState();
4299
+ if (gitState) {
4300
+ await manifest.updateGitState(gitState);
4301
+ }
4302
+ }
4303
+ async function handleDeletions(deletedFiles, vectorDB, manifest, spinner) {
4304
+ if (deletedFiles.length === 0) {
4305
+ return;
4306
+ }
4307
+ spinner.start(`Removing ${deletedFiles.length} deleted files...`);
4308
+ let removedCount = 0;
4309
+ for (const filepath of deletedFiles) {
4310
+ try {
4311
+ await vectorDB.deleteByFile(filepath);
4312
+ await manifest.removeFile(filepath);
4313
+ removedCount++;
4314
+ } catch (err) {
4315
+ spinner.warn(
4316
+ `Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`
4317
+ );
4318
+ }
4319
+ }
4320
+ spinner.succeed(`Removed ${removedCount}/${deletedFiles.length} deleted files`);
4321
+ }
4322
+ async function handleUpdates(addedFiles, modifiedFiles, vectorDB, embeddings, config, options, spinner) {
4323
+ const filesToIndex = [...addedFiles, ...modifiedFiles];
4324
+ if (filesToIndex.length === 0) {
4325
+ return;
4326
+ }
4327
+ spinner.start(`Reindexing ${filesToIndex.length} changed files...`);
4328
+ const count = await indexMultipleFiles(
4329
+ filesToIndex,
4330
+ vectorDB,
4331
+ embeddings,
4332
+ config,
4333
+ { verbose: options.verbose }
4334
+ );
4335
+ await writeVersionFile(vectorDB.dbPath);
4336
+ spinner.succeed(
4337
+ `Incremental reindex complete: ${count}/${filesToIndex.length} files indexed successfully`
4338
+ );
4339
+ }
4340
+ async function tryIncrementalIndex(rootDir, vectorDB, config, options, spinner) {
4341
+ spinner.text = "Checking for changes...";
4342
+ const manifest = new ManifestManager(vectorDB.dbPath);
4343
+ const savedManifest = await manifest.load();
4344
+ if (!savedManifest) {
4345
+ return false;
4346
+ }
4347
+ const changes = await detectChanges(rootDir, vectorDB, config);
4348
+ if (changes.reason === "full") {
4349
+ spinner.text = "Full reindex required...";
4350
+ return false;
4351
+ }
4352
+ const totalChanges = changes.added.length + changes.modified.length;
4353
+ const totalDeleted = changes.deleted.length;
4354
+ if (totalChanges === 0 && totalDeleted === 0) {
4355
+ spinner.succeed("No changes detected - index is up to date!");
4356
+ return true;
4357
+ }
4358
+ spinner.succeed(
4359
+ `Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
4360
+ );
4361
+ spinner.start(getModelLoadingMessage());
4362
+ const embeddings = new LocalEmbeddings();
4363
+ await embeddings.initialize();
4364
+ spinner.succeed("Embedding model loaded");
4365
+ await handleDeletions(changes.deleted, vectorDB, manifest, spinner);
4366
+ await handleUpdates(changes.added, changes.modified, vectorDB, embeddings, config, options, spinner);
4367
+ await updateGitState(rootDir, vectorDB, manifest);
4368
+ console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4369
+ return true;
4370
+ }
4371
+ async function performFullIndex(rootDir, vectorDB, config, options, spinner) {
4372
+ spinner.text = "Scanning codebase...";
4373
+ let files;
4374
+ if (isModernConfig(config) && config.frameworks.length > 0) {
4375
+ files = await scanCodebaseWithFrameworks(rootDir, config);
4376
+ } else if (isLegacyConfig(config)) {
4377
+ files = await scanCodebase({
4378
+ rootDir,
4379
+ includePatterns: config.indexing.include,
4380
+ excludePatterns: config.indexing.exclude
4381
+ });
4382
+ } else {
4383
+ files = await scanCodebase({
4384
+ rootDir,
4385
+ includePatterns: [],
4386
+ excludePatterns: []
4387
+ });
4388
+ }
4389
+ if (files.length === 0) {
4390
+ spinner.fail("No files found to index");
4391
+ return;
4392
+ }
4393
+ spinner.text = `Found ${files.length} files`;
4394
+ spinner.text = getModelLoadingMessage();
4395
+ const embeddings = new LocalEmbeddings();
4396
+ await embeddings.initialize();
4397
+ spinner.succeed("Embedding model loaded");
4398
+ const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
4399
+ const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
4400
+ const vectorDBBatchSize = 100;
4401
+ spinner.start(`Processing files with ${concurrency}x concurrency...`);
4402
+ const startTime = Date.now();
4403
+ let processedChunks = 0;
4404
+ const chunkAccumulator = [];
4405
+ const limit = pLimit(concurrency);
4406
+ const indexedFileEntries = [];
4407
+ const progressTracker = new IndexingProgressTracker(files.length, spinner);
4408
+ progressTracker.start();
3934
4409
  try {
3935
- spinner.text = "Loading configuration...";
3936
- const config = await configService.load(rootDir);
3937
- spinner.text = "Initializing vector database...";
3938
- const vectorDB = new VectorDB(rootDir);
3939
- await vectorDB.initialize();
3940
- if (!options.force) {
3941
- spinner.text = "Checking for changes...";
3942
- const manifest2 = new ManifestManager(vectorDB.dbPath);
3943
- const savedManifest = await manifest2.load();
3944
- if (savedManifest) {
3945
- const changes = await detectChanges(rootDir, vectorDB, config);
3946
- if (changes.reason !== "full") {
3947
- const totalChanges = changes.added.length + changes.modified.length;
3948
- const totalDeleted = changes.deleted.length;
3949
- if (totalChanges === 0 && totalDeleted === 0) {
3950
- spinner.succeed("No changes detected - index is up to date!");
3951
- return;
3952
- }
3953
- spinner.succeed(
3954
- `Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
3955
- );
3956
- spinner.start(getModelLoadingMessage());
3957
- const embeddings2 = new LocalEmbeddings();
3958
- await embeddings2.initialize();
3959
- spinner.succeed("Embedding model loaded");
3960
- if (totalDeleted > 0) {
3961
- spinner.start(`Removing ${totalDeleted} deleted files...`);
3962
- let removedCount = 0;
3963
- for (const filepath of changes.deleted) {
3964
- try {
3965
- await vectorDB.deleteByFile(filepath);
3966
- await manifest2.removeFile(filepath);
3967
- removedCount++;
3968
- } catch (err) {
3969
- spinner.warn(`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`);
3970
- }
3971
- }
3972
- spinner.succeed(`Removed ${removedCount}/${totalDeleted} deleted files`);
3973
- }
3974
- if (totalChanges > 0) {
3975
- spinner.start(`Reindexing ${totalChanges} changed files...`);
3976
- const filesToIndex = [...changes.added, ...changes.modified];
3977
- const count = await indexMultipleFiles(
3978
- filesToIndex,
3979
- vectorDB,
3980
- embeddings2,
3981
- config,
3982
- { verbose: options.verbose }
3983
- );
3984
- await writeVersionFile(vectorDB.dbPath);
3985
- spinner.succeed(
3986
- `Incremental reindex complete: ${count}/${totalChanges} files indexed successfully`
3987
- );
3988
- }
3989
- const { isGitAvailable: isGitAvailable3, isGitRepo: isGitRepo3 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
3990
- const { GitStateTracker: GitStateTracker3 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
3991
- const gitAvailable2 = await isGitAvailable3();
3992
- const isRepo2 = await isGitRepo3(rootDir);
3993
- if (gitAvailable2 && isRepo2) {
3994
- const gitTracker = new GitStateTracker3(rootDir, vectorDB.dbPath);
3995
- await gitTracker.initialize();
3996
- const gitState = gitTracker.getState();
3997
- if (gitState) {
3998
- await manifest2.updateGitState(gitState);
3999
- }
4000
- }
4001
- console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4002
- return;
4003
- }
4004
- spinner.text = "Full reindex required...";
4410
+ let addChunksLock = null;
4411
+ let processingQueue = null;
4412
+ const processAccumulatedChunks = async () => {
4413
+ if (processingQueue) {
4414
+ processingQueue = processingQueue.then(() => doProcessChunks());
4415
+ } else {
4416
+ processingQueue = doProcessChunks();
4005
4417
  }
4006
- } else {
4007
- spinner.text = "Force flag enabled, performing full reindex...";
4008
- }
4009
- spinner.text = "Scanning codebase...";
4010
- let files;
4011
- if (isModernConfig(config) && config.frameworks.length > 0) {
4012
- files = await scanCodebaseWithFrameworks(rootDir, config);
4013
- } else if (isLegacyConfig(config)) {
4014
- files = await scanCodebase({
4015
- rootDir,
4016
- includePatterns: config.indexing.include,
4017
- excludePatterns: config.indexing.exclude
4018
- });
4019
- } else {
4020
- files = await scanCodebase({
4021
- rootDir,
4022
- includePatterns: [],
4023
- excludePatterns: []
4024
- });
4025
- }
4026
- if (files.length === 0) {
4027
- spinner.fail("No files found to index");
4028
- return;
4029
- }
4030
- spinner.text = `Found ${files.length} files`;
4031
- spinner.text = getModelLoadingMessage();
4032
- const embeddings = new LocalEmbeddings();
4033
- await embeddings.initialize();
4034
- spinner.succeed("Embedding model loaded");
4035
- const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
4036
- const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
4037
- const vectorDBBatchSize = 100;
4038
- spinner.start(`Processing files with ${concurrency}x concurrency...`);
4039
- const startTime = Date.now();
4040
- let processedFiles = 0;
4041
- let processedChunks = 0;
4042
- const chunkAccumulator = [];
4043
- const limit = pLimit(concurrency);
4044
- const indexedFileEntries = [];
4045
- const progressState = {
4046
- processedFiles: 0,
4047
- totalFiles: files.length,
4048
- wittyMessage: getIndexingMessage()
4418
+ return processingQueue;
4049
4419
  };
4050
- const SPINNER_UPDATE_INTERVAL_MS = 200;
4051
- const MESSAGE_ROTATION_INTERVAL_MS = 8e3;
4052
- const MESSAGE_ROTATION_TICKS = Math.floor(MESSAGE_ROTATION_INTERVAL_MS / SPINNER_UPDATE_INTERVAL_MS);
4053
- let spinnerTick = 0;
4054
- updateInterval = setInterval(() => {
4055
- spinnerTick++;
4056
- if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
4057
- progressState.wittyMessage = getIndexingMessage();
4058
- spinnerTick = 0;
4059
- }
4060
- spinner.text = `${progressState.processedFiles}/${progressState.totalFiles} files | ${progressState.wittyMessage}`;
4061
- }, SPINNER_UPDATE_INTERVAL_MS);
4062
- const processAccumulatedChunks = async () => {
4063
- if (chunkAccumulator.length === 0) return;
4064
- const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
4065
- for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
4066
- const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
4067
- progressState.wittyMessage = getEmbeddingMessage();
4068
- const texts = batch.map((item) => item.content);
4069
- const embeddingVectors = [];
4070
- for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
4071
- const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
4072
- const microResults = await embeddings.embedBatch(microBatch);
4073
- embeddingVectors.push(...microResults);
4420
+ const doProcessChunks = async () => {
4421
+ if (chunkAccumulator.length === 0) {
4422
+ return;
4423
+ }
4424
+ const currentPromise = processingQueue;
4425
+ try {
4426
+ const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
4427
+ for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
4428
+ const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
4429
+ progressTracker.setMessage(getEmbeddingMessage());
4430
+ const texts = batch.map((item) => item.content);
4431
+ const embeddingVectors = [];
4432
+ for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
4433
+ const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
4434
+ const microResults = await embeddings.embedBatch(microBatch);
4435
+ embeddingVectors.push(...microResults);
4436
+ await new Promise((resolve) => setImmediate(resolve));
4437
+ }
4438
+ processedChunks += batch.length;
4439
+ progressTracker.setMessage(`Inserting ${batch.length} chunks into vector space...`);
4440
+ await vectorDB.insertBatch(
4441
+ embeddingVectors,
4442
+ batch.map((item) => item.chunk.metadata),
4443
+ texts
4444
+ );
4074
4445
  await new Promise((resolve) => setImmediate(resolve));
4075
4446
  }
4076
- processedChunks += batch.length;
4077
- progressState.wittyMessage = `Inserting ${batch.length} chunks into vector space...`;
4078
- await vectorDB.insertBatch(
4079
- embeddingVectors,
4080
- batch.map((item) => item.chunk.metadata),
4081
- texts
4082
- );
4083
- await new Promise((resolve) => setImmediate(resolve));
4447
+ progressTracker.setMessage(getIndexingMessage());
4448
+ } finally {
4449
+ if (processingQueue === currentPromise) {
4450
+ processingQueue = null;
4451
+ }
4084
4452
  }
4085
- progressState.wittyMessage = getIndexingMessage();
4086
4453
  };
4087
4454
  const filePromises = files.map(
4088
4455
  (file) => limit(async () => {
@@ -4100,73 +4467,91 @@ async function indexCodebase(options = {}) {
4100
4467
  astFallback
4101
4468
  });
4102
4469
  if (chunks.length === 0) {
4103
- processedFiles++;
4104
- progressState.processedFiles = processedFiles;
4470
+ progressTracker.incrementFiles();
4105
4471
  return;
4106
4472
  }
4107
- for (const chunk of chunks) {
4108
- chunkAccumulator.push({
4109
- chunk,
4110
- content: chunk.content
4473
+ {
4474
+ if (addChunksLock) {
4475
+ await addChunksLock;
4476
+ }
4477
+ let releaseAddLock;
4478
+ addChunksLock = new Promise((resolve) => {
4479
+ releaseAddLock = resolve;
4111
4480
  });
4112
- }
4113
- indexedFileEntries.push({
4114
- filepath: file,
4115
- chunkCount: chunks.length,
4116
- mtime: stats.mtimeMs
4117
- });
4118
- processedFiles++;
4119
- progressState.processedFiles = processedFiles;
4120
- if (chunkAccumulator.length >= vectorDBBatchSize) {
4121
- await processAccumulatedChunks();
4481
+ try {
4482
+ for (const chunk of chunks) {
4483
+ chunkAccumulator.push({
4484
+ chunk,
4485
+ content: chunk.content
4486
+ });
4487
+ }
4488
+ indexedFileEntries.push({
4489
+ filepath: file,
4490
+ chunkCount: chunks.length,
4491
+ mtime: stats.mtimeMs
4492
+ });
4493
+ progressTracker.incrementFiles();
4494
+ if (chunkAccumulator.length >= vectorDBBatchSize) {
4495
+ await processAccumulatedChunks();
4496
+ }
4497
+ } finally {
4498
+ releaseAddLock();
4499
+ addChunksLock = null;
4500
+ }
4122
4501
  }
4123
4502
  } catch (error) {
4124
4503
  if (options.verbose) {
4125
4504
  console.error(chalk5.yellow(`
4126
4505
  \u26A0\uFE0F Skipping ${file}: ${error}`));
4127
4506
  }
4128
- processedFiles++;
4129
- progressState.processedFiles = processedFiles;
4507
+ progressTracker.incrementFiles();
4130
4508
  }
4131
4509
  })
4132
4510
  );
4133
4511
  await Promise.all(filePromises);
4134
- progressState.wittyMessage = "Processing final chunks...";
4512
+ progressTracker.setMessage("Processing final chunks...");
4135
4513
  await processAccumulatedChunks();
4136
- clearInterval(updateInterval);
4137
- spinner.start("Saving index manifest...");
4138
- const manifest = new ManifestManager(vectorDB.dbPath);
4139
- await manifest.updateFiles(
4140
- indexedFileEntries.map((entry) => ({
4141
- filepath: entry.filepath,
4142
- lastModified: entry.mtime,
4143
- // Use actual file mtime for accurate change detection
4144
- chunkCount: entry.chunkCount
4145
- }))
4146
- );
4147
- const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
4148
- const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
4149
- const gitAvailable = await isGitAvailable2();
4150
- const isRepo = await isGitRepo2(rootDir);
4151
- if (gitAvailable && isRepo) {
4152
- const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
4153
- await gitTracker.initialize();
4154
- const gitState = gitTracker.getState();
4155
- if (gitState) {
4156
- await manifest.updateGitState(gitState);
4514
+ } finally {
4515
+ progressTracker.stop();
4516
+ }
4517
+ spinner.start("Saving index manifest...");
4518
+ const manifest = new ManifestManager(vectorDB.dbPath);
4519
+ await manifest.updateFiles(
4520
+ indexedFileEntries.map((entry) => ({
4521
+ filepath: entry.filepath,
4522
+ // Use actual file mtime for accurate change detection
4523
+ lastModified: entry.mtime,
4524
+ chunkCount: entry.chunkCount
4525
+ }))
4526
+ );
4527
+ await updateGitState(rootDir, vectorDB, manifest);
4528
+ spinner.succeed("Manifest saved");
4529
+ await writeVersionFile(vectorDB.dbPath);
4530
+ const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
4531
+ spinner.succeed(
4532
+ `Indexed ${progressTracker.getProcessedCount()} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
4533
+ );
4534
+ console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4535
+ }
4536
+ async function indexCodebase(options = {}) {
4537
+ const rootDir = options.rootDir ?? process.cwd();
4538
+ const spinner = ora("Starting indexing process...").start();
4539
+ try {
4540
+ spinner.text = "Loading configuration...";
4541
+ const config = await configService.load(rootDir);
4542
+ spinner.text = "Initializing vector database...";
4543
+ const vectorDB = new VectorDB(rootDir);
4544
+ await vectorDB.initialize();
4545
+ if (!options.force) {
4546
+ const completed = await tryIncrementalIndex(rootDir, vectorDB, config, options, spinner);
4547
+ if (completed) {
4548
+ return;
4157
4549
  }
4550
+ } else {
4551
+ spinner.text = "Force flag enabled, performing full reindex...";
4158
4552
  }
4159
- spinner.succeed("Manifest saved");
4160
- await writeVersionFile(vectorDB.dbPath);
4161
- const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
4162
- spinner.succeed(
4163
- `Indexed ${processedFiles} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
4164
- );
4165
- console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4553
+ await performFullIndex(rootDir, vectorDB, config, options, spinner);
4166
4554
  } catch (error) {
4167
- if (updateInterval) {
4168
- clearInterval(updateInterval);
4169
- }
4170
4555
  spinner.fail(`Indexing failed: ${error}`);
4171
4556
  throw error;
4172
4557
  }
@@ -4186,6 +4571,7 @@ var init_indexer = __esm({
4186
4571
  init_incremental();
4187
4572
  init_loading_messages();
4188
4573
  init_constants();
4574
+ init_progress_tracker();
4189
4575
  }
4190
4576
  });
4191
4577
 
@@ -4926,7 +5312,7 @@ async function createNewConfig(rootDir, options) {
4926
5312
  path: ".",
4927
5313
  enabled: true,
4928
5314
  config: {
4929
- include: ["**/*.{ts,tsx,js,jsx,py,go,rs,java,c,cpp,cs}"],
5315
+ include: ["**/*.{ts,tsx,js,jsx,py,php,go,rs,java,c,cpp,cs}"],
4930
5316
  exclude: [
4931
5317
  "**/node_modules/**",
4932
5318
  "**/dist/**",
@@ -5279,9 +5665,12 @@ var FindSimilarSchema = z2.object({
5279
5665
 
5280
5666
  // src/mcp/schemas/file.schema.ts
5281
5667
  import { z as z3 } from "zod";
5282
- var GetFileContextSchema = z3.object({
5283
- filepath: z3.string().min(1, "Filepath cannot be empty").describe(
5284
- "Relative path to file from workspace root.\n\nExample: 'src/components/Button.tsx'"
5668
+ var GetFilesContextSchema = z3.object({
5669
+ filepaths: z3.union([
5670
+ z3.string().min(1, "Filepath cannot be empty"),
5671
+ z3.array(z3.string().min(1, "Filepath cannot be empty")).min(1, "Array must contain at least one filepath").max(50, "Maximum 50 files per request")
5672
+ ]).describe(
5673
+ "Single filepath or array of filepaths (relative to workspace root).\n\nSingle file: 'src/components/Button.tsx'\nMultiple files: ['src/auth.ts', 'src/user.ts']\n\nMaximum 50 files per request for batch operations."
5285
5674
  ),
5286
5675
  includeRelated: z3.boolean().default(true).describe(
5287
5676
  "Include semantically related chunks from nearby code.\n\nDefault: true\n\nWhen enabled, also returns related code from other files that are semantically similar to the target file's contents."
@@ -5304,22 +5693,56 @@ var tools = [
5304
5693
  toMCPToolSchema(
5305
5694
  SemanticSearchSchema,
5306
5695
  "semantic_search",
5307
- "Search the codebase semantically for relevant code using natural language. Results include a relevance category (highly_relevant, relevant, loosely_related, not_relevant) based on semantic similarity."
5696
+ `Search codebase by MEANING, not text. USE THIS INSTEAD OF grep/ripgrep for finding implementations, features, or understanding how code works.
5697
+
5698
+ Examples:
5699
+ - "Where is authentication handled?" \u2192 semantic_search({ query: "handles user authentication" })
5700
+ - "How does payment work?" \u2192 semantic_search({ query: "processes payment transactions" })
5701
+
5702
+ Use natural language describing what the code DOES, not function names. For exact string matching, use grep instead.
5703
+
5704
+ Results include a relevance category (highly_relevant, relevant, loosely_related, not_relevant) for each match.`
5308
5705
  ),
5309
5706
  toMCPToolSchema(
5310
5707
  FindSimilarSchema,
5311
5708
  "find_similar",
5312
- "Find code similar to a given code snippet. Results include a relevance category (highly_relevant, relevant, loosely_related, not_relevant) based on semantic similarity."
5709
+ `Find code structurally similar to a given snippet. Use for:
5710
+ - Ensuring consistency when adding new code
5711
+ - Finding duplicate implementations
5712
+ - Refactoring similar patterns together
5713
+
5714
+ Provide at least 10 characters of code to match against. Results include a relevance category for each match.`
5313
5715
  ),
5314
5716
  toMCPToolSchema(
5315
- GetFileContextSchema,
5316
- "get_file_context",
5317
- "Get all chunks and related context for a specific file. Results include a relevance category (highly_relevant, relevant, loosely_related, not_relevant) based on semantic similarity."
5717
+ GetFilesContextSchema,
5718
+ "get_files_context",
5719
+ `Get context for one or more files including dependencies and test coverage.
5720
+
5721
+ MANDATORY: Call this BEFORE editing any file. Accepts single path or array of paths.
5722
+
5723
+ Single file:
5724
+ get_files_context({ filepaths: "src/auth.ts" })
5725
+
5726
+ Multiple files (batch):
5727
+ get_files_context({ filepaths: ["src/auth.ts", "src/user.ts"] })
5728
+
5729
+ Returns for each file:
5730
+ - All chunks and related code
5731
+ - testAssociations (which tests cover this file)
5732
+ - Relevance scoring
5733
+
5734
+ Batch calls are more efficient than multiple single-file calls.`
5318
5735
  ),
5319
5736
  toMCPToolSchema(
5320
5737
  ListFunctionsSchema,
5321
5738
  "list_functions",
5322
- "List functions, classes, and interfaces by name pattern and language"
5739
+ `Fast symbol lookup by naming pattern. Use when searching by NAME, not behavior.
5740
+
5741
+ Examples:
5742
+ - "Show all controllers" \u2192 list_functions({ pattern: ".*Controller.*" })
5743
+ - "Find service classes" \u2192 list_functions({ pattern: ".*Service$" })
5744
+
5745
+ 10x faster than semantic_search for structural/architectural queries. Use semantic_search instead when searching by what code DOES.`
5323
5746
  )
5324
5747
  ];
5325
5748
 
@@ -5618,32 +6041,69 @@ async function startMCPServer(options) {
5618
6041
  };
5619
6042
  }
5620
6043
  )(args);
5621
- case "get_file_context":
6044
+ case "get_files_context":
5622
6045
  return await wrapToolHandler(
5623
- GetFileContextSchema,
6046
+ GetFilesContextSchema,
5624
6047
  async (validatedArgs) => {
5625
- log(`Getting context for: ${validatedArgs.filepath}`);
6048
+ const filepaths = Array.isArray(validatedArgs.filepaths) ? validatedArgs.filepaths : [validatedArgs.filepaths];
6049
+ const isSingleFile = !Array.isArray(validatedArgs.filepaths);
6050
+ log(`Getting context for: ${filepaths.join(", ")}`);
5626
6051
  await checkAndReconnect();
5627
- const fileEmbedding = await embeddings.embed(validatedArgs.filepath);
5628
- const allResults = await vectorDB.search(fileEmbedding, 50, validatedArgs.filepath);
5629
- const fileChunks = allResults.filter(
5630
- (r) => r.metadata.file.includes(validatedArgs.filepath) || validatedArgs.filepath.includes(r.metadata.file)
6052
+ const fileEmbeddings = await Promise.all(filepaths.map((fp) => embeddings.embed(fp)));
6053
+ const allFileSearches = await Promise.all(
6054
+ fileEmbeddings.map(
6055
+ (embedding, i) => vectorDB.search(embedding, 50, filepaths[i])
6056
+ )
5631
6057
  );
5632
- let results = fileChunks;
5633
- if (validatedArgs.includeRelated && fileChunks.length > 0) {
5634
- const relatedEmbedding = await embeddings.embed(fileChunks[0].content);
5635
- const related = await vectorDB.search(relatedEmbedding, 5, fileChunks[0].content);
5636
- const relatedOtherFiles = related.filter(
5637
- (r) => !r.metadata.file.includes(validatedArgs.filepath) && !validatedArgs.filepath.includes(r.metadata.file)
6058
+ const fileChunksMap = filepaths.map((filepath, i) => {
6059
+ const allResults = allFileSearches[i];
6060
+ return allResults.filter(
6061
+ (r) => r.metadata.file.includes(filepath) || filepath.includes(r.metadata.file)
5638
6062
  );
5639
- results = [...fileChunks, ...relatedOtherFiles];
6063
+ });
6064
+ let relatedChunksMap = [];
6065
+ if (validatedArgs.includeRelated) {
6066
+ const filesWithChunks = fileChunksMap.map((chunks, i) => ({ chunks, filepath: filepaths[i], index: i })).filter(({ chunks }) => chunks.length > 0);
6067
+ if (filesWithChunks.length > 0) {
6068
+ const relatedEmbeddings = await Promise.all(
6069
+ filesWithChunks.map(({ chunks }) => embeddings.embed(chunks[0].content))
6070
+ );
6071
+ const relatedSearches = await Promise.all(
6072
+ relatedEmbeddings.map(
6073
+ (embedding, i) => vectorDB.search(embedding, 5, filesWithChunks[i].chunks[0].content)
6074
+ )
6075
+ );
6076
+ relatedChunksMap = Array.from({ length: filepaths.length }, () => []);
6077
+ filesWithChunks.forEach(({ filepath, index }, i) => {
6078
+ const related = relatedSearches[i];
6079
+ relatedChunksMap[index] = related.filter(
6080
+ (r) => !r.metadata.file.includes(filepath) && !filepath.includes(r.metadata.file)
6081
+ );
6082
+ });
6083
+ }
6084
+ }
6085
+ const filesData = {};
6086
+ filepaths.forEach((filepath, i) => {
6087
+ const fileChunks = fileChunksMap[i];
6088
+ const relatedChunks = relatedChunksMap[i] || [];
6089
+ filesData[filepath] = {
6090
+ chunks: [...fileChunks, ...relatedChunks]
6091
+ };
6092
+ });
6093
+ log(`Found ${Object.values(filesData).reduce((sum, f) => sum + f.chunks.length, 0)} total chunks`);
6094
+ if (isSingleFile) {
6095
+ const filepath = filepaths[0];
6096
+ return {
6097
+ indexInfo: getIndexMetadata(),
6098
+ file: filepath,
6099
+ chunks: filesData[filepath].chunks
6100
+ };
6101
+ } else {
6102
+ return {
6103
+ indexInfo: getIndexMetadata(),
6104
+ files: filesData
6105
+ };
5640
6106
  }
5641
- log(`Found ${results.length} chunks`);
5642
- return {
5643
- indexInfo: getIndexMetadata(),
5644
- file: validatedArgs.filepath,
5645
- chunks: results
5646
- };
5647
6107
  }
5648
6108
  )(args);
5649
6109
  case "list_functions":