@liendev/lien 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CURSOR_RULES_TEMPLATE.md +8 -7
- package/dist/index.js +673 -365
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2524,26 +2524,73 @@ var init_relevance = __esm({
|
|
|
2524
2524
|
});
|
|
2525
2525
|
|
|
2526
2526
|
// src/vectordb/intent-classifier.ts
|
|
2527
|
+
function getSortedRules() {
|
|
2528
|
+
if (cachedSortedRules === null) {
|
|
2529
|
+
cachedSortedRules = [...INTENT_RULES].sort((a, b) => b.priority - a.priority);
|
|
2530
|
+
}
|
|
2531
|
+
return cachedSortedRules;
|
|
2532
|
+
}
|
|
2527
2533
|
function classifyQueryIntent(query) {
|
|
2528
2534
|
const lower = query.toLowerCase().trim();
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
}
|
|
2535
|
-
if (lower.match(/how\s+(is|are)\s+.*\s+(implemented|built|coded)/) || lower.match(/implementation\s+of/) || lower.match(/source\s+code\s+for/)) {
|
|
2536
|
-
return "implementation" /* IMPLEMENTATION */;
|
|
2535
|
+
const sortedRules = getSortedRules();
|
|
2536
|
+
for (const rule of sortedRules) {
|
|
2537
|
+
if (rule.patterns.some((pattern) => pattern.test(lower))) {
|
|
2538
|
+
return rule.intent;
|
|
2539
|
+
}
|
|
2537
2540
|
}
|
|
2538
2541
|
return "implementation" /* IMPLEMENTATION */;
|
|
2539
2542
|
}
|
|
2543
|
+
var INTENT_RULES, INITIAL_RULE_COUNT, cachedSortedRules;
|
|
2540
2544
|
var init_intent_classifier = __esm({
|
|
2541
2545
|
"src/vectordb/intent-classifier.ts"() {
|
|
2542
2546
|
"use strict";
|
|
2547
|
+
INTENT_RULES = [
|
|
2548
|
+
// LOCATION intent (highest priority - most specific)
|
|
2549
|
+
{
|
|
2550
|
+
intent: "location" /* LOCATION */,
|
|
2551
|
+
priority: 3,
|
|
2552
|
+
patterns: [
|
|
2553
|
+
/where\s+(is|are|does|can\s+i\s+find)/,
|
|
2554
|
+
/find\s+the\s+/,
|
|
2555
|
+
/locate\s+/
|
|
2556
|
+
]
|
|
2557
|
+
},
|
|
2558
|
+
// CONCEPTUAL intent (medium priority)
|
|
2559
|
+
{
|
|
2560
|
+
intent: "conceptual" /* CONCEPTUAL */,
|
|
2561
|
+
priority: 2,
|
|
2562
|
+
patterns: [
|
|
2563
|
+
/how\s+does\s+.*\s+work/,
|
|
2564
|
+
/what\s+(is|are|does)/,
|
|
2565
|
+
/explain\s+/,
|
|
2566
|
+
/understand\s+/,
|
|
2567
|
+
/\b(process|workflow|architecture)\b/
|
|
2568
|
+
]
|
|
2569
|
+
},
|
|
2570
|
+
// IMPLEMENTATION intent (low priority - catches "how is X implemented")
|
|
2571
|
+
{
|
|
2572
|
+
intent: "implementation" /* IMPLEMENTATION */,
|
|
2573
|
+
priority: 1,
|
|
2574
|
+
patterns: [
|
|
2575
|
+
/how\s+(is|are)\s+.*\s+(implemented|built|coded)/,
|
|
2576
|
+
/implementation\s+of/,
|
|
2577
|
+
/source\s+code\s+for/
|
|
2578
|
+
]
|
|
2579
|
+
}
|
|
2580
|
+
];
|
|
2581
|
+
INITIAL_RULE_COUNT = INTENT_RULES.length;
|
|
2582
|
+
cachedSortedRules = null;
|
|
2543
2583
|
}
|
|
2544
2584
|
});
|
|
2545
2585
|
|
|
2546
|
-
// src/vectordb/
|
|
2586
|
+
// src/vectordb/boosting/types.ts
|
|
2587
|
+
var init_types2 = __esm({
|
|
2588
|
+
"src/vectordb/boosting/types.ts"() {
|
|
2589
|
+
"use strict";
|
|
2590
|
+
}
|
|
2591
|
+
});
|
|
2592
|
+
|
|
2593
|
+
// src/vectordb/boosting/strategies.ts
|
|
2547
2594
|
import path13 from "path";
|
|
2548
2595
|
function isDocumentationFile(filepath) {
|
|
2549
2596
|
const lower = filepath.toLowerCase();
|
|
@@ -2581,106 +2628,162 @@ function isUtilityFile(filepath) {
|
|
|
2581
2628
|
}
|
|
2582
2629
|
return false;
|
|
2583
2630
|
}
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
}
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2631
|
+
var PathBoostingStrategy, FilenameBoostingStrategy, FileTypeBoostingStrategy;
|
|
2632
|
+
var init_strategies = __esm({
|
|
2633
|
+
"src/vectordb/boosting/strategies.ts"() {
|
|
2634
|
+
"use strict";
|
|
2635
|
+
init_intent_classifier();
|
|
2636
|
+
PathBoostingStrategy = class {
|
|
2637
|
+
name = "path-matching";
|
|
2638
|
+
apply(query, filepath, baseScore) {
|
|
2639
|
+
const queryTokens = query.toLowerCase().split(/\s+/);
|
|
2640
|
+
const pathSegments = filepath.toLowerCase().split("/");
|
|
2641
|
+
let boostFactor = 1;
|
|
2642
|
+
for (const token of queryTokens) {
|
|
2643
|
+
if (token.length <= 2) continue;
|
|
2644
|
+
if (pathSegments.some((seg) => seg.includes(token))) {
|
|
2645
|
+
boostFactor *= 0.9;
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
return baseScore * boostFactor;
|
|
2649
|
+
}
|
|
2650
|
+
};
|
|
2651
|
+
FilenameBoostingStrategy = class {
|
|
2652
|
+
name = "filename-matching";
|
|
2653
|
+
apply(query, filepath, baseScore) {
|
|
2654
|
+
const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
|
|
2655
|
+
const queryTokens = query.toLowerCase().split(/\s+/);
|
|
2656
|
+
let boostFactor = 1;
|
|
2657
|
+
for (const token of queryTokens) {
|
|
2658
|
+
if (token.length <= 2) continue;
|
|
2659
|
+
if (filename === token) {
|
|
2660
|
+
boostFactor *= 0.7;
|
|
2661
|
+
} else if (filename.includes(token)) {
|
|
2662
|
+
boostFactor *= 0.8;
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
return baseScore * boostFactor;
|
|
2666
|
+
}
|
|
2667
|
+
};
|
|
2668
|
+
FileTypeBoostingStrategy = class {
|
|
2669
|
+
constructor(intent) {
|
|
2670
|
+
this.intent = intent;
|
|
2671
|
+
}
|
|
2672
|
+
name = "file-type";
|
|
2673
|
+
apply(query, filepath, baseScore) {
|
|
2674
|
+
switch (this.intent) {
|
|
2675
|
+
case "location" /* LOCATION */:
|
|
2676
|
+
return this.applyLocationBoosting(query, filepath, baseScore);
|
|
2677
|
+
case "conceptual" /* CONCEPTUAL */:
|
|
2678
|
+
return this.applyConceptualBoosting(query, filepath, baseScore);
|
|
2679
|
+
case "implementation" /* IMPLEMENTATION */:
|
|
2680
|
+
return this.applyImplementationBoosting(query, filepath, baseScore);
|
|
2681
|
+
default:
|
|
2682
|
+
return baseScore;
|
|
2683
|
+
}
|
|
2684
|
+
}
|
|
2685
|
+
applyLocationBoosting(_query, filepath, score) {
|
|
2686
|
+
if (isTestFile(filepath)) {
|
|
2687
|
+
score *= 1.1;
|
|
2688
|
+
}
|
|
2689
|
+
return score;
|
|
2690
|
+
}
|
|
2691
|
+
applyConceptualBoosting(_query, filepath, score) {
|
|
2692
|
+
if (isDocumentationFile(filepath)) {
|
|
2693
|
+
score *= 0.65;
|
|
2694
|
+
const lower = filepath.toLowerCase();
|
|
2695
|
+
if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
|
|
2696
|
+
score *= 0.9;
|
|
2697
|
+
}
|
|
2698
|
+
}
|
|
2699
|
+
if (isUtilityFile(filepath)) {
|
|
2700
|
+
score *= 0.95;
|
|
2701
|
+
}
|
|
2702
|
+
return score;
|
|
2703
|
+
}
|
|
2704
|
+
applyImplementationBoosting(_query, filepath, score) {
|
|
2705
|
+
if (isTestFile(filepath)) {
|
|
2706
|
+
score *= 1.1;
|
|
2707
|
+
}
|
|
2708
|
+
return score;
|
|
2709
|
+
}
|
|
2710
|
+
};
|
|
2647
2711
|
}
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2712
|
+
});
|
|
2713
|
+
|
|
2714
|
+
// src/vectordb/boosting/composer.ts
|
|
2715
|
+
var BoostingComposer;
|
|
2716
|
+
var init_composer = __esm({
|
|
2717
|
+
"src/vectordb/boosting/composer.ts"() {
|
|
2718
|
+
"use strict";
|
|
2719
|
+
BoostingComposer = class {
|
|
2720
|
+
strategies = [];
|
|
2721
|
+
/**
|
|
2722
|
+
* Add a boosting strategy to the pipeline.
|
|
2723
|
+
* Strategies are applied in the order they are added.
|
|
2724
|
+
*
|
|
2725
|
+
* @param strategy - The strategy to add
|
|
2726
|
+
* @returns This composer for chaining
|
|
2727
|
+
*/
|
|
2728
|
+
addStrategy(strategy) {
|
|
2729
|
+
this.strategies.push(strategy);
|
|
2730
|
+
return this;
|
|
2655
2731
|
}
|
|
2656
|
-
|
|
2732
|
+
/**
|
|
2733
|
+
* Apply all strategies to a base score.
|
|
2734
|
+
*
|
|
2735
|
+
* @param query - The search query
|
|
2736
|
+
* @param filepath - The file path being scored
|
|
2737
|
+
* @param baseScore - The initial score from vector similarity
|
|
2738
|
+
* @returns The final boosted score after all strategies
|
|
2739
|
+
*/
|
|
2740
|
+
apply(query, filepath, baseScore) {
|
|
2741
|
+
let score = baseScore;
|
|
2742
|
+
for (const strategy of this.strategies) {
|
|
2743
|
+
score = strategy.apply(query, filepath, score);
|
|
2744
|
+
}
|
|
2745
|
+
return score;
|
|
2746
|
+
}
|
|
2747
|
+
/**
|
|
2748
|
+
* Get the names of all strategies in this composer.
|
|
2749
|
+
* Useful for debugging and logging.
|
|
2750
|
+
*/
|
|
2751
|
+
getStrategyNames() {
|
|
2752
|
+
return this.strategies.map((s) => s.name);
|
|
2753
|
+
}
|
|
2754
|
+
/**
|
|
2755
|
+
* Get the number of strategies in this composer.
|
|
2756
|
+
*/
|
|
2757
|
+
getStrategyCount() {
|
|
2758
|
+
return this.strategies.length;
|
|
2759
|
+
}
|
|
2760
|
+
/**
|
|
2761
|
+
* Clear all strategies from this composer.
|
|
2762
|
+
*/
|
|
2763
|
+
clear() {
|
|
2764
|
+
this.strategies = [];
|
|
2765
|
+
}
|
|
2766
|
+
};
|
|
2657
2767
|
}
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2768
|
+
});
|
|
2769
|
+
|
|
2770
|
+
// src/vectordb/boosting/index.ts
|
|
2771
|
+
var init_boosting = __esm({
|
|
2772
|
+
"src/vectordb/boosting/index.ts"() {
|
|
2773
|
+
"use strict";
|
|
2774
|
+
init_types2();
|
|
2775
|
+
init_strategies();
|
|
2776
|
+
init_composer();
|
|
2666
2777
|
}
|
|
2667
|
-
|
|
2668
|
-
|
|
2778
|
+
});
|
|
2779
|
+
|
|
2780
|
+
// src/vectordb/query.ts
|
|
2669
2781
|
function applyRelevanceBoosting(query, filepath, baseScore) {
|
|
2670
2782
|
if (!query) {
|
|
2671
2783
|
return baseScore;
|
|
2672
2784
|
}
|
|
2673
2785
|
const intent = classifyQueryIntent(query);
|
|
2674
|
-
|
|
2675
|
-
case "location" /* LOCATION */:
|
|
2676
|
-
return boostForLocationIntent(query, filepath, baseScore);
|
|
2677
|
-
case "conceptual" /* CONCEPTUAL */:
|
|
2678
|
-
return boostForConceptualIntent(query, filepath, baseScore);
|
|
2679
|
-
case "implementation" /* IMPLEMENTATION */:
|
|
2680
|
-
return boostForImplementationIntent(query, filepath, baseScore);
|
|
2681
|
-
default:
|
|
2682
|
-
return boostForImplementationIntent(query, filepath, baseScore);
|
|
2683
|
-
}
|
|
2786
|
+
return BOOSTING_COMPOSERS[intent].apply(query, filepath, baseScore);
|
|
2684
2787
|
}
|
|
2685
2788
|
function dbRecordToSearchResult(r, query) {
|
|
2686
2789
|
const baseScore = r._distance ?? 0;
|
|
@@ -2862,6 +2965,7 @@ async function querySymbols(table, options) {
|
|
|
2862
2965
|
throw wrapError(error, "Failed to query symbols");
|
|
2863
2966
|
}
|
|
2864
2967
|
}
|
|
2968
|
+
var PATH_STRATEGY, FILENAME_STRATEGY, FILE_TYPE_STRATEGIES, BOOSTING_COMPOSERS;
|
|
2865
2969
|
var init_query = __esm({
|
|
2866
2970
|
"src/vectordb/query.ts"() {
|
|
2867
2971
|
"use strict";
|
|
@@ -2869,6 +2973,19 @@ var init_query = __esm({
|
|
|
2869
2973
|
init_errors();
|
|
2870
2974
|
init_relevance();
|
|
2871
2975
|
init_intent_classifier();
|
|
2976
|
+
init_boosting();
|
|
2977
|
+
PATH_STRATEGY = new PathBoostingStrategy();
|
|
2978
|
+
FILENAME_STRATEGY = new FilenameBoostingStrategy();
|
|
2979
|
+
FILE_TYPE_STRATEGIES = {
|
|
2980
|
+
["location" /* LOCATION */]: new FileTypeBoostingStrategy("location" /* LOCATION */),
|
|
2981
|
+
["conceptual" /* CONCEPTUAL */]: new FileTypeBoostingStrategy("conceptual" /* CONCEPTUAL */),
|
|
2982
|
+
["implementation" /* IMPLEMENTATION */]: new FileTypeBoostingStrategy("implementation" /* IMPLEMENTATION */)
|
|
2983
|
+
};
|
|
2984
|
+
BOOSTING_COMPOSERS = {
|
|
2985
|
+
["location" /* LOCATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["location" /* LOCATION */]),
|
|
2986
|
+
["conceptual" /* CONCEPTUAL */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["conceptual" /* CONCEPTUAL */]),
|
|
2987
|
+
["implementation" /* IMPLEMENTATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["implementation" /* IMPLEMENTATION */])
|
|
2988
|
+
};
|
|
2872
2989
|
}
|
|
2873
2990
|
});
|
|
2874
2991
|
|
|
@@ -3793,6 +3910,22 @@ var init_change_detector = __esm({
|
|
|
3793
3910
|
}
|
|
3794
3911
|
});
|
|
3795
3912
|
|
|
3913
|
+
// src/utils/result.ts
|
|
3914
|
+
function Ok(value) {
|
|
3915
|
+
return { ok: true, value };
|
|
3916
|
+
}
|
|
3917
|
+
function Err(error) {
|
|
3918
|
+
return { ok: false, error };
|
|
3919
|
+
}
|
|
3920
|
+
function isOk(result) {
|
|
3921
|
+
return result.ok;
|
|
3922
|
+
}
|
|
3923
|
+
var init_result = __esm({
|
|
3924
|
+
"src/utils/result.ts"() {
|
|
3925
|
+
"use strict";
|
|
3926
|
+
}
|
|
3927
|
+
});
|
|
3928
|
+
|
|
3796
3929
|
// src/indexer/incremental.ts
|
|
3797
3930
|
import fs16 from "fs/promises";
|
|
3798
3931
|
async function processFileContent(filepath, content, embeddings, config, verbose) {
|
|
@@ -3874,36 +4007,29 @@ async function indexSingleFile(filepath, vectorDB, embeddings, config, options =
|
|
|
3874
4007
|
console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
|
|
3875
4008
|
}
|
|
3876
4009
|
}
|
|
4010
|
+
async function processSingleFileForIndexing(filepath, embeddings, config, verbose) {
|
|
4011
|
+
try {
|
|
4012
|
+
const stats = await fs16.stat(filepath);
|
|
4013
|
+
const content = await fs16.readFile(filepath, "utf-8");
|
|
4014
|
+
const result = await processFileContent(filepath, content, embeddings, config, verbose);
|
|
4015
|
+
return Ok({
|
|
4016
|
+
filepath,
|
|
4017
|
+
result,
|
|
4018
|
+
mtime: stats.mtimeMs
|
|
4019
|
+
});
|
|
4020
|
+
} catch (error) {
|
|
4021
|
+
return Err(`Failed to process ${filepath}: ${error}`);
|
|
4022
|
+
}
|
|
4023
|
+
}
|
|
3877
4024
|
async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, options = {}) {
|
|
3878
4025
|
const { verbose } = options;
|
|
3879
4026
|
let processedCount = 0;
|
|
3880
4027
|
const manifestEntries = [];
|
|
3881
4028
|
for (const filepath of filepaths) {
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
|
|
3885
|
-
|
|
3886
|
-
fileMtime = stats.mtimeMs;
|
|
3887
|
-
content = await fs16.readFile(filepath, "utf-8");
|
|
3888
|
-
} catch (error) {
|
|
3889
|
-
if (verbose) {
|
|
3890
|
-
console.error(`[Lien] File not readable: ${filepath}`);
|
|
3891
|
-
}
|
|
3892
|
-
try {
|
|
3893
|
-
await vectorDB.deleteByFile(filepath);
|
|
3894
|
-
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
3895
|
-
await manifest.removeFile(filepath);
|
|
3896
|
-
} catch (error2) {
|
|
3897
|
-
if (verbose) {
|
|
3898
|
-
console.error(`[Lien] Note: ${filepath} not in index`);
|
|
3899
|
-
}
|
|
3900
|
-
}
|
|
3901
|
-
processedCount++;
|
|
3902
|
-
continue;
|
|
3903
|
-
}
|
|
3904
|
-
try {
|
|
3905
|
-
const result = await processFileContent(filepath, content, embeddings, config, verbose || false);
|
|
3906
|
-
if (result === null) {
|
|
4029
|
+
const result = await processSingleFileForIndexing(filepath, embeddings, config, verbose || false);
|
|
4030
|
+
if (isOk(result)) {
|
|
4031
|
+
const { result: processResult, mtime } = result.value;
|
|
4032
|
+
if (processResult === null) {
|
|
3907
4033
|
try {
|
|
3908
4034
|
await vectorDB.deleteByFile(filepath);
|
|
3909
4035
|
} catch (error) {
|
|
@@ -3911,7 +4037,7 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
|
|
|
3911
4037
|
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
3912
4038
|
await manifest.updateFile(filepath, {
|
|
3913
4039
|
filepath,
|
|
3914
|
-
lastModified:
|
|
4040
|
+
lastModified: mtime,
|
|
3915
4041
|
chunkCount: 0
|
|
3916
4042
|
});
|
|
3917
4043
|
processedCount++;
|
|
@@ -3922,21 +4048,33 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
|
|
|
3922
4048
|
} catch (error) {
|
|
3923
4049
|
}
|
|
3924
4050
|
await vectorDB.insertBatch(
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
4051
|
+
processResult.vectors,
|
|
4052
|
+
processResult.chunks.map((c) => c.metadata),
|
|
4053
|
+
processResult.texts
|
|
3928
4054
|
);
|
|
3929
4055
|
manifestEntries.push({
|
|
3930
4056
|
filepath,
|
|
3931
|
-
chunkCount:
|
|
3932
|
-
mtime
|
|
4057
|
+
chunkCount: processResult.chunkCount,
|
|
4058
|
+
mtime
|
|
3933
4059
|
});
|
|
3934
4060
|
if (verbose) {
|
|
3935
|
-
console.error(`[Lien] \u2713 Updated ${filepath} (${
|
|
4061
|
+
console.error(`[Lien] \u2713 Updated ${filepath} (${processResult.chunkCount} chunks)`);
|
|
4062
|
+
}
|
|
4063
|
+
processedCount++;
|
|
4064
|
+
} else {
|
|
4065
|
+
if (verbose) {
|
|
4066
|
+
console.error(`[Lien] ${result.error}`);
|
|
4067
|
+
}
|
|
4068
|
+
try {
|
|
4069
|
+
await vectorDB.deleteByFile(filepath);
|
|
4070
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4071
|
+
await manifest.removeFile(filepath);
|
|
4072
|
+
} catch (error) {
|
|
4073
|
+
if (verbose) {
|
|
4074
|
+
console.error(`[Lien] Note: ${filepath} not in index`);
|
|
4075
|
+
}
|
|
3936
4076
|
}
|
|
3937
4077
|
processedCount++;
|
|
3938
|
-
} catch (error) {
|
|
3939
|
-
console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
|
|
3940
4078
|
}
|
|
3941
4079
|
}
|
|
3942
4080
|
if (manifestEntries.length > 0) {
|
|
@@ -3959,6 +4097,7 @@ var init_incremental = __esm({
|
|
|
3959
4097
|
init_schema();
|
|
3960
4098
|
init_manifest();
|
|
3961
4099
|
init_constants();
|
|
4100
|
+
init_result();
|
|
3962
4101
|
}
|
|
3963
4102
|
});
|
|
3964
4103
|
|
|
@@ -4044,6 +4183,99 @@ var init_loading_messages = __esm({
|
|
|
4044
4183
|
}
|
|
4045
4184
|
});
|
|
4046
4185
|
|
|
4186
|
+
// src/indexer/progress-tracker.ts
|
|
4187
|
+
var IndexingProgressTracker;
|
|
4188
|
+
var init_progress_tracker = __esm({
|
|
4189
|
+
"src/indexer/progress-tracker.ts"() {
|
|
4190
|
+
"use strict";
|
|
4191
|
+
init_loading_messages();
|
|
4192
|
+
IndexingProgressTracker = class _IndexingProgressTracker {
|
|
4193
|
+
processedFiles = 0;
|
|
4194
|
+
totalFiles;
|
|
4195
|
+
wittyMessage;
|
|
4196
|
+
spinner;
|
|
4197
|
+
updateInterval;
|
|
4198
|
+
// Configuration constants
|
|
4199
|
+
static SPINNER_UPDATE_INTERVAL_MS = 200;
|
|
4200
|
+
// How often to update spinner
|
|
4201
|
+
static MESSAGE_ROTATION_INTERVAL_MS = 8e3;
|
|
4202
|
+
// How often to rotate message
|
|
4203
|
+
constructor(totalFiles, spinner) {
|
|
4204
|
+
this.totalFiles = totalFiles;
|
|
4205
|
+
this.spinner = spinner;
|
|
4206
|
+
this.wittyMessage = getIndexingMessage();
|
|
4207
|
+
}
|
|
4208
|
+
/**
|
|
4209
|
+
* Start the progress tracker.
|
|
4210
|
+
* Sets up periodic updates for spinner and message rotation.
|
|
4211
|
+
*
|
|
4212
|
+
* Safe to call multiple times - will not create duplicate intervals.
|
|
4213
|
+
*/
|
|
4214
|
+
start() {
|
|
4215
|
+
if (this.updateInterval) {
|
|
4216
|
+
return;
|
|
4217
|
+
}
|
|
4218
|
+
const MESSAGE_ROTATION_TICKS = Math.floor(
|
|
4219
|
+
_IndexingProgressTracker.MESSAGE_ROTATION_INTERVAL_MS / _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS
|
|
4220
|
+
);
|
|
4221
|
+
let spinnerTick = 0;
|
|
4222
|
+
this.updateInterval = setInterval(() => {
|
|
4223
|
+
spinnerTick++;
|
|
4224
|
+
if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
|
|
4225
|
+
this.wittyMessage = getIndexingMessage();
|
|
4226
|
+
spinnerTick = 0;
|
|
4227
|
+
}
|
|
4228
|
+
this.spinner.text = `${this.processedFiles}/${this.totalFiles} files | ${this.wittyMessage}`;
|
|
4229
|
+
}, _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS);
|
|
4230
|
+
}
|
|
4231
|
+
/**
|
|
4232
|
+
* Increment the count of processed files.
|
|
4233
|
+
*
|
|
4234
|
+
* Safe for async operations in Node.js's single-threaded event loop.
|
|
4235
|
+
* Note: Not thread-safe for true concurrent operations (e.g., worker threads).
|
|
4236
|
+
*/
|
|
4237
|
+
incrementFiles() {
|
|
4238
|
+
this.processedFiles++;
|
|
4239
|
+
}
|
|
4240
|
+
/**
|
|
4241
|
+
* Set a custom message (e.g., for special operations like embedding generation).
|
|
4242
|
+
* The message will be displayed until the next automatic rotation.
|
|
4243
|
+
*/
|
|
4244
|
+
setMessage(message) {
|
|
4245
|
+
this.wittyMessage = message;
|
|
4246
|
+
}
|
|
4247
|
+
/**
|
|
4248
|
+
* Stop the progress tracker and clean up intervals.
|
|
4249
|
+
* Must be called when indexing completes or fails.
|
|
4250
|
+
*/
|
|
4251
|
+
stop() {
|
|
4252
|
+
if (this.updateInterval) {
|
|
4253
|
+
clearInterval(this.updateInterval);
|
|
4254
|
+
this.updateInterval = void 0;
|
|
4255
|
+
}
|
|
4256
|
+
}
|
|
4257
|
+
/**
|
|
4258
|
+
* Get the current count of processed files.
|
|
4259
|
+
*/
|
|
4260
|
+
getProcessedCount() {
|
|
4261
|
+
return this.processedFiles;
|
|
4262
|
+
}
|
|
4263
|
+
/**
|
|
4264
|
+
* Get the total number of files to process.
|
|
4265
|
+
*/
|
|
4266
|
+
getTotalFiles() {
|
|
4267
|
+
return this.totalFiles;
|
|
4268
|
+
}
|
|
4269
|
+
/**
|
|
4270
|
+
* Get the current message being displayed.
|
|
4271
|
+
*/
|
|
4272
|
+
getCurrentMessage() {
|
|
4273
|
+
return this.wittyMessage;
|
|
4274
|
+
}
|
|
4275
|
+
};
|
|
4276
|
+
}
|
|
4277
|
+
});
|
|
4278
|
+
|
|
4047
4279
|
// src/indexer/index.ts
|
|
4048
4280
|
var indexer_exports = {};
|
|
4049
4281
|
__export(indexer_exports, {
|
|
@@ -4053,162 +4285,171 @@ import fs17 from "fs/promises";
|
|
|
4053
4285
|
import ora from "ora";
|
|
4054
4286
|
import chalk5 from "chalk";
|
|
4055
4287
|
import pLimit from "p-limit";
|
|
4056
|
-
async function
|
|
4057
|
-
const
|
|
4058
|
-
const
|
|
4059
|
-
|
|
4288
|
+
async function updateGitState(rootDir, vectorDB, manifest) {
|
|
4289
|
+
const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
4290
|
+
const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
|
|
4291
|
+
const gitAvailable = await isGitAvailable2();
|
|
4292
|
+
const isRepo = await isGitRepo2(rootDir);
|
|
4293
|
+
if (!gitAvailable || !isRepo) {
|
|
4294
|
+
return;
|
|
4295
|
+
}
|
|
4296
|
+
const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
|
|
4297
|
+
await gitTracker.initialize();
|
|
4298
|
+
const gitState = gitTracker.getState();
|
|
4299
|
+
if (gitState) {
|
|
4300
|
+
await manifest.updateGitState(gitState);
|
|
4301
|
+
}
|
|
4302
|
+
}
|
|
4303
|
+
async function handleDeletions(deletedFiles, vectorDB, manifest, spinner) {
|
|
4304
|
+
if (deletedFiles.length === 0) {
|
|
4305
|
+
return;
|
|
4306
|
+
}
|
|
4307
|
+
spinner.start(`Removing ${deletedFiles.length} deleted files...`);
|
|
4308
|
+
let removedCount = 0;
|
|
4309
|
+
for (const filepath of deletedFiles) {
|
|
4310
|
+
try {
|
|
4311
|
+
await vectorDB.deleteByFile(filepath);
|
|
4312
|
+
await manifest.removeFile(filepath);
|
|
4313
|
+
removedCount++;
|
|
4314
|
+
} catch (err) {
|
|
4315
|
+
spinner.warn(
|
|
4316
|
+
`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`
|
|
4317
|
+
);
|
|
4318
|
+
}
|
|
4319
|
+
}
|
|
4320
|
+
spinner.succeed(`Removed ${removedCount}/${deletedFiles.length} deleted files`);
|
|
4321
|
+
}
|
|
4322
|
+
async function handleUpdates(addedFiles, modifiedFiles, vectorDB, embeddings, config, options, spinner) {
|
|
4323
|
+
const filesToIndex = [...addedFiles, ...modifiedFiles];
|
|
4324
|
+
if (filesToIndex.length === 0) {
|
|
4325
|
+
return;
|
|
4326
|
+
}
|
|
4327
|
+
spinner.start(`Reindexing ${filesToIndex.length} changed files...`);
|
|
4328
|
+
const count = await indexMultipleFiles(
|
|
4329
|
+
filesToIndex,
|
|
4330
|
+
vectorDB,
|
|
4331
|
+
embeddings,
|
|
4332
|
+
config,
|
|
4333
|
+
{ verbose: options.verbose }
|
|
4334
|
+
);
|
|
4335
|
+
await writeVersionFile(vectorDB.dbPath);
|
|
4336
|
+
spinner.succeed(
|
|
4337
|
+
`Incremental reindex complete: ${count}/${filesToIndex.length} files indexed successfully`
|
|
4338
|
+
);
|
|
4339
|
+
}
|
|
4340
|
+
async function tryIncrementalIndex(rootDir, vectorDB, config, options, spinner) {
|
|
4341
|
+
spinner.text = "Checking for changes...";
|
|
4342
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4343
|
+
const savedManifest = await manifest.load();
|
|
4344
|
+
if (!savedManifest) {
|
|
4345
|
+
return false;
|
|
4346
|
+
}
|
|
4347
|
+
const changes = await detectChanges(rootDir, vectorDB, config);
|
|
4348
|
+
if (changes.reason === "full") {
|
|
4349
|
+
spinner.text = "Full reindex required...";
|
|
4350
|
+
return false;
|
|
4351
|
+
}
|
|
4352
|
+
const totalChanges = changes.added.length + changes.modified.length;
|
|
4353
|
+
const totalDeleted = changes.deleted.length;
|
|
4354
|
+
if (totalChanges === 0 && totalDeleted === 0) {
|
|
4355
|
+
spinner.succeed("No changes detected - index is up to date!");
|
|
4356
|
+
return true;
|
|
4357
|
+
}
|
|
4358
|
+
spinner.succeed(
|
|
4359
|
+
`Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
|
|
4360
|
+
);
|
|
4361
|
+
spinner.start(getModelLoadingMessage());
|
|
4362
|
+
const embeddings = new LocalEmbeddings();
|
|
4363
|
+
await embeddings.initialize();
|
|
4364
|
+
spinner.succeed("Embedding model loaded");
|
|
4365
|
+
await handleDeletions(changes.deleted, vectorDB, manifest, spinner);
|
|
4366
|
+
await handleUpdates(changes.added, changes.modified, vectorDB, embeddings, config, options, spinner);
|
|
4367
|
+
await updateGitState(rootDir, vectorDB, manifest);
|
|
4368
|
+
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4369
|
+
return true;
|
|
4370
|
+
}
|
|
4371
|
+
async function performFullIndex(rootDir, vectorDB, config, options, spinner) {
|
|
4372
|
+
spinner.text = "Scanning codebase...";
|
|
4373
|
+
let files;
|
|
4374
|
+
if (isModernConfig(config) && config.frameworks.length > 0) {
|
|
4375
|
+
files = await scanCodebaseWithFrameworks(rootDir, config);
|
|
4376
|
+
} else if (isLegacyConfig(config)) {
|
|
4377
|
+
files = await scanCodebase({
|
|
4378
|
+
rootDir,
|
|
4379
|
+
includePatterns: config.indexing.include,
|
|
4380
|
+
excludePatterns: config.indexing.exclude
|
|
4381
|
+
});
|
|
4382
|
+
} else {
|
|
4383
|
+
files = await scanCodebase({
|
|
4384
|
+
rootDir,
|
|
4385
|
+
includePatterns: [],
|
|
4386
|
+
excludePatterns: []
|
|
4387
|
+
});
|
|
4388
|
+
}
|
|
4389
|
+
if (files.length === 0) {
|
|
4390
|
+
spinner.fail("No files found to index");
|
|
4391
|
+
return;
|
|
4392
|
+
}
|
|
4393
|
+
spinner.text = `Found ${files.length} files`;
|
|
4394
|
+
spinner.text = getModelLoadingMessage();
|
|
4395
|
+
const embeddings = new LocalEmbeddings();
|
|
4396
|
+
await embeddings.initialize();
|
|
4397
|
+
spinner.succeed("Embedding model loaded");
|
|
4398
|
+
const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
|
|
4399
|
+
const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
|
|
4400
|
+
const vectorDBBatchSize = 100;
|
|
4401
|
+
spinner.start(`Processing files with ${concurrency}x concurrency...`);
|
|
4402
|
+
const startTime = Date.now();
|
|
4403
|
+
let processedChunks = 0;
|
|
4404
|
+
const chunkAccumulator = [];
|
|
4405
|
+
const limit = pLimit(concurrency);
|
|
4406
|
+
const indexedFileEntries = [];
|
|
4407
|
+
const progressTracker = new IndexingProgressTracker(files.length, spinner);
|
|
4408
|
+
progressTracker.start();
|
|
4060
4409
|
try {
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
|
|
4068
|
-
const manifest2 = new ManifestManager(vectorDB.dbPath);
|
|
4069
|
-
const savedManifest = await manifest2.load();
|
|
4070
|
-
if (savedManifest) {
|
|
4071
|
-
const changes = await detectChanges(rootDir, vectorDB, config);
|
|
4072
|
-
if (changes.reason !== "full") {
|
|
4073
|
-
const totalChanges = changes.added.length + changes.modified.length;
|
|
4074
|
-
const totalDeleted = changes.deleted.length;
|
|
4075
|
-
if (totalChanges === 0 && totalDeleted === 0) {
|
|
4076
|
-
spinner.succeed("No changes detected - index is up to date!");
|
|
4077
|
-
return;
|
|
4078
|
-
}
|
|
4079
|
-
spinner.succeed(
|
|
4080
|
-
`Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
|
|
4081
|
-
);
|
|
4082
|
-
spinner.start(getModelLoadingMessage());
|
|
4083
|
-
const embeddings2 = new LocalEmbeddings();
|
|
4084
|
-
await embeddings2.initialize();
|
|
4085
|
-
spinner.succeed("Embedding model loaded");
|
|
4086
|
-
if (totalDeleted > 0) {
|
|
4087
|
-
spinner.start(`Removing ${totalDeleted} deleted files...`);
|
|
4088
|
-
let removedCount = 0;
|
|
4089
|
-
for (const filepath of changes.deleted) {
|
|
4090
|
-
try {
|
|
4091
|
-
await vectorDB.deleteByFile(filepath);
|
|
4092
|
-
await manifest2.removeFile(filepath);
|
|
4093
|
-
removedCount++;
|
|
4094
|
-
} catch (err) {
|
|
4095
|
-
spinner.warn(`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`);
|
|
4096
|
-
}
|
|
4097
|
-
}
|
|
4098
|
-
spinner.succeed(`Removed ${removedCount}/${totalDeleted} deleted files`);
|
|
4099
|
-
}
|
|
4100
|
-
if (totalChanges > 0) {
|
|
4101
|
-
spinner.start(`Reindexing ${totalChanges} changed files...`);
|
|
4102
|
-
const filesToIndex = [...changes.added, ...changes.modified];
|
|
4103
|
-
const count = await indexMultipleFiles(
|
|
4104
|
-
filesToIndex,
|
|
4105
|
-
vectorDB,
|
|
4106
|
-
embeddings2,
|
|
4107
|
-
config,
|
|
4108
|
-
{ verbose: options.verbose }
|
|
4109
|
-
);
|
|
4110
|
-
await writeVersionFile(vectorDB.dbPath);
|
|
4111
|
-
spinner.succeed(
|
|
4112
|
-
`Incremental reindex complete: ${count}/${totalChanges} files indexed successfully`
|
|
4113
|
-
);
|
|
4114
|
-
}
|
|
4115
|
-
const { isGitAvailable: isGitAvailable3, isGitRepo: isGitRepo3 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
|
|
4116
|
-
const { GitStateTracker: GitStateTracker3 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
|
|
4117
|
-
const gitAvailable2 = await isGitAvailable3();
|
|
4118
|
-
const isRepo2 = await isGitRepo3(rootDir);
|
|
4119
|
-
if (gitAvailable2 && isRepo2) {
|
|
4120
|
-
const gitTracker = new GitStateTracker3(rootDir, vectorDB.dbPath);
|
|
4121
|
-
await gitTracker.initialize();
|
|
4122
|
-
const gitState = gitTracker.getState();
|
|
4123
|
-
if (gitState) {
|
|
4124
|
-
await manifest2.updateGitState(gitState);
|
|
4125
|
-
}
|
|
4126
|
-
}
|
|
4127
|
-
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4128
|
-
return;
|
|
4129
|
-
}
|
|
4130
|
-
spinner.text = "Full reindex required...";
|
|
4410
|
+
let addChunksLock = null;
|
|
4411
|
+
let processingQueue = null;
|
|
4412
|
+
const processAccumulatedChunks = async () => {
|
|
4413
|
+
if (processingQueue) {
|
|
4414
|
+
processingQueue = processingQueue.then(() => doProcessChunks());
|
|
4415
|
+
} else {
|
|
4416
|
+
processingQueue = doProcessChunks();
|
|
4131
4417
|
}
|
|
4132
|
-
|
|
4133
|
-
spinner.text = "Force flag enabled, performing full reindex...";
|
|
4134
|
-
}
|
|
4135
|
-
spinner.text = "Scanning codebase...";
|
|
4136
|
-
let files;
|
|
4137
|
-
if (isModernConfig(config) && config.frameworks.length > 0) {
|
|
4138
|
-
files = await scanCodebaseWithFrameworks(rootDir, config);
|
|
4139
|
-
} else if (isLegacyConfig(config)) {
|
|
4140
|
-
files = await scanCodebase({
|
|
4141
|
-
rootDir,
|
|
4142
|
-
includePatterns: config.indexing.include,
|
|
4143
|
-
excludePatterns: config.indexing.exclude
|
|
4144
|
-
});
|
|
4145
|
-
} else {
|
|
4146
|
-
files = await scanCodebase({
|
|
4147
|
-
rootDir,
|
|
4148
|
-
includePatterns: [],
|
|
4149
|
-
excludePatterns: []
|
|
4150
|
-
});
|
|
4151
|
-
}
|
|
4152
|
-
if (files.length === 0) {
|
|
4153
|
-
spinner.fail("No files found to index");
|
|
4154
|
-
return;
|
|
4155
|
-
}
|
|
4156
|
-
spinner.text = `Found ${files.length} files`;
|
|
4157
|
-
spinner.text = getModelLoadingMessage();
|
|
4158
|
-
const embeddings = new LocalEmbeddings();
|
|
4159
|
-
await embeddings.initialize();
|
|
4160
|
-
spinner.succeed("Embedding model loaded");
|
|
4161
|
-
const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
|
|
4162
|
-
const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
|
|
4163
|
-
const vectorDBBatchSize = 100;
|
|
4164
|
-
spinner.start(`Processing files with ${concurrency}x concurrency...`);
|
|
4165
|
-
const startTime = Date.now();
|
|
4166
|
-
let processedFiles = 0;
|
|
4167
|
-
let processedChunks = 0;
|
|
4168
|
-
const chunkAccumulator = [];
|
|
4169
|
-
const limit = pLimit(concurrency);
|
|
4170
|
-
const indexedFileEntries = [];
|
|
4171
|
-
const progressState = {
|
|
4172
|
-
processedFiles: 0,
|
|
4173
|
-
totalFiles: files.length,
|
|
4174
|
-
wittyMessage: getIndexingMessage()
|
|
4418
|
+
return processingQueue;
|
|
4175
4419
|
};
|
|
4176
|
-
const
|
|
4177
|
-
|
|
4178
|
-
|
|
4179
|
-
|
|
4180
|
-
|
|
4181
|
-
|
|
4182
|
-
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4189
|
-
|
|
4190
|
-
|
|
4191
|
-
|
|
4192
|
-
|
|
4193
|
-
|
|
4194
|
-
|
|
4195
|
-
|
|
4196
|
-
|
|
4197
|
-
|
|
4198
|
-
|
|
4199
|
-
|
|
4420
|
+
const doProcessChunks = async () => {
|
|
4421
|
+
if (chunkAccumulator.length === 0) {
|
|
4422
|
+
return;
|
|
4423
|
+
}
|
|
4424
|
+
const currentPromise = processingQueue;
|
|
4425
|
+
try {
|
|
4426
|
+
const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
|
|
4427
|
+
for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
|
|
4428
|
+
const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
|
|
4429
|
+
progressTracker.setMessage(getEmbeddingMessage());
|
|
4430
|
+
const texts = batch.map((item) => item.content);
|
|
4431
|
+
const embeddingVectors = [];
|
|
4432
|
+
for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
|
|
4433
|
+
const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
|
|
4434
|
+
const microResults = await embeddings.embedBatch(microBatch);
|
|
4435
|
+
embeddingVectors.push(...microResults);
|
|
4436
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
4437
|
+
}
|
|
4438
|
+
processedChunks += batch.length;
|
|
4439
|
+
progressTracker.setMessage(`Inserting ${batch.length} chunks into vector space...`);
|
|
4440
|
+
await vectorDB.insertBatch(
|
|
4441
|
+
embeddingVectors,
|
|
4442
|
+
batch.map((item) => item.chunk.metadata),
|
|
4443
|
+
texts
|
|
4444
|
+
);
|
|
4200
4445
|
await new Promise((resolve) => setImmediate(resolve));
|
|
4201
4446
|
}
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
texts
|
|
4208
|
-
);
|
|
4209
|
-
await new Promise((resolve) => setImmediate(resolve));
|
|
4447
|
+
progressTracker.setMessage(getIndexingMessage());
|
|
4448
|
+
} finally {
|
|
4449
|
+
if (processingQueue === currentPromise) {
|
|
4450
|
+
processingQueue = null;
|
|
4451
|
+
}
|
|
4210
4452
|
}
|
|
4211
|
-
progressState.wittyMessage = getIndexingMessage();
|
|
4212
4453
|
};
|
|
4213
4454
|
const filePromises = files.map(
|
|
4214
4455
|
(file) => limit(async () => {
|
|
@@ -4226,73 +4467,91 @@ async function indexCodebase(options = {}) {
|
|
|
4226
4467
|
astFallback
|
|
4227
4468
|
});
|
|
4228
4469
|
if (chunks.length === 0) {
|
|
4229
|
-
|
|
4230
|
-
progressState.processedFiles = processedFiles;
|
|
4470
|
+
progressTracker.incrementFiles();
|
|
4231
4471
|
return;
|
|
4232
4472
|
}
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
|
|
4473
|
+
{
|
|
4474
|
+
if (addChunksLock) {
|
|
4475
|
+
await addChunksLock;
|
|
4476
|
+
}
|
|
4477
|
+
let releaseAddLock;
|
|
4478
|
+
addChunksLock = new Promise((resolve) => {
|
|
4479
|
+
releaseAddLock = resolve;
|
|
4237
4480
|
});
|
|
4238
|
-
|
|
4239
|
-
|
|
4240
|
-
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4481
|
+
try {
|
|
4482
|
+
for (const chunk of chunks) {
|
|
4483
|
+
chunkAccumulator.push({
|
|
4484
|
+
chunk,
|
|
4485
|
+
content: chunk.content
|
|
4486
|
+
});
|
|
4487
|
+
}
|
|
4488
|
+
indexedFileEntries.push({
|
|
4489
|
+
filepath: file,
|
|
4490
|
+
chunkCount: chunks.length,
|
|
4491
|
+
mtime: stats.mtimeMs
|
|
4492
|
+
});
|
|
4493
|
+
progressTracker.incrementFiles();
|
|
4494
|
+
if (chunkAccumulator.length >= vectorDBBatchSize) {
|
|
4495
|
+
await processAccumulatedChunks();
|
|
4496
|
+
}
|
|
4497
|
+
} finally {
|
|
4498
|
+
releaseAddLock();
|
|
4499
|
+
addChunksLock = null;
|
|
4500
|
+
}
|
|
4248
4501
|
}
|
|
4249
4502
|
} catch (error) {
|
|
4250
4503
|
if (options.verbose) {
|
|
4251
4504
|
console.error(chalk5.yellow(`
|
|
4252
4505
|
\u26A0\uFE0F Skipping ${file}: ${error}`));
|
|
4253
4506
|
}
|
|
4254
|
-
|
|
4255
|
-
progressState.processedFiles = processedFiles;
|
|
4507
|
+
progressTracker.incrementFiles();
|
|
4256
4508
|
}
|
|
4257
4509
|
})
|
|
4258
4510
|
);
|
|
4259
4511
|
await Promise.all(filePromises);
|
|
4260
|
-
|
|
4512
|
+
progressTracker.setMessage("Processing final chunks...");
|
|
4261
4513
|
await processAccumulatedChunks();
|
|
4262
|
-
|
|
4263
|
-
|
|
4264
|
-
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
|
|
4270
|
-
|
|
4271
|
-
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4514
|
+
} finally {
|
|
4515
|
+
progressTracker.stop();
|
|
4516
|
+
}
|
|
4517
|
+
spinner.start("Saving index manifest...");
|
|
4518
|
+
const manifest = new ManifestManager(vectorDB.dbPath);
|
|
4519
|
+
await manifest.updateFiles(
|
|
4520
|
+
indexedFileEntries.map((entry) => ({
|
|
4521
|
+
filepath: entry.filepath,
|
|
4522
|
+
// Use actual file mtime for accurate change detection
|
|
4523
|
+
lastModified: entry.mtime,
|
|
4524
|
+
chunkCount: entry.chunkCount
|
|
4525
|
+
}))
|
|
4526
|
+
);
|
|
4527
|
+
await updateGitState(rootDir, vectorDB, manifest);
|
|
4528
|
+
spinner.succeed("Manifest saved");
|
|
4529
|
+
await writeVersionFile(vectorDB.dbPath);
|
|
4530
|
+
const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
4531
|
+
spinner.succeed(
|
|
4532
|
+
`Indexed ${progressTracker.getProcessedCount()} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
|
|
4533
|
+
);
|
|
4534
|
+
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4535
|
+
}
|
|
4536
|
+
async function indexCodebase(options = {}) {
|
|
4537
|
+
const rootDir = options.rootDir ?? process.cwd();
|
|
4538
|
+
const spinner = ora("Starting indexing process...").start();
|
|
4539
|
+
try {
|
|
4540
|
+
spinner.text = "Loading configuration...";
|
|
4541
|
+
const config = await configService.load(rootDir);
|
|
4542
|
+
spinner.text = "Initializing vector database...";
|
|
4543
|
+
const vectorDB = new VectorDB(rootDir);
|
|
4544
|
+
await vectorDB.initialize();
|
|
4545
|
+
if (!options.force) {
|
|
4546
|
+
const completed = await tryIncrementalIndex(rootDir, vectorDB, config, options, spinner);
|
|
4547
|
+
if (completed) {
|
|
4548
|
+
return;
|
|
4283
4549
|
}
|
|
4550
|
+
} else {
|
|
4551
|
+
spinner.text = "Force flag enabled, performing full reindex...";
|
|
4284
4552
|
}
|
|
4285
|
-
|
|
4286
|
-
await writeVersionFile(vectorDB.dbPath);
|
|
4287
|
-
const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
4288
|
-
spinner.succeed(
|
|
4289
|
-
`Indexed ${processedFiles} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
|
|
4290
|
-
);
|
|
4291
|
-
console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
|
|
4553
|
+
await performFullIndex(rootDir, vectorDB, config, options, spinner);
|
|
4292
4554
|
} catch (error) {
|
|
4293
|
-
if (updateInterval) {
|
|
4294
|
-
clearInterval(updateInterval);
|
|
4295
|
-
}
|
|
4296
4555
|
spinner.fail(`Indexing failed: ${error}`);
|
|
4297
4556
|
throw error;
|
|
4298
4557
|
}
|
|
@@ -4312,6 +4571,7 @@ var init_indexer = __esm({
|
|
|
4312
4571
|
init_incremental();
|
|
4313
4572
|
init_loading_messages();
|
|
4314
4573
|
init_constants();
|
|
4574
|
+
init_progress_tracker();
|
|
4315
4575
|
}
|
|
4316
4576
|
});
|
|
4317
4577
|
|
|
@@ -5405,9 +5665,12 @@ var FindSimilarSchema = z2.object({
|
|
|
5405
5665
|
|
|
5406
5666
|
// src/mcp/schemas/file.schema.ts
|
|
5407
5667
|
import { z as z3 } from "zod";
|
|
5408
|
-
var
|
|
5409
|
-
|
|
5410
|
-
"
|
|
5668
|
+
var GetFilesContextSchema = z3.object({
|
|
5669
|
+
filepaths: z3.union([
|
|
5670
|
+
z3.string().min(1, "Filepath cannot be empty"),
|
|
5671
|
+
z3.array(z3.string().min(1, "Filepath cannot be empty")).min(1, "Array must contain at least one filepath").max(50, "Maximum 50 files per request")
|
|
5672
|
+
]).describe(
|
|
5673
|
+
"Single filepath or array of filepaths (relative to workspace root).\n\nSingle file: 'src/components/Button.tsx'\nMultiple files: ['src/auth.ts', 'src/user.ts']\n\nMaximum 50 files per request for batch operations."
|
|
5411
5674
|
),
|
|
5412
5675
|
includeRelated: z3.boolean().default(true).describe(
|
|
5413
5676
|
"Include semantically related chunks from nearby code.\n\nDefault: true\n\nWhen enabled, also returns related code from other files that are semantically similar to the target file's contents."
|
|
@@ -5451,16 +5714,24 @@ Results include a relevance category (highly_relevant, relevant, loosely_related
|
|
|
5451
5714
|
Provide at least 10 characters of code to match against. Results include a relevance category for each match.`
|
|
5452
5715
|
),
|
|
5453
5716
|
toMCPToolSchema(
|
|
5454
|
-
|
|
5455
|
-
"
|
|
5456
|
-
`Get
|
|
5717
|
+
GetFilesContextSchema,
|
|
5718
|
+
"get_files_context",
|
|
5719
|
+
`Get context for one or more files including dependencies and test coverage.
|
|
5457
5720
|
|
|
5458
|
-
|
|
5459
|
-
- What the file does
|
|
5460
|
-
- What depends on it
|
|
5461
|
-
- Related test files (via testAssociations)
|
|
5721
|
+
MANDATORY: Call this BEFORE editing any file. Accepts single path or array of paths.
|
|
5462
5722
|
|
|
5463
|
-
|
|
5723
|
+
Single file:
|
|
5724
|
+
get_files_context({ filepaths: "src/auth.ts" })
|
|
5725
|
+
|
|
5726
|
+
Multiple files (batch):
|
|
5727
|
+
get_files_context({ filepaths: ["src/auth.ts", "src/user.ts"] })
|
|
5728
|
+
|
|
5729
|
+
Returns for each file:
|
|
5730
|
+
- All chunks and related code
|
|
5731
|
+
- testAssociations (which tests cover this file)
|
|
5732
|
+
- Relevance scoring
|
|
5733
|
+
|
|
5734
|
+
Batch calls are more efficient than multiple single-file calls.`
|
|
5464
5735
|
),
|
|
5465
5736
|
toMCPToolSchema(
|
|
5466
5737
|
ListFunctionsSchema,
|
|
@@ -5770,32 +6041,69 @@ async function startMCPServer(options) {
|
|
|
5770
6041
|
};
|
|
5771
6042
|
}
|
|
5772
6043
|
)(args);
|
|
5773
|
-
case "
|
|
6044
|
+
case "get_files_context":
|
|
5774
6045
|
return await wrapToolHandler(
|
|
5775
|
-
|
|
6046
|
+
GetFilesContextSchema,
|
|
5776
6047
|
async (validatedArgs) => {
|
|
5777
|
-
|
|
6048
|
+
const filepaths = Array.isArray(validatedArgs.filepaths) ? validatedArgs.filepaths : [validatedArgs.filepaths];
|
|
6049
|
+
const isSingleFile = !Array.isArray(validatedArgs.filepaths);
|
|
6050
|
+
log(`Getting context for: ${filepaths.join(", ")}`);
|
|
5778
6051
|
await checkAndReconnect();
|
|
5779
|
-
const
|
|
5780
|
-
const
|
|
5781
|
-
|
|
5782
|
-
|
|
6052
|
+
const fileEmbeddings = await Promise.all(filepaths.map((fp) => embeddings.embed(fp)));
|
|
6053
|
+
const allFileSearches = await Promise.all(
|
|
6054
|
+
fileEmbeddings.map(
|
|
6055
|
+
(embedding, i) => vectorDB.search(embedding, 50, filepaths[i])
|
|
6056
|
+
)
|
|
5783
6057
|
);
|
|
5784
|
-
|
|
5785
|
-
|
|
5786
|
-
|
|
5787
|
-
|
|
5788
|
-
const relatedOtherFiles = related.filter(
|
|
5789
|
-
(r) => !r.metadata.file.includes(validatedArgs.filepath) && !validatedArgs.filepath.includes(r.metadata.file)
|
|
6058
|
+
const fileChunksMap = filepaths.map((filepath, i) => {
|
|
6059
|
+
const allResults = allFileSearches[i];
|
|
6060
|
+
return allResults.filter(
|
|
6061
|
+
(r) => r.metadata.file.includes(filepath) || filepath.includes(r.metadata.file)
|
|
5790
6062
|
);
|
|
5791
|
-
|
|
6063
|
+
});
|
|
6064
|
+
let relatedChunksMap = [];
|
|
6065
|
+
if (validatedArgs.includeRelated) {
|
|
6066
|
+
const filesWithChunks = fileChunksMap.map((chunks, i) => ({ chunks, filepath: filepaths[i], index: i })).filter(({ chunks }) => chunks.length > 0);
|
|
6067
|
+
if (filesWithChunks.length > 0) {
|
|
6068
|
+
const relatedEmbeddings = await Promise.all(
|
|
6069
|
+
filesWithChunks.map(({ chunks }) => embeddings.embed(chunks[0].content))
|
|
6070
|
+
);
|
|
6071
|
+
const relatedSearches = await Promise.all(
|
|
6072
|
+
relatedEmbeddings.map(
|
|
6073
|
+
(embedding, i) => vectorDB.search(embedding, 5, filesWithChunks[i].chunks[0].content)
|
|
6074
|
+
)
|
|
6075
|
+
);
|
|
6076
|
+
relatedChunksMap = Array.from({ length: filepaths.length }, () => []);
|
|
6077
|
+
filesWithChunks.forEach(({ filepath, index }, i) => {
|
|
6078
|
+
const related = relatedSearches[i];
|
|
6079
|
+
relatedChunksMap[index] = related.filter(
|
|
6080
|
+
(r) => !r.metadata.file.includes(filepath) && !filepath.includes(r.metadata.file)
|
|
6081
|
+
);
|
|
6082
|
+
});
|
|
6083
|
+
}
|
|
6084
|
+
}
|
|
6085
|
+
const filesData = {};
|
|
6086
|
+
filepaths.forEach((filepath, i) => {
|
|
6087
|
+
const fileChunks = fileChunksMap[i];
|
|
6088
|
+
const relatedChunks = relatedChunksMap[i] || [];
|
|
6089
|
+
filesData[filepath] = {
|
|
6090
|
+
chunks: [...fileChunks, ...relatedChunks]
|
|
6091
|
+
};
|
|
6092
|
+
});
|
|
6093
|
+
log(`Found ${Object.values(filesData).reduce((sum, f) => sum + f.chunks.length, 0)} total chunks`);
|
|
6094
|
+
if (isSingleFile) {
|
|
6095
|
+
const filepath = filepaths[0];
|
|
6096
|
+
return {
|
|
6097
|
+
indexInfo: getIndexMetadata(),
|
|
6098
|
+
file: filepath,
|
|
6099
|
+
chunks: filesData[filepath].chunks
|
|
6100
|
+
};
|
|
6101
|
+
} else {
|
|
6102
|
+
return {
|
|
6103
|
+
indexInfo: getIndexMetadata(),
|
|
6104
|
+
files: filesData
|
|
6105
|
+
};
|
|
5792
6106
|
}
|
|
5793
|
-
log(`Found ${results.length} chunks`);
|
|
5794
|
-
return {
|
|
5795
|
-
indexInfo: getIndexMetadata(),
|
|
5796
|
-
file: validatedArgs.filepath,
|
|
5797
|
-
chunks: results
|
|
5798
|
-
};
|
|
5799
6107
|
}
|
|
5800
6108
|
)(args);
|
|
5801
6109
|
case "list_functions":
|