@liendev/lien 0.15.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2524,26 +2524,73 @@ var init_relevance = __esm({
2524
2524
  });
2525
2525
 
2526
2526
  // src/vectordb/intent-classifier.ts
2527
+ function getSortedRules() {
2528
+ if (cachedSortedRules === null) {
2529
+ cachedSortedRules = [...INTENT_RULES].sort((a, b) => b.priority - a.priority);
2530
+ }
2531
+ return cachedSortedRules;
2532
+ }
2527
2533
  function classifyQueryIntent(query) {
2528
2534
  const lower = query.toLowerCase().trim();
2529
- if (lower.match(/where\s+(is|are|does|can\s+i\s+find)/) || lower.match(/find\s+the\s+/) || lower.match(/locate\s+/)) {
2530
- return "location" /* LOCATION */;
2531
- }
2532
- if (lower.match(/how\s+does\s+.*\s+work/) || lower.match(/what\s+(is|are|does)/) || lower.match(/explain\s+/) || lower.match(/understand\s+/) || lower.match(/\b(process|workflow|architecture)\b/)) {
2533
- return "conceptual" /* CONCEPTUAL */;
2534
- }
2535
- if (lower.match(/how\s+(is|are)\s+.*\s+(implemented|built|coded)/) || lower.match(/implementation\s+of/) || lower.match(/source\s+code\s+for/)) {
2536
- return "implementation" /* IMPLEMENTATION */;
2535
+ const sortedRules = getSortedRules();
2536
+ for (const rule of sortedRules) {
2537
+ if (rule.patterns.some((pattern) => pattern.test(lower))) {
2538
+ return rule.intent;
2539
+ }
2537
2540
  }
2538
2541
  return "implementation" /* IMPLEMENTATION */;
2539
2542
  }
2543
+ var INTENT_RULES, INITIAL_RULE_COUNT, cachedSortedRules;
2540
2544
  var init_intent_classifier = __esm({
2541
2545
  "src/vectordb/intent-classifier.ts"() {
2542
2546
  "use strict";
2547
+ INTENT_RULES = [
2548
+ // LOCATION intent (highest priority - most specific)
2549
+ {
2550
+ intent: "location" /* LOCATION */,
2551
+ priority: 3,
2552
+ patterns: [
2553
+ /where\s+(is|are|does|can\s+i\s+find)/,
2554
+ /find\s+the\s+/,
2555
+ /locate\s+/
2556
+ ]
2557
+ },
2558
+ // CONCEPTUAL intent (medium priority)
2559
+ {
2560
+ intent: "conceptual" /* CONCEPTUAL */,
2561
+ priority: 2,
2562
+ patterns: [
2563
+ /how\s+does\s+.*\s+work/,
2564
+ /what\s+(is|are|does)/,
2565
+ /explain\s+/,
2566
+ /understand\s+/,
2567
+ /\b(process|workflow|architecture)\b/
2568
+ ]
2569
+ },
2570
+ // IMPLEMENTATION intent (low priority - catches "how is X implemented")
2571
+ {
2572
+ intent: "implementation" /* IMPLEMENTATION */,
2573
+ priority: 1,
2574
+ patterns: [
2575
+ /how\s+(is|are)\s+.*\s+(implemented|built|coded)/,
2576
+ /implementation\s+of/,
2577
+ /source\s+code\s+for/
2578
+ ]
2579
+ }
2580
+ ];
2581
+ INITIAL_RULE_COUNT = INTENT_RULES.length;
2582
+ cachedSortedRules = null;
2543
2583
  }
2544
2584
  });
2545
2585
 
2546
- // src/vectordb/query.ts
2586
+ // src/vectordb/boosting/types.ts
2587
+ var init_types2 = __esm({
2588
+ "src/vectordb/boosting/types.ts"() {
2589
+ "use strict";
2590
+ }
2591
+ });
2592
+
2593
+ // src/vectordb/boosting/strategies.ts
2547
2594
  import path13 from "path";
2548
2595
  function isDocumentationFile(filepath) {
2549
2596
  const lower = filepath.toLowerCase();
@@ -2581,106 +2628,162 @@ function isUtilityFile(filepath) {
2581
2628
  }
2582
2629
  return false;
2583
2630
  }
2584
- function boostPathRelevance(query, filepath, baseScore) {
2585
- const queryTokens = query.toLowerCase().split(/\s+/);
2586
- const pathSegments = filepath.toLowerCase().split("/");
2587
- let boostFactor = 1;
2588
- for (const token of queryTokens) {
2589
- if (token.length <= 2) continue;
2590
- if (pathSegments.some((seg) => seg.includes(token))) {
2591
- boostFactor *= 0.9;
2592
- }
2593
- }
2594
- return baseScore * boostFactor;
2595
- }
2596
- function boostFilenameRelevance(query, filepath, baseScore) {
2597
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2598
- const queryTokens = query.toLowerCase().split(/\s+/);
2599
- let boostFactor = 1;
2600
- for (const token of queryTokens) {
2601
- if (token.length <= 2) continue;
2602
- if (filename === token) {
2603
- boostFactor *= 0.7;
2604
- } else if (filename.includes(token)) {
2605
- boostFactor *= 0.8;
2606
- }
2607
- }
2608
- return baseScore * boostFactor;
2609
- }
2610
- function boostForLocationIntent(query, filepath, baseScore) {
2611
- let score = baseScore;
2612
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2613
- const queryTokens = query.toLowerCase().split(/\s+/);
2614
- for (const token of queryTokens) {
2615
- if (token.length <= 2) continue;
2616
- if (filename === token) {
2617
- score *= 0.6;
2618
- } else if (filename.includes(token)) {
2619
- score *= 0.7;
2620
- }
2621
- }
2622
- score = boostPathRelevance(query, filepath, score);
2623
- if (isTestFile(filepath)) {
2624
- score *= 1.1;
2625
- }
2626
- return score;
2627
- }
2628
- function boostForConceptualIntent(query, filepath, baseScore) {
2629
- let score = baseScore;
2630
- if (isDocumentationFile(filepath)) {
2631
- score *= 0.65;
2632
- const lower = filepath.toLowerCase();
2633
- if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
2634
- score *= 0.9;
2635
- }
2636
- }
2637
- if (isUtilityFile(filepath)) {
2638
- score *= 1.05;
2639
- }
2640
- const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2641
- const queryTokens = query.toLowerCase().split(/\s+/);
2642
- for (const token of queryTokens) {
2643
- if (token.length <= 2) continue;
2644
- if (filename.includes(token)) {
2645
- score *= 0.9;
2646
- }
2631
+ var PathBoostingStrategy, FilenameBoostingStrategy, FileTypeBoostingStrategy;
2632
+ var init_strategies = __esm({
2633
+ "src/vectordb/boosting/strategies.ts"() {
2634
+ "use strict";
2635
+ init_intent_classifier();
2636
+ PathBoostingStrategy = class {
2637
+ name = "path-matching";
2638
+ apply(query, filepath, baseScore) {
2639
+ const queryTokens = query.toLowerCase().split(/\s+/);
2640
+ const pathSegments = filepath.toLowerCase().split("/");
2641
+ let boostFactor = 1;
2642
+ for (const token of queryTokens) {
2643
+ if (token.length <= 2) continue;
2644
+ if (pathSegments.some((seg) => seg.includes(token))) {
2645
+ boostFactor *= 0.9;
2646
+ }
2647
+ }
2648
+ return baseScore * boostFactor;
2649
+ }
2650
+ };
2651
+ FilenameBoostingStrategy = class {
2652
+ name = "filename-matching";
2653
+ apply(query, filepath, baseScore) {
2654
+ const filename = path13.basename(filepath, path13.extname(filepath)).toLowerCase();
2655
+ const queryTokens = query.toLowerCase().split(/\s+/);
2656
+ let boostFactor = 1;
2657
+ for (const token of queryTokens) {
2658
+ if (token.length <= 2) continue;
2659
+ if (filename === token) {
2660
+ boostFactor *= 0.7;
2661
+ } else if (filename.includes(token)) {
2662
+ boostFactor *= 0.8;
2663
+ }
2664
+ }
2665
+ return baseScore * boostFactor;
2666
+ }
2667
+ };
2668
+ FileTypeBoostingStrategy = class {
2669
+ constructor(intent) {
2670
+ this.intent = intent;
2671
+ }
2672
+ name = "file-type";
2673
+ apply(query, filepath, baseScore) {
2674
+ switch (this.intent) {
2675
+ case "location" /* LOCATION */:
2676
+ return this.applyLocationBoosting(query, filepath, baseScore);
2677
+ case "conceptual" /* CONCEPTUAL */:
2678
+ return this.applyConceptualBoosting(query, filepath, baseScore);
2679
+ case "implementation" /* IMPLEMENTATION */:
2680
+ return this.applyImplementationBoosting(query, filepath, baseScore);
2681
+ default:
2682
+ return baseScore;
2683
+ }
2684
+ }
2685
+ applyLocationBoosting(_query, filepath, score) {
2686
+ if (isTestFile(filepath)) {
2687
+ score *= 1.1;
2688
+ }
2689
+ return score;
2690
+ }
2691
+ applyConceptualBoosting(_query, filepath, score) {
2692
+ if (isDocumentationFile(filepath)) {
2693
+ score *= 0.65;
2694
+ const lower = filepath.toLowerCase();
2695
+ if (lower.includes("architecture") || lower.includes("workflow") || lower.includes("flow")) {
2696
+ score *= 0.9;
2697
+ }
2698
+ }
2699
+ if (isUtilityFile(filepath)) {
2700
+ score *= 0.95;
2701
+ }
2702
+ return score;
2703
+ }
2704
+ applyImplementationBoosting(_query, filepath, score) {
2705
+ if (isTestFile(filepath)) {
2706
+ score *= 1.1;
2707
+ }
2708
+ return score;
2709
+ }
2710
+ };
2647
2711
  }
2648
- const pathSegments = filepath.toLowerCase().split(path13.sep);
2649
- for (const token of queryTokens) {
2650
- if (token.length <= 2) continue;
2651
- for (const segment of pathSegments) {
2652
- if (segment.includes(token)) {
2653
- score *= 0.95;
2654
- break;
2712
+ });
2713
+
2714
+ // src/vectordb/boosting/composer.ts
2715
+ var BoostingComposer;
2716
+ var init_composer = __esm({
2717
+ "src/vectordb/boosting/composer.ts"() {
2718
+ "use strict";
2719
+ BoostingComposer = class {
2720
+ strategies = [];
2721
+ /**
2722
+ * Add a boosting strategy to the pipeline.
2723
+ * Strategies are applied in the order they are added.
2724
+ *
2725
+ * @param strategy - The strategy to add
2726
+ * @returns This composer for chaining
2727
+ */
2728
+ addStrategy(strategy) {
2729
+ this.strategies.push(strategy);
2730
+ return this;
2655
2731
  }
2656
- }
2732
+ /**
2733
+ * Apply all strategies to a base score.
2734
+ *
2735
+ * @param query - The search query
2736
+ * @param filepath - The file path being scored
2737
+ * @param baseScore - The initial score from vector similarity
2738
+ * @returns The final boosted score after all strategies
2739
+ */
2740
+ apply(query, filepath, baseScore) {
2741
+ let score = baseScore;
2742
+ for (const strategy of this.strategies) {
2743
+ score = strategy.apply(query, filepath, score);
2744
+ }
2745
+ return score;
2746
+ }
2747
+ /**
2748
+ * Get the names of all strategies in this composer.
2749
+ * Useful for debugging and logging.
2750
+ */
2751
+ getStrategyNames() {
2752
+ return this.strategies.map((s) => s.name);
2753
+ }
2754
+ /**
2755
+ * Get the number of strategies in this composer.
2756
+ */
2757
+ getStrategyCount() {
2758
+ return this.strategies.length;
2759
+ }
2760
+ /**
2761
+ * Clear all strategies from this composer.
2762
+ */
2763
+ clear() {
2764
+ this.strategies = [];
2765
+ }
2766
+ };
2657
2767
  }
2658
- return score;
2659
- }
2660
- function boostForImplementationIntent(query, filepath, baseScore) {
2661
- let score = baseScore;
2662
- score = boostFilenameRelevance(query, filepath, score);
2663
- score = boostPathRelevance(query, filepath, score);
2664
- if (isTestFile(filepath)) {
2665
- score *= 0.9;
2768
+ });
2769
+
2770
+ // src/vectordb/boosting/index.ts
2771
+ var init_boosting = __esm({
2772
+ "src/vectordb/boosting/index.ts"() {
2773
+ "use strict";
2774
+ init_types2();
2775
+ init_strategies();
2776
+ init_composer();
2666
2777
  }
2667
- return score;
2668
- }
2778
+ });
2779
+
2780
+ // src/vectordb/query.ts
2669
2781
  function applyRelevanceBoosting(query, filepath, baseScore) {
2670
2782
  if (!query) {
2671
2783
  return baseScore;
2672
2784
  }
2673
2785
  const intent = classifyQueryIntent(query);
2674
- switch (intent) {
2675
- case "location" /* LOCATION */:
2676
- return boostForLocationIntent(query, filepath, baseScore);
2677
- case "conceptual" /* CONCEPTUAL */:
2678
- return boostForConceptualIntent(query, filepath, baseScore);
2679
- case "implementation" /* IMPLEMENTATION */:
2680
- return boostForImplementationIntent(query, filepath, baseScore);
2681
- default:
2682
- return boostForImplementationIntent(query, filepath, baseScore);
2683
- }
2786
+ return BOOSTING_COMPOSERS[intent].apply(query, filepath, baseScore);
2684
2787
  }
2685
2788
  function dbRecordToSearchResult(r, query) {
2686
2789
  const baseScore = r._distance ?? 0;
@@ -2862,6 +2965,7 @@ async function querySymbols(table, options) {
2862
2965
  throw wrapError(error, "Failed to query symbols");
2863
2966
  }
2864
2967
  }
2968
+ var PATH_STRATEGY, FILENAME_STRATEGY, FILE_TYPE_STRATEGIES, BOOSTING_COMPOSERS;
2865
2969
  var init_query = __esm({
2866
2970
  "src/vectordb/query.ts"() {
2867
2971
  "use strict";
@@ -2869,6 +2973,19 @@ var init_query = __esm({
2869
2973
  init_errors();
2870
2974
  init_relevance();
2871
2975
  init_intent_classifier();
2976
+ init_boosting();
2977
+ PATH_STRATEGY = new PathBoostingStrategy();
2978
+ FILENAME_STRATEGY = new FilenameBoostingStrategy();
2979
+ FILE_TYPE_STRATEGIES = {
2980
+ ["location" /* LOCATION */]: new FileTypeBoostingStrategy("location" /* LOCATION */),
2981
+ ["conceptual" /* CONCEPTUAL */]: new FileTypeBoostingStrategy("conceptual" /* CONCEPTUAL */),
2982
+ ["implementation" /* IMPLEMENTATION */]: new FileTypeBoostingStrategy("implementation" /* IMPLEMENTATION */)
2983
+ };
2984
+ BOOSTING_COMPOSERS = {
2985
+ ["location" /* LOCATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["location" /* LOCATION */]),
2986
+ ["conceptual" /* CONCEPTUAL */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["conceptual" /* CONCEPTUAL */]),
2987
+ ["implementation" /* IMPLEMENTATION */]: new BoostingComposer().addStrategy(PATH_STRATEGY).addStrategy(FILENAME_STRATEGY).addStrategy(FILE_TYPE_STRATEGIES["implementation" /* IMPLEMENTATION */])
2988
+ };
2872
2989
  }
2873
2990
  });
2874
2991
 
@@ -3793,6 +3910,22 @@ var init_change_detector = __esm({
3793
3910
  }
3794
3911
  });
3795
3912
 
3913
+ // src/utils/result.ts
3914
+ function Ok(value) {
3915
+ return { ok: true, value };
3916
+ }
3917
+ function Err(error) {
3918
+ return { ok: false, error };
3919
+ }
3920
+ function isOk(result) {
3921
+ return result.ok;
3922
+ }
3923
+ var init_result = __esm({
3924
+ "src/utils/result.ts"() {
3925
+ "use strict";
3926
+ }
3927
+ });
3928
+
3796
3929
  // src/indexer/incremental.ts
3797
3930
  import fs16 from "fs/promises";
3798
3931
  async function processFileContent(filepath, content, embeddings, config, verbose) {
@@ -3874,36 +4007,29 @@ async function indexSingleFile(filepath, vectorDB, embeddings, config, options =
3874
4007
  console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
3875
4008
  }
3876
4009
  }
4010
+ async function processSingleFileForIndexing(filepath, embeddings, config, verbose) {
4011
+ try {
4012
+ const stats = await fs16.stat(filepath);
4013
+ const content = await fs16.readFile(filepath, "utf-8");
4014
+ const result = await processFileContent(filepath, content, embeddings, config, verbose);
4015
+ return Ok({
4016
+ filepath,
4017
+ result,
4018
+ mtime: stats.mtimeMs
4019
+ });
4020
+ } catch (error) {
4021
+ return Err(`Failed to process ${filepath}: ${error}`);
4022
+ }
4023
+ }
3877
4024
  async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, options = {}) {
3878
4025
  const { verbose } = options;
3879
4026
  let processedCount = 0;
3880
4027
  const manifestEntries = [];
3881
4028
  for (const filepath of filepaths) {
3882
- let content;
3883
- let fileMtime;
3884
- try {
3885
- const stats = await fs16.stat(filepath);
3886
- fileMtime = stats.mtimeMs;
3887
- content = await fs16.readFile(filepath, "utf-8");
3888
- } catch (error) {
3889
- if (verbose) {
3890
- console.error(`[Lien] File not readable: ${filepath}`);
3891
- }
3892
- try {
3893
- await vectorDB.deleteByFile(filepath);
3894
- const manifest = new ManifestManager(vectorDB.dbPath);
3895
- await manifest.removeFile(filepath);
3896
- } catch (error2) {
3897
- if (verbose) {
3898
- console.error(`[Lien] Note: ${filepath} not in index`);
3899
- }
3900
- }
3901
- processedCount++;
3902
- continue;
3903
- }
3904
- try {
3905
- const result = await processFileContent(filepath, content, embeddings, config, verbose || false);
3906
- if (result === null) {
4029
+ const result = await processSingleFileForIndexing(filepath, embeddings, config, verbose || false);
4030
+ if (isOk(result)) {
4031
+ const { result: processResult, mtime } = result.value;
4032
+ if (processResult === null) {
3907
4033
  try {
3908
4034
  await vectorDB.deleteByFile(filepath);
3909
4035
  } catch (error) {
@@ -3911,7 +4037,7 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
3911
4037
  const manifest = new ManifestManager(vectorDB.dbPath);
3912
4038
  await manifest.updateFile(filepath, {
3913
4039
  filepath,
3914
- lastModified: fileMtime,
4040
+ lastModified: mtime,
3915
4041
  chunkCount: 0
3916
4042
  });
3917
4043
  processedCount++;
@@ -3922,21 +4048,33 @@ async function indexMultipleFiles(filepaths, vectorDB, embeddings, config, optio
3922
4048
  } catch (error) {
3923
4049
  }
3924
4050
  await vectorDB.insertBatch(
3925
- result.vectors,
3926
- result.chunks.map((c) => c.metadata),
3927
- result.texts
4051
+ processResult.vectors,
4052
+ processResult.chunks.map((c) => c.metadata),
4053
+ processResult.texts
3928
4054
  );
3929
4055
  manifestEntries.push({
3930
4056
  filepath,
3931
- chunkCount: result.chunkCount,
3932
- mtime: fileMtime
4057
+ chunkCount: processResult.chunkCount,
4058
+ mtime
3933
4059
  });
3934
4060
  if (verbose) {
3935
- console.error(`[Lien] \u2713 Updated ${filepath} (${result.chunkCount} chunks)`);
4061
+ console.error(`[Lien] \u2713 Updated ${filepath} (${processResult.chunkCount} chunks)`);
4062
+ }
4063
+ processedCount++;
4064
+ } else {
4065
+ if (verbose) {
4066
+ console.error(`[Lien] ${result.error}`);
4067
+ }
4068
+ try {
4069
+ await vectorDB.deleteByFile(filepath);
4070
+ const manifest = new ManifestManager(vectorDB.dbPath);
4071
+ await manifest.removeFile(filepath);
4072
+ } catch (error) {
4073
+ if (verbose) {
4074
+ console.error(`[Lien] Note: ${filepath} not in index`);
4075
+ }
3936
4076
  }
3937
4077
  processedCount++;
3938
- } catch (error) {
3939
- console.error(`[Lien] \u26A0\uFE0F Failed to index ${filepath}: ${error}`);
3940
4078
  }
3941
4079
  }
3942
4080
  if (manifestEntries.length > 0) {
@@ -3959,6 +4097,7 @@ var init_incremental = __esm({
3959
4097
  init_schema();
3960
4098
  init_manifest();
3961
4099
  init_constants();
4100
+ init_result();
3962
4101
  }
3963
4102
  });
3964
4103
 
@@ -4044,6 +4183,99 @@ var init_loading_messages = __esm({
4044
4183
  }
4045
4184
  });
4046
4185
 
4186
+ // src/indexer/progress-tracker.ts
4187
+ var IndexingProgressTracker;
4188
+ var init_progress_tracker = __esm({
4189
+ "src/indexer/progress-tracker.ts"() {
4190
+ "use strict";
4191
+ init_loading_messages();
4192
+ IndexingProgressTracker = class _IndexingProgressTracker {
4193
+ processedFiles = 0;
4194
+ totalFiles;
4195
+ wittyMessage;
4196
+ spinner;
4197
+ updateInterval;
4198
+ // Configuration constants
4199
+ static SPINNER_UPDATE_INTERVAL_MS = 200;
4200
+ // How often to update spinner
4201
+ static MESSAGE_ROTATION_INTERVAL_MS = 8e3;
4202
+ // How often to rotate message
4203
+ constructor(totalFiles, spinner) {
4204
+ this.totalFiles = totalFiles;
4205
+ this.spinner = spinner;
4206
+ this.wittyMessage = getIndexingMessage();
4207
+ }
4208
+ /**
4209
+ * Start the progress tracker.
4210
+ * Sets up periodic updates for spinner and message rotation.
4211
+ *
4212
+ * Safe to call multiple times - will not create duplicate intervals.
4213
+ */
4214
+ start() {
4215
+ if (this.updateInterval) {
4216
+ return;
4217
+ }
4218
+ const MESSAGE_ROTATION_TICKS = Math.floor(
4219
+ _IndexingProgressTracker.MESSAGE_ROTATION_INTERVAL_MS / _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS
4220
+ );
4221
+ let spinnerTick = 0;
4222
+ this.updateInterval = setInterval(() => {
4223
+ spinnerTick++;
4224
+ if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
4225
+ this.wittyMessage = getIndexingMessage();
4226
+ spinnerTick = 0;
4227
+ }
4228
+ this.spinner.text = `${this.processedFiles}/${this.totalFiles} files | ${this.wittyMessage}`;
4229
+ }, _IndexingProgressTracker.SPINNER_UPDATE_INTERVAL_MS);
4230
+ }
4231
+ /**
4232
+ * Increment the count of processed files.
4233
+ *
4234
+ * Safe for async operations in Node.js's single-threaded event loop.
4235
+ * Note: Not thread-safe for true concurrent operations (e.g., worker threads).
4236
+ */
4237
+ incrementFiles() {
4238
+ this.processedFiles++;
4239
+ }
4240
+ /**
4241
+ * Set a custom message (e.g., for special operations like embedding generation).
4242
+ * The message will be displayed until the next automatic rotation.
4243
+ */
4244
+ setMessage(message) {
4245
+ this.wittyMessage = message;
4246
+ }
4247
+ /**
4248
+ * Stop the progress tracker and clean up intervals.
4249
+ * Must be called when indexing completes or fails.
4250
+ */
4251
+ stop() {
4252
+ if (this.updateInterval) {
4253
+ clearInterval(this.updateInterval);
4254
+ this.updateInterval = void 0;
4255
+ }
4256
+ }
4257
+ /**
4258
+ * Get the current count of processed files.
4259
+ */
4260
+ getProcessedCount() {
4261
+ return this.processedFiles;
4262
+ }
4263
+ /**
4264
+ * Get the total number of files to process.
4265
+ */
4266
+ getTotalFiles() {
4267
+ return this.totalFiles;
4268
+ }
4269
+ /**
4270
+ * Get the current message being displayed.
4271
+ */
4272
+ getCurrentMessage() {
4273
+ return this.wittyMessage;
4274
+ }
4275
+ };
4276
+ }
4277
+ });
4278
+
4047
4279
  // src/indexer/index.ts
4048
4280
  var indexer_exports = {};
4049
4281
  __export(indexer_exports, {
@@ -4053,162 +4285,171 @@ import fs17 from "fs/promises";
4053
4285
  import ora from "ora";
4054
4286
  import chalk5 from "chalk";
4055
4287
  import pLimit from "p-limit";
4056
- async function indexCodebase(options = {}) {
4057
- const rootDir = options.rootDir ?? process.cwd();
4058
- const spinner = ora("Starting indexing process...").start();
4059
- let updateInterval;
4288
+ async function updateGitState(rootDir, vectorDB, manifest) {
4289
+ const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
4290
+ const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
4291
+ const gitAvailable = await isGitAvailable2();
4292
+ const isRepo = await isGitRepo2(rootDir);
4293
+ if (!gitAvailable || !isRepo) {
4294
+ return;
4295
+ }
4296
+ const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
4297
+ await gitTracker.initialize();
4298
+ const gitState = gitTracker.getState();
4299
+ if (gitState) {
4300
+ await manifest.updateGitState(gitState);
4301
+ }
4302
+ }
4303
+ async function handleDeletions(deletedFiles, vectorDB, manifest, spinner) {
4304
+ if (deletedFiles.length === 0) {
4305
+ return;
4306
+ }
4307
+ spinner.start(`Removing ${deletedFiles.length} deleted files...`);
4308
+ let removedCount = 0;
4309
+ for (const filepath of deletedFiles) {
4310
+ try {
4311
+ await vectorDB.deleteByFile(filepath);
4312
+ await manifest.removeFile(filepath);
4313
+ removedCount++;
4314
+ } catch (err) {
4315
+ spinner.warn(
4316
+ `Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`
4317
+ );
4318
+ }
4319
+ }
4320
+ spinner.succeed(`Removed ${removedCount}/${deletedFiles.length} deleted files`);
4321
+ }
4322
+ async function handleUpdates(addedFiles, modifiedFiles, vectorDB, embeddings, config, options, spinner) {
4323
+ const filesToIndex = [...addedFiles, ...modifiedFiles];
4324
+ if (filesToIndex.length === 0) {
4325
+ return;
4326
+ }
4327
+ spinner.start(`Reindexing ${filesToIndex.length} changed files...`);
4328
+ const count = await indexMultipleFiles(
4329
+ filesToIndex,
4330
+ vectorDB,
4331
+ embeddings,
4332
+ config,
4333
+ { verbose: options.verbose }
4334
+ );
4335
+ await writeVersionFile(vectorDB.dbPath);
4336
+ spinner.succeed(
4337
+ `Incremental reindex complete: ${count}/${filesToIndex.length} files indexed successfully`
4338
+ );
4339
+ }
4340
+ async function tryIncrementalIndex(rootDir, vectorDB, config, options, spinner) {
4341
+ spinner.text = "Checking for changes...";
4342
+ const manifest = new ManifestManager(vectorDB.dbPath);
4343
+ const savedManifest = await manifest.load();
4344
+ if (!savedManifest) {
4345
+ return false;
4346
+ }
4347
+ const changes = await detectChanges(rootDir, vectorDB, config);
4348
+ if (changes.reason === "full") {
4349
+ spinner.text = "Full reindex required...";
4350
+ return false;
4351
+ }
4352
+ const totalChanges = changes.added.length + changes.modified.length;
4353
+ const totalDeleted = changes.deleted.length;
4354
+ if (totalChanges === 0 && totalDeleted === 0) {
4355
+ spinner.succeed("No changes detected - index is up to date!");
4356
+ return true;
4357
+ }
4358
+ spinner.succeed(
4359
+ `Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
4360
+ );
4361
+ spinner.start(getModelLoadingMessage());
4362
+ const embeddings = new LocalEmbeddings();
4363
+ await embeddings.initialize();
4364
+ spinner.succeed("Embedding model loaded");
4365
+ await handleDeletions(changes.deleted, vectorDB, manifest, spinner);
4366
+ await handleUpdates(changes.added, changes.modified, vectorDB, embeddings, config, options, spinner);
4367
+ await updateGitState(rootDir, vectorDB, manifest);
4368
+ console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4369
+ return true;
4370
+ }
4371
+ async function performFullIndex(rootDir, vectorDB, config, options, spinner) {
4372
+ spinner.text = "Scanning codebase...";
4373
+ let files;
4374
+ if (isModernConfig(config) && config.frameworks.length > 0) {
4375
+ files = await scanCodebaseWithFrameworks(rootDir, config);
4376
+ } else if (isLegacyConfig(config)) {
4377
+ files = await scanCodebase({
4378
+ rootDir,
4379
+ includePatterns: config.indexing.include,
4380
+ excludePatterns: config.indexing.exclude
4381
+ });
4382
+ } else {
4383
+ files = await scanCodebase({
4384
+ rootDir,
4385
+ includePatterns: [],
4386
+ excludePatterns: []
4387
+ });
4388
+ }
4389
+ if (files.length === 0) {
4390
+ spinner.fail("No files found to index");
4391
+ return;
4392
+ }
4393
+ spinner.text = `Found ${files.length} files`;
4394
+ spinner.text = getModelLoadingMessage();
4395
+ const embeddings = new LocalEmbeddings();
4396
+ await embeddings.initialize();
4397
+ spinner.succeed("Embedding model loaded");
4398
+ const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
4399
+ const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
4400
+ const vectorDBBatchSize = 100;
4401
+ spinner.start(`Processing files with ${concurrency}x concurrency...`);
4402
+ const startTime = Date.now();
4403
+ let processedChunks = 0;
4404
+ const chunkAccumulator = [];
4405
+ const limit = pLimit(concurrency);
4406
+ const indexedFileEntries = [];
4407
+ const progressTracker = new IndexingProgressTracker(files.length, spinner);
4408
+ progressTracker.start();
4060
4409
  try {
4061
- spinner.text = "Loading configuration...";
4062
- const config = await configService.load(rootDir);
4063
- spinner.text = "Initializing vector database...";
4064
- const vectorDB = new VectorDB(rootDir);
4065
- await vectorDB.initialize();
4066
- if (!options.force) {
4067
- spinner.text = "Checking for changes...";
4068
- const manifest2 = new ManifestManager(vectorDB.dbPath);
4069
- const savedManifest = await manifest2.load();
4070
- if (savedManifest) {
4071
- const changes = await detectChanges(rootDir, vectorDB, config);
4072
- if (changes.reason !== "full") {
4073
- const totalChanges = changes.added.length + changes.modified.length;
4074
- const totalDeleted = changes.deleted.length;
4075
- if (totalChanges === 0 && totalDeleted === 0) {
4076
- spinner.succeed("No changes detected - index is up to date!");
4077
- return;
4078
- }
4079
- spinner.succeed(
4080
- `Detected changes: ${totalChanges} files to index, ${totalDeleted} to remove (${changes.reason} detection)`
4081
- );
4082
- spinner.start(getModelLoadingMessage());
4083
- const embeddings2 = new LocalEmbeddings();
4084
- await embeddings2.initialize();
4085
- spinner.succeed("Embedding model loaded");
4086
- if (totalDeleted > 0) {
4087
- spinner.start(`Removing ${totalDeleted} deleted files...`);
4088
- let removedCount = 0;
4089
- for (const filepath of changes.deleted) {
4090
- try {
4091
- await vectorDB.deleteByFile(filepath);
4092
- await manifest2.removeFile(filepath);
4093
- removedCount++;
4094
- } catch (err) {
4095
- spinner.warn(`Failed to remove file "${filepath}": ${err instanceof Error ? err.message : String(err)}`);
4096
- }
4097
- }
4098
- spinner.succeed(`Removed ${removedCount}/${totalDeleted} deleted files`);
4099
- }
4100
- if (totalChanges > 0) {
4101
- spinner.start(`Reindexing ${totalChanges} changed files...`);
4102
- const filesToIndex = [...changes.added, ...changes.modified];
4103
- const count = await indexMultipleFiles(
4104
- filesToIndex,
4105
- vectorDB,
4106
- embeddings2,
4107
- config,
4108
- { verbose: options.verbose }
4109
- );
4110
- await writeVersionFile(vectorDB.dbPath);
4111
- spinner.succeed(
4112
- `Incremental reindex complete: ${count}/${totalChanges} files indexed successfully`
4113
- );
4114
- }
4115
- const { isGitAvailable: isGitAvailable3, isGitRepo: isGitRepo3 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
4116
- const { GitStateTracker: GitStateTracker3 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
4117
- const gitAvailable2 = await isGitAvailable3();
4118
- const isRepo2 = await isGitRepo3(rootDir);
4119
- if (gitAvailable2 && isRepo2) {
4120
- const gitTracker = new GitStateTracker3(rootDir, vectorDB.dbPath);
4121
- await gitTracker.initialize();
4122
- const gitState = gitTracker.getState();
4123
- if (gitState) {
4124
- await manifest2.updateGitState(gitState);
4125
- }
4126
- }
4127
- console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4128
- return;
4129
- }
4130
- spinner.text = "Full reindex required...";
4410
+ let addChunksLock = null;
4411
+ let processingQueue = null;
4412
+ const processAccumulatedChunks = async () => {
4413
+ if (processingQueue) {
4414
+ processingQueue = processingQueue.then(() => doProcessChunks());
4415
+ } else {
4416
+ processingQueue = doProcessChunks();
4131
4417
  }
4132
- } else {
4133
- spinner.text = "Force flag enabled, performing full reindex...";
4134
- }
4135
- spinner.text = "Scanning codebase...";
4136
- let files;
4137
- if (isModernConfig(config) && config.frameworks.length > 0) {
4138
- files = await scanCodebaseWithFrameworks(rootDir, config);
4139
- } else if (isLegacyConfig(config)) {
4140
- files = await scanCodebase({
4141
- rootDir,
4142
- includePatterns: config.indexing.include,
4143
- excludePatterns: config.indexing.exclude
4144
- });
4145
- } else {
4146
- files = await scanCodebase({
4147
- rootDir,
4148
- includePatterns: [],
4149
- excludePatterns: []
4150
- });
4151
- }
4152
- if (files.length === 0) {
4153
- spinner.fail("No files found to index");
4154
- return;
4155
- }
4156
- spinner.text = `Found ${files.length} files`;
4157
- spinner.text = getModelLoadingMessage();
4158
- const embeddings = new LocalEmbeddings();
4159
- await embeddings.initialize();
4160
- spinner.succeed("Embedding model loaded");
4161
- const concurrency = isModernConfig(config) ? config.core.concurrency : 4;
4162
- const embeddingBatchSize = isModernConfig(config) ? config.core.embeddingBatchSize : 50;
4163
- const vectorDBBatchSize = 100;
4164
- spinner.start(`Processing files with ${concurrency}x concurrency...`);
4165
- const startTime = Date.now();
4166
- let processedFiles = 0;
4167
- let processedChunks = 0;
4168
- const chunkAccumulator = [];
4169
- const limit = pLimit(concurrency);
4170
- const indexedFileEntries = [];
4171
- const progressState = {
4172
- processedFiles: 0,
4173
- totalFiles: files.length,
4174
- wittyMessage: getIndexingMessage()
4418
+ return processingQueue;
4175
4419
  };
4176
- const SPINNER_UPDATE_INTERVAL_MS = 200;
4177
- const MESSAGE_ROTATION_INTERVAL_MS = 8e3;
4178
- const MESSAGE_ROTATION_TICKS = Math.floor(MESSAGE_ROTATION_INTERVAL_MS / SPINNER_UPDATE_INTERVAL_MS);
4179
- let spinnerTick = 0;
4180
- updateInterval = setInterval(() => {
4181
- spinnerTick++;
4182
- if (spinnerTick >= MESSAGE_ROTATION_TICKS) {
4183
- progressState.wittyMessage = getIndexingMessage();
4184
- spinnerTick = 0;
4185
- }
4186
- spinner.text = `${progressState.processedFiles}/${progressState.totalFiles} files | ${progressState.wittyMessage}`;
4187
- }, SPINNER_UPDATE_INTERVAL_MS);
4188
- const processAccumulatedChunks = async () => {
4189
- if (chunkAccumulator.length === 0) return;
4190
- const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
4191
- for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
4192
- const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
4193
- progressState.wittyMessage = getEmbeddingMessage();
4194
- const texts = batch.map((item) => item.content);
4195
- const embeddingVectors = [];
4196
- for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
4197
- const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
4198
- const microResults = await embeddings.embedBatch(microBatch);
4199
- embeddingVectors.push(...microResults);
4420
+ const doProcessChunks = async () => {
4421
+ if (chunkAccumulator.length === 0) {
4422
+ return;
4423
+ }
4424
+ const currentPromise = processingQueue;
4425
+ try {
4426
+ const toProcess = chunkAccumulator.splice(0, chunkAccumulator.length);
4427
+ for (let i = 0; i < toProcess.length; i += embeddingBatchSize) {
4428
+ const batch = toProcess.slice(i, Math.min(i + embeddingBatchSize, toProcess.length));
4429
+ progressTracker.setMessage(getEmbeddingMessage());
4430
+ const texts = batch.map((item) => item.content);
4431
+ const embeddingVectors = [];
4432
+ for (let j = 0; j < texts.length; j += EMBEDDING_MICRO_BATCH_SIZE) {
4433
+ const microBatch = texts.slice(j, Math.min(j + EMBEDDING_MICRO_BATCH_SIZE, texts.length));
4434
+ const microResults = await embeddings.embedBatch(microBatch);
4435
+ embeddingVectors.push(...microResults);
4436
+ await new Promise((resolve) => setImmediate(resolve));
4437
+ }
4438
+ processedChunks += batch.length;
4439
+ progressTracker.setMessage(`Inserting ${batch.length} chunks into vector space...`);
4440
+ await vectorDB.insertBatch(
4441
+ embeddingVectors,
4442
+ batch.map((item) => item.chunk.metadata),
4443
+ texts
4444
+ );
4200
4445
  await new Promise((resolve) => setImmediate(resolve));
4201
4446
  }
4202
- processedChunks += batch.length;
4203
- progressState.wittyMessage = `Inserting ${batch.length} chunks into vector space...`;
4204
- await vectorDB.insertBatch(
4205
- embeddingVectors,
4206
- batch.map((item) => item.chunk.metadata),
4207
- texts
4208
- );
4209
- await new Promise((resolve) => setImmediate(resolve));
4447
+ progressTracker.setMessage(getIndexingMessage());
4448
+ } finally {
4449
+ if (processingQueue === currentPromise) {
4450
+ processingQueue = null;
4451
+ }
4210
4452
  }
4211
- progressState.wittyMessage = getIndexingMessage();
4212
4453
  };
4213
4454
  const filePromises = files.map(
4214
4455
  (file) => limit(async () => {
@@ -4226,73 +4467,91 @@ async function indexCodebase(options = {}) {
4226
4467
  astFallback
4227
4468
  });
4228
4469
  if (chunks.length === 0) {
4229
- processedFiles++;
4230
- progressState.processedFiles = processedFiles;
4470
+ progressTracker.incrementFiles();
4231
4471
  return;
4232
4472
  }
4233
- for (const chunk of chunks) {
4234
- chunkAccumulator.push({
4235
- chunk,
4236
- content: chunk.content
4473
+ {
4474
+ if (addChunksLock) {
4475
+ await addChunksLock;
4476
+ }
4477
+ let releaseAddLock;
4478
+ addChunksLock = new Promise((resolve) => {
4479
+ releaseAddLock = resolve;
4237
4480
  });
4238
- }
4239
- indexedFileEntries.push({
4240
- filepath: file,
4241
- chunkCount: chunks.length,
4242
- mtime: stats.mtimeMs
4243
- });
4244
- processedFiles++;
4245
- progressState.processedFiles = processedFiles;
4246
- if (chunkAccumulator.length >= vectorDBBatchSize) {
4247
- await processAccumulatedChunks();
4481
+ try {
4482
+ for (const chunk of chunks) {
4483
+ chunkAccumulator.push({
4484
+ chunk,
4485
+ content: chunk.content
4486
+ });
4487
+ }
4488
+ indexedFileEntries.push({
4489
+ filepath: file,
4490
+ chunkCount: chunks.length,
4491
+ mtime: stats.mtimeMs
4492
+ });
4493
+ progressTracker.incrementFiles();
4494
+ if (chunkAccumulator.length >= vectorDBBatchSize) {
4495
+ await processAccumulatedChunks();
4496
+ }
4497
+ } finally {
4498
+ releaseAddLock();
4499
+ addChunksLock = null;
4500
+ }
4248
4501
  }
4249
4502
  } catch (error) {
4250
4503
  if (options.verbose) {
4251
4504
  console.error(chalk5.yellow(`
4252
4505
  \u26A0\uFE0F Skipping ${file}: ${error}`));
4253
4506
  }
4254
- processedFiles++;
4255
- progressState.processedFiles = processedFiles;
4507
+ progressTracker.incrementFiles();
4256
4508
  }
4257
4509
  })
4258
4510
  );
4259
4511
  await Promise.all(filePromises);
4260
- progressState.wittyMessage = "Processing final chunks...";
4512
+ progressTracker.setMessage("Processing final chunks...");
4261
4513
  await processAccumulatedChunks();
4262
- clearInterval(updateInterval);
4263
- spinner.start("Saving index manifest...");
4264
- const manifest = new ManifestManager(vectorDB.dbPath);
4265
- await manifest.updateFiles(
4266
- indexedFileEntries.map((entry) => ({
4267
- filepath: entry.filepath,
4268
- lastModified: entry.mtime,
4269
- // Use actual file mtime for accurate change detection
4270
- chunkCount: entry.chunkCount
4271
- }))
4272
- );
4273
- const { isGitAvailable: isGitAvailable2, isGitRepo: isGitRepo2 } = await Promise.resolve().then(() => (init_utils(), utils_exports));
4274
- const { GitStateTracker: GitStateTracker2 } = await Promise.resolve().then(() => (init_tracker(), tracker_exports));
4275
- const gitAvailable = await isGitAvailable2();
4276
- const isRepo = await isGitRepo2(rootDir);
4277
- if (gitAvailable && isRepo) {
4278
- const gitTracker = new GitStateTracker2(rootDir, vectorDB.dbPath);
4279
- await gitTracker.initialize();
4280
- const gitState = gitTracker.getState();
4281
- if (gitState) {
4282
- await manifest.updateGitState(gitState);
4514
+ } finally {
4515
+ progressTracker.stop();
4516
+ }
4517
+ spinner.start("Saving index manifest...");
4518
+ const manifest = new ManifestManager(vectorDB.dbPath);
4519
+ await manifest.updateFiles(
4520
+ indexedFileEntries.map((entry) => ({
4521
+ filepath: entry.filepath,
4522
+ // Use actual file mtime for accurate change detection
4523
+ lastModified: entry.mtime,
4524
+ chunkCount: entry.chunkCount
4525
+ }))
4526
+ );
4527
+ await updateGitState(rootDir, vectorDB, manifest);
4528
+ spinner.succeed("Manifest saved");
4529
+ await writeVersionFile(vectorDB.dbPath);
4530
+ const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
4531
+ spinner.succeed(
4532
+ `Indexed ${progressTracker.getProcessedCount()} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
4533
+ );
4534
+ console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4535
+ }
4536
+ async function indexCodebase(options = {}) {
4537
+ const rootDir = options.rootDir ?? process.cwd();
4538
+ const spinner = ora("Starting indexing process...").start();
4539
+ try {
4540
+ spinner.text = "Loading configuration...";
4541
+ const config = await configService.load(rootDir);
4542
+ spinner.text = "Initializing vector database...";
4543
+ const vectorDB = new VectorDB(rootDir);
4544
+ await vectorDB.initialize();
4545
+ if (!options.force) {
4546
+ const completed = await tryIncrementalIndex(rootDir, vectorDB, config, options, spinner);
4547
+ if (completed) {
4548
+ return;
4283
4549
  }
4550
+ } else {
4551
+ spinner.text = "Force flag enabled, performing full reindex...";
4284
4552
  }
4285
- spinner.succeed("Manifest saved");
4286
- await writeVersionFile(vectorDB.dbPath);
4287
- const totalTime = ((Date.now() - startTime) / 1e3).toFixed(1);
4288
- spinner.succeed(
4289
- `Indexed ${processedFiles} files (${processedChunks} chunks) in ${totalTime}s using ${concurrency}x concurrency`
4290
- );
4291
- console.log(chalk5.dim("\nNext step: Run"), chalk5.bold("lien serve"), chalk5.dim("to start the MCP server"));
4553
+ await performFullIndex(rootDir, vectorDB, config, options, spinner);
4292
4554
  } catch (error) {
4293
- if (updateInterval) {
4294
- clearInterval(updateInterval);
4295
- }
4296
4555
  spinner.fail(`Indexing failed: ${error}`);
4297
4556
  throw error;
4298
4557
  }
@@ -4312,6 +4571,7 @@ var init_indexer = __esm({
4312
4571
  init_incremental();
4313
4572
  init_loading_messages();
4314
4573
  init_constants();
4574
+ init_progress_tracker();
4315
4575
  }
4316
4576
  });
4317
4577
 
@@ -5405,9 +5665,12 @@ var FindSimilarSchema = z2.object({
5405
5665
 
5406
5666
  // src/mcp/schemas/file.schema.ts
5407
5667
  import { z as z3 } from "zod";
5408
- var GetFileContextSchema = z3.object({
5409
- filepath: z3.string().min(1, "Filepath cannot be empty").describe(
5410
- "Relative path to file from workspace root.\n\nExample: 'src/components/Button.tsx'"
5668
+ var GetFilesContextSchema = z3.object({
5669
+ filepaths: z3.union([
5670
+ z3.string().min(1, "Filepath cannot be empty"),
5671
+ z3.array(z3.string().min(1, "Filepath cannot be empty")).min(1, "Array must contain at least one filepath").max(50, "Maximum 50 files per request")
5672
+ ]).describe(
5673
+ "Single filepath or array of filepaths (relative to workspace root).\n\nSingle file: 'src/components/Button.tsx'\nMultiple files: ['src/auth.ts', 'src/user.ts']\n\nMaximum 50 files per request for batch operations."
5411
5674
  ),
5412
5675
  includeRelated: z3.boolean().default(true).describe(
5413
5676
  "Include semantically related chunks from nearby code.\n\nDefault: true\n\nWhen enabled, also returns related code from other files that are semantically similar to the target file's contents."
@@ -5425,6 +5688,17 @@ var ListFunctionsSchema = z4.object({
5425
5688
  )
5426
5689
  });
5427
5690
 
5691
+ // src/mcp/schemas/dependents.schema.ts
5692
+ import { z as z5 } from "zod";
5693
+ var GetDependentsSchema = z5.object({
5694
+ filepath: z5.string().min(1, "Filepath cannot be empty").describe(
5695
+ "Path to file to find dependents for (relative to workspace root).\n\nExample: 'src/utils/validate.ts'\n\nReturns all files that import or depend on this file.\n\nNote: Scans up to 10,000 code chunks. For very large codebases,\nresults may be incomplete (a warning will be included if truncated)."
5696
+ ),
5697
+ depth: z5.number().int().min(1).max(1).default(1).describe(
5698
+ "Depth of transitive dependencies. Only depth=1 (direct dependents) is currently supported.\n\n1 = Direct dependents only"
5699
+ )
5700
+ });
5701
+
5428
5702
  // src/mcp/tools.ts
5429
5703
  var tools = [
5430
5704
  toMCPToolSchema(
@@ -5451,16 +5725,48 @@ Results include a relevance category (highly_relevant, relevant, loosely_related
5451
5725
  Provide at least 10 characters of code to match against. Results include a relevance category for each match.`
5452
5726
  ),
5453
5727
  toMCPToolSchema(
5454
- GetFileContextSchema,
5455
- "get_file_context",
5456
- `Get full context for a file including related code and dependencies.
5728
+ GetFilesContextSchema,
5729
+ "get_files_context",
5730
+ `Get context for one or more files including dependencies and test coverage.
5731
+
5732
+ MANDATORY: Call this BEFORE editing any file. Accepts single path or array of paths.
5733
+
5734
+ Single file:
5735
+ get_files_context({ filepaths: "src/auth.ts" })
5736
+
5737
+ Returns:
5738
+ {
5739
+ file: "src/auth.ts",
5740
+ chunks: [...],
5741
+ testAssociations: ["src/__tests__/auth.test.ts"]
5742
+ }
5743
+
5744
+ Multiple files (batch):
5745
+ get_files_context({ filepaths: ["src/auth.ts", "src/user.ts"] })
5746
+
5747
+ Returns:
5748
+ {
5749
+ files: {
5750
+ "src/auth.ts": {
5751
+ chunks: [...],
5752
+ testAssociations: ["src/__tests__/auth.test.ts"]
5753
+ },
5754
+ "src/user.ts": {
5755
+ chunks: [...],
5756
+ testAssociations: ["src/__tests__/user.test.ts"]
5757
+ }
5758
+ }
5759
+ }
5457
5760
 
5458
- IMPORTANT: Call this BEFORE editing any file to understand:
5459
- - What the file does
5460
- - What depends on it
5461
- - Related test files (via testAssociations)
5761
+ Returns for each file:
5762
+ - All chunks and related code
5763
+ - testAssociations: Array of test files that import this file (reverse dependency lookup)
5764
+ - Relevance scoring
5462
5765
 
5463
- Results include a relevance category for each related chunk. Typical flow: semantic_search \u2192 find file \u2192 get_file_context \u2192 make changes.`
5766
+ ALWAYS check testAssociations before modifying source code.
5767
+ After changes, remind the user to run the associated tests.
5768
+
5769
+ Batch calls are more efficient than multiple single-file calls.`
5464
5770
  ),
5465
5771
  toMCPToolSchema(
5466
5772
  ListFunctionsSchema,
@@ -5472,6 +5778,20 @@ Examples:
5472
5778
  - "Find service classes" \u2192 list_functions({ pattern: ".*Service$" })
5473
5779
 
5474
5780
  10x faster than semantic_search for structural/architectural queries. Use semantic_search instead when searching by what code DOES.`
5781
+ ),
5782
+ toMCPToolSchema(
5783
+ GetDependentsSchema,
5784
+ "get_dependents",
5785
+ `Find all code that depends on a file (reverse dependency lookup). Use for impact analysis:
5786
+ - "What breaks if I change this?"
5787
+ - "Is this safe to delete?"
5788
+ - "What imports this module?"
5789
+
5790
+ Returns:
5791
+ - List of files that import the target
5792
+ - Risk level (low/medium/high/critical) based on dependent count and complexity
5793
+
5794
+ Example: get_dependents({ filepath: "src/utils/validate.ts" })`
5475
5795
  )
5476
5796
  ];
5477
5797
 
@@ -5671,6 +5991,50 @@ function wrapToolHandler(schema, handler) {
5671
5991
  };
5672
5992
  }
5673
5993
 
5994
+ // src/mcp/utils/path-matching.ts
5995
+ function normalizePath(path18, workspaceRoot) {
5996
+ let normalized = path18.replace(/['"]/g, "").trim().replace(/\\/g, "/");
5997
+ normalized = normalized.replace(/\.(ts|tsx|js|jsx)$/, "");
5998
+ if (normalized.startsWith(workspaceRoot + "/")) {
5999
+ normalized = normalized.substring(workspaceRoot.length + 1);
6000
+ }
6001
+ return normalized;
6002
+ }
6003
+ function matchesAtBoundary(str, pattern) {
6004
+ const index = str.indexOf(pattern);
6005
+ if (index === -1) return false;
6006
+ const charBefore = index > 0 ? str[index - 1] : "/";
6007
+ if (charBefore !== "/" && index !== 0) return false;
6008
+ const endIndex = index + pattern.length;
6009
+ if (endIndex === str.length) return true;
6010
+ const charAfter = str[endIndex];
6011
+ return charAfter === "/";
6012
+ }
6013
+ function matchesFile(normalizedImport, normalizedTarget) {
6014
+ if (normalizedImport === normalizedTarget) return true;
6015
+ if (matchesAtBoundary(normalizedImport, normalizedTarget)) {
6016
+ return true;
6017
+ }
6018
+ if (matchesAtBoundary(normalizedTarget, normalizedImport)) {
6019
+ return true;
6020
+ }
6021
+ const cleanedImport = normalizedImport.replace(/^(\.\.?\/)+/, "");
6022
+ if (matchesAtBoundary(cleanedImport, normalizedTarget) || matchesAtBoundary(normalizedTarget, cleanedImport)) {
6023
+ return true;
6024
+ }
6025
+ return false;
6026
+ }
6027
+ function getCanonicalPath(filepath, workspaceRoot) {
6028
+ let canonical = filepath.replace(/\\/g, "/");
6029
+ if (canonical.startsWith(workspaceRoot + "/")) {
6030
+ canonical = canonical.substring(workspaceRoot.length + 1);
6031
+ }
6032
+ return canonical;
6033
+ }
6034
+ function isTestFile2(filepath) {
6035
+ return /\.(test|spec)\.[^/]+$/.test(filepath) || /(^|[/\\])(test|tests|__tests__)[/\\]/.test(filepath);
6036
+ }
6037
+
5674
6038
  // src/mcp/server.ts
5675
6039
  init_errors();
5676
6040
  var __filename4 = fileURLToPath4(import.meta.url);
@@ -5682,6 +6046,31 @@ try {
5682
6046
  } catch {
5683
6047
  packageJson3 = require4(join3(__dirname4, "../../package.json"));
5684
6048
  }
6049
+ var DEPENDENT_COUNT_THRESHOLDS = {
6050
+ LOW: 5,
6051
+ // Few dependents, safe to change
6052
+ MEDIUM: 15,
6053
+ // Moderate impact, review dependents
6054
+ HIGH: 30
6055
+ // High impact, careful planning needed
6056
+ };
6057
+ var COMPLEXITY_THRESHOLDS = {
6058
+ HIGH_COMPLEXITY_DEPENDENT: 10,
6059
+ // Individual file is complex
6060
+ CRITICAL_AVG: 15,
6061
+ // Average complexity indicates systemic complexity
6062
+ CRITICAL_MAX: 25,
6063
+ // Peak complexity indicates hotspot
6064
+ HIGH_AVG: 10,
6065
+ // Moderately complex on average
6066
+ HIGH_MAX: 20,
6067
+ // Some complex functions exist
6068
+ MEDIUM_AVG: 6,
6069
+ // Slightly above simple code
6070
+ MEDIUM_MAX: 15
6071
+ // Occasional branching
6072
+ };
6073
+ var SCAN_LIMIT = 1e4;
5685
6074
  async function startMCPServer(options) {
5686
6075
  const { rootDir, verbose, watch } = options;
5687
6076
  const log = (message) => {
@@ -5770,32 +6159,112 @@ async function startMCPServer(options) {
5770
6159
  };
5771
6160
  }
5772
6161
  )(args);
5773
- case "get_file_context":
6162
+ case "get_files_context":
5774
6163
  return await wrapToolHandler(
5775
- GetFileContextSchema,
6164
+ GetFilesContextSchema,
5776
6165
  async (validatedArgs) => {
5777
- log(`Getting context for: ${validatedArgs.filepath}`);
6166
+ const filepaths = Array.isArray(validatedArgs.filepaths) ? validatedArgs.filepaths : [validatedArgs.filepaths];
6167
+ const isSingleFile = !Array.isArray(validatedArgs.filepaths);
6168
+ log(`Getting context for: ${filepaths.join(", ")}`);
5778
6169
  await checkAndReconnect();
5779
- const fileEmbedding = await embeddings.embed(validatedArgs.filepath);
5780
- const allResults = await vectorDB.search(fileEmbedding, 50, validatedArgs.filepath);
5781
- const fileChunks = allResults.filter(
5782
- (r) => r.metadata.file.includes(validatedArgs.filepath) || validatedArgs.filepath.includes(r.metadata.file)
6170
+ const workspaceRoot = process.cwd().replace(/\\/g, "/");
6171
+ const fileEmbeddings = await Promise.all(filepaths.map((fp) => embeddings.embed(fp)));
6172
+ const allFileSearches = await Promise.all(
6173
+ fileEmbeddings.map(
6174
+ (embedding, i) => vectorDB.search(embedding, 50, filepaths[i])
6175
+ )
5783
6176
  );
5784
- let results = fileChunks;
5785
- if (validatedArgs.includeRelated && fileChunks.length > 0) {
5786
- const relatedEmbedding = await embeddings.embed(fileChunks[0].content);
5787
- const related = await vectorDB.search(relatedEmbedding, 5, fileChunks[0].content);
5788
- const relatedOtherFiles = related.filter(
5789
- (r) => !r.metadata.file.includes(validatedArgs.filepath) && !validatedArgs.filepath.includes(r.metadata.file)
5790
- );
5791
- results = [...fileChunks, ...relatedOtherFiles];
6177
+ const fileChunksMap = filepaths.map((filepath, i) => {
6178
+ const allResults = allFileSearches[i];
6179
+ const targetCanonical = getCanonicalPath(filepath, workspaceRoot);
6180
+ return allResults.filter((r) => {
6181
+ const chunkCanonical = getCanonicalPath(r.metadata.file, workspaceRoot);
6182
+ return chunkCanonical === targetCanonical;
6183
+ });
6184
+ });
6185
+ let relatedChunksMap = [];
6186
+ if (validatedArgs.includeRelated) {
6187
+ const filesWithChunks = fileChunksMap.map((chunks, i) => ({ chunks, filepath: filepaths[i], index: i })).filter(({ chunks }) => chunks.length > 0);
6188
+ if (filesWithChunks.length > 0) {
6189
+ const relatedEmbeddings = await Promise.all(
6190
+ filesWithChunks.map(({ chunks }) => embeddings.embed(chunks[0].content))
6191
+ );
6192
+ const relatedSearches = await Promise.all(
6193
+ relatedEmbeddings.map(
6194
+ (embedding, i) => vectorDB.search(embedding, 5, filesWithChunks[i].chunks[0].content)
6195
+ )
6196
+ );
6197
+ relatedChunksMap = Array.from({ length: filepaths.length }, () => []);
6198
+ filesWithChunks.forEach(({ filepath, index }, i) => {
6199
+ const related = relatedSearches[i];
6200
+ const targetCanonical = getCanonicalPath(filepath, workspaceRoot);
6201
+ relatedChunksMap[index] = related.filter((r) => {
6202
+ const chunkCanonical = getCanonicalPath(r.metadata.file, workspaceRoot);
6203
+ return chunkCanonical !== targetCanonical;
6204
+ });
6205
+ });
6206
+ }
5792
6207
  }
5793
- log(`Found ${results.length} chunks`);
5794
- return {
5795
- indexInfo: getIndexMetadata(),
5796
- file: validatedArgs.filepath,
5797
- chunks: results
6208
+ const allChunks = await vectorDB.scanWithFilter({ limit: SCAN_LIMIT });
6209
+ if (allChunks.length === SCAN_LIMIT) {
6210
+ log(`WARNING: Scanned ${SCAN_LIMIT} chunks (limit reached). Test associations may be incomplete for large codebases.`);
6211
+ }
6212
+ const pathCache = /* @__PURE__ */ new Map();
6213
+ const normalizePathCached = (path18) => {
6214
+ if (pathCache.has(path18)) return pathCache.get(path18);
6215
+ const normalized = normalizePath(path18, workspaceRoot);
6216
+ pathCache.set(path18, normalized);
6217
+ return normalized;
5798
6218
  };
6219
+ const testAssociationsMap = filepaths.map((filepath) => {
6220
+ const normalizedTarget = normalizePathCached(filepath);
6221
+ const testFiles = /* @__PURE__ */ new Set();
6222
+ for (const chunk of allChunks) {
6223
+ const chunkFile2 = getCanonicalPath(chunk.metadata.file, workspaceRoot);
6224
+ if (!isTestFile2(chunkFile2)) continue;
6225
+ const imports = chunk.metadata.imports || [];
6226
+ for (const imp of imports) {
6227
+ const normalizedImport = normalizePathCached(imp);
6228
+ if (matchesFile(normalizedImport, normalizedTarget)) {
6229
+ testFiles.add(chunkFile2);
6230
+ break;
6231
+ }
6232
+ }
6233
+ }
6234
+ return Array.from(testFiles);
6235
+ });
6236
+ const filesData = {};
6237
+ filepaths.forEach((filepath, i) => {
6238
+ const fileChunks = fileChunksMap[i];
6239
+ const relatedChunks = relatedChunksMap[i] || [];
6240
+ const seenChunks = /* @__PURE__ */ new Set();
6241
+ const dedupedChunks = [...fileChunks, ...relatedChunks].filter((chunk) => {
6242
+ const canonicalFile = getCanonicalPath(chunk.metadata.file, workspaceRoot);
6243
+ const chunkId = `${canonicalFile}:${chunk.metadata.startLine}-${chunk.metadata.endLine}`;
6244
+ if (seenChunks.has(chunkId)) return false;
6245
+ seenChunks.add(chunkId);
6246
+ return true;
6247
+ });
6248
+ filesData[filepath] = {
6249
+ chunks: dedupedChunks,
6250
+ testAssociations: testAssociationsMap[i]
6251
+ };
6252
+ });
6253
+ log(`Found ${Object.values(filesData).reduce((sum, f) => sum + f.chunks.length, 0)} total chunks`);
6254
+ if (isSingleFile) {
6255
+ const filepath = filepaths[0];
6256
+ return {
6257
+ indexInfo: getIndexMetadata(),
6258
+ file: filepath,
6259
+ chunks: filesData[filepath].chunks,
6260
+ testAssociations: filesData[filepath].testAssociations
6261
+ };
6262
+ } else {
6263
+ return {
6264
+ indexInfo: getIndexMetadata(),
6265
+ files: filesData
6266
+ };
6267
+ }
5799
6268
  }
5800
6269
  )(args);
5801
6270
  case "list_functions":
@@ -5839,6 +6308,144 @@ async function startMCPServer(options) {
5839
6308
  };
5840
6309
  }
5841
6310
  )(args);
6311
+ case "get_dependents":
6312
+ return await wrapToolHandler(
6313
+ GetDependentsSchema,
6314
+ async (validatedArgs) => {
6315
+ log(`Finding dependents of: ${validatedArgs.filepath}`);
6316
+ await checkAndReconnect();
6317
+ const allChunks = await vectorDB.scanWithFilter({ limit: SCAN_LIMIT });
6318
+ if (allChunks.length === SCAN_LIMIT) {
6319
+ log(`WARNING: Scanned ${SCAN_LIMIT} chunks (limit reached). Results may be incomplete for large codebases.`);
6320
+ }
6321
+ log(`Scanning ${allChunks.length} chunks for imports...`);
6322
+ const workspaceRoot = process.cwd().replace(/\\/g, "/");
6323
+ const pathCache = /* @__PURE__ */ new Map();
6324
+ const normalizePathCached = (path18) => {
6325
+ if (pathCache.has(path18)) return pathCache.get(path18);
6326
+ const normalized = normalizePath(path18, workspaceRoot);
6327
+ pathCache.set(path18, normalized);
6328
+ return normalized;
6329
+ };
6330
+ const importIndex = /* @__PURE__ */ new Map();
6331
+ for (const chunk of allChunks) {
6332
+ const imports = chunk.metadata.imports || [];
6333
+ for (const imp of imports) {
6334
+ const normalizedImport = normalizePathCached(imp);
6335
+ if (!importIndex.has(normalizedImport)) {
6336
+ importIndex.set(normalizedImport, []);
6337
+ }
6338
+ importIndex.get(normalizedImport).push(chunk);
6339
+ }
6340
+ }
6341
+ const normalizedTarget = normalizePathCached(validatedArgs.filepath);
6342
+ const dependentChunks = [];
6343
+ const seenChunkIds = /* @__PURE__ */ new Set();
6344
+ if (importIndex.has(normalizedTarget)) {
6345
+ for (const chunk of importIndex.get(normalizedTarget)) {
6346
+ const chunkId = `${chunk.metadata.file}:${chunk.metadata.startLine}-${chunk.metadata.endLine}`;
6347
+ if (!seenChunkIds.has(chunkId)) {
6348
+ dependentChunks.push(chunk);
6349
+ seenChunkIds.add(chunkId);
6350
+ }
6351
+ }
6352
+ }
6353
+ for (const [normalizedImport, chunks] of importIndex.entries()) {
6354
+ if (normalizedImport !== normalizedTarget && matchesFile(normalizedImport, normalizedTarget)) {
6355
+ for (const chunk of chunks) {
6356
+ const chunkId = `${chunk.metadata.file}:${chunk.metadata.startLine}-${chunk.metadata.endLine}`;
6357
+ if (!seenChunkIds.has(chunkId)) {
6358
+ dependentChunks.push(chunk);
6359
+ seenChunkIds.add(chunkId);
6360
+ }
6361
+ }
6362
+ }
6363
+ }
6364
+ const chunksByFile = /* @__PURE__ */ new Map();
6365
+ for (const chunk of dependentChunks) {
6366
+ const canonical = getCanonicalPath(chunk.metadata.file, workspaceRoot);
6367
+ const existing = chunksByFile.get(canonical) || [];
6368
+ existing.push(chunk);
6369
+ chunksByFile.set(canonical, existing);
6370
+ }
6371
+ const fileComplexities = [];
6372
+ for (const [filepath, chunks] of chunksByFile.entries()) {
6373
+ const complexities = chunks.map((c) => c.metadata.complexity).filter((c) => typeof c === "number" && c > 0);
6374
+ if (complexities.length > 0) {
6375
+ const sum = complexities.reduce((a, b) => a + b, 0);
6376
+ const avg = sum / complexities.length;
6377
+ const max = Math.max(...complexities);
6378
+ fileComplexities.push({
6379
+ filepath,
6380
+ avgComplexity: Math.round(avg * 10) / 10,
6381
+ // Round to 1 decimal
6382
+ maxComplexity: max,
6383
+ complexityScore: sum,
6384
+ chunksWithComplexity: complexities.length
6385
+ });
6386
+ }
6387
+ }
6388
+ let complexityMetrics;
6389
+ if (fileComplexities.length > 0) {
6390
+ const allAvgs = fileComplexities.map((f) => f.avgComplexity);
6391
+ const allMaxes = fileComplexities.map((f) => f.maxComplexity);
6392
+ const totalAvg = allAvgs.reduce((a, b) => a + b, 0) / allAvgs.length;
6393
+ const globalMax = Math.max(...allMaxes);
6394
+ const highComplexityDependents = fileComplexities.filter((f) => f.maxComplexity > COMPLEXITY_THRESHOLDS.HIGH_COMPLEXITY_DEPENDENT).sort((a, b) => b.maxComplexity - a.maxComplexity).slice(0, 5).map((f) => ({
6395
+ filepath: f.filepath,
6396
+ maxComplexity: f.maxComplexity,
6397
+ avgComplexity: f.avgComplexity
6398
+ }));
6399
+ let complexityRiskBoost = "low";
6400
+ if (totalAvg > COMPLEXITY_THRESHOLDS.CRITICAL_AVG || globalMax > COMPLEXITY_THRESHOLDS.CRITICAL_MAX) {
6401
+ complexityRiskBoost = "critical";
6402
+ } else if (totalAvg > COMPLEXITY_THRESHOLDS.HIGH_AVG || globalMax > COMPLEXITY_THRESHOLDS.HIGH_MAX) {
6403
+ complexityRiskBoost = "high";
6404
+ } else if (totalAvg > COMPLEXITY_THRESHOLDS.MEDIUM_AVG || globalMax > COMPLEXITY_THRESHOLDS.MEDIUM_MAX) {
6405
+ complexityRiskBoost = "medium";
6406
+ }
6407
+ complexityMetrics = {
6408
+ averageComplexity: Math.round(totalAvg * 10) / 10,
6409
+ maxComplexity: globalMax,
6410
+ filesWithComplexityData: fileComplexities.length,
6411
+ highComplexityDependents,
6412
+ complexityRiskBoost
6413
+ };
6414
+ } else {
6415
+ complexityMetrics = {
6416
+ averageComplexity: 0,
6417
+ maxComplexity: 0,
6418
+ filesWithComplexityData: 0,
6419
+ highComplexityDependents: [],
6420
+ complexityRiskBoost: "low"
6421
+ };
6422
+ }
6423
+ const uniqueFiles = Array.from(chunksByFile.keys()).map((filepath) => ({
6424
+ filepath,
6425
+ isTestFile: isTestFile2(filepath)
6426
+ }));
6427
+ const count = uniqueFiles.length;
6428
+ let riskLevel = count === 0 ? "low" : count <= DEPENDENT_COUNT_THRESHOLDS.LOW ? "low" : count <= DEPENDENT_COUNT_THRESHOLDS.MEDIUM ? "medium" : count <= DEPENDENT_COUNT_THRESHOLDS.HIGH ? "high" : "critical";
6429
+ const RISK_ORDER = { low: 0, medium: 1, high: 2, critical: 3 };
6430
+ if (RISK_ORDER[complexityMetrics.complexityRiskBoost] > RISK_ORDER[riskLevel]) {
6431
+ riskLevel = complexityMetrics.complexityRiskBoost;
6432
+ }
6433
+ log(`Found ${count} dependent files (risk: ${riskLevel}${complexityMetrics.filesWithComplexityData > 0 ? ", complexity-boosted" : ""})`);
6434
+ let note;
6435
+ if (allChunks.length === SCAN_LIMIT) {
6436
+ note = `Warning: Scanned ${SCAN_LIMIT} chunks (limit reached). Results may be incomplete for large codebases. Some dependents might not be listed.`;
6437
+ }
6438
+ return {
6439
+ indexInfo: getIndexMetadata(),
6440
+ filepath: validatedArgs.filepath,
6441
+ dependentCount: count,
6442
+ riskLevel,
6443
+ dependents: uniqueFiles,
6444
+ complexityMetrics,
6445
+ note
6446
+ };
6447
+ }
6448
+ )(args);
5842
6449
  default:
5843
6450
  throw new LienError(
5844
6451
  `Unknown tool: ${name}`,