bluera-knowledge 0.17.2 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,11 @@
1
+ import {
2
+ createDocumentId,
3
+ createStoreId
4
+ } from "./chunk-CLIMKLTW.js";
5
+ import {
6
+ parseIgnorePatternsForScanning
7
+ } from "./chunk-HXBIIMYL.js";
8
+
1
9
  // src/analysis/adapter-registry.ts
2
10
  var AdapterRegistry = class _AdapterRegistry {
3
11
  static instance;
@@ -128,14 +136,14 @@ var ProjectRootService = class {
128
136
  if (projectRootEnv !== void 0 && projectRootEnv !== "") {
129
137
  return this.normalize(projectRootEnv);
130
138
  }
131
- const pwdEnv = process.env["PWD"];
132
- if (pwdEnv !== void 0 && pwdEnv !== "") {
133
- return this.normalize(pwdEnv);
134
- }
135
139
  const gitRoot = this.findGitRoot(process.cwd());
136
140
  if (gitRoot !== null) {
137
141
  return gitRoot;
138
142
  }
143
+ const pwdEnv = process.env["PWD"];
144
+ if (pwdEnv !== void 0 && pwdEnv !== "") {
145
+ return this.normalize(pwdEnv);
146
+ }
139
147
  return process.cwd();
140
148
  }
141
149
  /**
@@ -368,6 +376,23 @@ function err(error) {
368
376
  return { success: false, error };
369
377
  }
370
378
 
379
+ // src/utils/atomic-write.ts
380
+ import { writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync3 } from "fs";
381
+ import { writeFile, rename, mkdir } from "fs/promises";
382
+ import { dirname as dirname2 } from "path";
383
+ async function atomicWriteFile(filePath, content) {
384
+ await mkdir(dirname2(filePath), { recursive: true });
385
+ const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
386
+ await writeFile(tempPath, content, "utf-8");
387
+ await rename(tempPath, filePath);
388
+ }
389
+ function atomicWriteFileSync(filePath, content) {
390
+ mkdirSync3(dirname2(filePath), { recursive: true });
391
+ const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
392
+ writeFileSync2(tempPath, content, "utf-8");
393
+ renameSync(tempPath, filePath);
394
+ }
395
+
371
396
  // src/services/job.service.ts
372
397
  var JobService = class {
373
398
  jobsDir;
@@ -596,13 +621,13 @@ var JobService = class {
596
621
  */
597
622
  writeJob(job) {
598
623
  const jobFile = path.join(this.jobsDir, `${job.id}.json`);
599
- fs.writeFileSync(jobFile, JSON.stringify(job, null, 2), "utf-8");
624
+ atomicWriteFileSync(jobFile, JSON.stringify(job, null, 2));
600
625
  }
601
626
  };
602
627
 
603
628
  // src/services/code-graph.service.ts
604
- import { readFile, writeFile, mkdir, rm } from "fs/promises";
605
- import { join as join4, dirname as dirname2 } from "path";
629
+ import { readFile, writeFile as writeFile2, mkdir as mkdir2, rm } from "fs/promises";
630
+ import { join as join4, dirname as dirname3 } from "path";
606
631
 
607
632
  // src/analysis/ast-parser.ts
608
633
  import { parse } from "@babel/parser";
@@ -1759,11 +1784,31 @@ var CodeGraphService = class {
1759
1784
  parser;
1760
1785
  parserFactory;
1761
1786
  graphCache;
1787
+ cacheListeners;
1762
1788
  constructor(dataDir, pythonBridge) {
1763
1789
  this.dataDir = dataDir;
1764
1790
  this.parser = new ASTParser();
1765
1791
  this.parserFactory = new ParserFactory(pythonBridge);
1766
1792
  this.graphCache = /* @__PURE__ */ new Map();
1793
+ this.cacheListeners = /* @__PURE__ */ new Set();
1794
+ }
1795
+ /**
1796
+ * Subscribe to cache invalidation events.
1797
+ * Returns an unsubscribe function.
1798
+ */
1799
+ onCacheInvalidation(listener) {
1800
+ this.cacheListeners.add(listener);
1801
+ return () => {
1802
+ this.cacheListeners.delete(listener);
1803
+ };
1804
+ }
1805
+ /**
1806
+ * Emit a cache invalidation event to all listeners.
1807
+ */
1808
+ emitCacheInvalidation(event) {
1809
+ for (const listener of this.cacheListeners) {
1810
+ listener(event);
1811
+ }
1767
1812
  }
1768
1813
  /**
1769
1814
  * Build a code graph from source files.
@@ -1819,9 +1864,10 @@ var CodeGraphService = class {
1819
1864
  */
1820
1865
  async saveGraph(storeId, graph) {
1821
1866
  const graphPath = this.getGraphPath(storeId);
1822
- await mkdir(dirname2(graphPath), { recursive: true });
1867
+ await mkdir2(dirname3(graphPath), { recursive: true });
1823
1868
  const serialized = graph.toJSON();
1824
- await writeFile(graphPath, JSON.stringify(serialized, null, 2));
1869
+ await writeFile2(graphPath, JSON.stringify(serialized, null, 2));
1870
+ this.emitCacheInvalidation({ type: "graph-updated", storeId });
1825
1871
  }
1826
1872
  /**
1827
1873
  * Delete the code graph file for a store.
@@ -1831,6 +1877,7 @@ var CodeGraphService = class {
1831
1877
  const graphPath = this.getGraphPath(storeId);
1832
1878
  await rm(graphPath, { force: true });
1833
1879
  this.graphCache.delete(storeId);
1880
+ this.emitCacheInvalidation({ type: "graph-deleted", storeId });
1834
1881
  }
1835
1882
  /**
1836
1883
  * Load a code graph for a store.
@@ -1976,9 +2023,9 @@ var CodeGraphService = class {
1976
2023
  };
1977
2024
 
1978
2025
  // src/services/config.service.ts
1979
- import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir2, access } from "fs/promises";
2026
+ import { readFile as readFile2, access } from "fs/promises";
1980
2027
  import { homedir } from "os";
1981
- import { dirname as dirname3, join as join5, resolve } from "path";
2028
+ import { isAbsolute, join as join5, resolve } from "path";
1982
2029
 
1983
2030
  // src/types/config.ts
1984
2031
  var DEFAULT_CONFIG = {
@@ -1986,8 +2033,7 @@ var DEFAULT_CONFIG = {
1986
2033
  dataDir: ".bluera/bluera-knowledge/data",
1987
2034
  embedding: {
1988
2035
  model: "Xenova/all-MiniLM-L6-v2",
1989
- batchSize: 32,
1990
- dimensions: 384
2036
+ batchSize: 32
1991
2037
  },
1992
2038
  indexing: {
1993
2039
  concurrency: 4,
@@ -1997,13 +2043,7 @@ var DEFAULT_CONFIG = {
1997
2043
  },
1998
2044
  search: {
1999
2045
  defaultMode: "hybrid",
2000
- defaultLimit: 10,
2001
- minScore: 0.5,
2002
- rrf: {
2003
- k: 40,
2004
- vectorWeight: 0.7,
2005
- ftsWeight: 0.3
2006
- }
2046
+ defaultLimit: 10
2007
2047
  },
2008
2048
  crawl: {
2009
2049
  userAgent: "BlueraKnowledge/1.0",
@@ -2016,6 +2056,34 @@ var DEFAULT_CONFIG = {
2016
2056
  }
2017
2057
  };
2018
2058
 
2059
+ // src/utils/deep-merge.ts
2060
+ function isPlainObject(value) {
2061
+ return typeof value === "object" && value !== null && !Array.isArray(value) && !(value instanceof Date);
2062
+ }
2063
+ function deepMerge(defaults, overrides) {
2064
+ if (!isPlainObject(overrides)) {
2065
+ return { ...defaults };
2066
+ }
2067
+ const defaultsRecord = defaults;
2068
+ return deepMergeRecords(defaultsRecord, overrides);
2069
+ }
2070
+ function deepMergeRecords(defaults, overrides) {
2071
+ const result = { ...defaults };
2072
+ for (const key of Object.keys(overrides)) {
2073
+ const defaultValue = defaults[key];
2074
+ const overrideValue = overrides[key];
2075
+ if (overrideValue === void 0) {
2076
+ continue;
2077
+ }
2078
+ if (isPlainObject(defaultValue) && isPlainObject(overrideValue)) {
2079
+ result[key] = deepMergeRecords(defaultValue, overrideValue);
2080
+ } else {
2081
+ result[key] = overrideValue;
2082
+ }
2083
+ }
2084
+ return result;
2085
+ }
2086
+
2019
2087
  // src/services/config.service.ts
2020
2088
  var DEFAULT_CONFIG_PATH = ".bluera/bluera-knowledge/config.json";
2021
2089
  async function fileExists(path4) {
@@ -2029,20 +2097,27 @@ async function fileExists(path4) {
2029
2097
  var ConfigService = class {
2030
2098
  configPath;
2031
2099
  dataDir;
2100
+ projectRoot;
2032
2101
  config = null;
2033
2102
  constructor(configPath, dataDir, projectRoot) {
2034
- const root = projectRoot ?? ProjectRootService.resolve();
2103
+ this.projectRoot = projectRoot ?? ProjectRootService.resolve();
2035
2104
  if (configPath !== void 0 && configPath !== "") {
2036
- this.configPath = configPath;
2105
+ this.configPath = this.expandPath(configPath, this.projectRoot);
2037
2106
  } else {
2038
- this.configPath = join5(root, DEFAULT_CONFIG_PATH);
2107
+ this.configPath = join5(this.projectRoot, DEFAULT_CONFIG_PATH);
2039
2108
  }
2040
2109
  if (dataDir !== void 0 && dataDir !== "") {
2041
- this.dataDir = dataDir;
2110
+ this.dataDir = this.expandPath(dataDir, this.projectRoot);
2042
2111
  } else {
2043
- this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, root);
2112
+ this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, this.projectRoot);
2044
2113
  }
2045
2114
  }
2115
+ /**
2116
+ * Get the resolved project root directory.
2117
+ */
2118
+ resolveProjectRoot() {
2119
+ return this.projectRoot;
2120
+ }
2046
2121
  async load() {
2047
2122
  if (this.config !== null) {
2048
2123
  return this.config;
@@ -2055,7 +2130,7 @@ var ConfigService = class {
2055
2130
  }
2056
2131
  const content = await readFile2(this.configPath, "utf-8");
2057
2132
  try {
2058
- this.config = { ...DEFAULT_CONFIG, ...JSON.parse(content) };
2133
+ this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
2059
2134
  } catch (error) {
2060
2135
  throw new Error(
2061
2136
  `Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
@@ -2064,8 +2139,7 @@ var ConfigService = class {
2064
2139
  return this.config;
2065
2140
  }
2066
2141
  async save(config) {
2067
- await mkdir2(dirname3(this.configPath), { recursive: true });
2068
- await writeFile2(this.configPath, JSON.stringify(config, null, 2));
2142
+ await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
2069
2143
  this.config = config;
2070
2144
  }
2071
2145
  resolveDataDir() {
@@ -2078,7 +2152,7 @@ var ConfigService = class {
2078
2152
  if (path4.startsWith("~")) {
2079
2153
  return path4.replace("~", homedir());
2080
2154
  }
2081
- if (!path4.startsWith("/")) {
2155
+ if (!isAbsolute(path4)) {
2082
2156
  return resolve(baseDir, path4);
2083
2157
  }
2084
2158
  return path4;
@@ -2184,9 +2258,9 @@ ${REQUIRED_PATTERNS.join("\n")}
2184
2258
  };
2185
2259
 
2186
2260
  // src/services/index.service.ts
2187
- import { createHash as createHash2 } from "crypto";
2188
- import { readFile as readFile4, readdir } from "fs/promises";
2189
- import { join as join7, extname, basename } from "path";
2261
+ import { createHash as createHash3 } from "crypto";
2262
+ import { readFile as readFile5, readdir } from "fs/promises";
2263
+ import { join as join7, extname, basename, relative } from "path";
2190
2264
 
2191
2265
  // src/services/chunking.service.ts
2192
2266
  var CHUNK_PRESETS = {
@@ -2198,6 +2272,11 @@ var ChunkingService = class _ChunkingService {
2198
2272
  chunkSize;
2199
2273
  chunkOverlap;
2200
2274
  constructor(config) {
2275
+ if (config.chunkOverlap >= config.chunkSize) {
2276
+ throw new Error(
2277
+ `chunkOverlap (${String(config.chunkOverlap)}) must be less than chunkSize (${String(config.chunkSize)})`
2278
+ );
2279
+ }
2201
2280
  this.chunkSize = config.chunkSize;
2202
2281
  this.chunkOverlap = config.chunkOverlap;
2203
2282
  }
@@ -2292,7 +2371,7 @@ var ChunkingService = class _ChunkingService {
2292
2371
  * Splits on top-level declarations to keep functions/classes together.
2293
2372
  */
2294
2373
  chunkCode(text) {
2295
- const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
2374
+ const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
2296
2375
  const declarations = [];
2297
2376
  let match;
2298
2377
  while ((match = declarationRegex.exec(text)) !== null) {
@@ -2467,73 +2546,236 @@ var ChunkingService = class _ChunkingService {
2467
2546
  }
2468
2547
  };
2469
2548
 
2470
- // src/types/brands.ts
2471
- var ID_PATTERN = /^[a-zA-Z0-9_-]+$/;
2472
- function isStoreId(value) {
2473
- return value.length > 0 && ID_PATTERN.test(value);
2474
- }
2475
- function isDocumentId(value) {
2476
- return value.length > 0 && ID_PATTERN.test(value);
2477
- }
2478
- function createStoreId(value) {
2479
- if (!isStoreId(value)) {
2480
- throw new Error(`Invalid store ID: ${value}`);
2549
+ // src/services/drift.service.ts
2550
+ import { createHash as createHash2 } from "crypto";
2551
+ import { readFile as readFile4, stat } from "fs/promises";
2552
+ var DriftService = class {
2553
+ /**
2554
+ * Detect changes between current files and manifest.
2555
+ *
2556
+ * @param manifest - The stored manifest from last index
2557
+ * @param currentFiles - Current files on disk with mtime/size
2558
+ * @returns Classification of files into added, modified, deleted, unchanged
2559
+ */
2560
+ async detectChanges(manifest, currentFiles) {
2561
+ const result = {
2562
+ added: [],
2563
+ modified: [],
2564
+ deleted: [],
2565
+ unchanged: []
2566
+ };
2567
+ const currentPathSet = new Set(currentFiles.map((f) => f.path));
2568
+ const manifestPaths = new Set(Object.keys(manifest.files));
2569
+ for (const path4 of manifestPaths) {
2570
+ if (!currentPathSet.has(path4)) {
2571
+ result.deleted.push(path4);
2572
+ }
2573
+ }
2574
+ const potentiallyModified = [];
2575
+ for (const file of currentFiles) {
2576
+ const manifestState = manifest.files[file.path];
2577
+ if (manifestState === void 0) {
2578
+ result.added.push(file.path);
2579
+ } else {
2580
+ if (file.mtime === manifestState.mtime && file.size === manifestState.size) {
2581
+ result.unchanged.push(file.path);
2582
+ } else {
2583
+ potentiallyModified.push(file);
2584
+ }
2585
+ }
2586
+ }
2587
+ for (const file of potentiallyModified) {
2588
+ const manifestState = manifest.files[file.path];
2589
+ if (manifestState === void 0) {
2590
+ result.added.push(file.path);
2591
+ continue;
2592
+ }
2593
+ const currentHash = await this.computeFileHash(file.path);
2594
+ if (currentHash === manifestState.hash) {
2595
+ result.unchanged.push(file.path);
2596
+ } else {
2597
+ result.modified.push(file.path);
2598
+ }
2599
+ }
2600
+ return result;
2481
2601
  }
2482
- return value;
2483
- }
2484
- function createDocumentId(value) {
2485
- if (!isDocumentId(value)) {
2486
- throw new Error(`Invalid document ID: ${value}`);
2602
+ /**
2603
+ * Get the current state of a file on disk.
2604
+ */
2605
+ async getFileState(path4) {
2606
+ const stats = await stat(path4);
2607
+ return {
2608
+ path: path4,
2609
+ mtime: stats.mtimeMs,
2610
+ size: stats.size
2611
+ };
2487
2612
  }
2488
- return value;
2489
- }
2613
+ /**
2614
+ * Compute MD5 hash of a file.
2615
+ */
2616
+ async computeFileHash(path4) {
2617
+ const content = await readFile4(path4);
2618
+ return createHash2("md5").update(content).digest("hex");
2619
+ }
2620
+ /**
2621
+ * Create a file state entry for the manifest after indexing.
2622
+ *
2623
+ * @param path - File path
2624
+ * @param documentIds - Document IDs created from this file
2625
+ * @returns File state for manifest
2626
+ */
2627
+ async createFileState(path4, documentIds) {
2628
+ const stats = await stat(path4);
2629
+ const content = await readFile4(path4);
2630
+ const hash = createHash2("md5").update(content).digest("hex");
2631
+ const { createDocumentId: createDocumentId2 } = await import("./brands-3EYIYV6T.js");
2632
+ return {
2633
+ state: {
2634
+ mtime: stats.mtimeMs,
2635
+ size: stats.size,
2636
+ hash,
2637
+ documentIds: documentIds.map((id) => createDocumentId2(id))
2638
+ },
2639
+ hash
2640
+ };
2641
+ }
2642
+ };
2490
2643
 
2491
2644
  // src/services/index.service.ts
2492
2645
  var logger = createLogger("index-service");
2493
2646
  var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
2647
+ // Text/docs
2494
2648
  ".txt",
2495
2649
  ".md",
2650
+ ".rst",
2651
+ ".adoc",
2652
+ // JavaScript/TypeScript
2496
2653
  ".js",
2497
2654
  ".ts",
2498
2655
  ".jsx",
2499
2656
  ".tsx",
2657
+ ".mjs",
2658
+ ".cjs",
2659
+ ".mts",
2660
+ ".cts",
2661
+ // Config/data
2500
2662
  ".json",
2501
2663
  ".yaml",
2502
2664
  ".yml",
2665
+ ".toml",
2666
+ ".ini",
2667
+ ".env",
2668
+ // Web
2503
2669
  ".html",
2670
+ ".htm",
2504
2671
  ".css",
2505
2672
  ".scss",
2673
+ ".sass",
2506
2674
  ".less",
2675
+ ".vue",
2676
+ ".svelte",
2677
+ // Python
2507
2678
  ".py",
2679
+ ".pyi",
2680
+ ".pyx",
2681
+ // Ruby
2508
2682
  ".rb",
2683
+ ".erb",
2684
+ ".rake",
2685
+ // Go
2509
2686
  ".go",
2687
+ // Rust
2510
2688
  ".rs",
2689
+ // Java/JVM
2511
2690
  ".java",
2691
+ ".kt",
2692
+ ".kts",
2693
+ ".scala",
2694
+ ".groovy",
2695
+ ".gradle",
2696
+ // C/C++
2512
2697
  ".c",
2513
2698
  ".cpp",
2699
+ ".cc",
2700
+ ".cxx",
2514
2701
  ".h",
2515
2702
  ".hpp",
2703
+ ".hxx",
2704
+ // C#/.NET
2705
+ ".cs",
2706
+ ".fs",
2707
+ ".vb",
2708
+ // Swift/Objective-C
2709
+ ".swift",
2710
+ ".m",
2711
+ ".mm",
2712
+ // PHP
2713
+ ".php",
2714
+ // Shell
2516
2715
  ".sh",
2517
2716
  ".bash",
2518
2717
  ".zsh",
2718
+ ".fish",
2719
+ ".ps1",
2720
+ ".psm1",
2721
+ // SQL
2519
2722
  ".sql",
2520
- ".xml"
2723
+ // Other
2724
+ ".xml",
2725
+ ".graphql",
2726
+ ".gql",
2727
+ ".proto",
2728
+ ".lua",
2729
+ ".r",
2730
+ ".R",
2731
+ ".jl",
2732
+ ".ex",
2733
+ ".exs",
2734
+ ".erl",
2735
+ ".hrl",
2736
+ ".clj",
2737
+ ".cljs",
2738
+ ".cljc",
2739
+ ".hs",
2740
+ ".elm",
2741
+ ".dart",
2742
+ ".pl",
2743
+ ".pm",
2744
+ ".tcl",
2745
+ ".vim",
2746
+ ".zig",
2747
+ ".nim",
2748
+ ".v",
2749
+ ".tf",
2750
+ ".hcl",
2751
+ ".dockerfile",
2752
+ ".makefile",
2753
+ ".cmake"
2521
2754
  ]);
2522
2755
  var IndexService = class {
2523
2756
  lanceStore;
2524
2757
  embeddingEngine;
2525
2758
  chunker;
2526
2759
  codeGraphService;
2760
+ manifestService;
2761
+ driftService;
2527
2762
  concurrency;
2763
+ ignoreDirs;
2764
+ ignoreFilePatterns;
2528
2765
  constructor(lanceStore, embeddingEngine, options = {}) {
2529
2766
  this.lanceStore = lanceStore;
2530
2767
  this.embeddingEngine = embeddingEngine;
2531
2768
  this.chunker = new ChunkingService({
2532
- chunkSize: options.chunkSize ?? 768,
2533
- chunkOverlap: options.chunkOverlap ?? 100
2769
+ chunkSize: options.chunkSize ?? 1e3,
2770
+ chunkOverlap: options.chunkOverlap ?? 150
2534
2771
  });
2535
2772
  this.codeGraphService = options.codeGraphService;
2773
+ this.manifestService = options.manifestService;
2774
+ this.driftService = new DriftService();
2536
2775
  this.concurrency = options.concurrency ?? 4;
2776
+ const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
2777
+ this.ignoreDirs = parsed.dirs;
2778
+ this.ignoreFilePatterns = parsed.fileMatchers;
2537
2779
  }
2538
2780
  async indexStore(store, onProgress) {
2539
2781
  logger.info(
@@ -2564,8 +2806,205 @@ var IndexService = class {
2564
2806
  return err(error instanceof Error ? error : new Error(String(error)));
2565
2807
  }
2566
2808
  }
2809
+ /**
2810
+ * Incrementally index a store, only processing changed files.
2811
+ * Requires manifestService to be configured.
2812
+ *
2813
+ * @param store - The store to index
2814
+ * @param onProgress - Optional progress callback
2815
+ * @returns Result with incremental index statistics
2816
+ */
2817
+ async indexStoreIncremental(store, onProgress) {
2818
+ if (this.manifestService === void 0) {
2819
+ return err(new Error("ManifestService required for incremental indexing"));
2820
+ }
2821
+ if (store.type !== "file" && store.type !== "repo") {
2822
+ return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
2823
+ }
2824
+ logger.info(
2825
+ {
2826
+ storeId: store.id,
2827
+ storeName: store.name,
2828
+ storeType: store.type
2829
+ },
2830
+ "Starting incremental store indexing"
2831
+ );
2832
+ const startTime = Date.now();
2833
+ try {
2834
+ const manifest = await this.manifestService.load(store.id);
2835
+ const filePaths = await this.scanDirectory(store.path);
2836
+ const currentFiles = await Promise.all(
2837
+ filePaths.map((path4) => this.driftService.getFileState(path4))
2838
+ );
2839
+ const drift = await this.driftService.detectChanges(manifest, currentFiles);
2840
+ logger.debug(
2841
+ {
2842
+ storeId: store.id,
2843
+ added: drift.added.length,
2844
+ modified: drift.modified.length,
2845
+ deleted: drift.deleted.length,
2846
+ unchanged: drift.unchanged.length
2847
+ },
2848
+ "Drift detection complete"
2849
+ );
2850
+ const documentIdsToDelete = [];
2851
+ for (const path4 of [...drift.modified, ...drift.deleted]) {
2852
+ const fileState = manifest.files[path4];
2853
+ if (fileState !== void 0) {
2854
+ documentIdsToDelete.push(...fileState.documentIds);
2855
+ }
2856
+ }
2857
+ if (documentIdsToDelete.length > 0) {
2858
+ await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
2859
+ logger.debug(
2860
+ { storeId: store.id, count: documentIdsToDelete.length },
2861
+ "Deleted old documents"
2862
+ );
2863
+ }
2864
+ const filesToProcess = [...drift.added, ...drift.modified];
2865
+ const totalFiles = filesToProcess.length;
2866
+ onProgress?.({
2867
+ type: "start",
2868
+ current: 0,
2869
+ total: totalFiles,
2870
+ message: `Processing ${String(totalFiles)} changed files`
2871
+ });
2872
+ const documents = [];
2873
+ const newManifestFiles = {};
2874
+ let filesProcessed = 0;
2875
+ for (const path4 of drift.unchanged) {
2876
+ const existingState = manifest.files[path4];
2877
+ if (existingState !== void 0) {
2878
+ newManifestFiles[path4] = existingState;
2879
+ }
2880
+ }
2881
+ for (let i = 0; i < filesToProcess.length; i += this.concurrency) {
2882
+ const batch = filesToProcess.slice(i, i + this.concurrency);
2883
+ const batchResults = await Promise.all(
2884
+ batch.map(async (filePath) => {
2885
+ try {
2886
+ const result = await this.processFile(filePath, store);
2887
+ const documentIds = result.documents.map((d) => d.id);
2888
+ const { state } = await this.driftService.createFileState(filePath, documentIds);
2889
+ return {
2890
+ filePath,
2891
+ documents: result.documents,
2892
+ fileState: state
2893
+ };
2894
+ } catch (error) {
2895
+ logger.warn(
2896
+ { filePath, error: error instanceof Error ? error.message : String(error) },
2897
+ "Failed to process file during incremental indexing, skipping"
2898
+ );
2899
+ return null;
2900
+ }
2901
+ })
2902
+ );
2903
+ for (const result of batchResults) {
2904
+ if (result !== null) {
2905
+ documents.push(...result.documents);
2906
+ newManifestFiles[result.filePath] = result.fileState;
2907
+ }
2908
+ }
2909
+ filesProcessed += batch.length;
2910
+ onProgress?.({
2911
+ type: "progress",
2912
+ current: filesProcessed,
2913
+ total: totalFiles,
2914
+ message: `Processed ${String(filesProcessed)}/${String(totalFiles)} files`
2915
+ });
2916
+ }
2917
+ if (documents.length > 0) {
2918
+ await this.lanceStore.addDocuments(store.id, documents);
2919
+ }
2920
+ if (documentIdsToDelete.length > 0 || documents.length > 0) {
2921
+ await this.lanceStore.createFtsIndex(store.id);
2922
+ }
2923
+ if (this.codeGraphService) {
2924
+ const sourceExtensions = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"];
2925
+ const hasSourceChanges = filesToProcess.some((p) => sourceExtensions.includes(extname(p).toLowerCase())) || drift.deleted.some((p) => sourceExtensions.includes(extname(p).toLowerCase()));
2926
+ if (hasSourceChanges) {
2927
+ const allSourceFiles = [];
2928
+ const allPaths = [...drift.unchanged, ...filesToProcess];
2929
+ for (const filePath of allPaths) {
2930
+ const ext = extname(filePath).toLowerCase();
2931
+ if (sourceExtensions.includes(ext)) {
2932
+ try {
2933
+ const content = await readFile5(filePath, "utf-8");
2934
+ allSourceFiles.push({ path: filePath, content });
2935
+ } catch {
2936
+ }
2937
+ }
2938
+ }
2939
+ if (allSourceFiles.length > 0) {
2940
+ const graph = await this.codeGraphService.buildGraph(allSourceFiles);
2941
+ await this.codeGraphService.saveGraph(store.id, graph);
2942
+ logger.debug(
2943
+ { storeId: store.id, sourceFiles: allSourceFiles.length },
2944
+ "Rebuilt code graph during incremental indexing"
2945
+ );
2946
+ } else {
2947
+ await this.codeGraphService.deleteGraph(store.id);
2948
+ logger.debug(
2949
+ { storeId: store.id },
2950
+ "Deleted stale code graph (no source files remain)"
2951
+ );
2952
+ }
2953
+ }
2954
+ }
2955
+ const updatedManifest = {
2956
+ version: 1,
2957
+ storeId: store.id,
2958
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
2959
+ files: newManifestFiles
2960
+ };
2961
+ await this.manifestService.save(updatedManifest);
2962
+ onProgress?.({
2963
+ type: "complete",
2964
+ current: totalFiles,
2965
+ total: totalFiles,
2966
+ message: "Incremental indexing complete"
2967
+ });
2968
+ const timeMs = Date.now() - startTime;
2969
+ logger.info(
2970
+ {
2971
+ storeId: store.id,
2972
+ storeName: store.name,
2973
+ filesAdded: drift.added.length,
2974
+ filesModified: drift.modified.length,
2975
+ filesDeleted: drift.deleted.length,
2976
+ filesUnchanged: drift.unchanged.length,
2977
+ chunksCreated: documents.length,
2978
+ timeMs
2979
+ },
2980
+ "Incremental indexing complete"
2981
+ );
2982
+ return ok({
2983
+ filesIndexed: filesToProcess.length,
2984
+ chunksCreated: documents.length,
2985
+ timeMs,
2986
+ filesAdded: drift.added.length,
2987
+ filesModified: drift.modified.length,
2988
+ filesDeleted: drift.deleted.length,
2989
+ filesUnchanged: drift.unchanged.length
2990
+ });
2991
+ } catch (error) {
2992
+ logger.error(
2993
+ {
2994
+ storeId: store.id,
2995
+ error: error instanceof Error ? error.message : String(error)
2996
+ },
2997
+ "Incremental indexing failed"
2998
+ );
2999
+ return err(error instanceof Error ? error : new Error(String(error)));
3000
+ }
3001
+ }
2567
3002
  async indexFileStore(store, onProgress) {
2568
3003
  const startTime = Date.now();
3004
+ await this.lanceStore.clearAllDocuments(store.id);
3005
+ if (this.manifestService) {
3006
+ await this.manifestService.delete(store.id);
3007
+ }
2569
3008
  const files = await this.scanDirectory(store.path);
2570
3009
  const documents = [];
2571
3010
  let filesProcessed = 0;
@@ -2588,7 +3027,17 @@ var IndexService = class {
2588
3027
  for (let i = 0; i < files.length; i += this.concurrency) {
2589
3028
  const batch = files.slice(i, i + this.concurrency);
2590
3029
  const batchResults = await Promise.all(
2591
- batch.map((filePath) => this.processFile(filePath, store))
3030
+ batch.map(async (filePath) => {
3031
+ try {
3032
+ return await this.processFile(filePath, store);
3033
+ } catch (error) {
3034
+ logger.warn(
3035
+ { filePath, error: error instanceof Error ? error.message : String(error) },
3036
+ "Failed to process file, skipping"
3037
+ );
3038
+ return { documents: [], sourceFile: void 0 };
3039
+ }
3040
+ })
2592
3041
  );
2593
3042
  for (const result of batchResults) {
2594
3043
  documents.push(...result.documents);
@@ -2611,6 +3060,8 @@ var IndexService = class {
2611
3060
  if (this.codeGraphService && sourceFiles.length > 0) {
2612
3061
  const graph = await this.codeGraphService.buildGraph(sourceFiles);
2613
3062
  await this.codeGraphService.saveGraph(store.id, graph);
3063
+ } else if (this.codeGraphService) {
3064
+ await this.codeGraphService.deleteGraph(store.id);
2614
3065
  }
2615
3066
  onProgress?.({
2616
3067
  type: "complete",
@@ -2623,7 +3074,7 @@ var IndexService = class {
2623
3074
  {
2624
3075
  storeId: store.id,
2625
3076
  storeName: store.name,
2626
- documentsIndexed: filesProcessed,
3077
+ filesIndexed: filesProcessed,
2627
3078
  chunksCreated: documents.length,
2628
3079
  sourceFilesForGraph: sourceFiles.length,
2629
3080
  timeMs
@@ -2631,7 +3082,7 @@ var IndexService = class {
2631
3082
  "Store indexing complete"
2632
3083
  );
2633
3084
  return ok({
2634
- documentsIndexed: filesProcessed,
3085
+ filesIndexed: filesProcessed,
2635
3086
  chunksCreated: documents.length,
2636
3087
  timeMs
2637
3088
  });
@@ -2641,13 +3092,15 @@ var IndexService = class {
2641
3092
  * Extracted for parallel processing.
2642
3093
  */
2643
3094
  async processFile(filePath, store) {
2644
- const content = await readFile4(filePath, "utf-8");
2645
- const fileHash = createHash2("md5").update(content).digest("hex");
3095
+ const content = await readFile5(filePath, "utf-8");
3096
+ const fileHash = createHash3("md5").update(content).digest("hex");
2646
3097
  const chunks = this.chunker.chunk(content, filePath);
3098
+ const relativePath = relative(store.path, filePath);
3099
+ const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
2647
3100
  const ext = extname(filePath).toLowerCase();
2648
3101
  const fileName = basename(filePath).toLowerCase();
2649
3102
  const fileType = this.classifyFileType(ext, fileName, filePath);
2650
- const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
3103
+ const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
2651
3104
  if (chunks.length === 0) {
2652
3105
  return { documents: [], sourceFile };
2653
3106
  }
@@ -2662,7 +3115,7 @@ var IndexService = class {
2662
3115
  `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
2663
3116
  );
2664
3117
  }
2665
- const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
3118
+ const chunkId = chunks.length > 1 ? `${store.id}-${pathHash}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${pathHash}-${fileHash}`;
2666
3119
  documents.push({
2667
3120
  id: createDocumentId(chunkId),
2668
3121
  content: chunk.content,
@@ -2671,7 +3124,7 @@ var IndexService = class {
2671
3124
  type: chunks.length > 1 ? "chunk" : "file",
2672
3125
  storeId: store.id,
2673
3126
  path: filePath,
2674
- indexedAt: /* @__PURE__ */ new Date(),
3127
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
2675
3128
  fileHash,
2676
3129
  chunkIndex: chunk.chunkIndex,
2677
3130
  totalChunks: chunk.totalChunks,
@@ -2691,10 +3144,14 @@ var IndexService = class {
2691
3144
  for (const entry of entries) {
2692
3145
  const fullPath = join7(dir, entry.name);
2693
3146
  if (entry.isDirectory()) {
2694
- if (!["node_modules", ".git", "dist", "build"].includes(entry.name)) {
3147
+ if (!this.ignoreDirs.has(entry.name)) {
2695
3148
  files.push(...await this.scanDirectory(fullPath));
2696
3149
  }
2697
3150
  } else if (entry.isFile()) {
3151
+ const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(entry.name));
3152
+ if (shouldIgnore) {
3153
+ continue;
3154
+ }
2698
3155
  const ext = extname(entry.name).toLowerCase();
2699
3156
  if (TEXT_EXTENSIONS.has(ext)) {
2700
3157
  files.push(fullPath);
@@ -2784,6 +3241,141 @@ function classifyWebContentType(url, title) {
2784
3241
  return "documentation";
2785
3242
  }
2786
3243
 
3244
+ // src/services/manifest.service.ts
3245
+ import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
3246
+ import { join as join8 } from "path";
3247
+
3248
+ // src/types/manifest.ts
3249
+ import { z as z2 } from "zod";
3250
+ var FileStateSchema = z2.object({
3251
+ /** File modification time in milliseconds since epoch */
3252
+ mtime: z2.number(),
3253
+ /** File size in bytes */
3254
+ size: z2.number(),
3255
+ /** MD5 hash of file content */
3256
+ hash: z2.string(),
3257
+ /** Document IDs created from this file (for cleanup) */
3258
+ documentIds: z2.array(z2.string())
3259
+ });
3260
+ var StoreManifestSchema = z2.object({
3261
+ /** Schema version for future migrations */
3262
+ version: z2.literal(1),
3263
+ /** Store ID this manifest belongs to */
3264
+ storeId: z2.string(),
3265
+ /** When the manifest was last updated */
3266
+ indexedAt: z2.string(),
3267
+ /** Map of file paths to their state */
3268
+ files: z2.record(z2.string(), FileStateSchema)
3269
+ });
3270
+ function createEmptyManifest(storeId) {
3271
+ return {
3272
+ version: 1,
3273
+ storeId,
3274
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
3275
+ files: {}
3276
+ };
3277
+ }
3278
+
3279
+ // src/services/manifest.service.ts
3280
+ var ManifestService = class {
3281
+ manifestsDir;
3282
+ constructor(dataDir) {
3283
+ this.manifestsDir = join8(dataDir, "manifests");
3284
+ }
3285
+ /**
3286
+ * Initialize the manifests directory.
3287
+ */
3288
+ async initialize() {
3289
+ await mkdir3(this.manifestsDir, { recursive: true });
3290
+ }
3291
+ /**
3292
+ * Get the file path for a store's manifest.
3293
+ */
3294
+ getManifestPath(storeId) {
3295
+ return join8(this.manifestsDir, `${storeId}.manifest.json`);
3296
+ }
3297
+ /**
3298
+ * Load a store's manifest.
3299
+ * Returns an empty manifest if one doesn't exist.
3300
+ * Throws on parse/validation errors (fail fast).
3301
+ */
3302
+ async load(storeId) {
3303
+ const manifestPath = this.getManifestPath(storeId);
3304
+ const exists = await this.fileExists(manifestPath);
3305
+ if (!exists) {
3306
+ return createEmptyManifest(storeId);
3307
+ }
3308
+ const content = await readFile6(manifestPath, "utf-8");
3309
+ let parsed;
3310
+ try {
3311
+ parsed = JSON.parse(content);
3312
+ } catch (error) {
3313
+ throw new Error(
3314
+ `Failed to parse manifest at ${manifestPath}: ${error instanceof Error ? error.message : String(error)}`
3315
+ );
3316
+ }
3317
+ const result = StoreManifestSchema.safeParse(parsed);
3318
+ if (!result.success) {
3319
+ throw new Error(`Invalid manifest at ${manifestPath}: ${result.error.message}`);
3320
+ }
3321
+ return this.toTypedManifest(result.data, storeId);
3322
+ }
3323
+ /**
3324
+ * Save a store's manifest atomically.
3325
+ */
3326
+ async save(manifest) {
3327
+ const manifestPath = this.getManifestPath(manifest.storeId);
3328
+ const toSave = {
3329
+ ...manifest,
3330
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString()
3331
+ };
3332
+ await atomicWriteFile(manifestPath, JSON.stringify(toSave, null, 2));
3333
+ }
3334
+ /**
3335
+ * Delete a store's manifest.
3336
+ * Called when a store is deleted or during full re-index.
3337
+ */
3338
+ async delete(storeId) {
3339
+ const manifestPath = this.getManifestPath(storeId);
3340
+ const { unlink } = await import("fs/promises");
3341
+ const exists = await this.fileExists(manifestPath);
3342
+ if (exists) {
3343
+ await unlink(manifestPath);
3344
+ }
3345
+ }
3346
+ /**
3347
+ * Check if a file exists.
3348
+ */
3349
+ async fileExists(path4) {
3350
+ try {
3351
+ await access3(path4);
3352
+ return true;
3353
+ } catch {
3354
+ return false;
3355
+ }
3356
+ }
3357
+ /**
3358
+ * Convert a parsed manifest to a typed manifest with branded types.
3359
+ */
3360
+ toTypedManifest(data, storeId) {
3361
+ const files = {};
3362
+ for (const [path4, state] of Object.entries(data.files)) {
3363
+ files[path4] = {
3364
+ mtime: state.mtime,
3365
+ size: state.size,
3366
+ hash: state.hash,
3367
+ documentIds: state.documentIds.map((id) => createDocumentId(id))
3368
+ };
3369
+ }
3370
+ return {
3371
+ version: 1,
3372
+ storeId,
3373
+ indexedAt: data.indexedAt,
3374
+ files
3375
+ };
3376
+ }
3377
+ };
3378
+
2787
3379
  // src/services/code-unit.service.ts
2788
3380
  var CodeUnitService = class {
2789
3381
  extractCodeUnit(code, symbolName, language) {
@@ -2968,6 +3560,8 @@ var INTENT_FILE_BOOSTS = {
2968
3560
  // Stronger penalty - internal code less useful
2969
3561
  test: 0.8,
2970
3562
  config: 0.7,
3563
+ changelog: 0.6,
3564
+ // Changelogs rarely answer "how to" questions
2971
3565
  other: 0.9
2972
3566
  },
2973
3567
  implementation: {
@@ -2980,6 +3574,8 @@ var INTENT_FILE_BOOSTS = {
2980
3574
  // Internal code can be relevant
2981
3575
  test: 1,
2982
3576
  config: 0.95,
3577
+ changelog: 0.8,
3578
+ // Might reference implementation changes
2983
3579
  other: 1
2984
3580
  },
2985
3581
  conceptual: {
@@ -2990,6 +3586,8 @@ var INTENT_FILE_BOOSTS = {
2990
3586
  "source-internal": 0.9,
2991
3587
  test: 0.9,
2992
3588
  config: 0.85,
3589
+ changelog: 0.7,
3590
+ // Sometimes explains concepts behind changes
2993
3591
  other: 0.95
2994
3592
  },
2995
3593
  comparison: {
@@ -3000,6 +3598,8 @@ var INTENT_FILE_BOOSTS = {
3000
3598
  "source-internal": 0.85,
3001
3599
  test: 0.9,
3002
3600
  config: 0.85,
3601
+ changelog: 0.9,
3602
+ // Version comparisons can be useful
3003
3603
  other: 0.95
3004
3604
  },
3005
3605
  debugging: {
@@ -3012,6 +3612,8 @@ var INTENT_FILE_BOOSTS = {
3012
3612
  test: 1.05,
3013
3613
  // Tests can show expected behavior
3014
3614
  config: 0.9,
3615
+ changelog: 1.1,
3616
+ // Often contains bug fixes and known issues
3015
3617
  other: 1
3016
3618
  }
3017
3619
  };
@@ -3094,6 +3696,17 @@ function classifyQueryIntents(query) {
3094
3696
  function getPrimaryIntent(intents) {
3095
3697
  return intents[0]?.intent ?? "how-to";
3096
3698
  }
3699
+ function mapSearchIntentToQueryIntent(intent) {
3700
+ switch (intent) {
3701
+ case "find-pattern":
3702
+ case "find-implementation":
3703
+ case "find-definition":
3704
+ return "implementation";
3705
+ case "find-usage":
3706
+ case "find-documentation":
3707
+ return "how-to";
3708
+ }
3709
+ }
3097
3710
  var RRF_PRESETS = {
3098
3711
  code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
3099
3712
  web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
@@ -3108,12 +3721,27 @@ var SearchService = class {
3108
3721
  codeUnitService;
3109
3722
  codeGraphService;
3110
3723
  graphCache;
3111
- constructor(lanceStore, embeddingEngine, codeGraphService) {
3724
+ searchConfig;
3725
+ unsubscribeCacheInvalidation;
3726
+ constructor(lanceStore, embeddingEngine, codeGraphService, searchConfig) {
3112
3727
  this.lanceStore = lanceStore;
3113
3728
  this.embeddingEngine = embeddingEngine;
3114
3729
  this.codeUnitService = new CodeUnitService();
3115
3730
  this.codeGraphService = codeGraphService;
3116
3731
  this.graphCache = /* @__PURE__ */ new Map();
3732
+ this.searchConfig = searchConfig;
3733
+ if (codeGraphService) {
3734
+ this.unsubscribeCacheInvalidation = codeGraphService.onCacheInvalidation((event) => {
3735
+ this.graphCache.delete(event.storeId);
3736
+ });
3737
+ }
3738
+ }
3739
+ /**
3740
+ * Clean up resources (unsubscribe from events).
3741
+ * Call this when destroying the service.
3742
+ */
3743
+ cleanup() {
3744
+ this.unsubscribeCacheInvalidation?.();
3117
3745
  }
3118
3746
  /**
3119
3747
  * Load code graph for a store, with caching.
@@ -3141,12 +3769,12 @@ var SearchService = class {
3141
3769
  }
3142
3770
  async search(query) {
3143
3771
  const startTime = Date.now();
3144
- const mode = query.mode ?? "hybrid";
3145
- const limit = query.limit ?? 10;
3772
+ const mode = query.mode ?? this.searchConfig?.defaultMode ?? "hybrid";
3773
+ const limit = query.limit ?? this.searchConfig?.defaultLimit ?? 10;
3146
3774
  const stores = query.stores ?? [];
3147
3775
  const detail = query.detail ?? "minimal";
3148
3776
  const intents = classifyQueryIntents(query.query);
3149
- const primaryIntent = getPrimaryIntent(intents);
3777
+ const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
3150
3778
  logger2.debug(
3151
3779
  {
3152
3780
  query: query.query,
@@ -3155,7 +3783,8 @@ var SearchService = class {
3155
3783
  stores,
3156
3784
  detail,
3157
3785
  intent: primaryIntent,
3158
- intents,
3786
+ userIntent: query.intent,
3787
+ autoClassifiedIntents: intents,
3159
3788
  minRelevance: query.minRelevance
3160
3789
  },
3161
3790
  "Search query received"
@@ -3166,7 +3795,7 @@ var SearchService = class {
3166
3795
  if (mode === "vector") {
3167
3796
  const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
3168
3797
  maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
3169
- allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
3798
+ allResults = this.normalizeAndFilterScores(rawResults, query.threshold).slice(0, fetchLimit);
3170
3799
  } else if (mode === "fts") {
3171
3800
  allResults = await this.ftsSearch(query.query, stores, fetchLimit);
3172
3801
  } else {
@@ -3179,28 +3808,35 @@ var SearchService = class {
3179
3808
  allResults = hybridResult.results;
3180
3809
  maxRawScore = hybridResult.maxRawScore;
3181
3810
  }
3182
- if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
3183
- const timeMs2 = Date.now() - startTime;
3184
- logger2.info(
3185
- {
3811
+ if (query.minRelevance !== void 0) {
3812
+ if (mode === "fts") {
3813
+ logger2.warn(
3814
+ { query: query.query, minRelevance: query.minRelevance },
3815
+ "minRelevance filter ignored in FTS mode (no vector scores available)"
3816
+ );
3817
+ } else if (maxRawScore < query.minRelevance) {
3818
+ const timeMs2 = Date.now() - startTime;
3819
+ logger2.info(
3820
+ {
3821
+ query: query.query,
3822
+ mode,
3823
+ maxRawScore,
3824
+ minRelevance: query.minRelevance,
3825
+ timeMs: timeMs2
3826
+ },
3827
+ "Search filtered by minRelevance - no sufficiently relevant results"
3828
+ );
3829
+ return {
3186
3830
  query: query.query,
3187
3831
  mode,
3188
- maxRawScore,
3189
- minRelevance: query.minRelevance,
3190
- timeMs: timeMs2
3191
- },
3192
- "Search filtered by minRelevance - no sufficiently relevant results"
3193
- );
3194
- return {
3195
- query: query.query,
3196
- mode,
3197
- stores,
3198
- results: [],
3199
- totalResults: 0,
3200
- timeMs: timeMs2,
3201
- confidence: this.calculateConfidence(maxRawScore),
3202
- maxRawScore
3203
- };
3832
+ stores,
3833
+ results: [],
3834
+ totalResults: 0,
3835
+ timeMs: timeMs2,
3836
+ confidence: this.calculateConfidence(maxRawScore),
3837
+ maxRawScore
3838
+ };
3839
+ }
3204
3840
  }
3205
3841
  const dedupedResults = this.deduplicateBySource(allResults, query.query);
3206
3842
  const resultsToEnhance = dedupedResults.slice(0, limit);
@@ -3249,7 +3885,9 @@ var SearchService = class {
3249
3885
  const bySource = /* @__PURE__ */ new Map();
3250
3886
  const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
3251
3887
  for (const result of results) {
3252
- const sourceKey = result.metadata.path ?? result.metadata.url ?? result.id;
3888
+ const storeId = result.metadata.storeId;
3889
+ const source = result.metadata.path ?? result.metadata.url ?? result.id;
3890
+ const sourceKey = `${storeId}:${source}`;
3253
3891
  const existing = bySource.get(sourceKey);
3254
3892
  if (!existing) {
3255
3893
  bySource.set(sourceKey, result);
@@ -3320,11 +3958,6 @@ var SearchService = class {
3320
3958
  }
3321
3959
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
3322
3960
  }
3323
- async vectorSearch(query, stores, limit, threshold) {
3324
- const results = await this.vectorSearchRaw(query, stores, limit);
3325
- const normalized = this.normalizeAndFilterScores(results, threshold);
3326
- return normalized.slice(0, limit);
3327
- }
3328
3961
  async ftsSearch(query, stores, limit) {
3329
3962
  const results = [];
3330
3963
  for (const storeId of stores) {
@@ -3484,6 +4117,9 @@ var SearchService = class {
3484
4117
  case "config":
3485
4118
  baseBoost = 0.5;
3486
4119
  break;
4120
+ case "changelog":
4121
+ baseBoost = 0.7;
4122
+ break;
3487
4123
  default:
3488
4124
  baseBoost = 1;
3489
4125
  }
@@ -3889,42 +4525,53 @@ var SearchService = class {
3889
4525
  };
3890
4526
 
3891
4527
  // src/services/store-definition.service.ts
3892
- import { readFile as readFile5, writeFile as writeFile4, mkdir as mkdir3, access as access3 } from "fs/promises";
3893
- import { dirname as dirname4, resolve as resolve2, isAbsolute, join as join8 } from "path";
4528
+ import { readFile as readFile7, access as access4 } from "fs/promises";
4529
+ import { resolve as resolve2, isAbsolute as isAbsolute2, join as join9 } from "path";
3894
4530
 
3895
4531
  // src/types/store-definition.ts
3896
- import { z as z2 } from "zod";
3897
- var BaseStoreDefinitionSchema = z2.object({
3898
- name: z2.string().min(1, "Store name is required"),
3899
- description: z2.string().optional(),
3900
- tags: z2.array(z2.string()).optional()
4532
+ import { z as z3 } from "zod";
4533
+ var BaseStoreDefinitionSchema = z3.object({
4534
+ name: z3.string().min(1, "Store name is required"),
4535
+ description: z3.string().optional(),
4536
+ tags: z3.array(z3.string()).optional()
3901
4537
  });
3902
4538
  var FileStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3903
- type: z2.literal("file"),
3904
- path: z2.string().min(1, "Path is required for file stores")
4539
+ type: z3.literal("file"),
4540
+ path: z3.string().min(1, "Path is required for file stores")
3905
4541
  });
4542
+ var GitUrlSchema = z3.string().refine(
4543
+ (val) => {
4544
+ try {
4545
+ new URL(val);
4546
+ return true;
4547
+ } catch {
4548
+ return /^git@[\w.-]+:[\w./-]+$/.test(val);
4549
+ }
4550
+ },
4551
+ { message: "Must be a valid URL or SSH URL (git@host:path)" }
4552
+ );
3906
4553
  var RepoStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3907
- type: z2.literal("repo"),
3908
- url: z2.url("Valid URL is required for repo stores"),
3909
- branch: z2.string().optional(),
3910
- depth: z2.number().int().positive("Depth must be a positive integer").optional()
4554
+ type: z3.literal("repo"),
4555
+ url: GitUrlSchema,
4556
+ branch: z3.string().optional(),
4557
+ depth: z3.number().int().positive("Depth must be a positive integer").optional()
3911
4558
  });
3912
4559
  var WebStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3913
- type: z2.literal("web"),
3914
- url: z2.url("Valid URL is required for web stores"),
3915
- depth: z2.number().int().min(0, "Depth must be non-negative").default(1),
3916
- maxPages: z2.number().int().positive("maxPages must be a positive integer").optional(),
3917
- crawlInstructions: z2.string().optional(),
3918
- extractInstructions: z2.string().optional()
4560
+ type: z3.literal("web"),
4561
+ url: z3.url("Valid URL is required for web stores"),
4562
+ depth: z3.number().int().min(0, "Depth must be non-negative").default(1),
4563
+ maxPages: z3.number().int().positive("maxPages must be a positive integer").optional(),
4564
+ crawlInstructions: z3.string().optional(),
4565
+ extractInstructions: z3.string().optional()
3919
4566
  });
3920
- var StoreDefinitionSchema = z2.discriminatedUnion("type", [
4567
+ var StoreDefinitionSchema = z3.discriminatedUnion("type", [
3921
4568
  FileStoreDefinitionSchema,
3922
4569
  RepoStoreDefinitionSchema,
3923
4570
  WebStoreDefinitionSchema
3924
4571
  ]);
3925
- var StoreDefinitionsConfigSchema = z2.object({
3926
- version: z2.literal(1),
3927
- stores: z2.array(StoreDefinitionSchema)
4572
+ var StoreDefinitionsConfigSchema = z3.object({
4573
+ version: z3.literal(1),
4574
+ stores: z3.array(StoreDefinitionSchema)
3928
4575
  });
3929
4576
  function isFileStoreDefinition(def) {
3930
4577
  return def.type === "file";
@@ -3943,7 +4590,7 @@ var DEFAULT_STORE_DEFINITIONS_CONFIG = {
3943
4590
  // src/services/store-definition.service.ts
3944
4591
  async function fileExists3(path4) {
3945
4592
  try {
3946
- await access3(path4);
4593
+ await access4(path4);
3947
4594
  return true;
3948
4595
  } catch {
3949
4596
  return false;
@@ -3955,7 +4602,7 @@ var StoreDefinitionService = class {
3955
4602
  config = null;
3956
4603
  constructor(projectRoot) {
3957
4604
  this.projectRoot = projectRoot ?? ProjectRootService.resolve();
3958
- this.configPath = join8(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
4605
+ this.configPath = join9(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
3959
4606
  }
3960
4607
  /**
3961
4608
  * Load store definitions from config file.
@@ -3974,7 +4621,7 @@ var StoreDefinitionService = class {
3974
4621
  };
3975
4622
  return this.config;
3976
4623
  }
3977
- const content = await readFile5(this.configPath, "utf-8");
4624
+ const content = await readFile7(this.configPath, "utf-8");
3978
4625
  let parsed;
3979
4626
  try {
3980
4627
  parsed = JSON.parse(content);
@@ -3994,8 +4641,7 @@ var StoreDefinitionService = class {
3994
4641
  * Save store definitions to config file.
3995
4642
  */
3996
4643
  async save(config) {
3997
- await mkdir3(dirname4(this.configPath), { recursive: true });
3998
- await writeFile4(this.configPath, JSON.stringify(config, null, 2));
4644
+ await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
3999
4645
  this.config = config;
4000
4646
  }
4001
4647
  /**
@@ -4067,7 +4713,7 @@ var StoreDefinitionService = class {
4067
4713
  * Resolve a file store path relative to project root.
4068
4714
  */
4069
4715
  resolvePath(path4) {
4070
- if (isAbsolute(path4)) {
4716
+ if (isAbsolute2(path4)) {
4071
4717
  return path4;
4072
4718
  }
4073
4719
  return resolve2(this.projectRoot, path4);
@@ -4094,8 +4740,8 @@ var StoreDefinitionService = class {
4094
4740
 
4095
4741
  // src/services/store.service.ts
4096
4742
  import { randomUUID as randomUUID2 } from "crypto";
4097
- import { readFile as readFile6, writeFile as writeFile5, mkdir as mkdir5, stat, access as access4 } from "fs/promises";
4098
- import { join as join9, resolve as resolve3 } from "path";
4743
+ import { readFile as readFile8, mkdir as mkdir5, stat as stat2, access as access5 } from "fs/promises";
4744
+ import { join as join10, resolve as resolve3 } from "path";
4099
4745
 
4100
4746
  // src/plugin/git-clone.ts
4101
4747
  import { spawn } from "child_process";
@@ -4126,6 +4772,9 @@ async function cloneRepository(options) {
4126
4772
  });
4127
4773
  });
4128
4774
  }
4775
+ function isGitUrl(source) {
4776
+ return source.startsWith("http://") || source.startsWith("https://") || source.startsWith("git@");
4777
+ }
4129
4778
  function extractRepoName(url) {
4130
4779
  const match = /\/([^/]+?)(\.git)?$/.exec(url);
4131
4780
  const name = match?.[1];
@@ -4138,7 +4787,7 @@ function extractRepoName(url) {
4138
4787
  // src/services/store.service.ts
4139
4788
  async function fileExists4(path4) {
4140
4789
  try {
4141
- await access4(path4);
4790
+ await access5(path4);
4142
4791
  return true;
4143
4792
  } catch {
4144
4793
  return false;
@@ -4148,11 +4797,13 @@ var StoreService = class {
4148
4797
  dataDir;
4149
4798
  definitionService;
4150
4799
  gitignoreService;
4800
+ projectRoot;
4151
4801
  registry = { stores: [] };
4152
4802
  constructor(dataDir, options) {
4153
4803
  this.dataDir = dataDir;
4154
4804
  this.definitionService = options?.definitionService ?? void 0;
4155
4805
  this.gitignoreService = options?.gitignoreService ?? void 0;
4806
+ this.projectRoot = options?.projectRoot ?? void 0;
4156
4807
  }
4157
4808
  async initialize() {
4158
4809
  await mkdir5(this.dataDir, { recursive: true });
@@ -4160,6 +4811,7 @@ var StoreService = class {
4160
4811
  }
4161
4812
  /**
4162
4813
  * Convert a Store and CreateStoreInput to a StoreDefinition for persistence.
4814
+ * Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
4163
4815
  */
4164
4816
  createDefinitionFromStore(store, input) {
4165
4817
  const tags = store.tags !== void 0 ? [...store.tags] : void 0;
@@ -4181,10 +4833,13 @@ var StoreService = class {
4181
4833
  }
4182
4834
  case "repo": {
4183
4835
  const repoStore = store;
4836
+ if (repoStore.url === void 0) {
4837
+ return void 0;
4838
+ }
4184
4839
  const repoDef = {
4185
4840
  ...base,
4186
4841
  type: "repo",
4187
- url: repoStore.url ?? "",
4842
+ url: repoStore.url,
4188
4843
  branch: repoStore.branch,
4189
4844
  depth: input.depth
4190
4845
  };
@@ -4196,7 +4851,58 @@ var StoreService = class {
4196
4851
  ...base,
4197
4852
  type: "web",
4198
4853
  url: webStore.url,
4199
- depth: webStore.depth
4854
+ depth: webStore.depth,
4855
+ maxPages: input.maxPages,
4856
+ crawlInstructions: input.crawlInstructions,
4857
+ extractInstructions: input.extractInstructions
4858
+ };
4859
+ return webDef;
4860
+ }
4861
+ }
4862
+ }
4863
+ /**
4864
+ * Create a StoreDefinition from an existing store (without original input).
4865
+ * Used when updating/renaming stores where we don't have the original input.
4866
+ * Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
4867
+ */
4868
+ createDefinitionFromExistingStore(store) {
4869
+ const tags = store.tags !== void 0 ? [...store.tags] : void 0;
4870
+ const base = {
4871
+ name: store.name,
4872
+ description: store.description,
4873
+ tags
4874
+ };
4875
+ switch (store.type) {
4876
+ case "file": {
4877
+ const fileDef = {
4878
+ ...base,
4879
+ type: "file",
4880
+ path: store.path
4881
+ };
4882
+ return fileDef;
4883
+ }
4884
+ case "repo": {
4885
+ if (store.url === void 0) {
4886
+ return void 0;
4887
+ }
4888
+ const repoDef = {
4889
+ ...base,
4890
+ type: "repo",
4891
+ url: store.url,
4892
+ branch: store.branch,
4893
+ depth: store.depth
4894
+ };
4895
+ return repoDef;
4896
+ }
4897
+ case "web": {
4898
+ const webDef = {
4899
+ ...base,
4900
+ type: "web",
4901
+ url: store.url,
4902
+ depth: store.depth,
4903
+ maxPages: store.maxPages,
4904
+ crawlInstructions: store.crawlInstructions,
4905
+ extractInstructions: store.extractInstructions
4200
4906
  };
4201
4907
  return webDef;
4202
4908
  }
@@ -4218,9 +4924,9 @@ var StoreService = class {
4218
4924
  if (input.path === void 0) {
4219
4925
  return err(new Error("Path is required for file stores"));
4220
4926
  }
4221
- const normalizedPath = resolve3(input.path);
4927
+ const normalizedPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, input.path) : resolve3(input.path);
4222
4928
  try {
4223
- const stats = await stat(normalizedPath);
4929
+ const stats = await stat2(normalizedPath);
4224
4930
  if (!stats.isDirectory()) {
4225
4931
  return err(new Error(`Path is not a directory: ${normalizedPath}`));
4226
4932
  }
@@ -4243,7 +4949,7 @@ var StoreService = class {
4243
4949
  case "repo": {
4244
4950
  let repoPath = input.path;
4245
4951
  if (input.url !== void 0) {
4246
- const cloneDir = join9(this.dataDir, "repos", id);
4952
+ const cloneDir = join10(this.dataDir, "repos", id);
4247
4953
  const result = await cloneRepository({
4248
4954
  url: input.url,
4249
4955
  targetDir: cloneDir,
@@ -4258,7 +4964,17 @@ var StoreService = class {
4258
4964
  if (repoPath === void 0) {
4259
4965
  return err(new Error("Path or URL required for repo stores"));
4260
4966
  }
4261
- const normalizedRepoPath = resolve3(repoPath);
4967
+ const normalizedRepoPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, repoPath) : resolve3(repoPath);
4968
+ if (input.url === void 0) {
4969
+ try {
4970
+ const stats = await stat2(normalizedRepoPath);
4971
+ if (!stats.isDirectory()) {
4972
+ return err(new Error(`Path is not a directory: ${normalizedRepoPath}`));
4973
+ }
4974
+ } catch {
4975
+ return err(new Error(`Repository path does not exist: ${normalizedRepoPath}`));
4976
+ }
4977
+ }
4262
4978
  store = {
4263
4979
  type: "repo",
4264
4980
  id,
@@ -4266,6 +4982,7 @@ var StoreService = class {
4266
4982
  path: normalizedRepoPath,
4267
4983
  url: input.url,
4268
4984
  branch: input.branch,
4985
+ depth: input.depth ?? 1,
4269
4986
  description: input.description,
4270
4987
  tags: input.tags,
4271
4988
  status: "ready",
@@ -4284,6 +5001,9 @@ var StoreService = class {
4284
5001
  name: input.name,
4285
5002
  url: input.url,
4286
5003
  depth: input.depth ?? 1,
5004
+ maxPages: input.maxPages,
5005
+ crawlInstructions: input.crawlInstructions,
5006
+ extractInstructions: input.extractInstructions,
4287
5007
  description: input.description,
4288
5008
  tags: input.tags,
4289
5009
  status: "ready",
@@ -4303,7 +5023,9 @@ var StoreService = class {
4303
5023
  }
4304
5024
  if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
4305
5025
  const definition = this.createDefinitionFromStore(store, input);
4306
- await this.definitionService.addDefinition(definition);
5026
+ if (definition !== void 0) {
5027
+ await this.definitionService.addDefinition(definition);
5028
+ }
4307
5029
  }
4308
5030
  return ok(store);
4309
5031
  }
@@ -4333,6 +5055,16 @@ var StoreService = class {
4333
5055
  if (store === void 0) {
4334
5056
  return err(new Error(`Store not found: ${id}`));
4335
5057
  }
5058
+ if (updates.name?.trim() === "") {
5059
+ return err(new Error("Store name cannot be empty"));
5060
+ }
5061
+ const isRenaming = updates.name !== void 0 && updates.name !== store.name;
5062
+ if (isRenaming) {
5063
+ const existing = this.registry.stores.find((s) => s.name === updates.name && s.id !== id);
5064
+ if (existing !== void 0) {
5065
+ return err(new Error(`Store with name '${updates.name}' already exists`));
5066
+ }
5067
+ }
4336
5068
  const updated = {
4337
5069
  ...store,
4338
5070
  ...updates,
@@ -4341,14 +5073,24 @@ var StoreService = class {
4341
5073
  this.registry.stores[index] = updated;
4342
5074
  await this.saveRegistry();
4343
5075
  if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
4344
- const defUpdates = {};
4345
- if (updates.description !== void 0) {
4346
- defUpdates.description = updates.description;
4347
- }
4348
- if (updates.tags !== void 0) {
4349
- defUpdates.tags = [...updates.tags];
5076
+ if (isRenaming) {
5077
+ await this.definitionService.removeDefinition(store.name);
5078
+ const newDefinition = this.createDefinitionFromExistingStore(updated);
5079
+ if (newDefinition !== void 0) {
5080
+ await this.definitionService.addDefinition(newDefinition);
5081
+ }
5082
+ } else {
5083
+ const defUpdates = {};
5084
+ if (updates.description !== void 0) {
5085
+ defUpdates.description = updates.description;
5086
+ }
5087
+ if (updates.tags !== void 0) {
5088
+ defUpdates.tags = [...updates.tags];
5089
+ }
5090
+ if (Object.keys(defUpdates).length > 0) {
5091
+ await this.definitionService.updateDefinition(store.name, defUpdates);
5092
+ }
4350
5093
  }
4351
- await this.definitionService.updateDefinition(store.name, defUpdates);
4352
5094
  }
4353
5095
  return ok(updated);
4354
5096
  }
@@ -4370,14 +5112,14 @@ var StoreService = class {
4370
5112
  return ok(void 0);
4371
5113
  }
4372
5114
  async loadRegistry() {
4373
- const registryPath = join9(this.dataDir, "stores.json");
5115
+ const registryPath = join10(this.dataDir, "stores.json");
4374
5116
  const exists = await fileExists4(registryPath);
4375
5117
  if (!exists) {
4376
5118
  this.registry = { stores: [] };
4377
5119
  await this.saveRegistry();
4378
5120
  return;
4379
5121
  }
4380
- const content = await readFile6(registryPath, "utf-8");
5122
+ const content = await readFile8(registryPath, "utf-8");
4381
5123
  try {
4382
5124
  const data = JSON.parse(content);
4383
5125
  this.registry = {
@@ -4395,8 +5137,8 @@ var StoreService = class {
4395
5137
  }
4396
5138
  }
4397
5139
  async saveRegistry() {
4398
- const registryPath = join9(this.dataDir, "stores.json");
4399
- await writeFile5(registryPath, JSON.stringify(this.registry, null, 2));
5140
+ const registryPath = join10(this.dataDir, "stores.json");
5141
+ await atomicWriteFile(registryPath, JSON.stringify(this.registry, null, 2));
4400
5142
  }
4401
5143
  };
4402
5144
 
@@ -4410,33 +5152,33 @@ import { fileURLToPath } from "url";
4410
5152
  import { ZodError } from "zod";
4411
5153
 
4412
5154
  // src/crawl/schemas.ts
4413
- import { z as z3 } from "zod";
4414
- var CrawledLinkSchema = z3.object({
4415
- href: z3.string(),
4416
- text: z3.string(),
4417
- title: z3.string().optional(),
4418
- base_domain: z3.string().optional(),
4419
- head_data: z3.unknown().optional(),
4420
- head_extraction_status: z3.unknown().optional(),
4421
- head_extraction_error: z3.unknown().optional(),
4422
- intrinsic_score: z3.number().optional(),
4423
- contextual_score: z3.unknown().optional(),
4424
- total_score: z3.unknown().optional()
5155
+ import { z as z4 } from "zod";
5156
+ var CrawledLinkSchema = z4.object({
5157
+ href: z4.string(),
5158
+ text: z4.string(),
5159
+ title: z4.string().optional(),
5160
+ base_domain: z4.string().optional(),
5161
+ head_data: z4.unknown().optional(),
5162
+ head_extraction_status: z4.unknown().optional(),
5163
+ head_extraction_error: z4.unknown().optional(),
5164
+ intrinsic_score: z4.number().optional(),
5165
+ contextual_score: z4.unknown().optional(),
5166
+ total_score: z4.unknown().optional()
4425
5167
  });
4426
- var CrawlPageSchema = z3.object({
4427
- url: z3.string(),
4428
- title: z3.string(),
4429
- content: z3.string(),
4430
- links: z3.array(z3.string()),
4431
- crawledAt: z3.string()
5168
+ var CrawlPageSchema = z4.object({
5169
+ url: z4.string(),
5170
+ title: z4.string(),
5171
+ content: z4.string(),
5172
+ links: z4.array(z4.string()),
5173
+ crawledAt: z4.string()
4432
5174
  });
4433
- var CrawlResultSchema = z3.object({
4434
- pages: z3.array(CrawlPageSchema)
5175
+ var CrawlResultSchema = z4.object({
5176
+ pages: z4.array(CrawlPageSchema)
4435
5177
  });
4436
- var HeadlessResultSchema = z3.object({
4437
- html: z3.string(),
4438
- markdown: z3.string(),
4439
- links: z3.array(z3.union([CrawledLinkSchema, z3.string()]))
5178
+ var HeadlessResultSchema = z4.object({
5179
+ html: z4.string(),
5180
+ markdown: z4.string(),
5181
+ links: z4.array(z4.union([CrawledLinkSchema, z4.string()]))
4440
5182
  });
4441
5183
  function validateHeadlessResult(data) {
4442
5184
  return HeadlessResultSchema.parse(data);
@@ -4444,33 +5186,33 @@ function validateHeadlessResult(data) {
4444
5186
  function validateCrawlResult(data) {
4445
5187
  return CrawlResultSchema.parse(data);
4446
5188
  }
4447
- var MethodInfoSchema = z3.object({
4448
- name: z3.string(),
4449
- async: z3.boolean(),
4450
- signature: z3.string(),
4451
- startLine: z3.number(),
4452
- endLine: z3.number(),
4453
- calls: z3.array(z3.string())
5189
+ var MethodInfoSchema = z4.object({
5190
+ name: z4.string(),
5191
+ async: z4.boolean(),
5192
+ signature: z4.string(),
5193
+ startLine: z4.number(),
5194
+ endLine: z4.number(),
5195
+ calls: z4.array(z4.string())
4454
5196
  });
4455
- var CodeNodeSchema = z3.object({
4456
- type: z3.enum(["function", "class"]),
4457
- name: z3.string(),
4458
- exported: z3.boolean(),
4459
- startLine: z3.number(),
4460
- endLine: z3.number(),
4461
- async: z3.boolean().optional(),
4462
- signature: z3.string().optional(),
4463
- calls: z3.array(z3.string()).optional(),
4464
- methods: z3.array(MethodInfoSchema).optional()
5197
+ var CodeNodeSchema = z4.object({
5198
+ type: z4.enum(["function", "class"]),
5199
+ name: z4.string(),
5200
+ exported: z4.boolean(),
5201
+ startLine: z4.number(),
5202
+ endLine: z4.number(),
5203
+ async: z4.boolean().optional(),
5204
+ signature: z4.string().optional(),
5205
+ calls: z4.array(z4.string()).optional(),
5206
+ methods: z4.array(MethodInfoSchema).optional()
4465
5207
  });
4466
- var ImportInfoSchema = z3.object({
4467
- source: z3.string(),
4468
- imported: z3.string(),
4469
- alias: z3.string().optional().nullable()
5208
+ var ImportInfoSchema = z4.object({
5209
+ source: z4.string(),
5210
+ imported: z4.string(),
5211
+ alias: z4.string().optional().nullable()
4470
5212
  });
4471
- var ParsePythonResultSchema = z3.object({
4472
- nodes: z3.array(CodeNodeSchema),
4473
- imports: z3.array(ImportInfoSchema)
5213
+ var ParsePythonResultSchema = z4.object({
5214
+ nodes: z4.array(CodeNodeSchema),
5215
+ imports: z4.array(ImportInfoSchema)
4474
5216
  });
4475
5217
  function validateParsePythonResult(data) {
4476
5218
  return ParsePythonResultSchema.parse(data);
@@ -4478,6 +5220,15 @@ function validateParsePythonResult(data) {
4478
5220
 
4479
5221
  // src/crawl/bridge.ts
4480
5222
  var logger3 = createLogger("python-bridge");
5223
+ function getPythonExecutable() {
5224
+ return process.platform === "win32" ? "python" : "python3";
5225
+ }
5226
+ function getVenvPythonPath(pluginRoot) {
5227
+ if (process.platform === "win32") {
5228
+ return path3.join(pluginRoot, ".venv", "Scripts", "python.exe");
5229
+ }
5230
+ return path3.join(pluginRoot, ".venv", "bin", "python3");
5231
+ }
4481
5232
  var PythonBridge = class {
4482
5233
  process = null;
4483
5234
  pending = /* @__PURE__ */ new Map();
@@ -4487,20 +5238,21 @@ var PythonBridge = class {
4487
5238
  start() {
4488
5239
  if (this.process) return Promise.resolve();
4489
5240
  const currentFilePath = fileURLToPath(import.meta.url);
4490
- const isProduction = currentFilePath.includes("/dist/");
5241
+ const distPattern = `${path3.sep}dist${path3.sep}`;
5242
+ const isProduction = currentFilePath.includes(distPattern);
4491
5243
  let pythonWorkerPath;
4492
5244
  let pythonPath;
4493
5245
  if (isProduction) {
4494
- const distIndex = currentFilePath.indexOf("/dist/");
5246
+ const distIndex = currentFilePath.indexOf(distPattern);
4495
5247
  const pluginRoot = currentFilePath.substring(0, distIndex);
4496
5248
  pythonWorkerPath = path3.join(pluginRoot, "python", "crawl_worker.py");
4497
- const venvPython = path3.join(pluginRoot, ".venv", "bin", "python3");
4498
- pythonPath = existsSync4(venvPython) ? venvPython : "python3";
5249
+ const venvPython = getVenvPythonPath(pluginRoot);
5250
+ pythonPath = existsSync4(venvPython) ? venvPython : getPythonExecutable();
4499
5251
  } else {
4500
5252
  const srcDir = path3.dirname(path3.dirname(currentFilePath));
4501
5253
  const projectRoot = path3.dirname(srcDir);
4502
5254
  pythonWorkerPath = path3.join(projectRoot, "python", "crawl_worker.py");
4503
- pythonPath = "python3";
5255
+ pythonPath = getPythonExecutable();
4504
5256
  }
4505
5257
  logger3.debug(
4506
5258
  { pythonWorkerPath, pythonPath, currentFilePath, isProduction },
@@ -4737,16 +5489,18 @@ var PythonBridge = class {
4737
5489
 
4738
5490
  // src/db/embeddings.ts
4739
5491
  import { homedir as homedir2 } from "os";
4740
- import { join as join10 } from "path";
5492
+ import { join as join11 } from "path";
4741
5493
  import { pipeline, env } from "@huggingface/transformers";
4742
- env.cacheDir = join10(homedir2(), ".cache", "huggingface-transformers");
5494
+ env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
4743
5495
  var EmbeddingEngine = class {
4744
5496
  extractor = null;
5497
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed()
5498
+ _dimensions = null;
4745
5499
  modelName;
4746
- dimensions;
4747
- constructor(modelName = "Xenova/all-MiniLM-L6-v2", dimensions = 384) {
5500
+ batchSize;
5501
+ constructor(modelName = "Xenova/all-MiniLM-L6-v2", batchSize = 32) {
4748
5502
  this.modelName = modelName;
4749
- this.dimensions = dimensions;
5503
+ this.batchSize = batchSize;
4750
5504
  }
4751
5505
  async initialize() {
4752
5506
  if (this.extractor !== null) return;
@@ -4766,23 +5520,43 @@ var EmbeddingEngine = class {
4766
5520
  normalize: true
4767
5521
  });
4768
5522
  const result = Array.from(output.data);
5523
+ this._dimensions ??= result.length;
4769
5524
  return result.map((v) => Number(v));
4770
5525
  }
4771
5526
  async embedBatch(texts) {
4772
- const BATCH_SIZE = 32;
4773
5527
  const results = [];
4774
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
4775
- const batch = texts.slice(i, i + BATCH_SIZE);
5528
+ for (let i = 0; i < texts.length; i += this.batchSize) {
5529
+ const batch = texts.slice(i, i + this.batchSize);
4776
5530
  const batchResults = await Promise.all(batch.map((text) => this.embed(text)));
4777
5531
  results.push(...batchResults);
4778
- if (i + BATCH_SIZE < texts.length) {
5532
+ if (i + this.batchSize < texts.length) {
4779
5533
  await new Promise((resolve4) => setTimeout(resolve4, 100));
4780
5534
  }
4781
5535
  }
4782
5536
  return results;
4783
5537
  }
5538
+ /**
5539
+ * Get cached embedding dimensions. Throws if embed() hasn't been called yet.
5540
+ * Use ensureDimensions() if you need to guarantee dimensions are available.
5541
+ */
4784
5542
  getDimensions() {
4785
- return this.dimensions;
5543
+ if (this._dimensions === null) {
5544
+ throw new Error("Cannot get dimensions before first embed() call");
5545
+ }
5546
+ return this._dimensions;
5547
+ }
5548
+ /**
5549
+ * Ensure dimensions are available, initializing the model if needed.
5550
+ * Returns the embedding dimensions for the current model.
5551
+ */
5552
+ async ensureDimensions() {
5553
+ if (this._dimensions === null) {
5554
+ await this.embed("");
5555
+ }
5556
+ if (this._dimensions === null) {
5557
+ throw new Error("Failed to determine embedding dimensions");
5558
+ }
5559
+ return this._dimensions;
4786
5560
  }
4787
5561
  /**
4788
5562
  * Dispose the embedding pipeline to free resources.
@@ -4800,17 +5574,18 @@ var EmbeddingEngine = class {
4800
5574
  import * as lancedb from "@lancedb/lancedb";
4801
5575
 
4802
5576
  // src/types/document.ts
4803
- import { z as z4 } from "zod";
4804
- var DocumentTypeSchema = z4.enum(["file", "chunk", "web"]);
4805
- var DocumentMetadataSchema = z4.object({
4806
- path: z4.string().optional(),
4807
- url: z4.string().optional(),
5577
+ import { z as z5 } from "zod";
5578
+ var DocumentTypeSchema = z5.enum(["file", "chunk", "web"]);
5579
+ var DocumentMetadataSchema = z5.object({
5580
+ path: z5.string().optional(),
5581
+ url: z5.string().optional(),
4808
5582
  type: DocumentTypeSchema,
4809
- storeId: z4.string(),
4810
- indexedAt: z4.union([z4.string(), z4.date()]),
4811
- fileHash: z4.string().optional(),
4812
- chunkIndex: z4.number().optional(),
4813
- totalChunks: z4.number().optional()
5583
+ storeId: z5.string(),
5584
+ indexedAt: z5.string(),
5585
+ // ISO 8601 string (what JSON serialization produces)
5586
+ fileHash: z5.string().optional(),
5587
+ chunkIndex: z5.number().optional(),
5588
+ totalChunks: z5.number().optional()
4814
5589
  }).loose();
4815
5590
 
4816
5591
  // src/db/lance.ts
@@ -4818,10 +5593,23 @@ var LanceStore = class {
4818
5593
  connection = null;
4819
5594
  tables = /* @__PURE__ */ new Map();
4820
5595
  dataDir;
5596
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- set via setDimensions()
5597
+ _dimensions = null;
4821
5598
  constructor(dataDir) {
4822
5599
  this.dataDir = dataDir;
4823
5600
  }
5601
+ /**
5602
+ * Set the embedding dimensions. Must be called before initialize().
5603
+ * This allows dimensions to be derived from the embedding model at runtime.
5604
+ * Idempotent: subsequent calls are ignored if dimensions are already set.
5605
+ */
5606
+ setDimensions(dimensions) {
5607
+ this._dimensions ??= dimensions;
5608
+ }
4824
5609
  async initialize(storeId) {
5610
+ if (this._dimensions === null) {
5611
+ throw new Error("Dimensions not set. Call setDimensions() before initialize().");
5612
+ }
4825
5613
  this.connection ??= await lancedb.connect(this.dataDir);
4826
5614
  const tableName = this.getTableName(storeId);
4827
5615
  const tableNames = await this.connection.tableNames();
@@ -4830,7 +5618,7 @@ var LanceStore = class {
4830
5618
  {
4831
5619
  id: "__init__",
4832
5620
  content: "",
4833
- vector: new Array(384).fill(0),
5621
+ vector: new Array(this._dimensions).fill(0),
4834
5622
  metadata: "{}"
4835
5623
  }
4836
5624
  ]);
@@ -4852,10 +5640,17 @@ var LanceStore = class {
4852
5640
  await table.add(lanceDocuments);
4853
5641
  }
4854
5642
  async deleteDocuments(storeId, documentIds) {
5643
+ if (documentIds.length === 0) {
5644
+ return;
5645
+ }
4855
5646
  const table = await this.getTable(storeId);
4856
5647
  const idList = documentIds.map((id) => `"${id}"`).join(", ");
4857
5648
  await table.delete(`id IN (${idList})`);
4858
5649
  }
5650
+ async clearAllDocuments(storeId) {
5651
+ const table = await this.getTable(storeId);
5652
+ await table.delete("id IS NOT NULL");
5653
+ }
4859
5654
  async search(storeId, vector, limit, _threshold) {
4860
5655
  const table = await this.getTable(storeId);
4861
5656
  const query = table.vectorSearch(vector).limit(limit).distanceType("cosine");
@@ -4895,7 +5690,9 @@ var LanceStore = class {
4895
5690
  }
4896
5691
  async deleteStore(storeId) {
4897
5692
  const tableName = this.getTableName(storeId);
4898
- if (this.connection !== null) {
5693
+ this.connection ??= await lancedb.connect(this.dataDir);
5694
+ const tableNames = await this.connection.tableNames();
5695
+ if (tableNames.includes(tableName)) {
4899
5696
  await this.connection.dropTable(tableName);
4900
5697
  this.tables.delete(tableName);
4901
5698
  }
@@ -4945,6 +5742,8 @@ var LazyServiceContainer = class {
4945
5742
  appConfig;
4946
5743
  dataDir;
4947
5744
  // Lazily initialized (heavy)
5745
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
5746
+ _manifest = null;
4948
5747
  _embeddings = null;
4949
5748
  _codeGraph = null;
4950
5749
  _search = null;
@@ -4966,7 +5765,7 @@ var LazyServiceContainer = class {
4966
5765
  logger4.debug("Lazy-initializing EmbeddingEngine");
4967
5766
  this._embeddings = new EmbeddingEngine(
4968
5767
  this.appConfig.embedding.model,
4969
- this.appConfig.embedding.dimensions
5768
+ this.appConfig.embedding.batchSize
4970
5769
  );
4971
5770
  }
4972
5771
  return this._embeddings;
@@ -4987,7 +5786,12 @@ var LazyServiceContainer = class {
4987
5786
  get search() {
4988
5787
  if (this._search === null) {
4989
5788
  logger4.debug("Lazy-initializing SearchService");
4990
- this._search = new SearchService(this.lance, this.embeddings, this.codeGraph);
5789
+ this._search = new SearchService(
5790
+ this.lance,
5791
+ this.embeddings,
5792
+ this.codeGraph,
5793
+ this.appConfig.search
5794
+ );
4991
5795
  }
4992
5796
  return this._search;
4993
5797
  }
@@ -4998,17 +5802,38 @@ var LazyServiceContainer = class {
4998
5802
  if (this._index === null) {
4999
5803
  logger4.debug("Lazy-initializing IndexService");
5000
5804
  this._index = new IndexService(this.lance, this.embeddings, {
5001
- codeGraphService: this.codeGraph
5805
+ codeGraphService: this.codeGraph,
5806
+ manifestService: this.manifest,
5807
+ chunkSize: this.appConfig.indexing.chunkSize,
5808
+ chunkOverlap: this.appConfig.indexing.chunkOverlap,
5809
+ concurrency: this.appConfig.indexing.concurrency,
5810
+ ignorePatterns: this.appConfig.indexing.ignorePatterns
5002
5811
  });
5003
5812
  }
5004
5813
  return this._index;
5005
5814
  }
5815
+ /**
5816
+ * ManifestService is lazily created on first access.
5817
+ */
5818
+ get manifest() {
5819
+ if (this._manifest === null) {
5820
+ logger4.debug("Lazy-initializing ManifestService");
5821
+ this._manifest = new ManifestService(this.dataDir);
5822
+ }
5823
+ return this._manifest;
5824
+ }
5006
5825
  /**
5007
5826
  * Check if embeddings have been initialized (for cleanup purposes).
5008
5827
  */
5009
5828
  get hasEmbeddings() {
5010
5829
  return this._embeddings !== null;
5011
5830
  }
5831
+ /**
5832
+ * Check if search service has been initialized (for cleanup purposes).
5833
+ */
5834
+ get hasSearch() {
5835
+ return this._search !== null;
5836
+ }
5012
5837
  };
5013
5838
  async function createLazyServices(configPath, dataDir, projectRoot) {
5014
5839
  logger4.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
@@ -5019,16 +5844,21 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
5019
5844
  const pythonBridge = new PythonBridge();
5020
5845
  await pythonBridge.start();
5021
5846
  const lance = new LanceStore(resolvedDataDir);
5022
- let storeOptions;
5023
- if (projectRoot !== void 0) {
5024
- const definitionService = new StoreDefinitionService(projectRoot);
5025
- const gitignoreService = new GitignoreService(projectRoot);
5026
- storeOptions = { definitionService, gitignoreService };
5027
- }
5847
+ const resolvedProjectRoot = config.resolveProjectRoot();
5848
+ const definitionService = new StoreDefinitionService(resolvedProjectRoot);
5849
+ const gitignoreService = new GitignoreService(resolvedProjectRoot);
5850
+ const storeOptions = {
5851
+ definitionService,
5852
+ gitignoreService,
5853
+ projectRoot: resolvedProjectRoot
5854
+ };
5028
5855
  const store = new StoreService(resolvedDataDir, storeOptions);
5029
5856
  await store.initialize();
5030
5857
  const durationMs = Date.now() - startTime;
5031
- logger4.info({ dataDir: resolvedDataDir, durationMs }, "Lazy services initialized");
5858
+ logger4.info(
5859
+ { dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
5860
+ "Lazy services initialized"
5861
+ );
5032
5862
  return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
5033
5863
  }
5034
5864
  async function createServices(configPath, dataDir, projectRoot) {
@@ -5039,20 +5869,33 @@ async function createServices(configPath, dataDir, projectRoot) {
5039
5869
  const pythonBridge = new PythonBridge();
5040
5870
  await pythonBridge.start();
5041
5871
  const lance = new LanceStore(resolvedDataDir);
5042
- const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.dimensions);
5872
+ const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.batchSize);
5043
5873
  await embeddings.initialize();
5044
- let storeOptions;
5045
- if (projectRoot !== void 0) {
5046
- const definitionService = new StoreDefinitionService(projectRoot);
5047
- const gitignoreService = new GitignoreService(projectRoot);
5048
- storeOptions = { definitionService, gitignoreService };
5049
- }
5874
+ const resolvedProjectRoot = config.resolveProjectRoot();
5875
+ const definitionService = new StoreDefinitionService(resolvedProjectRoot);
5876
+ const gitignoreService = new GitignoreService(resolvedProjectRoot);
5877
+ const storeOptions = {
5878
+ definitionService,
5879
+ gitignoreService,
5880
+ projectRoot: resolvedProjectRoot
5881
+ };
5050
5882
  const store = new StoreService(resolvedDataDir, storeOptions);
5051
5883
  await store.initialize();
5052
5884
  const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
5053
- const search = new SearchService(lance, embeddings, codeGraph);
5054
- const index = new IndexService(lance, embeddings, { codeGraphService: codeGraph });
5055
- logger4.info({ dataDir: resolvedDataDir }, "Services initialized successfully");
5885
+ const manifest = new ManifestService(resolvedDataDir);
5886
+ const search = new SearchService(lance, embeddings, codeGraph, appConfig.search);
5887
+ const index = new IndexService(lance, embeddings, {
5888
+ codeGraphService: codeGraph,
5889
+ manifestService: manifest,
5890
+ chunkSize: appConfig.indexing.chunkSize,
5891
+ chunkOverlap: appConfig.indexing.chunkOverlap,
5892
+ concurrency: appConfig.indexing.concurrency,
5893
+ ignorePatterns: appConfig.indexing.ignorePatterns
5894
+ });
5895
+ logger4.info(
5896
+ { dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
5897
+ "Services initialized successfully"
5898
+ );
5056
5899
  return {
5057
5900
  config,
5058
5901
  store,
@@ -5061,12 +5904,20 @@ async function createServices(configPath, dataDir, projectRoot) {
5061
5904
  lance,
5062
5905
  embeddings,
5063
5906
  codeGraph,
5064
- pythonBridge
5907
+ pythonBridge,
5908
+ manifest
5065
5909
  };
5066
5910
  }
5067
5911
  async function destroyServices(services) {
5068
5912
  logger4.info("Shutting down services");
5069
5913
  const errors = [];
5914
+ const isLazyContainer = services instanceof LazyServiceContainer;
5915
+ const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
5916
+ if (shouldCleanupSearch) {
5917
+ services.search.cleanup();
5918
+ } else {
5919
+ logger4.debug("Skipping search cleanup (not initialized)");
5920
+ }
5070
5921
  try {
5071
5922
  await services.pythonBridge.stop();
5072
5923
  } catch (e) {
@@ -5074,7 +5925,6 @@ async function destroyServices(services) {
5074
5925
  logger4.error({ error }, "Error stopping Python bridge");
5075
5926
  errors.push(error);
5076
5927
  }
5077
- const isLazyContainer = services instanceof LazyServiceContainer;
5078
5928
  const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
5079
5929
  if (shouldDisposeEmbeddings) {
5080
5930
  try {
@@ -5104,6 +5954,7 @@ async function destroyServices(services) {
5104
5954
 
5105
5955
  export {
5106
5956
  AdapterRegistry,
5957
+ ProjectRootService,
5107
5958
  createLogger,
5108
5959
  shutdownLogger,
5109
5960
  summarizePayload,
@@ -5111,8 +5962,6 @@ export {
5111
5962
  PythonBridge,
5112
5963
  ChunkingService,
5113
5964
  ASTParser,
5114
- createStoreId,
5115
- createDocumentId,
5116
5965
  ok,
5117
5966
  err,
5118
5967
  classifyWebContentType,
@@ -5120,10 +5969,11 @@ export {
5120
5969
  isRepoStoreDefinition,
5121
5970
  isWebStoreDefinition,
5122
5971
  StoreDefinitionService,
5972
+ isGitUrl,
5123
5973
  extractRepoName,
5124
5974
  JobService,
5125
5975
  createLazyServices,
5126
5976
  createServices,
5127
5977
  destroyServices
5128
5978
  };
5129
- //# sourceMappingURL=chunk-WMALVLFW.js.map
5979
+ //# sourceMappingURL=chunk-RDDGZIDL.js.map