bluera-knowledge 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,11 @@
1
+ import {
2
+ createDocumentId,
3
+ createStoreId
4
+ } from "./chunk-CLIMKLTW.js";
5
+ import {
6
+ parseIgnorePatternsForScanning
7
+ } from "./chunk-HXBIIMYL.js";
8
+
1
9
  // src/analysis/adapter-registry.ts
2
10
  var AdapterRegistry = class _AdapterRegistry {
3
11
  static instance;
@@ -109,18 +117,94 @@ var AdapterRegistry = class _AdapterRegistry {
109
117
  };
110
118
 
111
119
  // src/logging/logger.ts
112
- import { mkdirSync, existsSync } from "fs";
113
- import { homedir } from "os";
114
- import { join } from "path";
120
+ import { mkdirSync, existsSync as existsSync2 } from "fs";
121
+ import { join as join2 } from "path";
115
122
  import pino from "pino";
123
+
124
+ // src/services/project-root.service.ts
125
+ import { existsSync, statSync, realpathSync } from "fs";
126
+ import { dirname, join, normalize, sep } from "path";
127
+ var ProjectRootService = class {
128
+ /**
129
+ * Resolve project root directory using hierarchical detection.
130
+ */
131
+ static resolve(options) {
132
+ if (options?.projectRoot !== void 0 && options.projectRoot !== "") {
133
+ return this.normalize(options.projectRoot);
134
+ }
135
+ const projectRootEnv = process.env["PROJECT_ROOT"];
136
+ if (projectRootEnv !== void 0 && projectRootEnv !== "") {
137
+ return this.normalize(projectRootEnv);
138
+ }
139
+ const gitRoot = this.findGitRoot(process.cwd());
140
+ if (gitRoot !== null) {
141
+ return gitRoot;
142
+ }
143
+ const pwdEnv = process.env["PWD"];
144
+ if (pwdEnv !== void 0 && pwdEnv !== "") {
145
+ return this.normalize(pwdEnv);
146
+ }
147
+ return process.cwd();
148
+ }
149
+ /**
150
+ * Find git repository root by walking up the directory tree looking for .git
151
+ */
152
+ static findGitRoot(startPath) {
153
+ let currentPath = normalize(startPath);
154
+ const root = normalize(sep);
155
+ while (currentPath !== root) {
156
+ const gitPath = join(currentPath, ".git");
157
+ if (existsSync(gitPath)) {
158
+ try {
159
+ const stats = statSync(gitPath);
160
+ if (stats.isDirectory() || stats.isFile()) {
161
+ return currentPath;
162
+ }
163
+ } catch {
164
+ }
165
+ }
166
+ const parentPath = dirname(currentPath);
167
+ if (parentPath === currentPath) {
168
+ break;
169
+ }
170
+ currentPath = parentPath;
171
+ }
172
+ return null;
173
+ }
174
+ /**
175
+ * Normalize path by resolving symlinks and normalizing separators
176
+ */
177
+ static normalize(path4) {
178
+ try {
179
+ const realPath = realpathSync(path4);
180
+ return normalize(realPath);
181
+ } catch {
182
+ return normalize(path4);
183
+ }
184
+ }
185
+ /**
186
+ * Validate that a path exists and is a directory
187
+ */
188
+ static validate(path4) {
189
+ try {
190
+ const stats = statSync(path4);
191
+ return stats.isDirectory();
192
+ } catch {
193
+ return false;
194
+ }
195
+ }
196
+ };
197
+
198
+ // src/logging/logger.ts
116
199
  var VALID_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal"];
117
200
  var VALID_LEVELS_SET = new Set(VALID_LEVELS);
118
201
  function getLogDir() {
119
- return join(homedir(), ".bluera", "bluera-knowledge", "logs");
202
+ const projectRoot = ProjectRootService.resolve();
203
+ return join2(projectRoot, ".bluera", "bluera-knowledge", "logs");
120
204
  }
121
205
  function ensureLogDir() {
122
206
  const logDir = getLogDir();
123
- if (!existsSync(logDir)) {
207
+ if (!existsSync2(logDir)) {
124
208
  mkdirSync(logDir, { recursive: true });
125
209
  }
126
210
  return logDir;
@@ -144,7 +228,7 @@ function initializeLogger() {
144
228
  return rootLogger;
145
229
  }
146
230
  const logDir = ensureLogDir();
147
- const logFile = join(logDir, "app.log");
231
+ const logFile = join2(logDir, "app.log");
148
232
  const level = getLogLevel();
149
233
  const options = {
150
234
  level,
@@ -196,13 +280,13 @@ function shutdownLogger() {
196
280
 
197
281
  // src/logging/payload.ts
198
282
  import { createHash } from "crypto";
199
- import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync2 } from "fs";
200
- import { join as join2 } from "path";
283
+ import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "fs";
284
+ import { join as join3 } from "path";
201
285
  var MAX_PREVIEW_LENGTH = 500;
202
286
  var PAYLOAD_DUMP_THRESHOLD = 1e4;
203
287
  function getPayloadDir() {
204
- const dir = join2(getLogDirectory(), "payload");
205
- if (!existsSync2(dir)) {
288
+ const dir = join3(getLogDirectory(), "payload");
289
+ if (!existsSync3(dir)) {
206
290
  mkdirSync2(dir, { recursive: true });
207
291
  }
208
292
  return dir;
@@ -219,7 +303,7 @@ function summarizePayload(content, type, identifier, dumpFull = isLevelEnabled("
219
303
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
220
304
  const safeId = safeFilename(identifier);
221
305
  const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
222
- const filepath = join2(getPayloadDir(), filename);
306
+ const filepath = join3(getPayloadDir(), filename);
223
307
  writeFileSync(
224
308
  filepath,
225
309
  JSON.stringify(
@@ -292,6 +376,23 @@ function err(error) {
292
376
  return { success: false, error };
293
377
  }
294
378
 
379
+ // src/utils/atomic-write.ts
380
+ import { writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync3 } from "fs";
381
+ import { writeFile, rename, mkdir } from "fs/promises";
382
+ import { dirname as dirname2 } from "path";
383
+ async function atomicWriteFile(filePath, content) {
384
+ await mkdir(dirname2(filePath), { recursive: true });
385
+ const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
386
+ await writeFile(tempPath, content, "utf-8");
387
+ await rename(tempPath, filePath);
388
+ }
389
+ function atomicWriteFileSync(filePath, content) {
390
+ mkdirSync3(dirname2(filePath), { recursive: true });
391
+ const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
392
+ writeFileSync2(tempPath, content, "utf-8");
393
+ renameSync(tempPath, filePath);
394
+ }
395
+
295
396
  // src/services/job.service.ts
296
397
  var JobService = class {
297
398
  jobsDir;
@@ -520,13 +621,13 @@ var JobService = class {
520
621
  */
521
622
  writeJob(job) {
522
623
  const jobFile = path.join(this.jobsDir, `${job.id}.json`);
523
- fs.writeFileSync(jobFile, JSON.stringify(job, null, 2), "utf-8");
624
+ atomicWriteFileSync(jobFile, JSON.stringify(job, null, 2));
524
625
  }
525
626
  };
526
627
 
527
628
  // src/services/code-graph.service.ts
528
- import { readFile, writeFile, mkdir, rm } from "fs/promises";
529
- import { join as join3, dirname } from "path";
629
+ import { readFile, writeFile as writeFile2, mkdir as mkdir2, rm } from "fs/promises";
630
+ import { join as join4, dirname as dirname3 } from "path";
530
631
 
531
632
  // src/analysis/ast-parser.ts
532
633
  import { parse } from "@babel/parser";
@@ -1683,11 +1784,31 @@ var CodeGraphService = class {
1683
1784
  parser;
1684
1785
  parserFactory;
1685
1786
  graphCache;
1787
+ cacheListeners;
1686
1788
  constructor(dataDir, pythonBridge) {
1687
1789
  this.dataDir = dataDir;
1688
1790
  this.parser = new ASTParser();
1689
1791
  this.parserFactory = new ParserFactory(pythonBridge);
1690
1792
  this.graphCache = /* @__PURE__ */ new Map();
1793
+ this.cacheListeners = /* @__PURE__ */ new Set();
1794
+ }
1795
+ /**
1796
+ * Subscribe to cache invalidation events.
1797
+ * Returns an unsubscribe function.
1798
+ */
1799
+ onCacheInvalidation(listener) {
1800
+ this.cacheListeners.add(listener);
1801
+ return () => {
1802
+ this.cacheListeners.delete(listener);
1803
+ };
1804
+ }
1805
+ /**
1806
+ * Emit a cache invalidation event to all listeners.
1807
+ */
1808
+ emitCacheInvalidation(event) {
1809
+ for (const listener of this.cacheListeners) {
1810
+ listener(event);
1811
+ }
1691
1812
  }
1692
1813
  /**
1693
1814
  * Build a code graph from source files.
@@ -1743,9 +1864,10 @@ var CodeGraphService = class {
1743
1864
  */
1744
1865
  async saveGraph(storeId, graph) {
1745
1866
  const graphPath = this.getGraphPath(storeId);
1746
- await mkdir(dirname(graphPath), { recursive: true });
1867
+ await mkdir2(dirname3(graphPath), { recursive: true });
1747
1868
  const serialized = graph.toJSON();
1748
- await writeFile(graphPath, JSON.stringify(serialized, null, 2));
1869
+ await writeFile2(graphPath, JSON.stringify(serialized, null, 2));
1870
+ this.emitCacheInvalidation({ type: "graph-updated", storeId });
1749
1871
  }
1750
1872
  /**
1751
1873
  * Delete the code graph file for a store.
@@ -1755,6 +1877,7 @@ var CodeGraphService = class {
1755
1877
  const graphPath = this.getGraphPath(storeId);
1756
1878
  await rm(graphPath, { force: true });
1757
1879
  this.graphCache.delete(storeId);
1880
+ this.emitCacheInvalidation({ type: "graph-deleted", storeId });
1758
1881
  }
1759
1882
  /**
1760
1883
  * Load a code graph for a store.
@@ -1856,7 +1979,7 @@ var CodeGraphService = class {
1856
1979
  this.graphCache.clear();
1857
1980
  }
1858
1981
  getGraphPath(storeId) {
1859
- return join3(this.dataDir, "graphs", `${storeId}.json`);
1982
+ return join4(this.dataDir, "graphs", `${storeId}.json`);
1860
1983
  }
1861
1984
  /**
1862
1985
  * Type guard for SerializedGraph structure.
@@ -1900,83 +2023,9 @@ var CodeGraphService = class {
1900
2023
  };
1901
2024
 
1902
2025
  // src/services/config.service.ts
1903
- import { readFile as readFile2, writeFile as writeFile2, mkdir as mkdir2, access } from "fs/promises";
1904
- import { homedir as homedir2 } from "os";
1905
- import { dirname as dirname3, join as join5, resolve } from "path";
1906
-
1907
- // src/services/project-root.service.ts
1908
- import { existsSync as existsSync3, statSync, realpathSync } from "fs";
1909
- import { dirname as dirname2, join as join4, normalize, sep } from "path";
1910
- var ProjectRootService = class {
1911
- /**
1912
- * Resolve project root directory using hierarchical detection.
1913
- */
1914
- static resolve(options) {
1915
- if (options?.projectRoot !== void 0 && options.projectRoot !== "") {
1916
- return this.normalize(options.projectRoot);
1917
- }
1918
- const projectRootEnv = process.env["PROJECT_ROOT"];
1919
- if (projectRootEnv !== void 0 && projectRootEnv !== "") {
1920
- return this.normalize(projectRootEnv);
1921
- }
1922
- const pwdEnv = process.env["PWD"];
1923
- if (pwdEnv !== void 0 && pwdEnv !== "") {
1924
- return this.normalize(pwdEnv);
1925
- }
1926
- const gitRoot = this.findGitRoot(process.cwd());
1927
- if (gitRoot !== null) {
1928
- return gitRoot;
1929
- }
1930
- return process.cwd();
1931
- }
1932
- /**
1933
- * Find git repository root by walking up the directory tree looking for .git
1934
- */
1935
- static findGitRoot(startPath) {
1936
- let currentPath = normalize(startPath);
1937
- const root = normalize(sep);
1938
- while (currentPath !== root) {
1939
- const gitPath = join4(currentPath, ".git");
1940
- if (existsSync3(gitPath)) {
1941
- try {
1942
- const stats = statSync(gitPath);
1943
- if (stats.isDirectory() || stats.isFile()) {
1944
- return currentPath;
1945
- }
1946
- } catch {
1947
- }
1948
- }
1949
- const parentPath = dirname2(currentPath);
1950
- if (parentPath === currentPath) {
1951
- break;
1952
- }
1953
- currentPath = parentPath;
1954
- }
1955
- return null;
1956
- }
1957
- /**
1958
- * Normalize path by resolving symlinks and normalizing separators
1959
- */
1960
- static normalize(path4) {
1961
- try {
1962
- const realPath = realpathSync(path4);
1963
- return normalize(realPath);
1964
- } catch {
1965
- return normalize(path4);
1966
- }
1967
- }
1968
- /**
1969
- * Validate that a path exists and is a directory
1970
- */
1971
- static validate(path4) {
1972
- try {
1973
- const stats = statSync(path4);
1974
- return stats.isDirectory();
1975
- } catch {
1976
- return false;
1977
- }
1978
- }
1979
- };
2026
+ import { readFile as readFile2, access } from "fs/promises";
2027
+ import { homedir } from "os";
2028
+ import { isAbsolute, join as join5, resolve } from "path";
1980
2029
 
1981
2030
  // src/types/config.ts
1982
2031
  var DEFAULT_CONFIG = {
@@ -1984,8 +2033,7 @@ var DEFAULT_CONFIG = {
1984
2033
  dataDir: ".bluera/bluera-knowledge/data",
1985
2034
  embedding: {
1986
2035
  model: "Xenova/all-MiniLM-L6-v2",
1987
- batchSize: 32,
1988
- dimensions: 384
2036
+ batchSize: 32
1989
2037
  },
1990
2038
  indexing: {
1991
2039
  concurrency: 4,
@@ -1995,13 +2043,7 @@ var DEFAULT_CONFIG = {
1995
2043
  },
1996
2044
  search: {
1997
2045
  defaultMode: "hybrid",
1998
- defaultLimit: 10,
1999
- minScore: 0.5,
2000
- rrf: {
2001
- k: 40,
2002
- vectorWeight: 0.7,
2003
- ftsWeight: 0.3
2004
- }
2046
+ defaultLimit: 10
2005
2047
  },
2006
2048
  crawl: {
2007
2049
  userAgent: "BlueraKnowledge/1.0",
@@ -2014,6 +2056,34 @@ var DEFAULT_CONFIG = {
2014
2056
  }
2015
2057
  };
2016
2058
 
2059
+ // src/utils/deep-merge.ts
2060
+ function isPlainObject(value) {
2061
+ return typeof value === "object" && value !== null && !Array.isArray(value) && !(value instanceof Date);
2062
+ }
2063
+ function deepMerge(defaults, overrides) {
2064
+ if (!isPlainObject(overrides)) {
2065
+ return { ...defaults };
2066
+ }
2067
+ const defaultsRecord = defaults;
2068
+ return deepMergeRecords(defaultsRecord, overrides);
2069
+ }
2070
+ function deepMergeRecords(defaults, overrides) {
2071
+ const result = { ...defaults };
2072
+ for (const key of Object.keys(overrides)) {
2073
+ const defaultValue = defaults[key];
2074
+ const overrideValue = overrides[key];
2075
+ if (overrideValue === void 0) {
2076
+ continue;
2077
+ }
2078
+ if (isPlainObject(defaultValue) && isPlainObject(overrideValue)) {
2079
+ result[key] = deepMergeRecords(defaultValue, overrideValue);
2080
+ } else {
2081
+ result[key] = overrideValue;
2082
+ }
2083
+ }
2084
+ return result;
2085
+ }
2086
+
2017
2087
  // src/services/config.service.ts
2018
2088
  var DEFAULT_CONFIG_PATH = ".bluera/bluera-knowledge/config.json";
2019
2089
  async function fileExists(path4) {
@@ -2027,20 +2097,27 @@ async function fileExists(path4) {
2027
2097
  var ConfigService = class {
2028
2098
  configPath;
2029
2099
  dataDir;
2100
+ projectRoot;
2030
2101
  config = null;
2031
2102
  constructor(configPath, dataDir, projectRoot) {
2032
- const root = projectRoot ?? ProjectRootService.resolve();
2103
+ this.projectRoot = projectRoot ?? ProjectRootService.resolve();
2033
2104
  if (configPath !== void 0 && configPath !== "") {
2034
- this.configPath = configPath;
2105
+ this.configPath = this.expandPath(configPath, this.projectRoot);
2035
2106
  } else {
2036
- this.configPath = join5(root, DEFAULT_CONFIG_PATH);
2107
+ this.configPath = join5(this.projectRoot, DEFAULT_CONFIG_PATH);
2037
2108
  }
2038
2109
  if (dataDir !== void 0 && dataDir !== "") {
2039
- this.dataDir = dataDir;
2110
+ this.dataDir = this.expandPath(dataDir, this.projectRoot);
2040
2111
  } else {
2041
- this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, root);
2112
+ this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, this.projectRoot);
2042
2113
  }
2043
2114
  }
2115
+ /**
2116
+ * Get the resolved project root directory.
2117
+ */
2118
+ resolveProjectRoot() {
2119
+ return this.projectRoot;
2120
+ }
2044
2121
  async load() {
2045
2122
  if (this.config !== null) {
2046
2123
  return this.config;
@@ -2053,7 +2130,7 @@ var ConfigService = class {
2053
2130
  }
2054
2131
  const content = await readFile2(this.configPath, "utf-8");
2055
2132
  try {
2056
- this.config = { ...DEFAULT_CONFIG, ...JSON.parse(content) };
2133
+ this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
2057
2134
  } catch (error) {
2058
2135
  throw new Error(
2059
2136
  `Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
@@ -2062,8 +2139,7 @@ var ConfigService = class {
2062
2139
  return this.config;
2063
2140
  }
2064
2141
  async save(config) {
2065
- await mkdir2(dirname3(this.configPath), { recursive: true });
2066
- await writeFile2(this.configPath, JSON.stringify(config, null, 2));
2142
+ await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
2067
2143
  this.config = config;
2068
2144
  }
2069
2145
  resolveDataDir() {
@@ -2074,9 +2150,9 @@ var ConfigService = class {
2074
2150
  }
2075
2151
  expandPath(path4, baseDir) {
2076
2152
  if (path4.startsWith("~")) {
2077
- return path4.replace("~", homedir2());
2153
+ return path4.replace("~", homedir());
2078
2154
  }
2079
- if (!path4.startsWith("/")) {
2155
+ if (!isAbsolute(path4)) {
2080
2156
  return resolve(baseDir, path4);
2081
2157
  }
2082
2158
  return path4;
@@ -2182,9 +2258,9 @@ ${REQUIRED_PATTERNS.join("\n")}
2182
2258
  };
2183
2259
 
2184
2260
  // src/services/index.service.ts
2185
- import { createHash as createHash2 } from "crypto";
2186
- import { readFile as readFile4, readdir } from "fs/promises";
2187
- import { join as join7, extname, basename } from "path";
2261
+ import { createHash as createHash3 } from "crypto";
2262
+ import { readFile as readFile5, readdir } from "fs/promises";
2263
+ import { join as join7, extname, basename, relative } from "path";
2188
2264
 
2189
2265
  // src/services/chunking.service.ts
2190
2266
  var CHUNK_PRESETS = {
@@ -2196,6 +2272,11 @@ var ChunkingService = class _ChunkingService {
2196
2272
  chunkSize;
2197
2273
  chunkOverlap;
2198
2274
  constructor(config) {
2275
+ if (config.chunkOverlap >= config.chunkSize) {
2276
+ throw new Error(
2277
+ `chunkOverlap (${String(config.chunkOverlap)}) must be less than chunkSize (${String(config.chunkSize)})`
2278
+ );
2279
+ }
2199
2280
  this.chunkSize = config.chunkSize;
2200
2281
  this.chunkOverlap = config.chunkOverlap;
2201
2282
  }
@@ -2290,7 +2371,7 @@ var ChunkingService = class _ChunkingService {
2290
2371
  * Splits on top-level declarations to keep functions/classes together.
2291
2372
  */
2292
2373
  chunkCode(text) {
2293
- const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
2374
+ const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
2294
2375
  const declarations = [];
2295
2376
  let match;
2296
2377
  while ((match = declarationRegex.exec(text)) !== null) {
@@ -2465,73 +2546,236 @@ var ChunkingService = class _ChunkingService {
2465
2546
  }
2466
2547
  };
2467
2548
 
2468
- // src/types/brands.ts
2469
- var ID_PATTERN = /^[a-zA-Z0-9_-]+$/;
2470
- function isStoreId(value) {
2471
- return value.length > 0 && ID_PATTERN.test(value);
2472
- }
2473
- function isDocumentId(value) {
2474
- return value.length > 0 && ID_PATTERN.test(value);
2475
- }
2476
- function createStoreId(value) {
2477
- if (!isStoreId(value)) {
2478
- throw new Error(`Invalid store ID: ${value}`);
2549
+ // src/services/drift.service.ts
2550
+ import { createHash as createHash2 } from "crypto";
2551
+ import { readFile as readFile4, stat } from "fs/promises";
2552
+ var DriftService = class {
2553
+ /**
2554
+ * Detect changes between current files and manifest.
2555
+ *
2556
+ * @param manifest - The stored manifest from last index
2557
+ * @param currentFiles - Current files on disk with mtime/size
2558
+ * @returns Classification of files into added, modified, deleted, unchanged
2559
+ */
2560
+ async detectChanges(manifest, currentFiles) {
2561
+ const result = {
2562
+ added: [],
2563
+ modified: [],
2564
+ deleted: [],
2565
+ unchanged: []
2566
+ };
2567
+ const currentPathSet = new Set(currentFiles.map((f) => f.path));
2568
+ const manifestPaths = new Set(Object.keys(manifest.files));
2569
+ for (const path4 of manifestPaths) {
2570
+ if (!currentPathSet.has(path4)) {
2571
+ result.deleted.push(path4);
2572
+ }
2573
+ }
2574
+ const potentiallyModified = [];
2575
+ for (const file of currentFiles) {
2576
+ const manifestState = manifest.files[file.path];
2577
+ if (manifestState === void 0) {
2578
+ result.added.push(file.path);
2579
+ } else {
2580
+ if (file.mtime === manifestState.mtime && file.size === manifestState.size) {
2581
+ result.unchanged.push(file.path);
2582
+ } else {
2583
+ potentiallyModified.push(file);
2584
+ }
2585
+ }
2586
+ }
2587
+ for (const file of potentiallyModified) {
2588
+ const manifestState = manifest.files[file.path];
2589
+ if (manifestState === void 0) {
2590
+ result.added.push(file.path);
2591
+ continue;
2592
+ }
2593
+ const currentHash = await this.computeFileHash(file.path);
2594
+ if (currentHash === manifestState.hash) {
2595
+ result.unchanged.push(file.path);
2596
+ } else {
2597
+ result.modified.push(file.path);
2598
+ }
2599
+ }
2600
+ return result;
2479
2601
  }
2480
- return value;
2481
- }
2482
- function createDocumentId(value) {
2483
- if (!isDocumentId(value)) {
2484
- throw new Error(`Invalid document ID: ${value}`);
2602
+ /**
2603
+ * Get the current state of a file on disk.
2604
+ */
2605
+ async getFileState(path4) {
2606
+ const stats = await stat(path4);
2607
+ return {
2608
+ path: path4,
2609
+ mtime: stats.mtimeMs,
2610
+ size: stats.size
2611
+ };
2485
2612
  }
2486
- return value;
2487
- }
2613
+ /**
2614
+ * Compute MD5 hash of a file.
2615
+ */
2616
+ async computeFileHash(path4) {
2617
+ const content = await readFile4(path4);
2618
+ return createHash2("md5").update(content).digest("hex");
2619
+ }
2620
+ /**
2621
+ * Create a file state entry for the manifest after indexing.
2622
+ *
2623
+ * @param path - File path
2624
+ * @param documentIds - Document IDs created from this file
2625
+ * @returns File state for manifest
2626
+ */
2627
+ async createFileState(path4, documentIds) {
2628
+ const stats = await stat(path4);
2629
+ const content = await readFile4(path4);
2630
+ const hash = createHash2("md5").update(content).digest("hex");
2631
+ const { createDocumentId: createDocumentId2 } = await import("./brands-3EYIYV6T.js");
2632
+ return {
2633
+ state: {
2634
+ mtime: stats.mtimeMs,
2635
+ size: stats.size,
2636
+ hash,
2637
+ documentIds: documentIds.map((id) => createDocumentId2(id))
2638
+ },
2639
+ hash
2640
+ };
2641
+ }
2642
+ };
2488
2643
 
2489
2644
  // src/services/index.service.ts
2490
2645
  var logger = createLogger("index-service");
2491
2646
  var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
2647
+ // Text/docs
2492
2648
  ".txt",
2493
2649
  ".md",
2650
+ ".rst",
2651
+ ".adoc",
2652
+ // JavaScript/TypeScript
2494
2653
  ".js",
2495
2654
  ".ts",
2496
2655
  ".jsx",
2497
2656
  ".tsx",
2657
+ ".mjs",
2658
+ ".cjs",
2659
+ ".mts",
2660
+ ".cts",
2661
+ // Config/data
2498
2662
  ".json",
2499
2663
  ".yaml",
2500
2664
  ".yml",
2665
+ ".toml",
2666
+ ".ini",
2667
+ ".env",
2668
+ // Web
2501
2669
  ".html",
2670
+ ".htm",
2502
2671
  ".css",
2503
2672
  ".scss",
2673
+ ".sass",
2504
2674
  ".less",
2675
+ ".vue",
2676
+ ".svelte",
2677
+ // Python
2505
2678
  ".py",
2679
+ ".pyi",
2680
+ ".pyx",
2681
+ // Ruby
2506
2682
  ".rb",
2683
+ ".erb",
2684
+ ".rake",
2685
+ // Go
2507
2686
  ".go",
2687
+ // Rust
2508
2688
  ".rs",
2689
+ // Java/JVM
2509
2690
  ".java",
2691
+ ".kt",
2692
+ ".kts",
2693
+ ".scala",
2694
+ ".groovy",
2695
+ ".gradle",
2696
+ // C/C++
2510
2697
  ".c",
2511
2698
  ".cpp",
2699
+ ".cc",
2700
+ ".cxx",
2512
2701
  ".h",
2513
2702
  ".hpp",
2703
+ ".hxx",
2704
+ // C#/.NET
2705
+ ".cs",
2706
+ ".fs",
2707
+ ".vb",
2708
+ // Swift/Objective-C
2709
+ ".swift",
2710
+ ".m",
2711
+ ".mm",
2712
+ // PHP
2713
+ ".php",
2714
+ // Shell
2514
2715
  ".sh",
2515
2716
  ".bash",
2516
2717
  ".zsh",
2718
+ ".fish",
2719
+ ".ps1",
2720
+ ".psm1",
2721
+ // SQL
2517
2722
  ".sql",
2518
- ".xml"
2723
+ // Other
2724
+ ".xml",
2725
+ ".graphql",
2726
+ ".gql",
2727
+ ".proto",
2728
+ ".lua",
2729
+ ".r",
2730
+ ".R",
2731
+ ".jl",
2732
+ ".ex",
2733
+ ".exs",
2734
+ ".erl",
2735
+ ".hrl",
2736
+ ".clj",
2737
+ ".cljs",
2738
+ ".cljc",
2739
+ ".hs",
2740
+ ".elm",
2741
+ ".dart",
2742
+ ".pl",
2743
+ ".pm",
2744
+ ".tcl",
2745
+ ".vim",
2746
+ ".zig",
2747
+ ".nim",
2748
+ ".v",
2749
+ ".tf",
2750
+ ".hcl",
2751
+ ".dockerfile",
2752
+ ".makefile",
2753
+ ".cmake"
2519
2754
  ]);
2520
2755
  var IndexService = class {
2521
2756
  lanceStore;
2522
2757
  embeddingEngine;
2523
2758
  chunker;
2524
2759
  codeGraphService;
2760
+ manifestService;
2761
+ driftService;
2525
2762
  concurrency;
2763
+ ignoreDirs;
2764
+ ignoreFilePatterns;
2526
2765
  constructor(lanceStore, embeddingEngine, options = {}) {
2527
2766
  this.lanceStore = lanceStore;
2528
2767
  this.embeddingEngine = embeddingEngine;
2529
2768
  this.chunker = new ChunkingService({
2530
- chunkSize: options.chunkSize ?? 768,
2531
- chunkOverlap: options.chunkOverlap ?? 100
2769
+ chunkSize: options.chunkSize ?? 1e3,
2770
+ chunkOverlap: options.chunkOverlap ?? 150
2532
2771
  });
2533
2772
  this.codeGraphService = options.codeGraphService;
2773
+ this.manifestService = options.manifestService;
2774
+ this.driftService = new DriftService();
2534
2775
  this.concurrency = options.concurrency ?? 4;
2776
+ const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
2777
+ this.ignoreDirs = parsed.dirs;
2778
+ this.ignoreFilePatterns = parsed.fileMatchers;
2535
2779
  }
2536
2780
  async indexStore(store, onProgress) {
2537
2781
  logger.info(
@@ -2562,8 +2806,205 @@ var IndexService = class {
2562
2806
  return err(error instanceof Error ? error : new Error(String(error)));
2563
2807
  }
2564
2808
  }
2809
+ /**
2810
+ * Incrementally index a store, only processing changed files.
2811
+ * Requires manifestService to be configured.
2812
+ *
2813
+ * @param store - The store to index
2814
+ * @param onProgress - Optional progress callback
2815
+ * @returns Result with incremental index statistics
2816
+ */
2817
+ async indexStoreIncremental(store, onProgress) {
2818
+ if (this.manifestService === void 0) {
2819
+ return err(new Error("ManifestService required for incremental indexing"));
2820
+ }
2821
+ if (store.type !== "file" && store.type !== "repo") {
2822
+ return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
2823
+ }
2824
+ logger.info(
2825
+ {
2826
+ storeId: store.id,
2827
+ storeName: store.name,
2828
+ storeType: store.type
2829
+ },
2830
+ "Starting incremental store indexing"
2831
+ );
2832
+ const startTime = Date.now();
2833
+ try {
2834
+ const manifest = await this.manifestService.load(store.id);
2835
+ const filePaths = await this.scanDirectory(store.path);
2836
+ const currentFiles = await Promise.all(
2837
+ filePaths.map((path4) => this.driftService.getFileState(path4))
2838
+ );
2839
+ const drift = await this.driftService.detectChanges(manifest, currentFiles);
2840
+ logger.debug(
2841
+ {
2842
+ storeId: store.id,
2843
+ added: drift.added.length,
2844
+ modified: drift.modified.length,
2845
+ deleted: drift.deleted.length,
2846
+ unchanged: drift.unchanged.length
2847
+ },
2848
+ "Drift detection complete"
2849
+ );
2850
+ const documentIdsToDelete = [];
2851
+ for (const path4 of [...drift.modified, ...drift.deleted]) {
2852
+ const fileState = manifest.files[path4];
2853
+ if (fileState !== void 0) {
2854
+ documentIdsToDelete.push(...fileState.documentIds);
2855
+ }
2856
+ }
2857
+ if (documentIdsToDelete.length > 0) {
2858
+ await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
2859
+ logger.debug(
2860
+ { storeId: store.id, count: documentIdsToDelete.length },
2861
+ "Deleted old documents"
2862
+ );
2863
+ }
2864
+ const filesToProcess = [...drift.added, ...drift.modified];
2865
+ const totalFiles = filesToProcess.length;
2866
+ onProgress?.({
2867
+ type: "start",
2868
+ current: 0,
2869
+ total: totalFiles,
2870
+ message: `Processing ${String(totalFiles)} changed files`
2871
+ });
2872
+ const documents = [];
2873
+ const newManifestFiles = {};
2874
+ let filesProcessed = 0;
2875
+ for (const path4 of drift.unchanged) {
2876
+ const existingState = manifest.files[path4];
2877
+ if (existingState !== void 0) {
2878
+ newManifestFiles[path4] = existingState;
2879
+ }
2880
+ }
2881
+ for (let i = 0; i < filesToProcess.length; i += this.concurrency) {
2882
+ const batch = filesToProcess.slice(i, i + this.concurrency);
2883
+ const batchResults = await Promise.all(
2884
+ batch.map(async (filePath) => {
2885
+ try {
2886
+ const result = await this.processFile(filePath, store);
2887
+ const documentIds = result.documents.map((d) => d.id);
2888
+ const { state } = await this.driftService.createFileState(filePath, documentIds);
2889
+ return {
2890
+ filePath,
2891
+ documents: result.documents,
2892
+ fileState: state
2893
+ };
2894
+ } catch (error) {
2895
+ logger.warn(
2896
+ { filePath, error: error instanceof Error ? error.message : String(error) },
2897
+ "Failed to process file during incremental indexing, skipping"
2898
+ );
2899
+ return null;
2900
+ }
2901
+ })
2902
+ );
2903
+ for (const result of batchResults) {
2904
+ if (result !== null) {
2905
+ documents.push(...result.documents);
2906
+ newManifestFiles[result.filePath] = result.fileState;
2907
+ }
2908
+ }
2909
+ filesProcessed += batch.length;
2910
+ onProgress?.({
2911
+ type: "progress",
2912
+ current: filesProcessed,
2913
+ total: totalFiles,
2914
+ message: `Processed ${String(filesProcessed)}/${String(totalFiles)} files`
2915
+ });
2916
+ }
2917
+ if (documents.length > 0) {
2918
+ await this.lanceStore.addDocuments(store.id, documents);
2919
+ }
2920
+ if (documentIdsToDelete.length > 0 || documents.length > 0) {
2921
+ await this.lanceStore.createFtsIndex(store.id);
2922
+ }
2923
+ if (this.codeGraphService) {
2924
+ const sourceExtensions = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"];
2925
+ const hasSourceChanges = filesToProcess.some((p) => sourceExtensions.includes(extname(p).toLowerCase())) || drift.deleted.some((p) => sourceExtensions.includes(extname(p).toLowerCase()));
2926
+ if (hasSourceChanges) {
2927
+ const allSourceFiles = [];
2928
+ const allPaths = [...drift.unchanged, ...filesToProcess];
2929
+ for (const filePath of allPaths) {
2930
+ const ext = extname(filePath).toLowerCase();
2931
+ if (sourceExtensions.includes(ext)) {
2932
+ try {
2933
+ const content = await readFile5(filePath, "utf-8");
2934
+ allSourceFiles.push({ path: filePath, content });
2935
+ } catch {
2936
+ }
2937
+ }
2938
+ }
2939
+ if (allSourceFiles.length > 0) {
2940
+ const graph = await this.codeGraphService.buildGraph(allSourceFiles);
2941
+ await this.codeGraphService.saveGraph(store.id, graph);
2942
+ logger.debug(
2943
+ { storeId: store.id, sourceFiles: allSourceFiles.length },
2944
+ "Rebuilt code graph during incremental indexing"
2945
+ );
2946
+ } else {
2947
+ await this.codeGraphService.deleteGraph(store.id);
2948
+ logger.debug(
2949
+ { storeId: store.id },
2950
+ "Deleted stale code graph (no source files remain)"
2951
+ );
2952
+ }
2953
+ }
2954
+ }
2955
+ const updatedManifest = {
2956
+ version: 1,
2957
+ storeId: store.id,
2958
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
2959
+ files: newManifestFiles
2960
+ };
2961
+ await this.manifestService.save(updatedManifest);
2962
+ onProgress?.({
2963
+ type: "complete",
2964
+ current: totalFiles,
2965
+ total: totalFiles,
2966
+ message: "Incremental indexing complete"
2967
+ });
2968
+ const timeMs = Date.now() - startTime;
2969
+ logger.info(
2970
+ {
2971
+ storeId: store.id,
2972
+ storeName: store.name,
2973
+ filesAdded: drift.added.length,
2974
+ filesModified: drift.modified.length,
2975
+ filesDeleted: drift.deleted.length,
2976
+ filesUnchanged: drift.unchanged.length,
2977
+ chunksCreated: documents.length,
2978
+ timeMs
2979
+ },
2980
+ "Incremental indexing complete"
2981
+ );
2982
+ return ok({
2983
+ filesIndexed: filesToProcess.length,
2984
+ chunksCreated: documents.length,
2985
+ timeMs,
2986
+ filesAdded: drift.added.length,
2987
+ filesModified: drift.modified.length,
2988
+ filesDeleted: drift.deleted.length,
2989
+ filesUnchanged: drift.unchanged.length
2990
+ });
2991
+ } catch (error) {
2992
+ logger.error(
2993
+ {
2994
+ storeId: store.id,
2995
+ error: error instanceof Error ? error.message : String(error)
2996
+ },
2997
+ "Incremental indexing failed"
2998
+ );
2999
+ return err(error instanceof Error ? error : new Error(String(error)));
3000
+ }
3001
+ }
2565
3002
  async indexFileStore(store, onProgress) {
2566
3003
  const startTime = Date.now();
3004
+ await this.lanceStore.clearAllDocuments(store.id);
3005
+ if (this.manifestService) {
3006
+ await this.manifestService.delete(store.id);
3007
+ }
2567
3008
  const files = await this.scanDirectory(store.path);
2568
3009
  const documents = [];
2569
3010
  let filesProcessed = 0;
@@ -2586,7 +3027,17 @@ var IndexService = class {
2586
3027
  for (let i = 0; i < files.length; i += this.concurrency) {
2587
3028
  const batch = files.slice(i, i + this.concurrency);
2588
3029
  const batchResults = await Promise.all(
2589
- batch.map((filePath) => this.processFile(filePath, store))
3030
+ batch.map(async (filePath) => {
3031
+ try {
3032
+ return await this.processFile(filePath, store);
3033
+ } catch (error) {
3034
+ logger.warn(
3035
+ { filePath, error: error instanceof Error ? error.message : String(error) },
3036
+ "Failed to process file, skipping"
3037
+ );
3038
+ return { documents: [], sourceFile: void 0 };
3039
+ }
3040
+ })
2590
3041
  );
2591
3042
  for (const result of batchResults) {
2592
3043
  documents.push(...result.documents);
@@ -2609,6 +3060,8 @@ var IndexService = class {
2609
3060
  if (this.codeGraphService && sourceFiles.length > 0) {
2610
3061
  const graph = await this.codeGraphService.buildGraph(sourceFiles);
2611
3062
  await this.codeGraphService.saveGraph(store.id, graph);
3063
+ } else if (this.codeGraphService) {
3064
+ await this.codeGraphService.deleteGraph(store.id);
2612
3065
  }
2613
3066
  onProgress?.({
2614
3067
  type: "complete",
@@ -2621,7 +3074,7 @@ var IndexService = class {
2621
3074
  {
2622
3075
  storeId: store.id,
2623
3076
  storeName: store.name,
2624
- documentsIndexed: filesProcessed,
3077
+ filesIndexed: filesProcessed,
2625
3078
  chunksCreated: documents.length,
2626
3079
  sourceFilesForGraph: sourceFiles.length,
2627
3080
  timeMs
@@ -2629,7 +3082,7 @@ var IndexService = class {
2629
3082
  "Store indexing complete"
2630
3083
  );
2631
3084
  return ok({
2632
- documentsIndexed: filesProcessed,
3085
+ filesIndexed: filesProcessed,
2633
3086
  chunksCreated: documents.length,
2634
3087
  timeMs
2635
3088
  });
@@ -2639,13 +3092,15 @@ var IndexService = class {
2639
3092
  * Extracted for parallel processing.
2640
3093
  */
2641
3094
  async processFile(filePath, store) {
2642
- const content = await readFile4(filePath, "utf-8");
2643
- const fileHash = createHash2("md5").update(content).digest("hex");
3095
+ const content = await readFile5(filePath, "utf-8");
3096
+ const fileHash = createHash3("md5").update(content).digest("hex");
2644
3097
  const chunks = this.chunker.chunk(content, filePath);
3098
+ const relativePath = relative(store.path, filePath);
3099
+ const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
2645
3100
  const ext = extname(filePath).toLowerCase();
2646
3101
  const fileName = basename(filePath).toLowerCase();
2647
3102
  const fileType = this.classifyFileType(ext, fileName, filePath);
2648
- const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
3103
+ const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
2649
3104
  if (chunks.length === 0) {
2650
3105
  return { documents: [], sourceFile };
2651
3106
  }
@@ -2660,7 +3115,7 @@ var IndexService = class {
2660
3115
  `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
2661
3116
  );
2662
3117
  }
2663
- const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
3118
+ const chunkId = chunks.length > 1 ? `${store.id}-${pathHash}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${pathHash}-${fileHash}`;
2664
3119
  documents.push({
2665
3120
  id: createDocumentId(chunkId),
2666
3121
  content: chunk.content,
@@ -2669,7 +3124,7 @@ var IndexService = class {
2669
3124
  type: chunks.length > 1 ? "chunk" : "file",
2670
3125
  storeId: store.id,
2671
3126
  path: filePath,
2672
- indexedAt: /* @__PURE__ */ new Date(),
3127
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
2673
3128
  fileHash,
2674
3129
  chunkIndex: chunk.chunkIndex,
2675
3130
  totalChunks: chunk.totalChunks,
@@ -2689,10 +3144,14 @@ var IndexService = class {
2689
3144
  for (const entry of entries) {
2690
3145
  const fullPath = join7(dir, entry.name);
2691
3146
  if (entry.isDirectory()) {
2692
- if (!["node_modules", ".git", "dist", "build"].includes(entry.name)) {
3147
+ if (!this.ignoreDirs.has(entry.name)) {
2693
3148
  files.push(...await this.scanDirectory(fullPath));
2694
3149
  }
2695
3150
  } else if (entry.isFile()) {
3151
+ const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(entry.name));
3152
+ if (shouldIgnore) {
3153
+ continue;
3154
+ }
2696
3155
  const ext = extname(entry.name).toLowerCase();
2697
3156
  if (TEXT_EXTENSIONS.has(ext)) {
2698
3157
  files.push(fullPath);
@@ -2782,6 +3241,141 @@ function classifyWebContentType(url, title) {
2782
3241
  return "documentation";
2783
3242
  }
2784
3243
 
3244
+ // src/services/manifest.service.ts
3245
+ import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
3246
+ import { join as join8 } from "path";
3247
+
3248
+ // src/types/manifest.ts
3249
+ import { z as z2 } from "zod";
3250
+ var FileStateSchema = z2.object({
3251
+ /** File modification time in milliseconds since epoch */
3252
+ mtime: z2.number(),
3253
+ /** File size in bytes */
3254
+ size: z2.number(),
3255
+ /** MD5 hash of file content */
3256
+ hash: z2.string(),
3257
+ /** Document IDs created from this file (for cleanup) */
3258
+ documentIds: z2.array(z2.string())
3259
+ });
3260
+ var StoreManifestSchema = z2.object({
3261
+ /** Schema version for future migrations */
3262
+ version: z2.literal(1),
3263
+ /** Store ID this manifest belongs to */
3264
+ storeId: z2.string(),
3265
+ /** When the manifest was last updated */
3266
+ indexedAt: z2.string(),
3267
+ /** Map of file paths to their state */
3268
+ files: z2.record(z2.string(), FileStateSchema)
3269
+ });
3270
+ function createEmptyManifest(storeId) {
3271
+ return {
3272
+ version: 1,
3273
+ storeId,
3274
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
3275
+ files: {}
3276
+ };
3277
+ }
3278
+
3279
+ // src/services/manifest.service.ts
3280
+ var ManifestService = class {
3281
+ manifestsDir;
3282
+ constructor(dataDir) {
3283
+ this.manifestsDir = join8(dataDir, "manifests");
3284
+ }
3285
+ /**
3286
+ * Initialize the manifests directory.
3287
+ */
3288
+ async initialize() {
3289
+ await mkdir3(this.manifestsDir, { recursive: true });
3290
+ }
3291
+ /**
3292
+ * Get the file path for a store's manifest.
3293
+ */
3294
+ getManifestPath(storeId) {
3295
+ return join8(this.manifestsDir, `${storeId}.manifest.json`);
3296
+ }
3297
+ /**
3298
+ * Load a store's manifest.
3299
+ * Returns an empty manifest if one doesn't exist.
3300
+ * Throws on parse/validation errors (fail fast).
3301
+ */
3302
+ async load(storeId) {
3303
+ const manifestPath = this.getManifestPath(storeId);
3304
+ const exists = await this.fileExists(manifestPath);
3305
+ if (!exists) {
3306
+ return createEmptyManifest(storeId);
3307
+ }
3308
+ const content = await readFile6(manifestPath, "utf-8");
3309
+ let parsed;
3310
+ try {
3311
+ parsed = JSON.parse(content);
3312
+ } catch (error) {
3313
+ throw new Error(
3314
+ `Failed to parse manifest at ${manifestPath}: ${error instanceof Error ? error.message : String(error)}`
3315
+ );
3316
+ }
3317
+ const result = StoreManifestSchema.safeParse(parsed);
3318
+ if (!result.success) {
3319
+ throw new Error(`Invalid manifest at ${manifestPath}: ${result.error.message}`);
3320
+ }
3321
+ return this.toTypedManifest(result.data, storeId);
3322
+ }
3323
+ /**
3324
+ * Save a store's manifest atomically.
3325
+ */
3326
+ async save(manifest) {
3327
+ const manifestPath = this.getManifestPath(manifest.storeId);
3328
+ const toSave = {
3329
+ ...manifest,
3330
+ indexedAt: (/* @__PURE__ */ new Date()).toISOString()
3331
+ };
3332
+ await atomicWriteFile(manifestPath, JSON.stringify(toSave, null, 2));
3333
+ }
3334
+ /**
3335
+ * Delete a store's manifest.
3336
+ * Called when a store is deleted or during full re-index.
3337
+ */
3338
+ async delete(storeId) {
3339
+ const manifestPath = this.getManifestPath(storeId);
3340
+ const { unlink } = await import("fs/promises");
3341
+ const exists = await this.fileExists(manifestPath);
3342
+ if (exists) {
3343
+ await unlink(manifestPath);
3344
+ }
3345
+ }
3346
+ /**
3347
+ * Check if a file exists.
3348
+ */
3349
+ async fileExists(path4) {
3350
+ try {
3351
+ await access3(path4);
3352
+ return true;
3353
+ } catch {
3354
+ return false;
3355
+ }
3356
+ }
3357
+ /**
3358
+ * Convert a parsed manifest to a typed manifest with branded types.
3359
+ */
3360
+ toTypedManifest(data, storeId) {
3361
+ const files = {};
3362
+ for (const [path4, state] of Object.entries(data.files)) {
3363
+ files[path4] = {
3364
+ mtime: state.mtime,
3365
+ size: state.size,
3366
+ hash: state.hash,
3367
+ documentIds: state.documentIds.map((id) => createDocumentId(id))
3368
+ };
3369
+ }
3370
+ return {
3371
+ version: 1,
3372
+ storeId,
3373
+ indexedAt: data.indexedAt,
3374
+ files
3375
+ };
3376
+ }
3377
+ };
3378
+
2785
3379
  // src/services/code-unit.service.ts
2786
3380
  var CodeUnitService = class {
2787
3381
  extractCodeUnit(code, symbolName, language) {
@@ -2966,6 +3560,8 @@ var INTENT_FILE_BOOSTS = {
2966
3560
  // Stronger penalty - internal code less useful
2967
3561
  test: 0.8,
2968
3562
  config: 0.7,
3563
+ changelog: 0.6,
3564
+ // Changelogs rarely answer "how to" questions
2969
3565
  other: 0.9
2970
3566
  },
2971
3567
  implementation: {
@@ -2978,6 +3574,8 @@ var INTENT_FILE_BOOSTS = {
2978
3574
  // Internal code can be relevant
2979
3575
  test: 1,
2980
3576
  config: 0.95,
3577
+ changelog: 0.8,
3578
+ // Might reference implementation changes
2981
3579
  other: 1
2982
3580
  },
2983
3581
  conceptual: {
@@ -2988,6 +3586,8 @@ var INTENT_FILE_BOOSTS = {
2988
3586
  "source-internal": 0.9,
2989
3587
  test: 0.9,
2990
3588
  config: 0.85,
3589
+ changelog: 0.7,
3590
+ // Sometimes explains concepts behind changes
2991
3591
  other: 0.95
2992
3592
  },
2993
3593
  comparison: {
@@ -2998,6 +3598,8 @@ var INTENT_FILE_BOOSTS = {
2998
3598
  "source-internal": 0.85,
2999
3599
  test: 0.9,
3000
3600
  config: 0.85,
3601
+ changelog: 0.9,
3602
+ // Version comparisons can be useful
3001
3603
  other: 0.95
3002
3604
  },
3003
3605
  debugging: {
@@ -3010,6 +3612,8 @@ var INTENT_FILE_BOOSTS = {
3010
3612
  test: 1.05,
3011
3613
  // Tests can show expected behavior
3012
3614
  config: 0.9,
3615
+ changelog: 1.1,
3616
+ // Often contains bug fixes and known issues
3013
3617
  other: 1
3014
3618
  }
3015
3619
  };
@@ -3092,6 +3696,17 @@ function classifyQueryIntents(query) {
3092
3696
  function getPrimaryIntent(intents) {
3093
3697
  return intents[0]?.intent ?? "how-to";
3094
3698
  }
3699
+ function mapSearchIntentToQueryIntent(intent) {
3700
+ switch (intent) {
3701
+ case "find-pattern":
3702
+ case "find-implementation":
3703
+ case "find-definition":
3704
+ return "implementation";
3705
+ case "find-usage":
3706
+ case "find-documentation":
3707
+ return "how-to";
3708
+ }
3709
+ }
3095
3710
  var RRF_PRESETS = {
3096
3711
  code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
3097
3712
  web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
@@ -3106,12 +3721,27 @@ var SearchService = class {
3106
3721
  codeUnitService;
3107
3722
  codeGraphService;
3108
3723
  graphCache;
3109
- constructor(lanceStore, embeddingEngine, codeGraphService) {
3724
+ searchConfig;
3725
+ unsubscribeCacheInvalidation;
3726
+ constructor(lanceStore, embeddingEngine, codeGraphService, searchConfig) {
3110
3727
  this.lanceStore = lanceStore;
3111
3728
  this.embeddingEngine = embeddingEngine;
3112
3729
  this.codeUnitService = new CodeUnitService();
3113
3730
  this.codeGraphService = codeGraphService;
3114
3731
  this.graphCache = /* @__PURE__ */ new Map();
3732
+ this.searchConfig = searchConfig;
3733
+ if (codeGraphService) {
3734
+ this.unsubscribeCacheInvalidation = codeGraphService.onCacheInvalidation((event) => {
3735
+ this.graphCache.delete(event.storeId);
3736
+ });
3737
+ }
3738
+ }
3739
+ /**
3740
+ * Clean up resources (unsubscribe from events).
3741
+ * Call this when destroying the service.
3742
+ */
3743
+ cleanup() {
3744
+ this.unsubscribeCacheInvalidation?.();
3115
3745
  }
3116
3746
  /**
3117
3747
  * Load code graph for a store, with caching.
@@ -3139,12 +3769,12 @@ var SearchService = class {
3139
3769
  }
3140
3770
  async search(query) {
3141
3771
  const startTime = Date.now();
3142
- const mode = query.mode ?? "hybrid";
3143
- const limit = query.limit ?? 10;
3772
+ const mode = query.mode ?? this.searchConfig?.defaultMode ?? "hybrid";
3773
+ const limit = query.limit ?? this.searchConfig?.defaultLimit ?? 10;
3144
3774
  const stores = query.stores ?? [];
3145
3775
  const detail = query.detail ?? "minimal";
3146
3776
  const intents = classifyQueryIntents(query.query);
3147
- const primaryIntent = getPrimaryIntent(intents);
3777
+ const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
3148
3778
  logger2.debug(
3149
3779
  {
3150
3780
  query: query.query,
@@ -3153,7 +3783,8 @@ var SearchService = class {
3153
3783
  stores,
3154
3784
  detail,
3155
3785
  intent: primaryIntent,
3156
- intents,
3786
+ userIntent: query.intent,
3787
+ autoClassifiedIntents: intents,
3157
3788
  minRelevance: query.minRelevance
3158
3789
  },
3159
3790
  "Search query received"
@@ -3164,7 +3795,7 @@ var SearchService = class {
3164
3795
  if (mode === "vector") {
3165
3796
  const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
3166
3797
  maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
3167
- allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
3798
+ allResults = this.normalizeAndFilterScores(rawResults, query.threshold).slice(0, fetchLimit);
3168
3799
  } else if (mode === "fts") {
3169
3800
  allResults = await this.ftsSearch(query.query, stores, fetchLimit);
3170
3801
  } else {
@@ -3177,28 +3808,35 @@ var SearchService = class {
3177
3808
  allResults = hybridResult.results;
3178
3809
  maxRawScore = hybridResult.maxRawScore;
3179
3810
  }
3180
- if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
3181
- const timeMs2 = Date.now() - startTime;
3182
- logger2.info(
3183
- {
3811
+ if (query.minRelevance !== void 0) {
3812
+ if (mode === "fts") {
3813
+ logger2.warn(
3814
+ { query: query.query, minRelevance: query.minRelevance },
3815
+ "minRelevance filter ignored in FTS mode (no vector scores available)"
3816
+ );
3817
+ } else if (maxRawScore < query.minRelevance) {
3818
+ const timeMs2 = Date.now() - startTime;
3819
+ logger2.info(
3820
+ {
3821
+ query: query.query,
3822
+ mode,
3823
+ maxRawScore,
3824
+ minRelevance: query.minRelevance,
3825
+ timeMs: timeMs2
3826
+ },
3827
+ "Search filtered by minRelevance - no sufficiently relevant results"
3828
+ );
3829
+ return {
3184
3830
  query: query.query,
3185
3831
  mode,
3186
- maxRawScore,
3187
- minRelevance: query.minRelevance,
3188
- timeMs: timeMs2
3189
- },
3190
- "Search filtered by minRelevance - no sufficiently relevant results"
3191
- );
3192
- return {
3193
- query: query.query,
3194
- mode,
3195
- stores,
3196
- results: [],
3197
- totalResults: 0,
3198
- timeMs: timeMs2,
3199
- confidence: this.calculateConfidence(maxRawScore),
3200
- maxRawScore
3201
- };
3832
+ stores,
3833
+ results: [],
3834
+ totalResults: 0,
3835
+ timeMs: timeMs2,
3836
+ confidence: this.calculateConfidence(maxRawScore),
3837
+ maxRawScore
3838
+ };
3839
+ }
3202
3840
  }
3203
3841
  const dedupedResults = this.deduplicateBySource(allResults, query.query);
3204
3842
  const resultsToEnhance = dedupedResults.slice(0, limit);
@@ -3247,7 +3885,9 @@ var SearchService = class {
3247
3885
  const bySource = /* @__PURE__ */ new Map();
3248
3886
  const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
3249
3887
  for (const result of results) {
3250
- const sourceKey = result.metadata.path ?? result.metadata.url ?? result.id;
3888
+ const storeId = result.metadata.storeId;
3889
+ const source = result.metadata.path ?? result.metadata.url ?? result.id;
3890
+ const sourceKey = `${storeId}:${source}`;
3251
3891
  const existing = bySource.get(sourceKey);
3252
3892
  if (!existing) {
3253
3893
  bySource.set(sourceKey, result);
@@ -3318,11 +3958,6 @@ var SearchService = class {
3318
3958
  }
3319
3959
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
3320
3960
  }
3321
- async vectorSearch(query, stores, limit, threshold) {
3322
- const results = await this.vectorSearchRaw(query, stores, limit);
3323
- const normalized = this.normalizeAndFilterScores(results, threshold);
3324
- return normalized.slice(0, limit);
3325
- }
3326
3961
  async ftsSearch(query, stores, limit) {
3327
3962
  const results = [];
3328
3963
  for (const storeId of stores) {
@@ -3482,6 +4117,9 @@ var SearchService = class {
3482
4117
  case "config":
3483
4118
  baseBoost = 0.5;
3484
4119
  break;
4120
+ case "changelog":
4121
+ baseBoost = 0.7;
4122
+ break;
3485
4123
  default:
3486
4124
  baseBoost = 1;
3487
4125
  }
@@ -3887,42 +4525,53 @@ var SearchService = class {
3887
4525
  };
3888
4526
 
3889
4527
  // src/services/store-definition.service.ts
3890
- import { readFile as readFile5, writeFile as writeFile4, mkdir as mkdir3, access as access3 } from "fs/promises";
3891
- import { dirname as dirname4, resolve as resolve2, isAbsolute, join as join8 } from "path";
4528
+ import { readFile as readFile7, access as access4 } from "fs/promises";
4529
+ import { resolve as resolve2, isAbsolute as isAbsolute2, join as join9 } from "path";
3892
4530
 
3893
4531
  // src/types/store-definition.ts
3894
- import { z as z2 } from "zod";
3895
- var BaseStoreDefinitionSchema = z2.object({
3896
- name: z2.string().min(1, "Store name is required"),
3897
- description: z2.string().optional(),
3898
- tags: z2.array(z2.string()).optional()
4532
+ import { z as z3 } from "zod";
4533
+ var BaseStoreDefinitionSchema = z3.object({
4534
+ name: z3.string().min(1, "Store name is required"),
4535
+ description: z3.string().optional(),
4536
+ tags: z3.array(z3.string()).optional()
3899
4537
  });
3900
4538
  var FileStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3901
- type: z2.literal("file"),
3902
- path: z2.string().min(1, "Path is required for file stores")
4539
+ type: z3.literal("file"),
4540
+ path: z3.string().min(1, "Path is required for file stores")
3903
4541
  });
4542
+ var GitUrlSchema = z3.string().refine(
4543
+ (val) => {
4544
+ try {
4545
+ new URL(val);
4546
+ return true;
4547
+ } catch {
4548
+ return /^git@[\w.-]+:[\w./-]+$/.test(val);
4549
+ }
4550
+ },
4551
+ { message: "Must be a valid URL or SSH URL (git@host:path)" }
4552
+ );
3904
4553
  var RepoStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3905
- type: z2.literal("repo"),
3906
- url: z2.url("Valid URL is required for repo stores"),
3907
- branch: z2.string().optional(),
3908
- depth: z2.number().int().positive("Depth must be a positive integer").optional()
4554
+ type: z3.literal("repo"),
4555
+ url: GitUrlSchema,
4556
+ branch: z3.string().optional(),
4557
+ depth: z3.number().int().positive("Depth must be a positive integer").optional()
3909
4558
  });
3910
4559
  var WebStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
3911
- type: z2.literal("web"),
3912
- url: z2.url("Valid URL is required for web stores"),
3913
- depth: z2.number().int().min(0, "Depth must be non-negative").default(1),
3914
- maxPages: z2.number().int().positive("maxPages must be a positive integer").optional(),
3915
- crawlInstructions: z2.string().optional(),
3916
- extractInstructions: z2.string().optional()
4560
+ type: z3.literal("web"),
4561
+ url: z3.url("Valid URL is required for web stores"),
4562
+ depth: z3.number().int().min(0, "Depth must be non-negative").default(1),
4563
+ maxPages: z3.number().int().positive("maxPages must be a positive integer").optional(),
4564
+ crawlInstructions: z3.string().optional(),
4565
+ extractInstructions: z3.string().optional()
3917
4566
  });
3918
- var StoreDefinitionSchema = z2.discriminatedUnion("type", [
4567
+ var StoreDefinitionSchema = z3.discriminatedUnion("type", [
3919
4568
  FileStoreDefinitionSchema,
3920
4569
  RepoStoreDefinitionSchema,
3921
4570
  WebStoreDefinitionSchema
3922
4571
  ]);
3923
- var StoreDefinitionsConfigSchema = z2.object({
3924
- version: z2.literal(1),
3925
- stores: z2.array(StoreDefinitionSchema)
4572
+ var StoreDefinitionsConfigSchema = z3.object({
4573
+ version: z3.literal(1),
4574
+ stores: z3.array(StoreDefinitionSchema)
3926
4575
  });
3927
4576
  function isFileStoreDefinition(def) {
3928
4577
  return def.type === "file";
@@ -3941,7 +4590,7 @@ var DEFAULT_STORE_DEFINITIONS_CONFIG = {
3941
4590
  // src/services/store-definition.service.ts
3942
4591
  async function fileExists3(path4) {
3943
4592
  try {
3944
- await access3(path4);
4593
+ await access4(path4);
3945
4594
  return true;
3946
4595
  } catch {
3947
4596
  return false;
@@ -3953,7 +4602,7 @@ var StoreDefinitionService = class {
3953
4602
  config = null;
3954
4603
  constructor(projectRoot) {
3955
4604
  this.projectRoot = projectRoot ?? ProjectRootService.resolve();
3956
- this.configPath = join8(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
4605
+ this.configPath = join9(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
3957
4606
  }
3958
4607
  /**
3959
4608
  * Load store definitions from config file.
@@ -3972,7 +4621,7 @@ var StoreDefinitionService = class {
3972
4621
  };
3973
4622
  return this.config;
3974
4623
  }
3975
- const content = await readFile5(this.configPath, "utf-8");
4624
+ const content = await readFile7(this.configPath, "utf-8");
3976
4625
  let parsed;
3977
4626
  try {
3978
4627
  parsed = JSON.parse(content);
@@ -3992,8 +4641,7 @@ var StoreDefinitionService = class {
3992
4641
  * Save store definitions to config file.
3993
4642
  */
3994
4643
  async save(config) {
3995
- await mkdir3(dirname4(this.configPath), { recursive: true });
3996
- await writeFile4(this.configPath, JSON.stringify(config, null, 2));
4644
+ await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
3997
4645
  this.config = config;
3998
4646
  }
3999
4647
  /**
@@ -4065,7 +4713,7 @@ var StoreDefinitionService = class {
4065
4713
  * Resolve a file store path relative to project root.
4066
4714
  */
4067
4715
  resolvePath(path4) {
4068
- if (isAbsolute(path4)) {
4716
+ if (isAbsolute2(path4)) {
4069
4717
  return path4;
4070
4718
  }
4071
4719
  return resolve2(this.projectRoot, path4);
@@ -4092,8 +4740,8 @@ var StoreDefinitionService = class {
4092
4740
 
4093
4741
  // src/services/store.service.ts
4094
4742
  import { randomUUID as randomUUID2 } from "crypto";
4095
- import { readFile as readFile6, writeFile as writeFile5, mkdir as mkdir5, stat, access as access4 } from "fs/promises";
4096
- import { join as join9, resolve as resolve3 } from "path";
4743
+ import { readFile as readFile8, mkdir as mkdir5, stat as stat2, access as access5 } from "fs/promises";
4744
+ import { join as join10, resolve as resolve3 } from "path";
4097
4745
 
4098
4746
  // src/plugin/git-clone.ts
4099
4747
  import { spawn } from "child_process";
@@ -4124,6 +4772,9 @@ async function cloneRepository(options) {
4124
4772
  });
4125
4773
  });
4126
4774
  }
4775
+ function isGitUrl(source) {
4776
+ return source.startsWith("http://") || source.startsWith("https://") || source.startsWith("git@");
4777
+ }
4127
4778
  function extractRepoName(url) {
4128
4779
  const match = /\/([^/]+?)(\.git)?$/.exec(url);
4129
4780
  const name = match?.[1];
@@ -4136,7 +4787,7 @@ function extractRepoName(url) {
4136
4787
  // src/services/store.service.ts
4137
4788
  async function fileExists4(path4) {
4138
4789
  try {
4139
- await access4(path4);
4790
+ await access5(path4);
4140
4791
  return true;
4141
4792
  } catch {
4142
4793
  return false;
@@ -4146,11 +4797,13 @@ var StoreService = class {
4146
4797
  dataDir;
4147
4798
  definitionService;
4148
4799
  gitignoreService;
4800
+ projectRoot;
4149
4801
  registry = { stores: [] };
4150
4802
  constructor(dataDir, options) {
4151
4803
  this.dataDir = dataDir;
4152
4804
  this.definitionService = options?.definitionService ?? void 0;
4153
4805
  this.gitignoreService = options?.gitignoreService ?? void 0;
4806
+ this.projectRoot = options?.projectRoot ?? void 0;
4154
4807
  }
4155
4808
  async initialize() {
4156
4809
  await mkdir5(this.dataDir, { recursive: true });
@@ -4158,6 +4811,7 @@ var StoreService = class {
4158
4811
  }
4159
4812
  /**
4160
4813
  * Convert a Store and CreateStoreInput to a StoreDefinition for persistence.
4814
+ * Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
4161
4815
  */
4162
4816
  createDefinitionFromStore(store, input) {
4163
4817
  const tags = store.tags !== void 0 ? [...store.tags] : void 0;
@@ -4179,10 +4833,13 @@ var StoreService = class {
4179
4833
  }
4180
4834
  case "repo": {
4181
4835
  const repoStore = store;
4836
+ if (repoStore.url === void 0) {
4837
+ return void 0;
4838
+ }
4182
4839
  const repoDef = {
4183
4840
  ...base,
4184
4841
  type: "repo",
4185
- url: repoStore.url ?? "",
4842
+ url: repoStore.url,
4186
4843
  branch: repoStore.branch,
4187
4844
  depth: input.depth
4188
4845
  };
@@ -4194,7 +4851,58 @@ var StoreService = class {
4194
4851
  ...base,
4195
4852
  type: "web",
4196
4853
  url: webStore.url,
4197
- depth: webStore.depth
4854
+ depth: webStore.depth,
4855
+ maxPages: input.maxPages,
4856
+ crawlInstructions: input.crawlInstructions,
4857
+ extractInstructions: input.extractInstructions
4858
+ };
4859
+ return webDef;
4860
+ }
4861
+ }
4862
+ }
4863
+ /**
4864
+ * Create a StoreDefinition from an existing store (without original input).
4865
+ * Used when updating/renaming stores where we don't have the original input.
4866
+ * Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
4867
+ */
4868
+ createDefinitionFromExistingStore(store) {
4869
+ const tags = store.tags !== void 0 ? [...store.tags] : void 0;
4870
+ const base = {
4871
+ name: store.name,
4872
+ description: store.description,
4873
+ tags
4874
+ };
4875
+ switch (store.type) {
4876
+ case "file": {
4877
+ const fileDef = {
4878
+ ...base,
4879
+ type: "file",
4880
+ path: store.path
4881
+ };
4882
+ return fileDef;
4883
+ }
4884
+ case "repo": {
4885
+ if (store.url === void 0) {
4886
+ return void 0;
4887
+ }
4888
+ const repoDef = {
4889
+ ...base,
4890
+ type: "repo",
4891
+ url: store.url,
4892
+ branch: store.branch,
4893
+ depth: store.depth
4894
+ };
4895
+ return repoDef;
4896
+ }
4897
+ case "web": {
4898
+ const webDef = {
4899
+ ...base,
4900
+ type: "web",
4901
+ url: store.url,
4902
+ depth: store.depth,
4903
+ maxPages: store.maxPages,
4904
+ crawlInstructions: store.crawlInstructions,
4905
+ extractInstructions: store.extractInstructions
4198
4906
  };
4199
4907
  return webDef;
4200
4908
  }
@@ -4216,9 +4924,9 @@ var StoreService = class {
4216
4924
  if (input.path === void 0) {
4217
4925
  return err(new Error("Path is required for file stores"));
4218
4926
  }
4219
- const normalizedPath = resolve3(input.path);
4927
+ const normalizedPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, input.path) : resolve3(input.path);
4220
4928
  try {
4221
- const stats = await stat(normalizedPath);
4929
+ const stats = await stat2(normalizedPath);
4222
4930
  if (!stats.isDirectory()) {
4223
4931
  return err(new Error(`Path is not a directory: ${normalizedPath}`));
4224
4932
  }
@@ -4241,7 +4949,7 @@ var StoreService = class {
4241
4949
  case "repo": {
4242
4950
  let repoPath = input.path;
4243
4951
  if (input.url !== void 0) {
4244
- const cloneDir = join9(this.dataDir, "repos", id);
4952
+ const cloneDir = join10(this.dataDir, "repos", id);
4245
4953
  const result = await cloneRepository({
4246
4954
  url: input.url,
4247
4955
  targetDir: cloneDir,
@@ -4256,7 +4964,17 @@ var StoreService = class {
4256
4964
  if (repoPath === void 0) {
4257
4965
  return err(new Error("Path or URL required for repo stores"));
4258
4966
  }
4259
- const normalizedRepoPath = resolve3(repoPath);
4967
+ const normalizedRepoPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, repoPath) : resolve3(repoPath);
4968
+ if (input.url === void 0) {
4969
+ try {
4970
+ const stats = await stat2(normalizedRepoPath);
4971
+ if (!stats.isDirectory()) {
4972
+ return err(new Error(`Path is not a directory: ${normalizedRepoPath}`));
4973
+ }
4974
+ } catch {
4975
+ return err(new Error(`Repository path does not exist: ${normalizedRepoPath}`));
4976
+ }
4977
+ }
4260
4978
  store = {
4261
4979
  type: "repo",
4262
4980
  id,
@@ -4264,6 +4982,7 @@ var StoreService = class {
4264
4982
  path: normalizedRepoPath,
4265
4983
  url: input.url,
4266
4984
  branch: input.branch,
4985
+ depth: input.depth ?? 1,
4267
4986
  description: input.description,
4268
4987
  tags: input.tags,
4269
4988
  status: "ready",
@@ -4282,6 +5001,9 @@ var StoreService = class {
4282
5001
  name: input.name,
4283
5002
  url: input.url,
4284
5003
  depth: input.depth ?? 1,
5004
+ maxPages: input.maxPages,
5005
+ crawlInstructions: input.crawlInstructions,
5006
+ extractInstructions: input.extractInstructions,
4285
5007
  description: input.description,
4286
5008
  tags: input.tags,
4287
5009
  status: "ready",
@@ -4301,7 +5023,9 @@ var StoreService = class {
4301
5023
  }
4302
5024
  if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
4303
5025
  const definition = this.createDefinitionFromStore(store, input);
4304
- await this.definitionService.addDefinition(definition);
5026
+ if (definition !== void 0) {
5027
+ await this.definitionService.addDefinition(definition);
5028
+ }
4305
5029
  }
4306
5030
  return ok(store);
4307
5031
  }
@@ -4331,6 +5055,16 @@ var StoreService = class {
4331
5055
  if (store === void 0) {
4332
5056
  return err(new Error(`Store not found: ${id}`));
4333
5057
  }
5058
+ if (updates.name?.trim() === "") {
5059
+ return err(new Error("Store name cannot be empty"));
5060
+ }
5061
+ const isRenaming = updates.name !== void 0 && updates.name !== store.name;
5062
+ if (isRenaming) {
5063
+ const existing = this.registry.stores.find((s) => s.name === updates.name && s.id !== id);
5064
+ if (existing !== void 0) {
5065
+ return err(new Error(`Store with name '${updates.name}' already exists`));
5066
+ }
5067
+ }
4334
5068
  const updated = {
4335
5069
  ...store,
4336
5070
  ...updates,
@@ -4339,14 +5073,24 @@ var StoreService = class {
4339
5073
  this.registry.stores[index] = updated;
4340
5074
  await this.saveRegistry();
4341
5075
  if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
4342
- const defUpdates = {};
4343
- if (updates.description !== void 0) {
4344
- defUpdates.description = updates.description;
4345
- }
4346
- if (updates.tags !== void 0) {
4347
- defUpdates.tags = [...updates.tags];
5076
+ if (isRenaming) {
5077
+ await this.definitionService.removeDefinition(store.name);
5078
+ const newDefinition = this.createDefinitionFromExistingStore(updated);
5079
+ if (newDefinition !== void 0) {
5080
+ await this.definitionService.addDefinition(newDefinition);
5081
+ }
5082
+ } else {
5083
+ const defUpdates = {};
5084
+ if (updates.description !== void 0) {
5085
+ defUpdates.description = updates.description;
5086
+ }
5087
+ if (updates.tags !== void 0) {
5088
+ defUpdates.tags = [...updates.tags];
5089
+ }
5090
+ if (Object.keys(defUpdates).length > 0) {
5091
+ await this.definitionService.updateDefinition(store.name, defUpdates);
5092
+ }
4348
5093
  }
4349
- await this.definitionService.updateDefinition(store.name, defUpdates);
4350
5094
  }
4351
5095
  return ok(updated);
4352
5096
  }
@@ -4368,14 +5112,14 @@ var StoreService = class {
4368
5112
  return ok(void 0);
4369
5113
  }
4370
5114
  async loadRegistry() {
4371
- const registryPath = join9(this.dataDir, "stores.json");
5115
+ const registryPath = join10(this.dataDir, "stores.json");
4372
5116
  const exists = await fileExists4(registryPath);
4373
5117
  if (!exists) {
4374
5118
  this.registry = { stores: [] };
4375
5119
  await this.saveRegistry();
4376
5120
  return;
4377
5121
  }
4378
- const content = await readFile6(registryPath, "utf-8");
5122
+ const content = await readFile8(registryPath, "utf-8");
4379
5123
  try {
4380
5124
  const data = JSON.parse(content);
4381
5125
  this.registry = {
@@ -4393,8 +5137,8 @@ var StoreService = class {
4393
5137
  }
4394
5138
  }
4395
5139
  async saveRegistry() {
4396
- const registryPath = join9(this.dataDir, "stores.json");
4397
- await writeFile5(registryPath, JSON.stringify(this.registry, null, 2));
5140
+ const registryPath = join10(this.dataDir, "stores.json");
5141
+ await atomicWriteFile(registryPath, JSON.stringify(this.registry, null, 2));
4398
5142
  }
4399
5143
  };
4400
5144
 
@@ -4408,33 +5152,33 @@ import { fileURLToPath } from "url";
4408
5152
  import { ZodError } from "zod";
4409
5153
 
4410
5154
  // src/crawl/schemas.ts
4411
- import { z as z3 } from "zod";
4412
- var CrawledLinkSchema = z3.object({
4413
- href: z3.string(),
4414
- text: z3.string(),
4415
- title: z3.string().optional(),
4416
- base_domain: z3.string().optional(),
4417
- head_data: z3.unknown().optional(),
4418
- head_extraction_status: z3.unknown().optional(),
4419
- head_extraction_error: z3.unknown().optional(),
4420
- intrinsic_score: z3.number().optional(),
4421
- contextual_score: z3.unknown().optional(),
4422
- total_score: z3.unknown().optional()
5155
+ import { z as z4 } from "zod";
5156
+ var CrawledLinkSchema = z4.object({
5157
+ href: z4.string(),
5158
+ text: z4.string(),
5159
+ title: z4.string().optional(),
5160
+ base_domain: z4.string().optional(),
5161
+ head_data: z4.unknown().optional(),
5162
+ head_extraction_status: z4.unknown().optional(),
5163
+ head_extraction_error: z4.unknown().optional(),
5164
+ intrinsic_score: z4.number().optional(),
5165
+ contextual_score: z4.unknown().optional(),
5166
+ total_score: z4.unknown().optional()
4423
5167
  });
4424
- var CrawlPageSchema = z3.object({
4425
- url: z3.string(),
4426
- title: z3.string(),
4427
- content: z3.string(),
4428
- links: z3.array(z3.string()),
4429
- crawledAt: z3.string()
5168
+ var CrawlPageSchema = z4.object({
5169
+ url: z4.string(),
5170
+ title: z4.string(),
5171
+ content: z4.string(),
5172
+ links: z4.array(z4.string()),
5173
+ crawledAt: z4.string()
4430
5174
  });
4431
- var CrawlResultSchema = z3.object({
4432
- pages: z3.array(CrawlPageSchema)
5175
+ var CrawlResultSchema = z4.object({
5176
+ pages: z4.array(CrawlPageSchema)
4433
5177
  });
4434
- var HeadlessResultSchema = z3.object({
4435
- html: z3.string(),
4436
- markdown: z3.string(),
4437
- links: z3.array(z3.union([CrawledLinkSchema, z3.string()]))
5178
+ var HeadlessResultSchema = z4.object({
5179
+ html: z4.string(),
5180
+ markdown: z4.string(),
5181
+ links: z4.array(z4.union([CrawledLinkSchema, z4.string()]))
4438
5182
  });
4439
5183
  function validateHeadlessResult(data) {
4440
5184
  return HeadlessResultSchema.parse(data);
@@ -4442,33 +5186,33 @@ function validateHeadlessResult(data) {
4442
5186
  function validateCrawlResult(data) {
4443
5187
  return CrawlResultSchema.parse(data);
4444
5188
  }
4445
- var MethodInfoSchema = z3.object({
4446
- name: z3.string(),
4447
- async: z3.boolean(),
4448
- signature: z3.string(),
4449
- startLine: z3.number(),
4450
- endLine: z3.number(),
4451
- calls: z3.array(z3.string())
5189
+ var MethodInfoSchema = z4.object({
5190
+ name: z4.string(),
5191
+ async: z4.boolean(),
5192
+ signature: z4.string(),
5193
+ startLine: z4.number(),
5194
+ endLine: z4.number(),
5195
+ calls: z4.array(z4.string())
4452
5196
  });
4453
- var CodeNodeSchema = z3.object({
4454
- type: z3.enum(["function", "class"]),
4455
- name: z3.string(),
4456
- exported: z3.boolean(),
4457
- startLine: z3.number(),
4458
- endLine: z3.number(),
4459
- async: z3.boolean().optional(),
4460
- signature: z3.string().optional(),
4461
- calls: z3.array(z3.string()).optional(),
4462
- methods: z3.array(MethodInfoSchema).optional()
5197
+ var CodeNodeSchema = z4.object({
5198
+ type: z4.enum(["function", "class"]),
5199
+ name: z4.string(),
5200
+ exported: z4.boolean(),
5201
+ startLine: z4.number(),
5202
+ endLine: z4.number(),
5203
+ async: z4.boolean().optional(),
5204
+ signature: z4.string().optional(),
5205
+ calls: z4.array(z4.string()).optional(),
5206
+ methods: z4.array(MethodInfoSchema).optional()
4463
5207
  });
4464
- var ImportInfoSchema = z3.object({
4465
- source: z3.string(),
4466
- imported: z3.string(),
4467
- alias: z3.string().optional().nullable()
5208
+ var ImportInfoSchema = z4.object({
5209
+ source: z4.string(),
5210
+ imported: z4.string(),
5211
+ alias: z4.string().optional().nullable()
4468
5212
  });
4469
- var ParsePythonResultSchema = z3.object({
4470
- nodes: z3.array(CodeNodeSchema),
4471
- imports: z3.array(ImportInfoSchema)
5213
+ var ParsePythonResultSchema = z4.object({
5214
+ nodes: z4.array(CodeNodeSchema),
5215
+ imports: z4.array(ImportInfoSchema)
4472
5216
  });
4473
5217
  function validateParsePythonResult(data) {
4474
5218
  return ParsePythonResultSchema.parse(data);
@@ -4476,6 +5220,15 @@ function validateParsePythonResult(data) {
4476
5220
 
4477
5221
  // src/crawl/bridge.ts
4478
5222
  var logger3 = createLogger("python-bridge");
5223
+ function getPythonExecutable() {
5224
+ return process.platform === "win32" ? "python" : "python3";
5225
+ }
5226
+ function getVenvPythonPath(pluginRoot) {
5227
+ if (process.platform === "win32") {
5228
+ return path3.join(pluginRoot, ".venv", "Scripts", "python.exe");
5229
+ }
5230
+ return path3.join(pluginRoot, ".venv", "bin", "python3");
5231
+ }
4479
5232
  var PythonBridge = class {
4480
5233
  process = null;
4481
5234
  pending = /* @__PURE__ */ new Map();
@@ -4485,20 +5238,21 @@ var PythonBridge = class {
4485
5238
  start() {
4486
5239
  if (this.process) return Promise.resolve();
4487
5240
  const currentFilePath = fileURLToPath(import.meta.url);
4488
- const isProduction = currentFilePath.includes("/dist/");
5241
+ const distPattern = `${path3.sep}dist${path3.sep}`;
5242
+ const isProduction = currentFilePath.includes(distPattern);
4489
5243
  let pythonWorkerPath;
4490
5244
  let pythonPath;
4491
5245
  if (isProduction) {
4492
- const distIndex = currentFilePath.indexOf("/dist/");
5246
+ const distIndex = currentFilePath.indexOf(distPattern);
4493
5247
  const pluginRoot = currentFilePath.substring(0, distIndex);
4494
5248
  pythonWorkerPath = path3.join(pluginRoot, "python", "crawl_worker.py");
4495
- const venvPython = path3.join(pluginRoot, ".venv", "bin", "python3");
4496
- pythonPath = existsSync4(venvPython) ? venvPython : "python3";
5249
+ const venvPython = getVenvPythonPath(pluginRoot);
5250
+ pythonPath = existsSync4(venvPython) ? venvPython : getPythonExecutable();
4497
5251
  } else {
4498
5252
  const srcDir = path3.dirname(path3.dirname(currentFilePath));
4499
5253
  const projectRoot = path3.dirname(srcDir);
4500
5254
  pythonWorkerPath = path3.join(projectRoot, "python", "crawl_worker.py");
4501
- pythonPath = "python3";
5255
+ pythonPath = getPythonExecutable();
4502
5256
  }
4503
5257
  logger3.debug(
4504
5258
  { pythonWorkerPath, pythonPath, currentFilePath, isProduction },
@@ -4734,17 +5488,19 @@ var PythonBridge = class {
4734
5488
  };
4735
5489
 
4736
5490
  // src/db/embeddings.ts
4737
- import { homedir as homedir3 } from "os";
4738
- import { join as join10 } from "path";
5491
+ import { homedir as homedir2 } from "os";
5492
+ import { join as join11 } from "path";
4739
5493
  import { pipeline, env } from "@huggingface/transformers";
4740
- env.cacheDir = join10(homedir3(), ".cache", "huggingface-transformers");
5494
+ env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
4741
5495
  var EmbeddingEngine = class {
4742
5496
  extractor = null;
5497
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed()
5498
+ _dimensions = null;
4743
5499
  modelName;
4744
- dimensions;
4745
- constructor(modelName = "Xenova/all-MiniLM-L6-v2", dimensions = 384) {
5500
+ batchSize;
5501
+ constructor(modelName = "Xenova/all-MiniLM-L6-v2", batchSize = 32) {
4746
5502
  this.modelName = modelName;
4747
- this.dimensions = dimensions;
5503
+ this.batchSize = batchSize;
4748
5504
  }
4749
5505
  async initialize() {
4750
5506
  if (this.extractor !== null) return;
@@ -4764,23 +5520,43 @@ var EmbeddingEngine = class {
4764
5520
  normalize: true
4765
5521
  });
4766
5522
  const result = Array.from(output.data);
5523
+ this._dimensions ??= result.length;
4767
5524
  return result.map((v) => Number(v));
4768
5525
  }
4769
5526
  async embedBatch(texts) {
4770
- const BATCH_SIZE = 32;
4771
5527
  const results = [];
4772
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
4773
- const batch = texts.slice(i, i + BATCH_SIZE);
5528
+ for (let i = 0; i < texts.length; i += this.batchSize) {
5529
+ const batch = texts.slice(i, i + this.batchSize);
4774
5530
  const batchResults = await Promise.all(batch.map((text) => this.embed(text)));
4775
5531
  results.push(...batchResults);
4776
- if (i + BATCH_SIZE < texts.length) {
5532
+ if (i + this.batchSize < texts.length) {
4777
5533
  await new Promise((resolve4) => setTimeout(resolve4, 100));
4778
5534
  }
4779
5535
  }
4780
5536
  return results;
4781
5537
  }
5538
+ /**
5539
+ * Get cached embedding dimensions. Throws if embed() hasn't been called yet.
5540
+ * Use ensureDimensions() if you need to guarantee dimensions are available.
5541
+ */
4782
5542
  getDimensions() {
4783
- return this.dimensions;
5543
+ if (this._dimensions === null) {
5544
+ throw new Error("Cannot get dimensions before first embed() call");
5545
+ }
5546
+ return this._dimensions;
5547
+ }
5548
+ /**
5549
+ * Ensure dimensions are available, initializing the model if needed.
5550
+ * Returns the embedding dimensions for the current model.
5551
+ */
5552
+ async ensureDimensions() {
5553
+ if (this._dimensions === null) {
5554
+ await this.embed("");
5555
+ }
5556
+ if (this._dimensions === null) {
5557
+ throw new Error("Failed to determine embedding dimensions");
5558
+ }
5559
+ return this._dimensions;
4784
5560
  }
4785
5561
  /**
4786
5562
  * Dispose the embedding pipeline to free resources.
@@ -4798,17 +5574,18 @@ var EmbeddingEngine = class {
4798
5574
  import * as lancedb from "@lancedb/lancedb";
4799
5575
 
4800
5576
  // src/types/document.ts
4801
- import { z as z4 } from "zod";
4802
- var DocumentTypeSchema = z4.enum(["file", "chunk", "web"]);
4803
- var DocumentMetadataSchema = z4.object({
4804
- path: z4.string().optional(),
4805
- url: z4.string().optional(),
5577
+ import { z as z5 } from "zod";
5578
+ var DocumentTypeSchema = z5.enum(["file", "chunk", "web"]);
5579
+ var DocumentMetadataSchema = z5.object({
5580
+ path: z5.string().optional(),
5581
+ url: z5.string().optional(),
4806
5582
  type: DocumentTypeSchema,
4807
- storeId: z4.string(),
4808
- indexedAt: z4.union([z4.string(), z4.date()]),
4809
- fileHash: z4.string().optional(),
4810
- chunkIndex: z4.number().optional(),
4811
- totalChunks: z4.number().optional()
5583
+ storeId: z5.string(),
5584
+ indexedAt: z5.string(),
5585
+ // ISO 8601 string (what JSON serialization produces)
5586
+ fileHash: z5.string().optional(),
5587
+ chunkIndex: z5.number().optional(),
5588
+ totalChunks: z5.number().optional()
4812
5589
  }).loose();
4813
5590
 
4814
5591
  // src/db/lance.ts
@@ -4816,10 +5593,23 @@ var LanceStore = class {
4816
5593
  connection = null;
4817
5594
  tables = /* @__PURE__ */ new Map();
4818
5595
  dataDir;
5596
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- set via setDimensions()
5597
+ _dimensions = null;
4819
5598
  constructor(dataDir) {
4820
5599
  this.dataDir = dataDir;
4821
5600
  }
5601
+ /**
5602
+ * Set the embedding dimensions. Must be called before initialize().
5603
+ * This allows dimensions to be derived from the embedding model at runtime.
5604
+ * Idempotent: subsequent calls are ignored if dimensions are already set.
5605
+ */
5606
+ setDimensions(dimensions) {
5607
+ this._dimensions ??= dimensions;
5608
+ }
4822
5609
  async initialize(storeId) {
5610
+ if (this._dimensions === null) {
5611
+ throw new Error("Dimensions not set. Call setDimensions() before initialize().");
5612
+ }
4823
5613
  this.connection ??= await lancedb.connect(this.dataDir);
4824
5614
  const tableName = this.getTableName(storeId);
4825
5615
  const tableNames = await this.connection.tableNames();
@@ -4828,7 +5618,7 @@ var LanceStore = class {
4828
5618
  {
4829
5619
  id: "__init__",
4830
5620
  content: "",
4831
- vector: new Array(384).fill(0),
5621
+ vector: new Array(this._dimensions).fill(0),
4832
5622
  metadata: "{}"
4833
5623
  }
4834
5624
  ]);
@@ -4850,10 +5640,17 @@ var LanceStore = class {
4850
5640
  await table.add(lanceDocuments);
4851
5641
  }
4852
5642
  async deleteDocuments(storeId, documentIds) {
5643
+ if (documentIds.length === 0) {
5644
+ return;
5645
+ }
4853
5646
  const table = await this.getTable(storeId);
4854
5647
  const idList = documentIds.map((id) => `"${id}"`).join(", ");
4855
5648
  await table.delete(`id IN (${idList})`);
4856
5649
  }
5650
+ async clearAllDocuments(storeId) {
5651
+ const table = await this.getTable(storeId);
5652
+ await table.delete("id IS NOT NULL");
5653
+ }
4857
5654
  async search(storeId, vector, limit, _threshold) {
4858
5655
  const table = await this.getTable(storeId);
4859
5656
  const query = table.vectorSearch(vector).limit(limit).distanceType("cosine");
@@ -4893,7 +5690,9 @@ var LanceStore = class {
4893
5690
  }
4894
5691
  async deleteStore(storeId) {
4895
5692
  const tableName = this.getTableName(storeId);
4896
- if (this.connection !== null) {
5693
+ this.connection ??= await lancedb.connect(this.dataDir);
5694
+ const tableNames = await this.connection.tableNames();
5695
+ if (tableNames.includes(tableName)) {
4897
5696
  await this.connection.dropTable(tableName);
4898
5697
  this.tables.delete(tableName);
4899
5698
  }
@@ -4943,6 +5742,8 @@ var LazyServiceContainer = class {
4943
5742
  appConfig;
4944
5743
  dataDir;
4945
5744
  // Lazily initialized (heavy)
5745
+ // eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
5746
+ _manifest = null;
4946
5747
  _embeddings = null;
4947
5748
  _codeGraph = null;
4948
5749
  _search = null;
@@ -4964,7 +5765,7 @@ var LazyServiceContainer = class {
4964
5765
  logger4.debug("Lazy-initializing EmbeddingEngine");
4965
5766
  this._embeddings = new EmbeddingEngine(
4966
5767
  this.appConfig.embedding.model,
4967
- this.appConfig.embedding.dimensions
5768
+ this.appConfig.embedding.batchSize
4968
5769
  );
4969
5770
  }
4970
5771
  return this._embeddings;
@@ -4985,7 +5786,12 @@ var LazyServiceContainer = class {
4985
5786
  get search() {
4986
5787
  if (this._search === null) {
4987
5788
  logger4.debug("Lazy-initializing SearchService");
4988
- this._search = new SearchService(this.lance, this.embeddings, this.codeGraph);
5789
+ this._search = new SearchService(
5790
+ this.lance,
5791
+ this.embeddings,
5792
+ this.codeGraph,
5793
+ this.appConfig.search
5794
+ );
4989
5795
  }
4990
5796
  return this._search;
4991
5797
  }
@@ -4996,17 +5802,38 @@ var LazyServiceContainer = class {
4996
5802
  if (this._index === null) {
4997
5803
  logger4.debug("Lazy-initializing IndexService");
4998
5804
  this._index = new IndexService(this.lance, this.embeddings, {
4999
- codeGraphService: this.codeGraph
5805
+ codeGraphService: this.codeGraph,
5806
+ manifestService: this.manifest,
5807
+ chunkSize: this.appConfig.indexing.chunkSize,
5808
+ chunkOverlap: this.appConfig.indexing.chunkOverlap,
5809
+ concurrency: this.appConfig.indexing.concurrency,
5810
+ ignorePatterns: this.appConfig.indexing.ignorePatterns
5000
5811
  });
5001
5812
  }
5002
5813
  return this._index;
5003
5814
  }
5815
+ /**
5816
+ * ManifestService is lazily created on first access.
5817
+ */
5818
+ get manifest() {
5819
+ if (this._manifest === null) {
5820
+ logger4.debug("Lazy-initializing ManifestService");
5821
+ this._manifest = new ManifestService(this.dataDir);
5822
+ }
5823
+ return this._manifest;
5824
+ }
5004
5825
  /**
5005
5826
  * Check if embeddings have been initialized (for cleanup purposes).
5006
5827
  */
5007
5828
  get hasEmbeddings() {
5008
5829
  return this._embeddings !== null;
5009
5830
  }
5831
+ /**
5832
+ * Check if search service has been initialized (for cleanup purposes).
5833
+ */
5834
+ get hasSearch() {
5835
+ return this._search !== null;
5836
+ }
5010
5837
  };
5011
5838
  async function createLazyServices(configPath, dataDir, projectRoot) {
5012
5839
  logger4.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
@@ -5017,16 +5844,21 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
5017
5844
  const pythonBridge = new PythonBridge();
5018
5845
  await pythonBridge.start();
5019
5846
  const lance = new LanceStore(resolvedDataDir);
5020
- let storeOptions;
5021
- if (projectRoot !== void 0) {
5022
- const definitionService = new StoreDefinitionService(projectRoot);
5023
- const gitignoreService = new GitignoreService(projectRoot);
5024
- storeOptions = { definitionService, gitignoreService };
5025
- }
5847
+ const resolvedProjectRoot = config.resolveProjectRoot();
5848
+ const definitionService = new StoreDefinitionService(resolvedProjectRoot);
5849
+ const gitignoreService = new GitignoreService(resolvedProjectRoot);
5850
+ const storeOptions = {
5851
+ definitionService,
5852
+ gitignoreService,
5853
+ projectRoot: resolvedProjectRoot
5854
+ };
5026
5855
  const store = new StoreService(resolvedDataDir, storeOptions);
5027
5856
  await store.initialize();
5028
5857
  const durationMs = Date.now() - startTime;
5029
- logger4.info({ dataDir: resolvedDataDir, durationMs }, "Lazy services initialized");
5858
+ logger4.info(
5859
+ { dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
5860
+ "Lazy services initialized"
5861
+ );
5030
5862
  return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
5031
5863
  }
5032
5864
  async function createServices(configPath, dataDir, projectRoot) {
@@ -5037,20 +5869,33 @@ async function createServices(configPath, dataDir, projectRoot) {
5037
5869
  const pythonBridge = new PythonBridge();
5038
5870
  await pythonBridge.start();
5039
5871
  const lance = new LanceStore(resolvedDataDir);
5040
- const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.dimensions);
5872
+ const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.batchSize);
5041
5873
  await embeddings.initialize();
5042
- let storeOptions;
5043
- if (projectRoot !== void 0) {
5044
- const definitionService = new StoreDefinitionService(projectRoot);
5045
- const gitignoreService = new GitignoreService(projectRoot);
5046
- storeOptions = { definitionService, gitignoreService };
5047
- }
5874
+ const resolvedProjectRoot = config.resolveProjectRoot();
5875
+ const definitionService = new StoreDefinitionService(resolvedProjectRoot);
5876
+ const gitignoreService = new GitignoreService(resolvedProjectRoot);
5877
+ const storeOptions = {
5878
+ definitionService,
5879
+ gitignoreService,
5880
+ projectRoot: resolvedProjectRoot
5881
+ };
5048
5882
  const store = new StoreService(resolvedDataDir, storeOptions);
5049
5883
  await store.initialize();
5050
5884
  const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
5051
- const search = new SearchService(lance, embeddings, codeGraph);
5052
- const index = new IndexService(lance, embeddings, { codeGraphService: codeGraph });
5053
- logger4.info({ dataDir: resolvedDataDir }, "Services initialized successfully");
5885
+ const manifest = new ManifestService(resolvedDataDir);
5886
+ const search = new SearchService(lance, embeddings, codeGraph, appConfig.search);
5887
+ const index = new IndexService(lance, embeddings, {
5888
+ codeGraphService: codeGraph,
5889
+ manifestService: manifest,
5890
+ chunkSize: appConfig.indexing.chunkSize,
5891
+ chunkOverlap: appConfig.indexing.chunkOverlap,
5892
+ concurrency: appConfig.indexing.concurrency,
5893
+ ignorePatterns: appConfig.indexing.ignorePatterns
5894
+ });
5895
+ logger4.info(
5896
+ { dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
5897
+ "Services initialized successfully"
5898
+ );
5054
5899
  return {
5055
5900
  config,
5056
5901
  store,
@@ -5059,12 +5904,20 @@ async function createServices(configPath, dataDir, projectRoot) {
5059
5904
  lance,
5060
5905
  embeddings,
5061
5906
  codeGraph,
5062
- pythonBridge
5907
+ pythonBridge,
5908
+ manifest
5063
5909
  };
5064
5910
  }
5065
5911
  async function destroyServices(services) {
5066
5912
  logger4.info("Shutting down services");
5067
5913
  const errors = [];
5914
+ const isLazyContainer = services instanceof LazyServiceContainer;
5915
+ const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
5916
+ if (shouldCleanupSearch) {
5917
+ services.search.cleanup();
5918
+ } else {
5919
+ logger4.debug("Skipping search cleanup (not initialized)");
5920
+ }
5068
5921
  try {
5069
5922
  await services.pythonBridge.stop();
5070
5923
  } catch (e) {
@@ -5072,7 +5925,6 @@ async function destroyServices(services) {
5072
5925
  logger4.error({ error }, "Error stopping Python bridge");
5073
5926
  errors.push(error);
5074
5927
  }
5075
- const isLazyContainer = services instanceof LazyServiceContainer;
5076
5928
  const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
5077
5929
  if (shouldDisposeEmbeddings) {
5078
5930
  try {
@@ -5102,6 +5954,7 @@ async function destroyServices(services) {
5102
5954
 
5103
5955
  export {
5104
5956
  AdapterRegistry,
5957
+ ProjectRootService,
5105
5958
  createLogger,
5106
5959
  shutdownLogger,
5107
5960
  summarizePayload,
@@ -5109,8 +5962,6 @@ export {
5109
5962
  PythonBridge,
5110
5963
  ChunkingService,
5111
5964
  ASTParser,
5112
- createStoreId,
5113
- createDocumentId,
5114
5965
  ok,
5115
5966
  err,
5116
5967
  classifyWebContentType,
@@ -5118,10 +5969,11 @@ export {
5118
5969
  isRepoStoreDefinition,
5119
5970
  isWebStoreDefinition,
5120
5971
  StoreDefinitionService,
5972
+ isGitUrl,
5121
5973
  extractRepoName,
5122
5974
  JobService,
5123
5975
  createLazyServices,
5124
5976
  createServices,
5125
5977
  destroyServices
5126
5978
  };
5127
- //# sourceMappingURL=chunk-VKTVMW45.js.map
5979
+ //# sourceMappingURL=chunk-RDDGZIDL.js.map