bluera-knowledge 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +98 -0
- package/README.md +3 -3
- package/dist/brands-3EYIYV6T.js +13 -0
- package/dist/chunk-CLIMKLTW.js +28 -0
- package/dist/chunk-CLIMKLTW.js.map +1 -0
- package/dist/{chunk-ZGEQCLOZ.js → chunk-EZXJ3W5X.js} +79 -32
- package/dist/chunk-EZXJ3W5X.js.map +1 -0
- package/dist/chunk-HXBIIMYL.js +140 -0
- package/dist/chunk-HXBIIMYL.js.map +1 -0
- package/dist/{chunk-VKTVMW45.js → chunk-RDDGZIDL.js} +1182 -330
- package/dist/chunk-RDDGZIDL.js.map +1 -0
- package/dist/{chunk-RAXRD23K.js → chunk-VUGQ7HAR.js} +10 -6
- package/dist/chunk-VUGQ7HAR.js.map +1 -0
- package/dist/index.js +227 -84
- package/dist/index.js.map +1 -1
- package/dist/mcp/bootstrap.js +22 -3
- package/dist/mcp/bootstrap.js.map +1 -1
- package/dist/mcp/server.d.ts +169 -18
- package/dist/mcp/server.js +4 -3
- package/dist/watch.service-VDSUQ72Z.js +7 -0
- package/dist/watch.service-VDSUQ72Z.js.map +1 -0
- package/dist/workers/background-worker-cli.js +20 -9
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +3 -3
- package/dist/chunk-HRQD3MPH.js +0 -69
- package/dist/chunk-HRQD3MPH.js.map +0 -1
- package/dist/chunk-RAXRD23K.js.map +0 -1
- package/dist/chunk-VKTVMW45.js.map +0 -1
- package/dist/chunk-ZGEQCLOZ.js.map +0 -1
- package/dist/watch.service-OPLKIDFQ.js +0 -7
- /package/dist/{watch.service-OPLKIDFQ.js.map → brands-3EYIYV6T.js.map} +0 -0
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createDocumentId,
|
|
3
|
+
createStoreId
|
|
4
|
+
} from "./chunk-CLIMKLTW.js";
|
|
5
|
+
import {
|
|
6
|
+
parseIgnorePatternsForScanning
|
|
7
|
+
} from "./chunk-HXBIIMYL.js";
|
|
8
|
+
|
|
1
9
|
// src/analysis/adapter-registry.ts
|
|
2
10
|
var AdapterRegistry = class _AdapterRegistry {
|
|
3
11
|
static instance;
|
|
@@ -109,18 +117,94 @@ var AdapterRegistry = class _AdapterRegistry {
|
|
|
109
117
|
};
|
|
110
118
|
|
|
111
119
|
// src/logging/logger.ts
|
|
112
|
-
import { mkdirSync, existsSync } from "fs";
|
|
113
|
-
import {
|
|
114
|
-
import { join } from "path";
|
|
120
|
+
import { mkdirSync, existsSync as existsSync2 } from "fs";
|
|
121
|
+
import { join as join2 } from "path";
|
|
115
122
|
import pino from "pino";
|
|
123
|
+
|
|
124
|
+
// src/services/project-root.service.ts
|
|
125
|
+
import { existsSync, statSync, realpathSync } from "fs";
|
|
126
|
+
import { dirname, join, normalize, sep } from "path";
|
|
127
|
+
var ProjectRootService = class {
|
|
128
|
+
/**
|
|
129
|
+
* Resolve project root directory using hierarchical detection.
|
|
130
|
+
*/
|
|
131
|
+
static resolve(options) {
|
|
132
|
+
if (options?.projectRoot !== void 0 && options.projectRoot !== "") {
|
|
133
|
+
return this.normalize(options.projectRoot);
|
|
134
|
+
}
|
|
135
|
+
const projectRootEnv = process.env["PROJECT_ROOT"];
|
|
136
|
+
if (projectRootEnv !== void 0 && projectRootEnv !== "") {
|
|
137
|
+
return this.normalize(projectRootEnv);
|
|
138
|
+
}
|
|
139
|
+
const gitRoot = this.findGitRoot(process.cwd());
|
|
140
|
+
if (gitRoot !== null) {
|
|
141
|
+
return gitRoot;
|
|
142
|
+
}
|
|
143
|
+
const pwdEnv = process.env["PWD"];
|
|
144
|
+
if (pwdEnv !== void 0 && pwdEnv !== "") {
|
|
145
|
+
return this.normalize(pwdEnv);
|
|
146
|
+
}
|
|
147
|
+
return process.cwd();
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Find git repository root by walking up the directory tree looking for .git
|
|
151
|
+
*/
|
|
152
|
+
static findGitRoot(startPath) {
|
|
153
|
+
let currentPath = normalize(startPath);
|
|
154
|
+
const root = normalize(sep);
|
|
155
|
+
while (currentPath !== root) {
|
|
156
|
+
const gitPath = join(currentPath, ".git");
|
|
157
|
+
if (existsSync(gitPath)) {
|
|
158
|
+
try {
|
|
159
|
+
const stats = statSync(gitPath);
|
|
160
|
+
if (stats.isDirectory() || stats.isFile()) {
|
|
161
|
+
return currentPath;
|
|
162
|
+
}
|
|
163
|
+
} catch {
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
const parentPath = dirname(currentPath);
|
|
167
|
+
if (parentPath === currentPath) {
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
currentPath = parentPath;
|
|
171
|
+
}
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Normalize path by resolving symlinks and normalizing separators
|
|
176
|
+
*/
|
|
177
|
+
static normalize(path4) {
|
|
178
|
+
try {
|
|
179
|
+
const realPath = realpathSync(path4);
|
|
180
|
+
return normalize(realPath);
|
|
181
|
+
} catch {
|
|
182
|
+
return normalize(path4);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Validate that a path exists and is a directory
|
|
187
|
+
*/
|
|
188
|
+
static validate(path4) {
|
|
189
|
+
try {
|
|
190
|
+
const stats = statSync(path4);
|
|
191
|
+
return stats.isDirectory();
|
|
192
|
+
} catch {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
// src/logging/logger.ts
|
|
116
199
|
var VALID_LEVELS = ["trace", "debug", "info", "warn", "error", "fatal"];
|
|
117
200
|
var VALID_LEVELS_SET = new Set(VALID_LEVELS);
|
|
118
201
|
function getLogDir() {
|
|
119
|
-
|
|
202
|
+
const projectRoot = ProjectRootService.resolve();
|
|
203
|
+
return join2(projectRoot, ".bluera", "bluera-knowledge", "logs");
|
|
120
204
|
}
|
|
121
205
|
function ensureLogDir() {
|
|
122
206
|
const logDir = getLogDir();
|
|
123
|
-
if (!
|
|
207
|
+
if (!existsSync2(logDir)) {
|
|
124
208
|
mkdirSync(logDir, { recursive: true });
|
|
125
209
|
}
|
|
126
210
|
return logDir;
|
|
@@ -144,7 +228,7 @@ function initializeLogger() {
|
|
|
144
228
|
return rootLogger;
|
|
145
229
|
}
|
|
146
230
|
const logDir = ensureLogDir();
|
|
147
|
-
const logFile =
|
|
231
|
+
const logFile = join2(logDir, "app.log");
|
|
148
232
|
const level = getLogLevel();
|
|
149
233
|
const options = {
|
|
150
234
|
level,
|
|
@@ -196,13 +280,13 @@ function shutdownLogger() {
|
|
|
196
280
|
|
|
197
281
|
// src/logging/payload.ts
|
|
198
282
|
import { createHash } from "crypto";
|
|
199
|
-
import { writeFileSync, mkdirSync as mkdirSync2, existsSync as
|
|
200
|
-
import { join as
|
|
283
|
+
import { writeFileSync, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "fs";
|
|
284
|
+
import { join as join3 } from "path";
|
|
201
285
|
var MAX_PREVIEW_LENGTH = 500;
|
|
202
286
|
var PAYLOAD_DUMP_THRESHOLD = 1e4;
|
|
203
287
|
function getPayloadDir() {
|
|
204
|
-
const dir =
|
|
205
|
-
if (!
|
|
288
|
+
const dir = join3(getLogDirectory(), "payload");
|
|
289
|
+
if (!existsSync3(dir)) {
|
|
206
290
|
mkdirSync2(dir, { recursive: true });
|
|
207
291
|
}
|
|
208
292
|
return dir;
|
|
@@ -219,7 +303,7 @@ function summarizePayload(content, type, identifier, dumpFull = isLevelEnabled("
|
|
|
219
303
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
220
304
|
const safeId = safeFilename(identifier);
|
|
221
305
|
const filename = `${timestamp}-${type}-${safeId}-${hash}.json`;
|
|
222
|
-
const filepath =
|
|
306
|
+
const filepath = join3(getPayloadDir(), filename);
|
|
223
307
|
writeFileSync(
|
|
224
308
|
filepath,
|
|
225
309
|
JSON.stringify(
|
|
@@ -292,6 +376,23 @@ function err(error) {
|
|
|
292
376
|
return { success: false, error };
|
|
293
377
|
}
|
|
294
378
|
|
|
379
|
+
// src/utils/atomic-write.ts
|
|
380
|
+
import { writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync3 } from "fs";
|
|
381
|
+
import { writeFile, rename, mkdir } from "fs/promises";
|
|
382
|
+
import { dirname as dirname2 } from "path";
|
|
383
|
+
async function atomicWriteFile(filePath, content) {
|
|
384
|
+
await mkdir(dirname2(filePath), { recursive: true });
|
|
385
|
+
const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
|
|
386
|
+
await writeFile(tempPath, content, "utf-8");
|
|
387
|
+
await rename(tempPath, filePath);
|
|
388
|
+
}
|
|
389
|
+
function atomicWriteFileSync(filePath, content) {
|
|
390
|
+
mkdirSync3(dirname2(filePath), { recursive: true });
|
|
391
|
+
const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
|
|
392
|
+
writeFileSync2(tempPath, content, "utf-8");
|
|
393
|
+
renameSync(tempPath, filePath);
|
|
394
|
+
}
|
|
395
|
+
|
|
295
396
|
// src/services/job.service.ts
|
|
296
397
|
var JobService = class {
|
|
297
398
|
jobsDir;
|
|
@@ -520,13 +621,13 @@ var JobService = class {
|
|
|
520
621
|
*/
|
|
521
622
|
writeJob(job) {
|
|
522
623
|
const jobFile = path.join(this.jobsDir, `${job.id}.json`);
|
|
523
|
-
|
|
624
|
+
atomicWriteFileSync(jobFile, JSON.stringify(job, null, 2));
|
|
524
625
|
}
|
|
525
626
|
};
|
|
526
627
|
|
|
527
628
|
// src/services/code-graph.service.ts
|
|
528
|
-
import { readFile, writeFile, mkdir, rm } from "fs/promises";
|
|
529
|
-
import { join as
|
|
629
|
+
import { readFile, writeFile as writeFile2, mkdir as mkdir2, rm } from "fs/promises";
|
|
630
|
+
import { join as join4, dirname as dirname3 } from "path";
|
|
530
631
|
|
|
531
632
|
// src/analysis/ast-parser.ts
|
|
532
633
|
import { parse } from "@babel/parser";
|
|
@@ -1683,11 +1784,31 @@ var CodeGraphService = class {
|
|
|
1683
1784
|
parser;
|
|
1684
1785
|
parserFactory;
|
|
1685
1786
|
graphCache;
|
|
1787
|
+
cacheListeners;
|
|
1686
1788
|
constructor(dataDir, pythonBridge) {
|
|
1687
1789
|
this.dataDir = dataDir;
|
|
1688
1790
|
this.parser = new ASTParser();
|
|
1689
1791
|
this.parserFactory = new ParserFactory(pythonBridge);
|
|
1690
1792
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
1793
|
+
this.cacheListeners = /* @__PURE__ */ new Set();
|
|
1794
|
+
}
|
|
1795
|
+
/**
|
|
1796
|
+
* Subscribe to cache invalidation events.
|
|
1797
|
+
* Returns an unsubscribe function.
|
|
1798
|
+
*/
|
|
1799
|
+
onCacheInvalidation(listener) {
|
|
1800
|
+
this.cacheListeners.add(listener);
|
|
1801
|
+
return () => {
|
|
1802
|
+
this.cacheListeners.delete(listener);
|
|
1803
|
+
};
|
|
1804
|
+
}
|
|
1805
|
+
/**
|
|
1806
|
+
* Emit a cache invalidation event to all listeners.
|
|
1807
|
+
*/
|
|
1808
|
+
emitCacheInvalidation(event) {
|
|
1809
|
+
for (const listener of this.cacheListeners) {
|
|
1810
|
+
listener(event);
|
|
1811
|
+
}
|
|
1691
1812
|
}
|
|
1692
1813
|
/**
|
|
1693
1814
|
* Build a code graph from source files.
|
|
@@ -1743,9 +1864,10 @@ var CodeGraphService = class {
|
|
|
1743
1864
|
*/
|
|
1744
1865
|
async saveGraph(storeId, graph) {
|
|
1745
1866
|
const graphPath = this.getGraphPath(storeId);
|
|
1746
|
-
await
|
|
1867
|
+
await mkdir2(dirname3(graphPath), { recursive: true });
|
|
1747
1868
|
const serialized = graph.toJSON();
|
|
1748
|
-
await
|
|
1869
|
+
await writeFile2(graphPath, JSON.stringify(serialized, null, 2));
|
|
1870
|
+
this.emitCacheInvalidation({ type: "graph-updated", storeId });
|
|
1749
1871
|
}
|
|
1750
1872
|
/**
|
|
1751
1873
|
* Delete the code graph file for a store.
|
|
@@ -1755,6 +1877,7 @@ var CodeGraphService = class {
|
|
|
1755
1877
|
const graphPath = this.getGraphPath(storeId);
|
|
1756
1878
|
await rm(graphPath, { force: true });
|
|
1757
1879
|
this.graphCache.delete(storeId);
|
|
1880
|
+
this.emitCacheInvalidation({ type: "graph-deleted", storeId });
|
|
1758
1881
|
}
|
|
1759
1882
|
/**
|
|
1760
1883
|
* Load a code graph for a store.
|
|
@@ -1856,7 +1979,7 @@ var CodeGraphService = class {
|
|
|
1856
1979
|
this.graphCache.clear();
|
|
1857
1980
|
}
|
|
1858
1981
|
getGraphPath(storeId) {
|
|
1859
|
-
return
|
|
1982
|
+
return join4(this.dataDir, "graphs", `${storeId}.json`);
|
|
1860
1983
|
}
|
|
1861
1984
|
/**
|
|
1862
1985
|
* Type guard for SerializedGraph structure.
|
|
@@ -1900,83 +2023,9 @@ var CodeGraphService = class {
|
|
|
1900
2023
|
};
|
|
1901
2024
|
|
|
1902
2025
|
// src/services/config.service.ts
|
|
1903
|
-
import { readFile as readFile2,
|
|
1904
|
-
import { homedir
|
|
1905
|
-
import {
|
|
1906
|
-
|
|
1907
|
-
// src/services/project-root.service.ts
|
|
1908
|
-
import { existsSync as existsSync3, statSync, realpathSync } from "fs";
|
|
1909
|
-
import { dirname as dirname2, join as join4, normalize, sep } from "path";
|
|
1910
|
-
var ProjectRootService = class {
|
|
1911
|
-
/**
|
|
1912
|
-
* Resolve project root directory using hierarchical detection.
|
|
1913
|
-
*/
|
|
1914
|
-
static resolve(options) {
|
|
1915
|
-
if (options?.projectRoot !== void 0 && options.projectRoot !== "") {
|
|
1916
|
-
return this.normalize(options.projectRoot);
|
|
1917
|
-
}
|
|
1918
|
-
const projectRootEnv = process.env["PROJECT_ROOT"];
|
|
1919
|
-
if (projectRootEnv !== void 0 && projectRootEnv !== "") {
|
|
1920
|
-
return this.normalize(projectRootEnv);
|
|
1921
|
-
}
|
|
1922
|
-
const pwdEnv = process.env["PWD"];
|
|
1923
|
-
if (pwdEnv !== void 0 && pwdEnv !== "") {
|
|
1924
|
-
return this.normalize(pwdEnv);
|
|
1925
|
-
}
|
|
1926
|
-
const gitRoot = this.findGitRoot(process.cwd());
|
|
1927
|
-
if (gitRoot !== null) {
|
|
1928
|
-
return gitRoot;
|
|
1929
|
-
}
|
|
1930
|
-
return process.cwd();
|
|
1931
|
-
}
|
|
1932
|
-
/**
|
|
1933
|
-
* Find git repository root by walking up the directory tree looking for .git
|
|
1934
|
-
*/
|
|
1935
|
-
static findGitRoot(startPath) {
|
|
1936
|
-
let currentPath = normalize(startPath);
|
|
1937
|
-
const root = normalize(sep);
|
|
1938
|
-
while (currentPath !== root) {
|
|
1939
|
-
const gitPath = join4(currentPath, ".git");
|
|
1940
|
-
if (existsSync3(gitPath)) {
|
|
1941
|
-
try {
|
|
1942
|
-
const stats = statSync(gitPath);
|
|
1943
|
-
if (stats.isDirectory() || stats.isFile()) {
|
|
1944
|
-
return currentPath;
|
|
1945
|
-
}
|
|
1946
|
-
} catch {
|
|
1947
|
-
}
|
|
1948
|
-
}
|
|
1949
|
-
const parentPath = dirname2(currentPath);
|
|
1950
|
-
if (parentPath === currentPath) {
|
|
1951
|
-
break;
|
|
1952
|
-
}
|
|
1953
|
-
currentPath = parentPath;
|
|
1954
|
-
}
|
|
1955
|
-
return null;
|
|
1956
|
-
}
|
|
1957
|
-
/**
|
|
1958
|
-
* Normalize path by resolving symlinks and normalizing separators
|
|
1959
|
-
*/
|
|
1960
|
-
static normalize(path4) {
|
|
1961
|
-
try {
|
|
1962
|
-
const realPath = realpathSync(path4);
|
|
1963
|
-
return normalize(realPath);
|
|
1964
|
-
} catch {
|
|
1965
|
-
return normalize(path4);
|
|
1966
|
-
}
|
|
1967
|
-
}
|
|
1968
|
-
/**
|
|
1969
|
-
* Validate that a path exists and is a directory
|
|
1970
|
-
*/
|
|
1971
|
-
static validate(path4) {
|
|
1972
|
-
try {
|
|
1973
|
-
const stats = statSync(path4);
|
|
1974
|
-
return stats.isDirectory();
|
|
1975
|
-
} catch {
|
|
1976
|
-
return false;
|
|
1977
|
-
}
|
|
1978
|
-
}
|
|
1979
|
-
};
|
|
2026
|
+
import { readFile as readFile2, access } from "fs/promises";
|
|
2027
|
+
import { homedir } from "os";
|
|
2028
|
+
import { isAbsolute, join as join5, resolve } from "path";
|
|
1980
2029
|
|
|
1981
2030
|
// src/types/config.ts
|
|
1982
2031
|
var DEFAULT_CONFIG = {
|
|
@@ -1984,8 +2033,7 @@ var DEFAULT_CONFIG = {
|
|
|
1984
2033
|
dataDir: ".bluera/bluera-knowledge/data",
|
|
1985
2034
|
embedding: {
|
|
1986
2035
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
1987
|
-
batchSize: 32
|
|
1988
|
-
dimensions: 384
|
|
2036
|
+
batchSize: 32
|
|
1989
2037
|
},
|
|
1990
2038
|
indexing: {
|
|
1991
2039
|
concurrency: 4,
|
|
@@ -1995,13 +2043,7 @@ var DEFAULT_CONFIG = {
|
|
|
1995
2043
|
},
|
|
1996
2044
|
search: {
|
|
1997
2045
|
defaultMode: "hybrid",
|
|
1998
|
-
defaultLimit: 10
|
|
1999
|
-
minScore: 0.5,
|
|
2000
|
-
rrf: {
|
|
2001
|
-
k: 40,
|
|
2002
|
-
vectorWeight: 0.7,
|
|
2003
|
-
ftsWeight: 0.3
|
|
2004
|
-
}
|
|
2046
|
+
defaultLimit: 10
|
|
2005
2047
|
},
|
|
2006
2048
|
crawl: {
|
|
2007
2049
|
userAgent: "BlueraKnowledge/1.0",
|
|
@@ -2014,6 +2056,34 @@ var DEFAULT_CONFIG = {
|
|
|
2014
2056
|
}
|
|
2015
2057
|
};
|
|
2016
2058
|
|
|
2059
|
+
// src/utils/deep-merge.ts
|
|
2060
|
+
function isPlainObject(value) {
|
|
2061
|
+
return typeof value === "object" && value !== null && !Array.isArray(value) && !(value instanceof Date);
|
|
2062
|
+
}
|
|
2063
|
+
function deepMerge(defaults, overrides) {
|
|
2064
|
+
if (!isPlainObject(overrides)) {
|
|
2065
|
+
return { ...defaults };
|
|
2066
|
+
}
|
|
2067
|
+
const defaultsRecord = defaults;
|
|
2068
|
+
return deepMergeRecords(defaultsRecord, overrides);
|
|
2069
|
+
}
|
|
2070
|
+
function deepMergeRecords(defaults, overrides) {
|
|
2071
|
+
const result = { ...defaults };
|
|
2072
|
+
for (const key of Object.keys(overrides)) {
|
|
2073
|
+
const defaultValue = defaults[key];
|
|
2074
|
+
const overrideValue = overrides[key];
|
|
2075
|
+
if (overrideValue === void 0) {
|
|
2076
|
+
continue;
|
|
2077
|
+
}
|
|
2078
|
+
if (isPlainObject(defaultValue) && isPlainObject(overrideValue)) {
|
|
2079
|
+
result[key] = deepMergeRecords(defaultValue, overrideValue);
|
|
2080
|
+
} else {
|
|
2081
|
+
result[key] = overrideValue;
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
return result;
|
|
2085
|
+
}
|
|
2086
|
+
|
|
2017
2087
|
// src/services/config.service.ts
|
|
2018
2088
|
var DEFAULT_CONFIG_PATH = ".bluera/bluera-knowledge/config.json";
|
|
2019
2089
|
async function fileExists(path4) {
|
|
@@ -2027,20 +2097,27 @@ async function fileExists(path4) {
|
|
|
2027
2097
|
var ConfigService = class {
|
|
2028
2098
|
configPath;
|
|
2029
2099
|
dataDir;
|
|
2100
|
+
projectRoot;
|
|
2030
2101
|
config = null;
|
|
2031
2102
|
constructor(configPath, dataDir, projectRoot) {
|
|
2032
|
-
|
|
2103
|
+
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
2033
2104
|
if (configPath !== void 0 && configPath !== "") {
|
|
2034
|
-
this.configPath = configPath;
|
|
2105
|
+
this.configPath = this.expandPath(configPath, this.projectRoot);
|
|
2035
2106
|
} else {
|
|
2036
|
-
this.configPath = join5(
|
|
2107
|
+
this.configPath = join5(this.projectRoot, DEFAULT_CONFIG_PATH);
|
|
2037
2108
|
}
|
|
2038
2109
|
if (dataDir !== void 0 && dataDir !== "") {
|
|
2039
|
-
this.dataDir = dataDir;
|
|
2110
|
+
this.dataDir = this.expandPath(dataDir, this.projectRoot);
|
|
2040
2111
|
} else {
|
|
2041
|
-
this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir,
|
|
2112
|
+
this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, this.projectRoot);
|
|
2042
2113
|
}
|
|
2043
2114
|
}
|
|
2115
|
+
/**
|
|
2116
|
+
* Get the resolved project root directory.
|
|
2117
|
+
*/
|
|
2118
|
+
resolveProjectRoot() {
|
|
2119
|
+
return this.projectRoot;
|
|
2120
|
+
}
|
|
2044
2121
|
async load() {
|
|
2045
2122
|
if (this.config !== null) {
|
|
2046
2123
|
return this.config;
|
|
@@ -2053,7 +2130,7 @@ var ConfigService = class {
|
|
|
2053
2130
|
}
|
|
2054
2131
|
const content = await readFile2(this.configPath, "utf-8");
|
|
2055
2132
|
try {
|
|
2056
|
-
this.config =
|
|
2133
|
+
this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
|
|
2057
2134
|
} catch (error) {
|
|
2058
2135
|
throw new Error(
|
|
2059
2136
|
`Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -2062,8 +2139,7 @@ var ConfigService = class {
|
|
|
2062
2139
|
return this.config;
|
|
2063
2140
|
}
|
|
2064
2141
|
async save(config) {
|
|
2065
|
-
await
|
|
2066
|
-
await writeFile2(this.configPath, JSON.stringify(config, null, 2));
|
|
2142
|
+
await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
|
|
2067
2143
|
this.config = config;
|
|
2068
2144
|
}
|
|
2069
2145
|
resolveDataDir() {
|
|
@@ -2074,9 +2150,9 @@ var ConfigService = class {
|
|
|
2074
2150
|
}
|
|
2075
2151
|
expandPath(path4, baseDir) {
|
|
2076
2152
|
if (path4.startsWith("~")) {
|
|
2077
|
-
return path4.replace("~",
|
|
2153
|
+
return path4.replace("~", homedir());
|
|
2078
2154
|
}
|
|
2079
|
-
if (!path4
|
|
2155
|
+
if (!isAbsolute(path4)) {
|
|
2080
2156
|
return resolve(baseDir, path4);
|
|
2081
2157
|
}
|
|
2082
2158
|
return path4;
|
|
@@ -2182,9 +2258,9 @@ ${REQUIRED_PATTERNS.join("\n")}
|
|
|
2182
2258
|
};
|
|
2183
2259
|
|
|
2184
2260
|
// src/services/index.service.ts
|
|
2185
|
-
import { createHash as
|
|
2186
|
-
import { readFile as
|
|
2187
|
-
import { join as join7, extname, basename } from "path";
|
|
2261
|
+
import { createHash as createHash3 } from "crypto";
|
|
2262
|
+
import { readFile as readFile5, readdir } from "fs/promises";
|
|
2263
|
+
import { join as join7, extname, basename, relative } from "path";
|
|
2188
2264
|
|
|
2189
2265
|
// src/services/chunking.service.ts
|
|
2190
2266
|
var CHUNK_PRESETS = {
|
|
@@ -2196,6 +2272,11 @@ var ChunkingService = class _ChunkingService {
|
|
|
2196
2272
|
chunkSize;
|
|
2197
2273
|
chunkOverlap;
|
|
2198
2274
|
constructor(config) {
|
|
2275
|
+
if (config.chunkOverlap >= config.chunkSize) {
|
|
2276
|
+
throw new Error(
|
|
2277
|
+
`chunkOverlap (${String(config.chunkOverlap)}) must be less than chunkSize (${String(config.chunkSize)})`
|
|
2278
|
+
);
|
|
2279
|
+
}
|
|
2199
2280
|
this.chunkSize = config.chunkSize;
|
|
2200
2281
|
this.chunkOverlap = config.chunkOverlap;
|
|
2201
2282
|
}
|
|
@@ -2290,7 +2371,7 @@ var ChunkingService = class _ChunkingService {
|
|
|
2290
2371
|
* Splits on top-level declarations to keep functions/classes together.
|
|
2291
2372
|
*/
|
|
2292
2373
|
chunkCode(text) {
|
|
2293
|
-
const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
|
|
2374
|
+
const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
|
|
2294
2375
|
const declarations = [];
|
|
2295
2376
|
let match;
|
|
2296
2377
|
while ((match = declarationRegex.exec(text)) !== null) {
|
|
@@ -2465,73 +2546,236 @@ var ChunkingService = class _ChunkingService {
|
|
|
2465
2546
|
}
|
|
2466
2547
|
};
|
|
2467
2548
|
|
|
2468
|
-
// src/
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2549
|
+
// src/services/drift.service.ts
|
|
2550
|
+
import { createHash as createHash2 } from "crypto";
|
|
2551
|
+
import { readFile as readFile4, stat } from "fs/promises";
|
|
2552
|
+
var DriftService = class {
|
|
2553
|
+
/**
|
|
2554
|
+
* Detect changes between current files and manifest.
|
|
2555
|
+
*
|
|
2556
|
+
* @param manifest - The stored manifest from last index
|
|
2557
|
+
* @param currentFiles - Current files on disk with mtime/size
|
|
2558
|
+
* @returns Classification of files into added, modified, deleted, unchanged
|
|
2559
|
+
*/
|
|
2560
|
+
async detectChanges(manifest, currentFiles) {
|
|
2561
|
+
const result = {
|
|
2562
|
+
added: [],
|
|
2563
|
+
modified: [],
|
|
2564
|
+
deleted: [],
|
|
2565
|
+
unchanged: []
|
|
2566
|
+
};
|
|
2567
|
+
const currentPathSet = new Set(currentFiles.map((f) => f.path));
|
|
2568
|
+
const manifestPaths = new Set(Object.keys(manifest.files));
|
|
2569
|
+
for (const path4 of manifestPaths) {
|
|
2570
|
+
if (!currentPathSet.has(path4)) {
|
|
2571
|
+
result.deleted.push(path4);
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
const potentiallyModified = [];
|
|
2575
|
+
for (const file of currentFiles) {
|
|
2576
|
+
const manifestState = manifest.files[file.path];
|
|
2577
|
+
if (manifestState === void 0) {
|
|
2578
|
+
result.added.push(file.path);
|
|
2579
|
+
} else {
|
|
2580
|
+
if (file.mtime === manifestState.mtime && file.size === manifestState.size) {
|
|
2581
|
+
result.unchanged.push(file.path);
|
|
2582
|
+
} else {
|
|
2583
|
+
potentiallyModified.push(file);
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
for (const file of potentiallyModified) {
|
|
2588
|
+
const manifestState = manifest.files[file.path];
|
|
2589
|
+
if (manifestState === void 0) {
|
|
2590
|
+
result.added.push(file.path);
|
|
2591
|
+
continue;
|
|
2592
|
+
}
|
|
2593
|
+
const currentHash = await this.computeFileHash(file.path);
|
|
2594
|
+
if (currentHash === manifestState.hash) {
|
|
2595
|
+
result.unchanged.push(file.path);
|
|
2596
|
+
} else {
|
|
2597
|
+
result.modified.push(file.path);
|
|
2598
|
+
}
|
|
2599
|
+
}
|
|
2600
|
+
return result;
|
|
2479
2601
|
}
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2602
|
+
/**
|
|
2603
|
+
* Get the current state of a file on disk.
|
|
2604
|
+
*/
|
|
2605
|
+
async getFileState(path4) {
|
|
2606
|
+
const stats = await stat(path4);
|
|
2607
|
+
return {
|
|
2608
|
+
path: path4,
|
|
2609
|
+
mtime: stats.mtimeMs,
|
|
2610
|
+
size: stats.size
|
|
2611
|
+
};
|
|
2485
2612
|
}
|
|
2486
|
-
|
|
2487
|
-
|
|
2613
|
+
/**
|
|
2614
|
+
* Compute MD5 hash of a file.
|
|
2615
|
+
*/
|
|
2616
|
+
async computeFileHash(path4) {
|
|
2617
|
+
const content = await readFile4(path4);
|
|
2618
|
+
return createHash2("md5").update(content).digest("hex");
|
|
2619
|
+
}
|
|
2620
|
+
/**
|
|
2621
|
+
* Create a file state entry for the manifest after indexing.
|
|
2622
|
+
*
|
|
2623
|
+
* @param path - File path
|
|
2624
|
+
* @param documentIds - Document IDs created from this file
|
|
2625
|
+
* @returns File state for manifest
|
|
2626
|
+
*/
|
|
2627
|
+
async createFileState(path4, documentIds) {
|
|
2628
|
+
const stats = await stat(path4);
|
|
2629
|
+
const content = await readFile4(path4);
|
|
2630
|
+
const hash = createHash2("md5").update(content).digest("hex");
|
|
2631
|
+
const { createDocumentId: createDocumentId2 } = await import("./brands-3EYIYV6T.js");
|
|
2632
|
+
return {
|
|
2633
|
+
state: {
|
|
2634
|
+
mtime: stats.mtimeMs,
|
|
2635
|
+
size: stats.size,
|
|
2636
|
+
hash,
|
|
2637
|
+
documentIds: documentIds.map((id) => createDocumentId2(id))
|
|
2638
|
+
},
|
|
2639
|
+
hash
|
|
2640
|
+
};
|
|
2641
|
+
}
|
|
2642
|
+
};
|
|
2488
2643
|
|
|
2489
2644
|
// src/services/index.service.ts
|
|
2490
2645
|
var logger = createLogger("index-service");
|
|
2491
2646
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
2647
|
+
// Text/docs
|
|
2492
2648
|
".txt",
|
|
2493
2649
|
".md",
|
|
2650
|
+
".rst",
|
|
2651
|
+
".adoc",
|
|
2652
|
+
// JavaScript/TypeScript
|
|
2494
2653
|
".js",
|
|
2495
2654
|
".ts",
|
|
2496
2655
|
".jsx",
|
|
2497
2656
|
".tsx",
|
|
2657
|
+
".mjs",
|
|
2658
|
+
".cjs",
|
|
2659
|
+
".mts",
|
|
2660
|
+
".cts",
|
|
2661
|
+
// Config/data
|
|
2498
2662
|
".json",
|
|
2499
2663
|
".yaml",
|
|
2500
2664
|
".yml",
|
|
2665
|
+
".toml",
|
|
2666
|
+
".ini",
|
|
2667
|
+
".env",
|
|
2668
|
+
// Web
|
|
2501
2669
|
".html",
|
|
2670
|
+
".htm",
|
|
2502
2671
|
".css",
|
|
2503
2672
|
".scss",
|
|
2673
|
+
".sass",
|
|
2504
2674
|
".less",
|
|
2675
|
+
".vue",
|
|
2676
|
+
".svelte",
|
|
2677
|
+
// Python
|
|
2505
2678
|
".py",
|
|
2679
|
+
".pyi",
|
|
2680
|
+
".pyx",
|
|
2681
|
+
// Ruby
|
|
2506
2682
|
".rb",
|
|
2683
|
+
".erb",
|
|
2684
|
+
".rake",
|
|
2685
|
+
// Go
|
|
2507
2686
|
".go",
|
|
2687
|
+
// Rust
|
|
2508
2688
|
".rs",
|
|
2689
|
+
// Java/JVM
|
|
2509
2690
|
".java",
|
|
2691
|
+
".kt",
|
|
2692
|
+
".kts",
|
|
2693
|
+
".scala",
|
|
2694
|
+
".groovy",
|
|
2695
|
+
".gradle",
|
|
2696
|
+
// C/C++
|
|
2510
2697
|
".c",
|
|
2511
2698
|
".cpp",
|
|
2699
|
+
".cc",
|
|
2700
|
+
".cxx",
|
|
2512
2701
|
".h",
|
|
2513
2702
|
".hpp",
|
|
2703
|
+
".hxx",
|
|
2704
|
+
// C#/.NET
|
|
2705
|
+
".cs",
|
|
2706
|
+
".fs",
|
|
2707
|
+
".vb",
|
|
2708
|
+
// Swift/Objective-C
|
|
2709
|
+
".swift",
|
|
2710
|
+
".m",
|
|
2711
|
+
".mm",
|
|
2712
|
+
// PHP
|
|
2713
|
+
".php",
|
|
2714
|
+
// Shell
|
|
2514
2715
|
".sh",
|
|
2515
2716
|
".bash",
|
|
2516
2717
|
".zsh",
|
|
2718
|
+
".fish",
|
|
2719
|
+
".ps1",
|
|
2720
|
+
".psm1",
|
|
2721
|
+
// SQL
|
|
2517
2722
|
".sql",
|
|
2518
|
-
|
|
2723
|
+
// Other
|
|
2724
|
+
".xml",
|
|
2725
|
+
".graphql",
|
|
2726
|
+
".gql",
|
|
2727
|
+
".proto",
|
|
2728
|
+
".lua",
|
|
2729
|
+
".r",
|
|
2730
|
+
".R",
|
|
2731
|
+
".jl",
|
|
2732
|
+
".ex",
|
|
2733
|
+
".exs",
|
|
2734
|
+
".erl",
|
|
2735
|
+
".hrl",
|
|
2736
|
+
".clj",
|
|
2737
|
+
".cljs",
|
|
2738
|
+
".cljc",
|
|
2739
|
+
".hs",
|
|
2740
|
+
".elm",
|
|
2741
|
+
".dart",
|
|
2742
|
+
".pl",
|
|
2743
|
+
".pm",
|
|
2744
|
+
".tcl",
|
|
2745
|
+
".vim",
|
|
2746
|
+
".zig",
|
|
2747
|
+
".nim",
|
|
2748
|
+
".v",
|
|
2749
|
+
".tf",
|
|
2750
|
+
".hcl",
|
|
2751
|
+
".dockerfile",
|
|
2752
|
+
".makefile",
|
|
2753
|
+
".cmake"
|
|
2519
2754
|
]);
|
|
2520
2755
|
var IndexService = class {
|
|
2521
2756
|
lanceStore;
|
|
2522
2757
|
embeddingEngine;
|
|
2523
2758
|
chunker;
|
|
2524
2759
|
codeGraphService;
|
|
2760
|
+
manifestService;
|
|
2761
|
+
driftService;
|
|
2525
2762
|
concurrency;
|
|
2763
|
+
ignoreDirs;
|
|
2764
|
+
ignoreFilePatterns;
|
|
2526
2765
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2527
2766
|
this.lanceStore = lanceStore;
|
|
2528
2767
|
this.embeddingEngine = embeddingEngine;
|
|
2529
2768
|
this.chunker = new ChunkingService({
|
|
2530
|
-
chunkSize: options.chunkSize ??
|
|
2531
|
-
chunkOverlap: options.chunkOverlap ??
|
|
2769
|
+
chunkSize: options.chunkSize ?? 1e3,
|
|
2770
|
+
chunkOverlap: options.chunkOverlap ?? 150
|
|
2532
2771
|
});
|
|
2533
2772
|
this.codeGraphService = options.codeGraphService;
|
|
2773
|
+
this.manifestService = options.manifestService;
|
|
2774
|
+
this.driftService = new DriftService();
|
|
2534
2775
|
this.concurrency = options.concurrency ?? 4;
|
|
2776
|
+
const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
|
|
2777
|
+
this.ignoreDirs = parsed.dirs;
|
|
2778
|
+
this.ignoreFilePatterns = parsed.fileMatchers;
|
|
2535
2779
|
}
|
|
2536
2780
|
async indexStore(store, onProgress) {
|
|
2537
2781
|
logger.info(
|
|
@@ -2562,8 +2806,205 @@ var IndexService = class {
|
|
|
2562
2806
|
return err(error instanceof Error ? error : new Error(String(error)));
|
|
2563
2807
|
}
|
|
2564
2808
|
}
|
|
2809
|
+
/**
|
|
2810
|
+
* Incrementally index a store, only processing changed files.
|
|
2811
|
+
* Requires manifestService to be configured.
|
|
2812
|
+
*
|
|
2813
|
+
* @param store - The store to index
|
|
2814
|
+
* @param onProgress - Optional progress callback
|
|
2815
|
+
* @returns Result with incremental index statistics
|
|
2816
|
+
*/
|
|
2817
|
+
async indexStoreIncremental(store, onProgress) {
|
|
2818
|
+
if (this.manifestService === void 0) {
|
|
2819
|
+
return err(new Error("ManifestService required for incremental indexing"));
|
|
2820
|
+
}
|
|
2821
|
+
if (store.type !== "file" && store.type !== "repo") {
|
|
2822
|
+
return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
|
|
2823
|
+
}
|
|
2824
|
+
logger.info(
|
|
2825
|
+
{
|
|
2826
|
+
storeId: store.id,
|
|
2827
|
+
storeName: store.name,
|
|
2828
|
+
storeType: store.type
|
|
2829
|
+
},
|
|
2830
|
+
"Starting incremental store indexing"
|
|
2831
|
+
);
|
|
2832
|
+
const startTime = Date.now();
|
|
2833
|
+
try {
|
|
2834
|
+
const manifest = await this.manifestService.load(store.id);
|
|
2835
|
+
const filePaths = await this.scanDirectory(store.path);
|
|
2836
|
+
const currentFiles = await Promise.all(
|
|
2837
|
+
filePaths.map((path4) => this.driftService.getFileState(path4))
|
|
2838
|
+
);
|
|
2839
|
+
const drift = await this.driftService.detectChanges(manifest, currentFiles);
|
|
2840
|
+
logger.debug(
|
|
2841
|
+
{
|
|
2842
|
+
storeId: store.id,
|
|
2843
|
+
added: drift.added.length,
|
|
2844
|
+
modified: drift.modified.length,
|
|
2845
|
+
deleted: drift.deleted.length,
|
|
2846
|
+
unchanged: drift.unchanged.length
|
|
2847
|
+
},
|
|
2848
|
+
"Drift detection complete"
|
|
2849
|
+
);
|
|
2850
|
+
const documentIdsToDelete = [];
|
|
2851
|
+
for (const path4 of [...drift.modified, ...drift.deleted]) {
|
|
2852
|
+
const fileState = manifest.files[path4];
|
|
2853
|
+
if (fileState !== void 0) {
|
|
2854
|
+
documentIdsToDelete.push(...fileState.documentIds);
|
|
2855
|
+
}
|
|
2856
|
+
}
|
|
2857
|
+
if (documentIdsToDelete.length > 0) {
|
|
2858
|
+
await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
|
|
2859
|
+
logger.debug(
|
|
2860
|
+
{ storeId: store.id, count: documentIdsToDelete.length },
|
|
2861
|
+
"Deleted old documents"
|
|
2862
|
+
);
|
|
2863
|
+
}
|
|
2864
|
+
const filesToProcess = [...drift.added, ...drift.modified];
|
|
2865
|
+
const totalFiles = filesToProcess.length;
|
|
2866
|
+
onProgress?.({
|
|
2867
|
+
type: "start",
|
|
2868
|
+
current: 0,
|
|
2869
|
+
total: totalFiles,
|
|
2870
|
+
message: `Processing ${String(totalFiles)} changed files`
|
|
2871
|
+
});
|
|
2872
|
+
const documents = [];
|
|
2873
|
+
const newManifestFiles = {};
|
|
2874
|
+
let filesProcessed = 0;
|
|
2875
|
+
for (const path4 of drift.unchanged) {
|
|
2876
|
+
const existingState = manifest.files[path4];
|
|
2877
|
+
if (existingState !== void 0) {
|
|
2878
|
+
newManifestFiles[path4] = existingState;
|
|
2879
|
+
}
|
|
2880
|
+
}
|
|
2881
|
+
for (let i = 0; i < filesToProcess.length; i += this.concurrency) {
|
|
2882
|
+
const batch = filesToProcess.slice(i, i + this.concurrency);
|
|
2883
|
+
const batchResults = await Promise.all(
|
|
2884
|
+
batch.map(async (filePath) => {
|
|
2885
|
+
try {
|
|
2886
|
+
const result = await this.processFile(filePath, store);
|
|
2887
|
+
const documentIds = result.documents.map((d) => d.id);
|
|
2888
|
+
const { state } = await this.driftService.createFileState(filePath, documentIds);
|
|
2889
|
+
return {
|
|
2890
|
+
filePath,
|
|
2891
|
+
documents: result.documents,
|
|
2892
|
+
fileState: state
|
|
2893
|
+
};
|
|
2894
|
+
} catch (error) {
|
|
2895
|
+
logger.warn(
|
|
2896
|
+
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
2897
|
+
"Failed to process file during incremental indexing, skipping"
|
|
2898
|
+
);
|
|
2899
|
+
return null;
|
|
2900
|
+
}
|
|
2901
|
+
})
|
|
2902
|
+
);
|
|
2903
|
+
for (const result of batchResults) {
|
|
2904
|
+
if (result !== null) {
|
|
2905
|
+
documents.push(...result.documents);
|
|
2906
|
+
newManifestFiles[result.filePath] = result.fileState;
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
filesProcessed += batch.length;
|
|
2910
|
+
onProgress?.({
|
|
2911
|
+
type: "progress",
|
|
2912
|
+
current: filesProcessed,
|
|
2913
|
+
total: totalFiles,
|
|
2914
|
+
message: `Processed ${String(filesProcessed)}/${String(totalFiles)} files`
|
|
2915
|
+
});
|
|
2916
|
+
}
|
|
2917
|
+
if (documents.length > 0) {
|
|
2918
|
+
await this.lanceStore.addDocuments(store.id, documents);
|
|
2919
|
+
}
|
|
2920
|
+
if (documentIdsToDelete.length > 0 || documents.length > 0) {
|
|
2921
|
+
await this.lanceStore.createFtsIndex(store.id);
|
|
2922
|
+
}
|
|
2923
|
+
if (this.codeGraphService) {
|
|
2924
|
+
const sourceExtensions = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"];
|
|
2925
|
+
const hasSourceChanges = filesToProcess.some((p) => sourceExtensions.includes(extname(p).toLowerCase())) || drift.deleted.some((p) => sourceExtensions.includes(extname(p).toLowerCase()));
|
|
2926
|
+
if (hasSourceChanges) {
|
|
2927
|
+
const allSourceFiles = [];
|
|
2928
|
+
const allPaths = [...drift.unchanged, ...filesToProcess];
|
|
2929
|
+
for (const filePath of allPaths) {
|
|
2930
|
+
const ext = extname(filePath).toLowerCase();
|
|
2931
|
+
if (sourceExtensions.includes(ext)) {
|
|
2932
|
+
try {
|
|
2933
|
+
const content = await readFile5(filePath, "utf-8");
|
|
2934
|
+
allSourceFiles.push({ path: filePath, content });
|
|
2935
|
+
} catch {
|
|
2936
|
+
}
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
if (allSourceFiles.length > 0) {
|
|
2940
|
+
const graph = await this.codeGraphService.buildGraph(allSourceFiles);
|
|
2941
|
+
await this.codeGraphService.saveGraph(store.id, graph);
|
|
2942
|
+
logger.debug(
|
|
2943
|
+
{ storeId: store.id, sourceFiles: allSourceFiles.length },
|
|
2944
|
+
"Rebuilt code graph during incremental indexing"
|
|
2945
|
+
);
|
|
2946
|
+
} else {
|
|
2947
|
+
await this.codeGraphService.deleteGraph(store.id);
|
|
2948
|
+
logger.debug(
|
|
2949
|
+
{ storeId: store.id },
|
|
2950
|
+
"Deleted stale code graph (no source files remain)"
|
|
2951
|
+
);
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
}
|
|
2955
|
+
const updatedManifest = {
|
|
2956
|
+
version: 1,
|
|
2957
|
+
storeId: store.id,
|
|
2958
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2959
|
+
files: newManifestFiles
|
|
2960
|
+
};
|
|
2961
|
+
await this.manifestService.save(updatedManifest);
|
|
2962
|
+
onProgress?.({
|
|
2963
|
+
type: "complete",
|
|
2964
|
+
current: totalFiles,
|
|
2965
|
+
total: totalFiles,
|
|
2966
|
+
message: "Incremental indexing complete"
|
|
2967
|
+
});
|
|
2968
|
+
const timeMs = Date.now() - startTime;
|
|
2969
|
+
logger.info(
|
|
2970
|
+
{
|
|
2971
|
+
storeId: store.id,
|
|
2972
|
+
storeName: store.name,
|
|
2973
|
+
filesAdded: drift.added.length,
|
|
2974
|
+
filesModified: drift.modified.length,
|
|
2975
|
+
filesDeleted: drift.deleted.length,
|
|
2976
|
+
filesUnchanged: drift.unchanged.length,
|
|
2977
|
+
chunksCreated: documents.length,
|
|
2978
|
+
timeMs
|
|
2979
|
+
},
|
|
2980
|
+
"Incremental indexing complete"
|
|
2981
|
+
);
|
|
2982
|
+
return ok({
|
|
2983
|
+
filesIndexed: filesToProcess.length,
|
|
2984
|
+
chunksCreated: documents.length,
|
|
2985
|
+
timeMs,
|
|
2986
|
+
filesAdded: drift.added.length,
|
|
2987
|
+
filesModified: drift.modified.length,
|
|
2988
|
+
filesDeleted: drift.deleted.length,
|
|
2989
|
+
filesUnchanged: drift.unchanged.length
|
|
2990
|
+
});
|
|
2991
|
+
} catch (error) {
|
|
2992
|
+
logger.error(
|
|
2993
|
+
{
|
|
2994
|
+
storeId: store.id,
|
|
2995
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2996
|
+
},
|
|
2997
|
+
"Incremental indexing failed"
|
|
2998
|
+
);
|
|
2999
|
+
return err(error instanceof Error ? error : new Error(String(error)));
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
2565
3002
|
async indexFileStore(store, onProgress) {
|
|
2566
3003
|
const startTime = Date.now();
|
|
3004
|
+
await this.lanceStore.clearAllDocuments(store.id);
|
|
3005
|
+
if (this.manifestService) {
|
|
3006
|
+
await this.manifestService.delete(store.id);
|
|
3007
|
+
}
|
|
2567
3008
|
const files = await this.scanDirectory(store.path);
|
|
2568
3009
|
const documents = [];
|
|
2569
3010
|
let filesProcessed = 0;
|
|
@@ -2586,7 +3027,17 @@ var IndexService = class {
|
|
|
2586
3027
|
for (let i = 0; i < files.length; i += this.concurrency) {
|
|
2587
3028
|
const batch = files.slice(i, i + this.concurrency);
|
|
2588
3029
|
const batchResults = await Promise.all(
|
|
2589
|
-
batch.map((filePath) =>
|
|
3030
|
+
batch.map(async (filePath) => {
|
|
3031
|
+
try {
|
|
3032
|
+
return await this.processFile(filePath, store);
|
|
3033
|
+
} catch (error) {
|
|
3034
|
+
logger.warn(
|
|
3035
|
+
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3036
|
+
"Failed to process file, skipping"
|
|
3037
|
+
);
|
|
3038
|
+
return { documents: [], sourceFile: void 0 };
|
|
3039
|
+
}
|
|
3040
|
+
})
|
|
2590
3041
|
);
|
|
2591
3042
|
for (const result of batchResults) {
|
|
2592
3043
|
documents.push(...result.documents);
|
|
@@ -2609,6 +3060,8 @@ var IndexService = class {
|
|
|
2609
3060
|
if (this.codeGraphService && sourceFiles.length > 0) {
|
|
2610
3061
|
const graph = await this.codeGraphService.buildGraph(sourceFiles);
|
|
2611
3062
|
await this.codeGraphService.saveGraph(store.id, graph);
|
|
3063
|
+
} else if (this.codeGraphService) {
|
|
3064
|
+
await this.codeGraphService.deleteGraph(store.id);
|
|
2612
3065
|
}
|
|
2613
3066
|
onProgress?.({
|
|
2614
3067
|
type: "complete",
|
|
@@ -2621,7 +3074,7 @@ var IndexService = class {
|
|
|
2621
3074
|
{
|
|
2622
3075
|
storeId: store.id,
|
|
2623
3076
|
storeName: store.name,
|
|
2624
|
-
|
|
3077
|
+
filesIndexed: filesProcessed,
|
|
2625
3078
|
chunksCreated: documents.length,
|
|
2626
3079
|
sourceFilesForGraph: sourceFiles.length,
|
|
2627
3080
|
timeMs
|
|
@@ -2629,7 +3082,7 @@ var IndexService = class {
|
|
|
2629
3082
|
"Store indexing complete"
|
|
2630
3083
|
);
|
|
2631
3084
|
return ok({
|
|
2632
|
-
|
|
3085
|
+
filesIndexed: filesProcessed,
|
|
2633
3086
|
chunksCreated: documents.length,
|
|
2634
3087
|
timeMs
|
|
2635
3088
|
});
|
|
@@ -2639,13 +3092,15 @@ var IndexService = class {
|
|
|
2639
3092
|
* Extracted for parallel processing.
|
|
2640
3093
|
*/
|
|
2641
3094
|
async processFile(filePath, store) {
|
|
2642
|
-
const content = await
|
|
2643
|
-
const fileHash =
|
|
3095
|
+
const content = await readFile5(filePath, "utf-8");
|
|
3096
|
+
const fileHash = createHash3("md5").update(content).digest("hex");
|
|
2644
3097
|
const chunks = this.chunker.chunk(content, filePath);
|
|
3098
|
+
const relativePath = relative(store.path, filePath);
|
|
3099
|
+
const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
|
|
2645
3100
|
const ext = extname(filePath).toLowerCase();
|
|
2646
3101
|
const fileName = basename(filePath).toLowerCase();
|
|
2647
3102
|
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
2648
|
-
const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
|
|
3103
|
+
const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
|
|
2649
3104
|
if (chunks.length === 0) {
|
|
2650
3105
|
return { documents: [], sourceFile };
|
|
2651
3106
|
}
|
|
@@ -2660,7 +3115,7 @@ var IndexService = class {
|
|
|
2660
3115
|
`Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
|
|
2661
3116
|
);
|
|
2662
3117
|
}
|
|
2663
|
-
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
3118
|
+
const chunkId = chunks.length > 1 ? `${store.id}-${pathHash}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${pathHash}-${fileHash}`;
|
|
2664
3119
|
documents.push({
|
|
2665
3120
|
id: createDocumentId(chunkId),
|
|
2666
3121
|
content: chunk.content,
|
|
@@ -2669,7 +3124,7 @@ var IndexService = class {
|
|
|
2669
3124
|
type: chunks.length > 1 ? "chunk" : "file",
|
|
2670
3125
|
storeId: store.id,
|
|
2671
3126
|
path: filePath,
|
|
2672
|
-
indexedAt: /* @__PURE__ */ new Date(),
|
|
3127
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2673
3128
|
fileHash,
|
|
2674
3129
|
chunkIndex: chunk.chunkIndex,
|
|
2675
3130
|
totalChunks: chunk.totalChunks,
|
|
@@ -2689,10 +3144,14 @@ var IndexService = class {
|
|
|
2689
3144
|
for (const entry of entries) {
|
|
2690
3145
|
const fullPath = join7(dir, entry.name);
|
|
2691
3146
|
if (entry.isDirectory()) {
|
|
2692
|
-
if (!
|
|
3147
|
+
if (!this.ignoreDirs.has(entry.name)) {
|
|
2693
3148
|
files.push(...await this.scanDirectory(fullPath));
|
|
2694
3149
|
}
|
|
2695
3150
|
} else if (entry.isFile()) {
|
|
3151
|
+
const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(entry.name));
|
|
3152
|
+
if (shouldIgnore) {
|
|
3153
|
+
continue;
|
|
3154
|
+
}
|
|
2696
3155
|
const ext = extname(entry.name).toLowerCase();
|
|
2697
3156
|
if (TEXT_EXTENSIONS.has(ext)) {
|
|
2698
3157
|
files.push(fullPath);
|
|
@@ -2782,6 +3241,141 @@ function classifyWebContentType(url, title) {
|
|
|
2782
3241
|
return "documentation";
|
|
2783
3242
|
}
|
|
2784
3243
|
|
|
3244
|
+
// src/services/manifest.service.ts
|
|
3245
|
+
import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
|
|
3246
|
+
import { join as join8 } from "path";
|
|
3247
|
+
|
|
3248
|
+
// src/types/manifest.ts
|
|
3249
|
+
import { z as z2 } from "zod";
|
|
3250
|
+
var FileStateSchema = z2.object({
|
|
3251
|
+
/** File modification time in milliseconds since epoch */
|
|
3252
|
+
mtime: z2.number(),
|
|
3253
|
+
/** File size in bytes */
|
|
3254
|
+
size: z2.number(),
|
|
3255
|
+
/** MD5 hash of file content */
|
|
3256
|
+
hash: z2.string(),
|
|
3257
|
+
/** Document IDs created from this file (for cleanup) */
|
|
3258
|
+
documentIds: z2.array(z2.string())
|
|
3259
|
+
});
|
|
3260
|
+
var StoreManifestSchema = z2.object({
|
|
3261
|
+
/** Schema version for future migrations */
|
|
3262
|
+
version: z2.literal(1),
|
|
3263
|
+
/** Store ID this manifest belongs to */
|
|
3264
|
+
storeId: z2.string(),
|
|
3265
|
+
/** When the manifest was last updated */
|
|
3266
|
+
indexedAt: z2.string(),
|
|
3267
|
+
/** Map of file paths to their state */
|
|
3268
|
+
files: z2.record(z2.string(), FileStateSchema)
|
|
3269
|
+
});
|
|
3270
|
+
function createEmptyManifest(storeId) {
|
|
3271
|
+
return {
|
|
3272
|
+
version: 1,
|
|
3273
|
+
storeId,
|
|
3274
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3275
|
+
files: {}
|
|
3276
|
+
};
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
// src/services/manifest.service.ts
|
|
3280
|
+
var ManifestService = class {
|
|
3281
|
+
manifestsDir;
|
|
3282
|
+
constructor(dataDir) {
|
|
3283
|
+
this.manifestsDir = join8(dataDir, "manifests");
|
|
3284
|
+
}
|
|
3285
|
+
/**
|
|
3286
|
+
* Initialize the manifests directory.
|
|
3287
|
+
*/
|
|
3288
|
+
async initialize() {
|
|
3289
|
+
await mkdir3(this.manifestsDir, { recursive: true });
|
|
3290
|
+
}
|
|
3291
|
+
/**
|
|
3292
|
+
* Get the file path for a store's manifest.
|
|
3293
|
+
*/
|
|
3294
|
+
getManifestPath(storeId) {
|
|
3295
|
+
return join8(this.manifestsDir, `${storeId}.manifest.json`);
|
|
3296
|
+
}
|
|
3297
|
+
/**
|
|
3298
|
+
* Load a store's manifest.
|
|
3299
|
+
* Returns an empty manifest if one doesn't exist.
|
|
3300
|
+
* Throws on parse/validation errors (fail fast).
|
|
3301
|
+
*/
|
|
3302
|
+
async load(storeId) {
|
|
3303
|
+
const manifestPath = this.getManifestPath(storeId);
|
|
3304
|
+
const exists = await this.fileExists(manifestPath);
|
|
3305
|
+
if (!exists) {
|
|
3306
|
+
return createEmptyManifest(storeId);
|
|
3307
|
+
}
|
|
3308
|
+
const content = await readFile6(manifestPath, "utf-8");
|
|
3309
|
+
let parsed;
|
|
3310
|
+
try {
|
|
3311
|
+
parsed = JSON.parse(content);
|
|
3312
|
+
} catch (error) {
|
|
3313
|
+
throw new Error(
|
|
3314
|
+
`Failed to parse manifest at ${manifestPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
3315
|
+
);
|
|
3316
|
+
}
|
|
3317
|
+
const result = StoreManifestSchema.safeParse(parsed);
|
|
3318
|
+
if (!result.success) {
|
|
3319
|
+
throw new Error(`Invalid manifest at ${manifestPath}: ${result.error.message}`);
|
|
3320
|
+
}
|
|
3321
|
+
return this.toTypedManifest(result.data, storeId);
|
|
3322
|
+
}
|
|
3323
|
+
/**
|
|
3324
|
+
* Save a store's manifest atomically.
|
|
3325
|
+
*/
|
|
3326
|
+
async save(manifest) {
|
|
3327
|
+
const manifestPath = this.getManifestPath(manifest.storeId);
|
|
3328
|
+
const toSave = {
|
|
3329
|
+
...manifest,
|
|
3330
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
3331
|
+
};
|
|
3332
|
+
await atomicWriteFile(manifestPath, JSON.stringify(toSave, null, 2));
|
|
3333
|
+
}
|
|
3334
|
+
/**
|
|
3335
|
+
* Delete a store's manifest.
|
|
3336
|
+
* Called when a store is deleted or during full re-index.
|
|
3337
|
+
*/
|
|
3338
|
+
async delete(storeId) {
|
|
3339
|
+
const manifestPath = this.getManifestPath(storeId);
|
|
3340
|
+
const { unlink } = await import("fs/promises");
|
|
3341
|
+
const exists = await this.fileExists(manifestPath);
|
|
3342
|
+
if (exists) {
|
|
3343
|
+
await unlink(manifestPath);
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
/**
|
|
3347
|
+
* Check if a file exists.
|
|
3348
|
+
*/
|
|
3349
|
+
async fileExists(path4) {
|
|
3350
|
+
try {
|
|
3351
|
+
await access3(path4);
|
|
3352
|
+
return true;
|
|
3353
|
+
} catch {
|
|
3354
|
+
return false;
|
|
3355
|
+
}
|
|
3356
|
+
}
|
|
3357
|
+
/**
|
|
3358
|
+
* Convert a parsed manifest to a typed manifest with branded types.
|
|
3359
|
+
*/
|
|
3360
|
+
toTypedManifest(data, storeId) {
|
|
3361
|
+
const files = {};
|
|
3362
|
+
for (const [path4, state] of Object.entries(data.files)) {
|
|
3363
|
+
files[path4] = {
|
|
3364
|
+
mtime: state.mtime,
|
|
3365
|
+
size: state.size,
|
|
3366
|
+
hash: state.hash,
|
|
3367
|
+
documentIds: state.documentIds.map((id) => createDocumentId(id))
|
|
3368
|
+
};
|
|
3369
|
+
}
|
|
3370
|
+
return {
|
|
3371
|
+
version: 1,
|
|
3372
|
+
storeId,
|
|
3373
|
+
indexedAt: data.indexedAt,
|
|
3374
|
+
files
|
|
3375
|
+
};
|
|
3376
|
+
}
|
|
3377
|
+
};
|
|
3378
|
+
|
|
2785
3379
|
// src/services/code-unit.service.ts
|
|
2786
3380
|
var CodeUnitService = class {
|
|
2787
3381
|
extractCodeUnit(code, symbolName, language) {
|
|
@@ -2966,6 +3560,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2966
3560
|
// Stronger penalty - internal code less useful
|
|
2967
3561
|
test: 0.8,
|
|
2968
3562
|
config: 0.7,
|
|
3563
|
+
changelog: 0.6,
|
|
3564
|
+
// Changelogs rarely answer "how to" questions
|
|
2969
3565
|
other: 0.9
|
|
2970
3566
|
},
|
|
2971
3567
|
implementation: {
|
|
@@ -2978,6 +3574,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2978
3574
|
// Internal code can be relevant
|
|
2979
3575
|
test: 1,
|
|
2980
3576
|
config: 0.95,
|
|
3577
|
+
changelog: 0.8,
|
|
3578
|
+
// Might reference implementation changes
|
|
2981
3579
|
other: 1
|
|
2982
3580
|
},
|
|
2983
3581
|
conceptual: {
|
|
@@ -2988,6 +3586,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2988
3586
|
"source-internal": 0.9,
|
|
2989
3587
|
test: 0.9,
|
|
2990
3588
|
config: 0.85,
|
|
3589
|
+
changelog: 0.7,
|
|
3590
|
+
// Sometimes explains concepts behind changes
|
|
2991
3591
|
other: 0.95
|
|
2992
3592
|
},
|
|
2993
3593
|
comparison: {
|
|
@@ -2998,6 +3598,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2998
3598
|
"source-internal": 0.85,
|
|
2999
3599
|
test: 0.9,
|
|
3000
3600
|
config: 0.85,
|
|
3601
|
+
changelog: 0.9,
|
|
3602
|
+
// Version comparisons can be useful
|
|
3001
3603
|
other: 0.95
|
|
3002
3604
|
},
|
|
3003
3605
|
debugging: {
|
|
@@ -3010,6 +3612,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
3010
3612
|
test: 1.05,
|
|
3011
3613
|
// Tests can show expected behavior
|
|
3012
3614
|
config: 0.9,
|
|
3615
|
+
changelog: 1.1,
|
|
3616
|
+
// Often contains bug fixes and known issues
|
|
3013
3617
|
other: 1
|
|
3014
3618
|
}
|
|
3015
3619
|
};
|
|
@@ -3092,6 +3696,17 @@ function classifyQueryIntents(query) {
|
|
|
3092
3696
|
function getPrimaryIntent(intents) {
|
|
3093
3697
|
return intents[0]?.intent ?? "how-to";
|
|
3094
3698
|
}
|
|
3699
|
+
function mapSearchIntentToQueryIntent(intent) {
|
|
3700
|
+
switch (intent) {
|
|
3701
|
+
case "find-pattern":
|
|
3702
|
+
case "find-implementation":
|
|
3703
|
+
case "find-definition":
|
|
3704
|
+
return "implementation";
|
|
3705
|
+
case "find-usage":
|
|
3706
|
+
case "find-documentation":
|
|
3707
|
+
return "how-to";
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3095
3710
|
var RRF_PRESETS = {
|
|
3096
3711
|
code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
|
|
3097
3712
|
web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
|
|
@@ -3106,12 +3721,27 @@ var SearchService = class {
|
|
|
3106
3721
|
codeUnitService;
|
|
3107
3722
|
codeGraphService;
|
|
3108
3723
|
graphCache;
|
|
3109
|
-
|
|
3724
|
+
searchConfig;
|
|
3725
|
+
unsubscribeCacheInvalidation;
|
|
3726
|
+
constructor(lanceStore, embeddingEngine, codeGraphService, searchConfig) {
|
|
3110
3727
|
this.lanceStore = lanceStore;
|
|
3111
3728
|
this.embeddingEngine = embeddingEngine;
|
|
3112
3729
|
this.codeUnitService = new CodeUnitService();
|
|
3113
3730
|
this.codeGraphService = codeGraphService;
|
|
3114
3731
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
3732
|
+
this.searchConfig = searchConfig;
|
|
3733
|
+
if (codeGraphService) {
|
|
3734
|
+
this.unsubscribeCacheInvalidation = codeGraphService.onCacheInvalidation((event) => {
|
|
3735
|
+
this.graphCache.delete(event.storeId);
|
|
3736
|
+
});
|
|
3737
|
+
}
|
|
3738
|
+
}
|
|
3739
|
+
/**
|
|
3740
|
+
* Clean up resources (unsubscribe from events).
|
|
3741
|
+
* Call this when destroying the service.
|
|
3742
|
+
*/
|
|
3743
|
+
cleanup() {
|
|
3744
|
+
this.unsubscribeCacheInvalidation?.();
|
|
3115
3745
|
}
|
|
3116
3746
|
/**
|
|
3117
3747
|
* Load code graph for a store, with caching.
|
|
@@ -3139,12 +3769,12 @@ var SearchService = class {
|
|
|
3139
3769
|
}
|
|
3140
3770
|
async search(query) {
|
|
3141
3771
|
const startTime = Date.now();
|
|
3142
|
-
const mode = query.mode ?? "hybrid";
|
|
3143
|
-
const limit = query.limit ?? 10;
|
|
3772
|
+
const mode = query.mode ?? this.searchConfig?.defaultMode ?? "hybrid";
|
|
3773
|
+
const limit = query.limit ?? this.searchConfig?.defaultLimit ?? 10;
|
|
3144
3774
|
const stores = query.stores ?? [];
|
|
3145
3775
|
const detail = query.detail ?? "minimal";
|
|
3146
3776
|
const intents = classifyQueryIntents(query.query);
|
|
3147
|
-
const primaryIntent = getPrimaryIntent(intents);
|
|
3777
|
+
const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
|
|
3148
3778
|
logger2.debug(
|
|
3149
3779
|
{
|
|
3150
3780
|
query: query.query,
|
|
@@ -3153,7 +3783,8 @@ var SearchService = class {
|
|
|
3153
3783
|
stores,
|
|
3154
3784
|
detail,
|
|
3155
3785
|
intent: primaryIntent,
|
|
3156
|
-
|
|
3786
|
+
userIntent: query.intent,
|
|
3787
|
+
autoClassifiedIntents: intents,
|
|
3157
3788
|
minRelevance: query.minRelevance
|
|
3158
3789
|
},
|
|
3159
3790
|
"Search query received"
|
|
@@ -3164,7 +3795,7 @@ var SearchService = class {
|
|
|
3164
3795
|
if (mode === "vector") {
|
|
3165
3796
|
const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
|
|
3166
3797
|
maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
|
|
3167
|
-
allResults =
|
|
3798
|
+
allResults = this.normalizeAndFilterScores(rawResults, query.threshold).slice(0, fetchLimit);
|
|
3168
3799
|
} else if (mode === "fts") {
|
|
3169
3800
|
allResults = await this.ftsSearch(query.query, stores, fetchLimit);
|
|
3170
3801
|
} else {
|
|
@@ -3177,28 +3808,35 @@ var SearchService = class {
|
|
|
3177
3808
|
allResults = hybridResult.results;
|
|
3178
3809
|
maxRawScore = hybridResult.maxRawScore;
|
|
3179
3810
|
}
|
|
3180
|
-
if (query.minRelevance !== void 0
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3811
|
+
if (query.minRelevance !== void 0) {
|
|
3812
|
+
if (mode === "fts") {
|
|
3813
|
+
logger2.warn(
|
|
3814
|
+
{ query: query.query, minRelevance: query.minRelevance },
|
|
3815
|
+
"minRelevance filter ignored in FTS mode (no vector scores available)"
|
|
3816
|
+
);
|
|
3817
|
+
} else if (maxRawScore < query.minRelevance) {
|
|
3818
|
+
const timeMs2 = Date.now() - startTime;
|
|
3819
|
+
logger2.info(
|
|
3820
|
+
{
|
|
3821
|
+
query: query.query,
|
|
3822
|
+
mode,
|
|
3823
|
+
maxRawScore,
|
|
3824
|
+
minRelevance: query.minRelevance,
|
|
3825
|
+
timeMs: timeMs2
|
|
3826
|
+
},
|
|
3827
|
+
"Search filtered by minRelevance - no sufficiently relevant results"
|
|
3828
|
+
);
|
|
3829
|
+
return {
|
|
3184
3830
|
query: query.query,
|
|
3185
3831
|
mode,
|
|
3186
|
-
|
|
3187
|
-
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
mode,
|
|
3195
|
-
stores,
|
|
3196
|
-
results: [],
|
|
3197
|
-
totalResults: 0,
|
|
3198
|
-
timeMs: timeMs2,
|
|
3199
|
-
confidence: this.calculateConfidence(maxRawScore),
|
|
3200
|
-
maxRawScore
|
|
3201
|
-
};
|
|
3832
|
+
stores,
|
|
3833
|
+
results: [],
|
|
3834
|
+
totalResults: 0,
|
|
3835
|
+
timeMs: timeMs2,
|
|
3836
|
+
confidence: this.calculateConfidence(maxRawScore),
|
|
3837
|
+
maxRawScore
|
|
3838
|
+
};
|
|
3839
|
+
}
|
|
3202
3840
|
}
|
|
3203
3841
|
const dedupedResults = this.deduplicateBySource(allResults, query.query);
|
|
3204
3842
|
const resultsToEnhance = dedupedResults.slice(0, limit);
|
|
@@ -3247,7 +3885,9 @@ var SearchService = class {
|
|
|
3247
3885
|
const bySource = /* @__PURE__ */ new Map();
|
|
3248
3886
|
const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
|
|
3249
3887
|
for (const result of results) {
|
|
3250
|
-
const
|
|
3888
|
+
const storeId = result.metadata.storeId;
|
|
3889
|
+
const source = result.metadata.path ?? result.metadata.url ?? result.id;
|
|
3890
|
+
const sourceKey = `${storeId}:${source}`;
|
|
3251
3891
|
const existing = bySource.get(sourceKey);
|
|
3252
3892
|
if (!existing) {
|
|
3253
3893
|
bySource.set(sourceKey, result);
|
|
@@ -3318,11 +3958,6 @@ var SearchService = class {
|
|
|
3318
3958
|
}
|
|
3319
3959
|
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
3320
3960
|
}
|
|
3321
|
-
async vectorSearch(query, stores, limit, threshold) {
|
|
3322
|
-
const results = await this.vectorSearchRaw(query, stores, limit);
|
|
3323
|
-
const normalized = this.normalizeAndFilterScores(results, threshold);
|
|
3324
|
-
return normalized.slice(0, limit);
|
|
3325
|
-
}
|
|
3326
3961
|
async ftsSearch(query, stores, limit) {
|
|
3327
3962
|
const results = [];
|
|
3328
3963
|
for (const storeId of stores) {
|
|
@@ -3482,6 +4117,9 @@ var SearchService = class {
|
|
|
3482
4117
|
case "config":
|
|
3483
4118
|
baseBoost = 0.5;
|
|
3484
4119
|
break;
|
|
4120
|
+
case "changelog":
|
|
4121
|
+
baseBoost = 0.7;
|
|
4122
|
+
break;
|
|
3485
4123
|
default:
|
|
3486
4124
|
baseBoost = 1;
|
|
3487
4125
|
}
|
|
@@ -3887,42 +4525,53 @@ var SearchService = class {
|
|
|
3887
4525
|
};
|
|
3888
4526
|
|
|
3889
4527
|
// src/services/store-definition.service.ts
|
|
3890
|
-
import { readFile as
|
|
3891
|
-
import {
|
|
4528
|
+
import { readFile as readFile7, access as access4 } from "fs/promises";
|
|
4529
|
+
import { resolve as resolve2, isAbsolute as isAbsolute2, join as join9 } from "path";
|
|
3892
4530
|
|
|
3893
4531
|
// src/types/store-definition.ts
|
|
3894
|
-
import { z as
|
|
3895
|
-
var BaseStoreDefinitionSchema =
|
|
3896
|
-
name:
|
|
3897
|
-
description:
|
|
3898
|
-
tags:
|
|
4532
|
+
import { z as z3 } from "zod";
|
|
4533
|
+
var BaseStoreDefinitionSchema = z3.object({
|
|
4534
|
+
name: z3.string().min(1, "Store name is required"),
|
|
4535
|
+
description: z3.string().optional(),
|
|
4536
|
+
tags: z3.array(z3.string()).optional()
|
|
3899
4537
|
});
|
|
3900
4538
|
var FileStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3901
|
-
type:
|
|
3902
|
-
path:
|
|
4539
|
+
type: z3.literal("file"),
|
|
4540
|
+
path: z3.string().min(1, "Path is required for file stores")
|
|
3903
4541
|
});
|
|
4542
|
+
var GitUrlSchema = z3.string().refine(
|
|
4543
|
+
(val) => {
|
|
4544
|
+
try {
|
|
4545
|
+
new URL(val);
|
|
4546
|
+
return true;
|
|
4547
|
+
} catch {
|
|
4548
|
+
return /^git@[\w.-]+:[\w./-]+$/.test(val);
|
|
4549
|
+
}
|
|
4550
|
+
},
|
|
4551
|
+
{ message: "Must be a valid URL or SSH URL (git@host:path)" }
|
|
4552
|
+
);
|
|
3904
4553
|
var RepoStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3905
|
-
type:
|
|
3906
|
-
url:
|
|
3907
|
-
branch:
|
|
3908
|
-
depth:
|
|
4554
|
+
type: z3.literal("repo"),
|
|
4555
|
+
url: GitUrlSchema,
|
|
4556
|
+
branch: z3.string().optional(),
|
|
4557
|
+
depth: z3.number().int().positive("Depth must be a positive integer").optional()
|
|
3909
4558
|
});
|
|
3910
4559
|
var WebStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3911
|
-
type:
|
|
3912
|
-
url:
|
|
3913
|
-
depth:
|
|
3914
|
-
maxPages:
|
|
3915
|
-
crawlInstructions:
|
|
3916
|
-
extractInstructions:
|
|
4560
|
+
type: z3.literal("web"),
|
|
4561
|
+
url: z3.url("Valid URL is required for web stores"),
|
|
4562
|
+
depth: z3.number().int().min(0, "Depth must be non-negative").default(1),
|
|
4563
|
+
maxPages: z3.number().int().positive("maxPages must be a positive integer").optional(),
|
|
4564
|
+
crawlInstructions: z3.string().optional(),
|
|
4565
|
+
extractInstructions: z3.string().optional()
|
|
3917
4566
|
});
|
|
3918
|
-
var StoreDefinitionSchema =
|
|
4567
|
+
var StoreDefinitionSchema = z3.discriminatedUnion("type", [
|
|
3919
4568
|
FileStoreDefinitionSchema,
|
|
3920
4569
|
RepoStoreDefinitionSchema,
|
|
3921
4570
|
WebStoreDefinitionSchema
|
|
3922
4571
|
]);
|
|
3923
|
-
var StoreDefinitionsConfigSchema =
|
|
3924
|
-
version:
|
|
3925
|
-
stores:
|
|
4572
|
+
var StoreDefinitionsConfigSchema = z3.object({
|
|
4573
|
+
version: z3.literal(1),
|
|
4574
|
+
stores: z3.array(StoreDefinitionSchema)
|
|
3926
4575
|
});
|
|
3927
4576
|
function isFileStoreDefinition(def) {
|
|
3928
4577
|
return def.type === "file";
|
|
@@ -3941,7 +4590,7 @@ var DEFAULT_STORE_DEFINITIONS_CONFIG = {
|
|
|
3941
4590
|
// src/services/store-definition.service.ts
|
|
3942
4591
|
async function fileExists3(path4) {
|
|
3943
4592
|
try {
|
|
3944
|
-
await
|
|
4593
|
+
await access4(path4);
|
|
3945
4594
|
return true;
|
|
3946
4595
|
} catch {
|
|
3947
4596
|
return false;
|
|
@@ -3953,7 +4602,7 @@ var StoreDefinitionService = class {
|
|
|
3953
4602
|
config = null;
|
|
3954
4603
|
constructor(projectRoot) {
|
|
3955
4604
|
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
3956
|
-
this.configPath =
|
|
4605
|
+
this.configPath = join9(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
|
|
3957
4606
|
}
|
|
3958
4607
|
/**
|
|
3959
4608
|
* Load store definitions from config file.
|
|
@@ -3972,7 +4621,7 @@ var StoreDefinitionService = class {
|
|
|
3972
4621
|
};
|
|
3973
4622
|
return this.config;
|
|
3974
4623
|
}
|
|
3975
|
-
const content = await
|
|
4624
|
+
const content = await readFile7(this.configPath, "utf-8");
|
|
3976
4625
|
let parsed;
|
|
3977
4626
|
try {
|
|
3978
4627
|
parsed = JSON.parse(content);
|
|
@@ -3992,8 +4641,7 @@ var StoreDefinitionService = class {
|
|
|
3992
4641
|
* Save store definitions to config file.
|
|
3993
4642
|
*/
|
|
3994
4643
|
async save(config) {
|
|
3995
|
-
await
|
|
3996
|
-
await writeFile4(this.configPath, JSON.stringify(config, null, 2));
|
|
4644
|
+
await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
|
|
3997
4645
|
this.config = config;
|
|
3998
4646
|
}
|
|
3999
4647
|
/**
|
|
@@ -4065,7 +4713,7 @@ var StoreDefinitionService = class {
|
|
|
4065
4713
|
* Resolve a file store path relative to project root.
|
|
4066
4714
|
*/
|
|
4067
4715
|
resolvePath(path4) {
|
|
4068
|
-
if (
|
|
4716
|
+
if (isAbsolute2(path4)) {
|
|
4069
4717
|
return path4;
|
|
4070
4718
|
}
|
|
4071
4719
|
return resolve2(this.projectRoot, path4);
|
|
@@ -4092,8 +4740,8 @@ var StoreDefinitionService = class {
|
|
|
4092
4740
|
|
|
4093
4741
|
// src/services/store.service.ts
|
|
4094
4742
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
4095
|
-
import { readFile as
|
|
4096
|
-
import { join as
|
|
4743
|
+
import { readFile as readFile8, mkdir as mkdir5, stat as stat2, access as access5 } from "fs/promises";
|
|
4744
|
+
import { join as join10, resolve as resolve3 } from "path";
|
|
4097
4745
|
|
|
4098
4746
|
// src/plugin/git-clone.ts
|
|
4099
4747
|
import { spawn } from "child_process";
|
|
@@ -4124,6 +4772,9 @@ async function cloneRepository(options) {
|
|
|
4124
4772
|
});
|
|
4125
4773
|
});
|
|
4126
4774
|
}
|
|
4775
|
+
function isGitUrl(source) {
|
|
4776
|
+
return source.startsWith("http://") || source.startsWith("https://") || source.startsWith("git@");
|
|
4777
|
+
}
|
|
4127
4778
|
function extractRepoName(url) {
|
|
4128
4779
|
const match = /\/([^/]+?)(\.git)?$/.exec(url);
|
|
4129
4780
|
const name = match?.[1];
|
|
@@ -4136,7 +4787,7 @@ function extractRepoName(url) {
|
|
|
4136
4787
|
// src/services/store.service.ts
|
|
4137
4788
|
async function fileExists4(path4) {
|
|
4138
4789
|
try {
|
|
4139
|
-
await
|
|
4790
|
+
await access5(path4);
|
|
4140
4791
|
return true;
|
|
4141
4792
|
} catch {
|
|
4142
4793
|
return false;
|
|
@@ -4146,11 +4797,13 @@ var StoreService = class {
|
|
|
4146
4797
|
dataDir;
|
|
4147
4798
|
definitionService;
|
|
4148
4799
|
gitignoreService;
|
|
4800
|
+
projectRoot;
|
|
4149
4801
|
registry = { stores: [] };
|
|
4150
4802
|
constructor(dataDir, options) {
|
|
4151
4803
|
this.dataDir = dataDir;
|
|
4152
4804
|
this.definitionService = options?.definitionService ?? void 0;
|
|
4153
4805
|
this.gitignoreService = options?.gitignoreService ?? void 0;
|
|
4806
|
+
this.projectRoot = options?.projectRoot ?? void 0;
|
|
4154
4807
|
}
|
|
4155
4808
|
async initialize() {
|
|
4156
4809
|
await mkdir5(this.dataDir, { recursive: true });
|
|
@@ -4158,6 +4811,7 @@ var StoreService = class {
|
|
|
4158
4811
|
}
|
|
4159
4812
|
/**
|
|
4160
4813
|
* Convert a Store and CreateStoreInput to a StoreDefinition for persistence.
|
|
4814
|
+
* Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
|
|
4161
4815
|
*/
|
|
4162
4816
|
createDefinitionFromStore(store, input) {
|
|
4163
4817
|
const tags = store.tags !== void 0 ? [...store.tags] : void 0;
|
|
@@ -4179,10 +4833,13 @@ var StoreService = class {
|
|
|
4179
4833
|
}
|
|
4180
4834
|
case "repo": {
|
|
4181
4835
|
const repoStore = store;
|
|
4836
|
+
if (repoStore.url === void 0) {
|
|
4837
|
+
return void 0;
|
|
4838
|
+
}
|
|
4182
4839
|
const repoDef = {
|
|
4183
4840
|
...base,
|
|
4184
4841
|
type: "repo",
|
|
4185
|
-
url: repoStore.url
|
|
4842
|
+
url: repoStore.url,
|
|
4186
4843
|
branch: repoStore.branch,
|
|
4187
4844
|
depth: input.depth
|
|
4188
4845
|
};
|
|
@@ -4194,7 +4851,58 @@ var StoreService = class {
|
|
|
4194
4851
|
...base,
|
|
4195
4852
|
type: "web",
|
|
4196
4853
|
url: webStore.url,
|
|
4197
|
-
depth: webStore.depth
|
|
4854
|
+
depth: webStore.depth,
|
|
4855
|
+
maxPages: input.maxPages,
|
|
4856
|
+
crawlInstructions: input.crawlInstructions,
|
|
4857
|
+
extractInstructions: input.extractInstructions
|
|
4858
|
+
};
|
|
4859
|
+
return webDef;
|
|
4860
|
+
}
|
|
4861
|
+
}
|
|
4862
|
+
}
|
|
4863
|
+
/**
|
|
4864
|
+
* Create a StoreDefinition from an existing store (without original input).
|
|
4865
|
+
* Used when updating/renaming stores where we don't have the original input.
|
|
4866
|
+
* Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
|
|
4867
|
+
*/
|
|
4868
|
+
createDefinitionFromExistingStore(store) {
|
|
4869
|
+
const tags = store.tags !== void 0 ? [...store.tags] : void 0;
|
|
4870
|
+
const base = {
|
|
4871
|
+
name: store.name,
|
|
4872
|
+
description: store.description,
|
|
4873
|
+
tags
|
|
4874
|
+
};
|
|
4875
|
+
switch (store.type) {
|
|
4876
|
+
case "file": {
|
|
4877
|
+
const fileDef = {
|
|
4878
|
+
...base,
|
|
4879
|
+
type: "file",
|
|
4880
|
+
path: store.path
|
|
4881
|
+
};
|
|
4882
|
+
return fileDef;
|
|
4883
|
+
}
|
|
4884
|
+
case "repo": {
|
|
4885
|
+
if (store.url === void 0) {
|
|
4886
|
+
return void 0;
|
|
4887
|
+
}
|
|
4888
|
+
const repoDef = {
|
|
4889
|
+
...base,
|
|
4890
|
+
type: "repo",
|
|
4891
|
+
url: store.url,
|
|
4892
|
+
branch: store.branch,
|
|
4893
|
+
depth: store.depth
|
|
4894
|
+
};
|
|
4895
|
+
return repoDef;
|
|
4896
|
+
}
|
|
4897
|
+
case "web": {
|
|
4898
|
+
const webDef = {
|
|
4899
|
+
...base,
|
|
4900
|
+
type: "web",
|
|
4901
|
+
url: store.url,
|
|
4902
|
+
depth: store.depth,
|
|
4903
|
+
maxPages: store.maxPages,
|
|
4904
|
+
crawlInstructions: store.crawlInstructions,
|
|
4905
|
+
extractInstructions: store.extractInstructions
|
|
4198
4906
|
};
|
|
4199
4907
|
return webDef;
|
|
4200
4908
|
}
|
|
@@ -4216,9 +4924,9 @@ var StoreService = class {
|
|
|
4216
4924
|
if (input.path === void 0) {
|
|
4217
4925
|
return err(new Error("Path is required for file stores"));
|
|
4218
4926
|
}
|
|
4219
|
-
const normalizedPath = resolve3(input.path);
|
|
4927
|
+
const normalizedPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, input.path) : resolve3(input.path);
|
|
4220
4928
|
try {
|
|
4221
|
-
const stats = await
|
|
4929
|
+
const stats = await stat2(normalizedPath);
|
|
4222
4930
|
if (!stats.isDirectory()) {
|
|
4223
4931
|
return err(new Error(`Path is not a directory: ${normalizedPath}`));
|
|
4224
4932
|
}
|
|
@@ -4241,7 +4949,7 @@ var StoreService = class {
|
|
|
4241
4949
|
case "repo": {
|
|
4242
4950
|
let repoPath = input.path;
|
|
4243
4951
|
if (input.url !== void 0) {
|
|
4244
|
-
const cloneDir =
|
|
4952
|
+
const cloneDir = join10(this.dataDir, "repos", id);
|
|
4245
4953
|
const result = await cloneRepository({
|
|
4246
4954
|
url: input.url,
|
|
4247
4955
|
targetDir: cloneDir,
|
|
@@ -4256,7 +4964,17 @@ var StoreService = class {
|
|
|
4256
4964
|
if (repoPath === void 0) {
|
|
4257
4965
|
return err(new Error("Path or URL required for repo stores"));
|
|
4258
4966
|
}
|
|
4259
|
-
const normalizedRepoPath = resolve3(repoPath);
|
|
4967
|
+
const normalizedRepoPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, repoPath) : resolve3(repoPath);
|
|
4968
|
+
if (input.url === void 0) {
|
|
4969
|
+
try {
|
|
4970
|
+
const stats = await stat2(normalizedRepoPath);
|
|
4971
|
+
if (!stats.isDirectory()) {
|
|
4972
|
+
return err(new Error(`Path is not a directory: ${normalizedRepoPath}`));
|
|
4973
|
+
}
|
|
4974
|
+
} catch {
|
|
4975
|
+
return err(new Error(`Repository path does not exist: ${normalizedRepoPath}`));
|
|
4976
|
+
}
|
|
4977
|
+
}
|
|
4260
4978
|
store = {
|
|
4261
4979
|
type: "repo",
|
|
4262
4980
|
id,
|
|
@@ -4264,6 +4982,7 @@ var StoreService = class {
|
|
|
4264
4982
|
path: normalizedRepoPath,
|
|
4265
4983
|
url: input.url,
|
|
4266
4984
|
branch: input.branch,
|
|
4985
|
+
depth: input.depth ?? 1,
|
|
4267
4986
|
description: input.description,
|
|
4268
4987
|
tags: input.tags,
|
|
4269
4988
|
status: "ready",
|
|
@@ -4282,6 +5001,9 @@ var StoreService = class {
|
|
|
4282
5001
|
name: input.name,
|
|
4283
5002
|
url: input.url,
|
|
4284
5003
|
depth: input.depth ?? 1,
|
|
5004
|
+
maxPages: input.maxPages,
|
|
5005
|
+
crawlInstructions: input.crawlInstructions,
|
|
5006
|
+
extractInstructions: input.extractInstructions,
|
|
4285
5007
|
description: input.description,
|
|
4286
5008
|
tags: input.tags,
|
|
4287
5009
|
status: "ready",
|
|
@@ -4301,7 +5023,9 @@ var StoreService = class {
|
|
|
4301
5023
|
}
|
|
4302
5024
|
if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
|
|
4303
5025
|
const definition = this.createDefinitionFromStore(store, input);
|
|
4304
|
-
|
|
5026
|
+
if (definition !== void 0) {
|
|
5027
|
+
await this.definitionService.addDefinition(definition);
|
|
5028
|
+
}
|
|
4305
5029
|
}
|
|
4306
5030
|
return ok(store);
|
|
4307
5031
|
}
|
|
@@ -4331,6 +5055,16 @@ var StoreService = class {
|
|
|
4331
5055
|
if (store === void 0) {
|
|
4332
5056
|
return err(new Error(`Store not found: ${id}`));
|
|
4333
5057
|
}
|
|
5058
|
+
if (updates.name?.trim() === "") {
|
|
5059
|
+
return err(new Error("Store name cannot be empty"));
|
|
5060
|
+
}
|
|
5061
|
+
const isRenaming = updates.name !== void 0 && updates.name !== store.name;
|
|
5062
|
+
if (isRenaming) {
|
|
5063
|
+
const existing = this.registry.stores.find((s) => s.name === updates.name && s.id !== id);
|
|
5064
|
+
if (existing !== void 0) {
|
|
5065
|
+
return err(new Error(`Store with name '${updates.name}' already exists`));
|
|
5066
|
+
}
|
|
5067
|
+
}
|
|
4334
5068
|
const updated = {
|
|
4335
5069
|
...store,
|
|
4336
5070
|
...updates,
|
|
@@ -4339,14 +5073,24 @@ var StoreService = class {
|
|
|
4339
5073
|
this.registry.stores[index] = updated;
|
|
4340
5074
|
await this.saveRegistry();
|
|
4341
5075
|
if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
|
|
4342
|
-
|
|
4343
|
-
|
|
4344
|
-
|
|
4345
|
-
|
|
4346
|
-
|
|
4347
|
-
|
|
5076
|
+
if (isRenaming) {
|
|
5077
|
+
await this.definitionService.removeDefinition(store.name);
|
|
5078
|
+
const newDefinition = this.createDefinitionFromExistingStore(updated);
|
|
5079
|
+
if (newDefinition !== void 0) {
|
|
5080
|
+
await this.definitionService.addDefinition(newDefinition);
|
|
5081
|
+
}
|
|
5082
|
+
} else {
|
|
5083
|
+
const defUpdates = {};
|
|
5084
|
+
if (updates.description !== void 0) {
|
|
5085
|
+
defUpdates.description = updates.description;
|
|
5086
|
+
}
|
|
5087
|
+
if (updates.tags !== void 0) {
|
|
5088
|
+
defUpdates.tags = [...updates.tags];
|
|
5089
|
+
}
|
|
5090
|
+
if (Object.keys(defUpdates).length > 0) {
|
|
5091
|
+
await this.definitionService.updateDefinition(store.name, defUpdates);
|
|
5092
|
+
}
|
|
4348
5093
|
}
|
|
4349
|
-
await this.definitionService.updateDefinition(store.name, defUpdates);
|
|
4350
5094
|
}
|
|
4351
5095
|
return ok(updated);
|
|
4352
5096
|
}
|
|
@@ -4368,14 +5112,14 @@ var StoreService = class {
|
|
|
4368
5112
|
return ok(void 0);
|
|
4369
5113
|
}
|
|
4370
5114
|
async loadRegistry() {
|
|
4371
|
-
const registryPath =
|
|
5115
|
+
const registryPath = join10(this.dataDir, "stores.json");
|
|
4372
5116
|
const exists = await fileExists4(registryPath);
|
|
4373
5117
|
if (!exists) {
|
|
4374
5118
|
this.registry = { stores: [] };
|
|
4375
5119
|
await this.saveRegistry();
|
|
4376
5120
|
return;
|
|
4377
5121
|
}
|
|
4378
|
-
const content = await
|
|
5122
|
+
const content = await readFile8(registryPath, "utf-8");
|
|
4379
5123
|
try {
|
|
4380
5124
|
const data = JSON.parse(content);
|
|
4381
5125
|
this.registry = {
|
|
@@ -4393,8 +5137,8 @@ var StoreService = class {
|
|
|
4393
5137
|
}
|
|
4394
5138
|
}
|
|
4395
5139
|
async saveRegistry() {
|
|
4396
|
-
const registryPath =
|
|
4397
|
-
await
|
|
5140
|
+
const registryPath = join10(this.dataDir, "stores.json");
|
|
5141
|
+
await atomicWriteFile(registryPath, JSON.stringify(this.registry, null, 2));
|
|
4398
5142
|
}
|
|
4399
5143
|
};
|
|
4400
5144
|
|
|
@@ -4408,33 +5152,33 @@ import { fileURLToPath } from "url";
|
|
|
4408
5152
|
import { ZodError } from "zod";
|
|
4409
5153
|
|
|
4410
5154
|
// src/crawl/schemas.ts
|
|
4411
|
-
import { z as
|
|
4412
|
-
var CrawledLinkSchema =
|
|
4413
|
-
href:
|
|
4414
|
-
text:
|
|
4415
|
-
title:
|
|
4416
|
-
base_domain:
|
|
4417
|
-
head_data:
|
|
4418
|
-
head_extraction_status:
|
|
4419
|
-
head_extraction_error:
|
|
4420
|
-
intrinsic_score:
|
|
4421
|
-
contextual_score:
|
|
4422
|
-
total_score:
|
|
5155
|
+
import { z as z4 } from "zod";
|
|
5156
|
+
var CrawledLinkSchema = z4.object({
|
|
5157
|
+
href: z4.string(),
|
|
5158
|
+
text: z4.string(),
|
|
5159
|
+
title: z4.string().optional(),
|
|
5160
|
+
base_domain: z4.string().optional(),
|
|
5161
|
+
head_data: z4.unknown().optional(),
|
|
5162
|
+
head_extraction_status: z4.unknown().optional(),
|
|
5163
|
+
head_extraction_error: z4.unknown().optional(),
|
|
5164
|
+
intrinsic_score: z4.number().optional(),
|
|
5165
|
+
contextual_score: z4.unknown().optional(),
|
|
5166
|
+
total_score: z4.unknown().optional()
|
|
4423
5167
|
});
|
|
4424
|
-
var CrawlPageSchema =
|
|
4425
|
-
url:
|
|
4426
|
-
title:
|
|
4427
|
-
content:
|
|
4428
|
-
links:
|
|
4429
|
-
crawledAt:
|
|
5168
|
+
var CrawlPageSchema = z4.object({
|
|
5169
|
+
url: z4.string(),
|
|
5170
|
+
title: z4.string(),
|
|
5171
|
+
content: z4.string(),
|
|
5172
|
+
links: z4.array(z4.string()),
|
|
5173
|
+
crawledAt: z4.string()
|
|
4430
5174
|
});
|
|
4431
|
-
var CrawlResultSchema =
|
|
4432
|
-
pages:
|
|
5175
|
+
var CrawlResultSchema = z4.object({
|
|
5176
|
+
pages: z4.array(CrawlPageSchema)
|
|
4433
5177
|
});
|
|
4434
|
-
var HeadlessResultSchema =
|
|
4435
|
-
html:
|
|
4436
|
-
markdown:
|
|
4437
|
-
links:
|
|
5178
|
+
var HeadlessResultSchema = z4.object({
|
|
5179
|
+
html: z4.string(),
|
|
5180
|
+
markdown: z4.string(),
|
|
5181
|
+
links: z4.array(z4.union([CrawledLinkSchema, z4.string()]))
|
|
4438
5182
|
});
|
|
4439
5183
|
function validateHeadlessResult(data) {
|
|
4440
5184
|
return HeadlessResultSchema.parse(data);
|
|
@@ -4442,33 +5186,33 @@ function validateHeadlessResult(data) {
|
|
|
4442
5186
|
function validateCrawlResult(data) {
|
|
4443
5187
|
return CrawlResultSchema.parse(data);
|
|
4444
5188
|
}
|
|
4445
|
-
var MethodInfoSchema =
|
|
4446
|
-
name:
|
|
4447
|
-
async:
|
|
4448
|
-
signature:
|
|
4449
|
-
startLine:
|
|
4450
|
-
endLine:
|
|
4451
|
-
calls:
|
|
5189
|
+
var MethodInfoSchema = z4.object({
|
|
5190
|
+
name: z4.string(),
|
|
5191
|
+
async: z4.boolean(),
|
|
5192
|
+
signature: z4.string(),
|
|
5193
|
+
startLine: z4.number(),
|
|
5194
|
+
endLine: z4.number(),
|
|
5195
|
+
calls: z4.array(z4.string())
|
|
4452
5196
|
});
|
|
4453
|
-
var CodeNodeSchema =
|
|
4454
|
-
type:
|
|
4455
|
-
name:
|
|
4456
|
-
exported:
|
|
4457
|
-
startLine:
|
|
4458
|
-
endLine:
|
|
4459
|
-
async:
|
|
4460
|
-
signature:
|
|
4461
|
-
calls:
|
|
4462
|
-
methods:
|
|
5197
|
+
var CodeNodeSchema = z4.object({
|
|
5198
|
+
type: z4.enum(["function", "class"]),
|
|
5199
|
+
name: z4.string(),
|
|
5200
|
+
exported: z4.boolean(),
|
|
5201
|
+
startLine: z4.number(),
|
|
5202
|
+
endLine: z4.number(),
|
|
5203
|
+
async: z4.boolean().optional(),
|
|
5204
|
+
signature: z4.string().optional(),
|
|
5205
|
+
calls: z4.array(z4.string()).optional(),
|
|
5206
|
+
methods: z4.array(MethodInfoSchema).optional()
|
|
4463
5207
|
});
|
|
4464
|
-
var ImportInfoSchema =
|
|
4465
|
-
source:
|
|
4466
|
-
imported:
|
|
4467
|
-
alias:
|
|
5208
|
+
var ImportInfoSchema = z4.object({
|
|
5209
|
+
source: z4.string(),
|
|
5210
|
+
imported: z4.string(),
|
|
5211
|
+
alias: z4.string().optional().nullable()
|
|
4468
5212
|
});
|
|
4469
|
-
var ParsePythonResultSchema =
|
|
4470
|
-
nodes:
|
|
4471
|
-
imports:
|
|
5213
|
+
var ParsePythonResultSchema = z4.object({
|
|
5214
|
+
nodes: z4.array(CodeNodeSchema),
|
|
5215
|
+
imports: z4.array(ImportInfoSchema)
|
|
4472
5216
|
});
|
|
4473
5217
|
function validateParsePythonResult(data) {
|
|
4474
5218
|
return ParsePythonResultSchema.parse(data);
|
|
@@ -4476,6 +5220,15 @@ function validateParsePythonResult(data) {
|
|
|
4476
5220
|
|
|
4477
5221
|
// src/crawl/bridge.ts
|
|
4478
5222
|
var logger3 = createLogger("python-bridge");
|
|
5223
|
+
function getPythonExecutable() {
|
|
5224
|
+
return process.platform === "win32" ? "python" : "python3";
|
|
5225
|
+
}
|
|
5226
|
+
function getVenvPythonPath(pluginRoot) {
|
|
5227
|
+
if (process.platform === "win32") {
|
|
5228
|
+
return path3.join(pluginRoot, ".venv", "Scripts", "python.exe");
|
|
5229
|
+
}
|
|
5230
|
+
return path3.join(pluginRoot, ".venv", "bin", "python3");
|
|
5231
|
+
}
|
|
4479
5232
|
var PythonBridge = class {
|
|
4480
5233
|
process = null;
|
|
4481
5234
|
pending = /* @__PURE__ */ new Map();
|
|
@@ -4485,20 +5238,21 @@ var PythonBridge = class {
|
|
|
4485
5238
|
start() {
|
|
4486
5239
|
if (this.process) return Promise.resolve();
|
|
4487
5240
|
const currentFilePath = fileURLToPath(import.meta.url);
|
|
4488
|
-
const
|
|
5241
|
+
const distPattern = `${path3.sep}dist${path3.sep}`;
|
|
5242
|
+
const isProduction = currentFilePath.includes(distPattern);
|
|
4489
5243
|
let pythonWorkerPath;
|
|
4490
5244
|
let pythonPath;
|
|
4491
5245
|
if (isProduction) {
|
|
4492
|
-
const distIndex = currentFilePath.indexOf(
|
|
5246
|
+
const distIndex = currentFilePath.indexOf(distPattern);
|
|
4493
5247
|
const pluginRoot = currentFilePath.substring(0, distIndex);
|
|
4494
5248
|
pythonWorkerPath = path3.join(pluginRoot, "python", "crawl_worker.py");
|
|
4495
|
-
const venvPython =
|
|
4496
|
-
pythonPath = existsSync4(venvPython) ? venvPython :
|
|
5249
|
+
const venvPython = getVenvPythonPath(pluginRoot);
|
|
5250
|
+
pythonPath = existsSync4(venvPython) ? venvPython : getPythonExecutable();
|
|
4497
5251
|
} else {
|
|
4498
5252
|
const srcDir = path3.dirname(path3.dirname(currentFilePath));
|
|
4499
5253
|
const projectRoot = path3.dirname(srcDir);
|
|
4500
5254
|
pythonWorkerPath = path3.join(projectRoot, "python", "crawl_worker.py");
|
|
4501
|
-
pythonPath =
|
|
5255
|
+
pythonPath = getPythonExecutable();
|
|
4502
5256
|
}
|
|
4503
5257
|
logger3.debug(
|
|
4504
5258
|
{ pythonWorkerPath, pythonPath, currentFilePath, isProduction },
|
|
@@ -4734,17 +5488,19 @@ var PythonBridge = class {
|
|
|
4734
5488
|
};
|
|
4735
5489
|
|
|
4736
5490
|
// src/db/embeddings.ts
|
|
4737
|
-
import { homedir as
|
|
4738
|
-
import { join as
|
|
5491
|
+
import { homedir as homedir2 } from "os";
|
|
5492
|
+
import { join as join11 } from "path";
|
|
4739
5493
|
import { pipeline, env } from "@huggingface/transformers";
|
|
4740
|
-
env.cacheDir =
|
|
5494
|
+
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
4741
5495
|
var EmbeddingEngine = class {
|
|
4742
5496
|
extractor = null;
|
|
5497
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed()
|
|
5498
|
+
_dimensions = null;
|
|
4743
5499
|
modelName;
|
|
4744
|
-
|
|
4745
|
-
constructor(modelName = "Xenova/all-MiniLM-L6-v2",
|
|
5500
|
+
batchSize;
|
|
5501
|
+
constructor(modelName = "Xenova/all-MiniLM-L6-v2", batchSize = 32) {
|
|
4746
5502
|
this.modelName = modelName;
|
|
4747
|
-
this.
|
|
5503
|
+
this.batchSize = batchSize;
|
|
4748
5504
|
}
|
|
4749
5505
|
async initialize() {
|
|
4750
5506
|
if (this.extractor !== null) return;
|
|
@@ -4764,23 +5520,43 @@ var EmbeddingEngine = class {
|
|
|
4764
5520
|
normalize: true
|
|
4765
5521
|
});
|
|
4766
5522
|
const result = Array.from(output.data);
|
|
5523
|
+
this._dimensions ??= result.length;
|
|
4767
5524
|
return result.map((v) => Number(v));
|
|
4768
5525
|
}
|
|
4769
5526
|
async embedBatch(texts) {
|
|
4770
|
-
const BATCH_SIZE = 32;
|
|
4771
5527
|
const results = [];
|
|
4772
|
-
for (let i = 0; i < texts.length; i +=
|
|
4773
|
-
const batch = texts.slice(i, i +
|
|
5528
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
5529
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
4774
5530
|
const batchResults = await Promise.all(batch.map((text) => this.embed(text)));
|
|
4775
5531
|
results.push(...batchResults);
|
|
4776
|
-
if (i +
|
|
5532
|
+
if (i + this.batchSize < texts.length) {
|
|
4777
5533
|
await new Promise((resolve4) => setTimeout(resolve4, 100));
|
|
4778
5534
|
}
|
|
4779
5535
|
}
|
|
4780
5536
|
return results;
|
|
4781
5537
|
}
|
|
5538
|
+
/**
|
|
5539
|
+
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
5540
|
+
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
5541
|
+
*/
|
|
4782
5542
|
getDimensions() {
|
|
4783
|
-
|
|
5543
|
+
if (this._dimensions === null) {
|
|
5544
|
+
throw new Error("Cannot get dimensions before first embed() call");
|
|
5545
|
+
}
|
|
5546
|
+
return this._dimensions;
|
|
5547
|
+
}
|
|
5548
|
+
/**
|
|
5549
|
+
* Ensure dimensions are available, initializing the model if needed.
|
|
5550
|
+
* Returns the embedding dimensions for the current model.
|
|
5551
|
+
*/
|
|
5552
|
+
async ensureDimensions() {
|
|
5553
|
+
if (this._dimensions === null) {
|
|
5554
|
+
await this.embed("");
|
|
5555
|
+
}
|
|
5556
|
+
if (this._dimensions === null) {
|
|
5557
|
+
throw new Error("Failed to determine embedding dimensions");
|
|
5558
|
+
}
|
|
5559
|
+
return this._dimensions;
|
|
4784
5560
|
}
|
|
4785
5561
|
/**
|
|
4786
5562
|
* Dispose the embedding pipeline to free resources.
|
|
@@ -4798,17 +5574,18 @@ var EmbeddingEngine = class {
|
|
|
4798
5574
|
import * as lancedb from "@lancedb/lancedb";
|
|
4799
5575
|
|
|
4800
5576
|
// src/types/document.ts
|
|
4801
|
-
import { z as
|
|
4802
|
-
var DocumentTypeSchema =
|
|
4803
|
-
var DocumentMetadataSchema =
|
|
4804
|
-
path:
|
|
4805
|
-
url:
|
|
5577
|
+
import { z as z5 } from "zod";
|
|
5578
|
+
var DocumentTypeSchema = z5.enum(["file", "chunk", "web"]);
|
|
5579
|
+
var DocumentMetadataSchema = z5.object({
|
|
5580
|
+
path: z5.string().optional(),
|
|
5581
|
+
url: z5.string().optional(),
|
|
4806
5582
|
type: DocumentTypeSchema,
|
|
4807
|
-
storeId:
|
|
4808
|
-
indexedAt:
|
|
4809
|
-
|
|
4810
|
-
|
|
4811
|
-
|
|
5583
|
+
storeId: z5.string(),
|
|
5584
|
+
indexedAt: z5.string(),
|
|
5585
|
+
// ISO 8601 string (what JSON serialization produces)
|
|
5586
|
+
fileHash: z5.string().optional(),
|
|
5587
|
+
chunkIndex: z5.number().optional(),
|
|
5588
|
+
totalChunks: z5.number().optional()
|
|
4812
5589
|
}).loose();
|
|
4813
5590
|
|
|
4814
5591
|
// src/db/lance.ts
|
|
@@ -4816,10 +5593,23 @@ var LanceStore = class {
|
|
|
4816
5593
|
connection = null;
|
|
4817
5594
|
tables = /* @__PURE__ */ new Map();
|
|
4818
5595
|
dataDir;
|
|
5596
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- set via setDimensions()
|
|
5597
|
+
_dimensions = null;
|
|
4819
5598
|
constructor(dataDir) {
|
|
4820
5599
|
this.dataDir = dataDir;
|
|
4821
5600
|
}
|
|
5601
|
+
/**
|
|
5602
|
+
* Set the embedding dimensions. Must be called before initialize().
|
|
5603
|
+
* This allows dimensions to be derived from the embedding model at runtime.
|
|
5604
|
+
* Idempotent: subsequent calls are ignored if dimensions are already set.
|
|
5605
|
+
*/
|
|
5606
|
+
setDimensions(dimensions) {
|
|
5607
|
+
this._dimensions ??= dimensions;
|
|
5608
|
+
}
|
|
4822
5609
|
async initialize(storeId) {
|
|
5610
|
+
if (this._dimensions === null) {
|
|
5611
|
+
throw new Error("Dimensions not set. Call setDimensions() before initialize().");
|
|
5612
|
+
}
|
|
4823
5613
|
this.connection ??= await lancedb.connect(this.dataDir);
|
|
4824
5614
|
const tableName = this.getTableName(storeId);
|
|
4825
5615
|
const tableNames = await this.connection.tableNames();
|
|
@@ -4828,7 +5618,7 @@ var LanceStore = class {
|
|
|
4828
5618
|
{
|
|
4829
5619
|
id: "__init__",
|
|
4830
5620
|
content: "",
|
|
4831
|
-
vector: new Array(
|
|
5621
|
+
vector: new Array(this._dimensions).fill(0),
|
|
4832
5622
|
metadata: "{}"
|
|
4833
5623
|
}
|
|
4834
5624
|
]);
|
|
@@ -4850,10 +5640,17 @@ var LanceStore = class {
|
|
|
4850
5640
|
await table.add(lanceDocuments);
|
|
4851
5641
|
}
|
|
4852
5642
|
async deleteDocuments(storeId, documentIds) {
|
|
5643
|
+
if (documentIds.length === 0) {
|
|
5644
|
+
return;
|
|
5645
|
+
}
|
|
4853
5646
|
const table = await this.getTable(storeId);
|
|
4854
5647
|
const idList = documentIds.map((id) => `"${id}"`).join(", ");
|
|
4855
5648
|
await table.delete(`id IN (${idList})`);
|
|
4856
5649
|
}
|
|
5650
|
+
async clearAllDocuments(storeId) {
|
|
5651
|
+
const table = await this.getTable(storeId);
|
|
5652
|
+
await table.delete("id IS NOT NULL");
|
|
5653
|
+
}
|
|
4857
5654
|
async search(storeId, vector, limit, _threshold) {
|
|
4858
5655
|
const table = await this.getTable(storeId);
|
|
4859
5656
|
const query = table.vectorSearch(vector).limit(limit).distanceType("cosine");
|
|
@@ -4893,7 +5690,9 @@ var LanceStore = class {
|
|
|
4893
5690
|
}
|
|
4894
5691
|
async deleteStore(storeId) {
|
|
4895
5692
|
const tableName = this.getTableName(storeId);
|
|
4896
|
-
|
|
5693
|
+
this.connection ??= await lancedb.connect(this.dataDir);
|
|
5694
|
+
const tableNames = await this.connection.tableNames();
|
|
5695
|
+
if (tableNames.includes(tableName)) {
|
|
4897
5696
|
await this.connection.dropTable(tableName);
|
|
4898
5697
|
this.tables.delete(tableName);
|
|
4899
5698
|
}
|
|
@@ -4943,6 +5742,8 @@ var LazyServiceContainer = class {
|
|
|
4943
5742
|
appConfig;
|
|
4944
5743
|
dataDir;
|
|
4945
5744
|
// Lazily initialized (heavy)
|
|
5745
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
|
|
5746
|
+
_manifest = null;
|
|
4946
5747
|
_embeddings = null;
|
|
4947
5748
|
_codeGraph = null;
|
|
4948
5749
|
_search = null;
|
|
@@ -4964,7 +5765,7 @@ var LazyServiceContainer = class {
|
|
|
4964
5765
|
logger4.debug("Lazy-initializing EmbeddingEngine");
|
|
4965
5766
|
this._embeddings = new EmbeddingEngine(
|
|
4966
5767
|
this.appConfig.embedding.model,
|
|
4967
|
-
this.appConfig.embedding.
|
|
5768
|
+
this.appConfig.embedding.batchSize
|
|
4968
5769
|
);
|
|
4969
5770
|
}
|
|
4970
5771
|
return this._embeddings;
|
|
@@ -4985,7 +5786,12 @@ var LazyServiceContainer = class {
|
|
|
4985
5786
|
get search() {
|
|
4986
5787
|
if (this._search === null) {
|
|
4987
5788
|
logger4.debug("Lazy-initializing SearchService");
|
|
4988
|
-
this._search = new SearchService(
|
|
5789
|
+
this._search = new SearchService(
|
|
5790
|
+
this.lance,
|
|
5791
|
+
this.embeddings,
|
|
5792
|
+
this.codeGraph,
|
|
5793
|
+
this.appConfig.search
|
|
5794
|
+
);
|
|
4989
5795
|
}
|
|
4990
5796
|
return this._search;
|
|
4991
5797
|
}
|
|
@@ -4996,17 +5802,38 @@ var LazyServiceContainer = class {
|
|
|
4996
5802
|
if (this._index === null) {
|
|
4997
5803
|
logger4.debug("Lazy-initializing IndexService");
|
|
4998
5804
|
this._index = new IndexService(this.lance, this.embeddings, {
|
|
4999
|
-
codeGraphService: this.codeGraph
|
|
5805
|
+
codeGraphService: this.codeGraph,
|
|
5806
|
+
manifestService: this.manifest,
|
|
5807
|
+
chunkSize: this.appConfig.indexing.chunkSize,
|
|
5808
|
+
chunkOverlap: this.appConfig.indexing.chunkOverlap,
|
|
5809
|
+
concurrency: this.appConfig.indexing.concurrency,
|
|
5810
|
+
ignorePatterns: this.appConfig.indexing.ignorePatterns
|
|
5000
5811
|
});
|
|
5001
5812
|
}
|
|
5002
5813
|
return this._index;
|
|
5003
5814
|
}
|
|
5815
|
+
/**
|
|
5816
|
+
* ManifestService is lazily created on first access.
|
|
5817
|
+
*/
|
|
5818
|
+
get manifest() {
|
|
5819
|
+
if (this._manifest === null) {
|
|
5820
|
+
logger4.debug("Lazy-initializing ManifestService");
|
|
5821
|
+
this._manifest = new ManifestService(this.dataDir);
|
|
5822
|
+
}
|
|
5823
|
+
return this._manifest;
|
|
5824
|
+
}
|
|
5004
5825
|
/**
|
|
5005
5826
|
* Check if embeddings have been initialized (for cleanup purposes).
|
|
5006
5827
|
*/
|
|
5007
5828
|
get hasEmbeddings() {
|
|
5008
5829
|
return this._embeddings !== null;
|
|
5009
5830
|
}
|
|
5831
|
+
/**
|
|
5832
|
+
* Check if search service has been initialized (for cleanup purposes).
|
|
5833
|
+
*/
|
|
5834
|
+
get hasSearch() {
|
|
5835
|
+
return this._search !== null;
|
|
5836
|
+
}
|
|
5010
5837
|
};
|
|
5011
5838
|
async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
5012
5839
|
logger4.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
|
|
@@ -5017,16 +5844,21 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
|
5017
5844
|
const pythonBridge = new PythonBridge();
|
|
5018
5845
|
await pythonBridge.start();
|
|
5019
5846
|
const lance = new LanceStore(resolvedDataDir);
|
|
5020
|
-
|
|
5021
|
-
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5847
|
+
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5848
|
+
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
5849
|
+
const gitignoreService = new GitignoreService(resolvedProjectRoot);
|
|
5850
|
+
const storeOptions = {
|
|
5851
|
+
definitionService,
|
|
5852
|
+
gitignoreService,
|
|
5853
|
+
projectRoot: resolvedProjectRoot
|
|
5854
|
+
};
|
|
5026
5855
|
const store = new StoreService(resolvedDataDir, storeOptions);
|
|
5027
5856
|
await store.initialize();
|
|
5028
5857
|
const durationMs = Date.now() - startTime;
|
|
5029
|
-
logger4.info(
|
|
5858
|
+
logger4.info(
|
|
5859
|
+
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
|
|
5860
|
+
"Lazy services initialized"
|
|
5861
|
+
);
|
|
5030
5862
|
return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
|
|
5031
5863
|
}
|
|
5032
5864
|
async function createServices(configPath, dataDir, projectRoot) {
|
|
@@ -5037,20 +5869,33 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5037
5869
|
const pythonBridge = new PythonBridge();
|
|
5038
5870
|
await pythonBridge.start();
|
|
5039
5871
|
const lance = new LanceStore(resolvedDataDir);
|
|
5040
|
-
const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.
|
|
5872
|
+
const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.batchSize);
|
|
5041
5873
|
await embeddings.initialize();
|
|
5042
|
-
|
|
5043
|
-
|
|
5044
|
-
|
|
5045
|
-
|
|
5046
|
-
|
|
5047
|
-
|
|
5874
|
+
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5875
|
+
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
5876
|
+
const gitignoreService = new GitignoreService(resolvedProjectRoot);
|
|
5877
|
+
const storeOptions = {
|
|
5878
|
+
definitionService,
|
|
5879
|
+
gitignoreService,
|
|
5880
|
+
projectRoot: resolvedProjectRoot
|
|
5881
|
+
};
|
|
5048
5882
|
const store = new StoreService(resolvedDataDir, storeOptions);
|
|
5049
5883
|
await store.initialize();
|
|
5050
5884
|
const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
|
|
5051
|
-
const
|
|
5052
|
-
const
|
|
5053
|
-
|
|
5885
|
+
const manifest = new ManifestService(resolvedDataDir);
|
|
5886
|
+
const search = new SearchService(lance, embeddings, codeGraph, appConfig.search);
|
|
5887
|
+
const index = new IndexService(lance, embeddings, {
|
|
5888
|
+
codeGraphService: codeGraph,
|
|
5889
|
+
manifestService: manifest,
|
|
5890
|
+
chunkSize: appConfig.indexing.chunkSize,
|
|
5891
|
+
chunkOverlap: appConfig.indexing.chunkOverlap,
|
|
5892
|
+
concurrency: appConfig.indexing.concurrency,
|
|
5893
|
+
ignorePatterns: appConfig.indexing.ignorePatterns
|
|
5894
|
+
});
|
|
5895
|
+
logger4.info(
|
|
5896
|
+
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
|
|
5897
|
+
"Services initialized successfully"
|
|
5898
|
+
);
|
|
5054
5899
|
return {
|
|
5055
5900
|
config,
|
|
5056
5901
|
store,
|
|
@@ -5059,12 +5904,20 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5059
5904
|
lance,
|
|
5060
5905
|
embeddings,
|
|
5061
5906
|
codeGraph,
|
|
5062
|
-
pythonBridge
|
|
5907
|
+
pythonBridge,
|
|
5908
|
+
manifest
|
|
5063
5909
|
};
|
|
5064
5910
|
}
|
|
5065
5911
|
async function destroyServices(services) {
|
|
5066
5912
|
logger4.info("Shutting down services");
|
|
5067
5913
|
const errors = [];
|
|
5914
|
+
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
5915
|
+
const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
|
|
5916
|
+
if (shouldCleanupSearch) {
|
|
5917
|
+
services.search.cleanup();
|
|
5918
|
+
} else {
|
|
5919
|
+
logger4.debug("Skipping search cleanup (not initialized)");
|
|
5920
|
+
}
|
|
5068
5921
|
try {
|
|
5069
5922
|
await services.pythonBridge.stop();
|
|
5070
5923
|
} catch (e) {
|
|
@@ -5072,7 +5925,6 @@ async function destroyServices(services) {
|
|
|
5072
5925
|
logger4.error({ error }, "Error stopping Python bridge");
|
|
5073
5926
|
errors.push(error);
|
|
5074
5927
|
}
|
|
5075
|
-
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
5076
5928
|
const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
|
|
5077
5929
|
if (shouldDisposeEmbeddings) {
|
|
5078
5930
|
try {
|
|
@@ -5102,6 +5954,7 @@ async function destroyServices(services) {
|
|
|
5102
5954
|
|
|
5103
5955
|
export {
|
|
5104
5956
|
AdapterRegistry,
|
|
5957
|
+
ProjectRootService,
|
|
5105
5958
|
createLogger,
|
|
5106
5959
|
shutdownLogger,
|
|
5107
5960
|
summarizePayload,
|
|
@@ -5109,8 +5962,6 @@ export {
|
|
|
5109
5962
|
PythonBridge,
|
|
5110
5963
|
ChunkingService,
|
|
5111
5964
|
ASTParser,
|
|
5112
|
-
createStoreId,
|
|
5113
|
-
createDocumentId,
|
|
5114
5965
|
ok,
|
|
5115
5966
|
err,
|
|
5116
5967
|
classifyWebContentType,
|
|
@@ -5118,10 +5969,11 @@ export {
|
|
|
5118
5969
|
isRepoStoreDefinition,
|
|
5119
5970
|
isWebStoreDefinition,
|
|
5120
5971
|
StoreDefinitionService,
|
|
5972
|
+
isGitUrl,
|
|
5121
5973
|
extractRepoName,
|
|
5122
5974
|
JobService,
|
|
5123
5975
|
createLazyServices,
|
|
5124
5976
|
createServices,
|
|
5125
5977
|
destroyServices
|
|
5126
5978
|
};
|
|
5127
|
-
//# sourceMappingURL=chunk-
|
|
5979
|
+
//# sourceMappingURL=chunk-RDDGZIDL.js.map
|