bluera-knowledge 0.17.2 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +92 -0
- package/README.md +3 -3
- package/dist/brands-3EYIYV6T.js +13 -0
- package/dist/chunk-CLIMKLTW.js +28 -0
- package/dist/chunk-CLIMKLTW.js.map +1 -0
- package/dist/{chunk-YMDXPECI.js → chunk-EZXJ3W5X.js} +79 -32
- package/dist/chunk-EZXJ3W5X.js.map +1 -0
- package/dist/chunk-HXBIIMYL.js +140 -0
- package/dist/chunk-HXBIIMYL.js.map +1 -0
- package/dist/{chunk-WMALVLFW.js → chunk-RDDGZIDL.js} +1095 -245
- package/dist/chunk-RDDGZIDL.js.map +1 -0
- package/dist/{chunk-PFHK5Q22.js → chunk-VUGQ7HAR.js} +10 -6
- package/dist/chunk-VUGQ7HAR.js.map +1 -0
- package/dist/index.js +231 -84
- package/dist/index.js.map +1 -1
- package/dist/mcp/bootstrap.js +22 -3
- package/dist/mcp/bootstrap.js.map +1 -1
- package/dist/mcp/server.d.ts +169 -18
- package/dist/mcp/server.js +4 -3
- package/dist/watch.service-VDSUQ72Z.js +7 -0
- package/dist/watch.service-VDSUQ72Z.js.map +1 -0
- package/dist/workers/background-worker-cli.js +20 -9
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +3 -3
- package/dist/chunk-HRQD3MPH.js +0 -69
- package/dist/chunk-HRQD3MPH.js.map +0 -1
- package/dist/chunk-PFHK5Q22.js.map +0 -1
- package/dist/chunk-WMALVLFW.js.map +0 -1
- package/dist/chunk-YMDXPECI.js.map +0 -1
- package/dist/watch.service-OPLKIDFQ.js +0 -7
- /package/dist/{watch.service-OPLKIDFQ.js.map → brands-3EYIYV6T.js.map} +0 -0
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createDocumentId,
|
|
3
|
+
createStoreId
|
|
4
|
+
} from "./chunk-CLIMKLTW.js";
|
|
5
|
+
import {
|
|
6
|
+
parseIgnorePatternsForScanning
|
|
7
|
+
} from "./chunk-HXBIIMYL.js";
|
|
8
|
+
|
|
1
9
|
// src/analysis/adapter-registry.ts
|
|
2
10
|
var AdapterRegistry = class _AdapterRegistry {
|
|
3
11
|
static instance;
|
|
@@ -128,14 +136,14 @@ var ProjectRootService = class {
|
|
|
128
136
|
if (projectRootEnv !== void 0 && projectRootEnv !== "") {
|
|
129
137
|
return this.normalize(projectRootEnv);
|
|
130
138
|
}
|
|
131
|
-
const pwdEnv = process.env["PWD"];
|
|
132
|
-
if (pwdEnv !== void 0 && pwdEnv !== "") {
|
|
133
|
-
return this.normalize(pwdEnv);
|
|
134
|
-
}
|
|
135
139
|
const gitRoot = this.findGitRoot(process.cwd());
|
|
136
140
|
if (gitRoot !== null) {
|
|
137
141
|
return gitRoot;
|
|
138
142
|
}
|
|
143
|
+
const pwdEnv = process.env["PWD"];
|
|
144
|
+
if (pwdEnv !== void 0 && pwdEnv !== "") {
|
|
145
|
+
return this.normalize(pwdEnv);
|
|
146
|
+
}
|
|
139
147
|
return process.cwd();
|
|
140
148
|
}
|
|
141
149
|
/**
|
|
@@ -368,6 +376,23 @@ function err(error) {
|
|
|
368
376
|
return { success: false, error };
|
|
369
377
|
}
|
|
370
378
|
|
|
379
|
+
// src/utils/atomic-write.ts
|
|
380
|
+
import { writeFileSync as writeFileSync2, renameSync, mkdirSync as mkdirSync3 } from "fs";
|
|
381
|
+
import { writeFile, rename, mkdir } from "fs/promises";
|
|
382
|
+
import { dirname as dirname2 } from "path";
|
|
383
|
+
async function atomicWriteFile(filePath, content) {
|
|
384
|
+
await mkdir(dirname2(filePath), { recursive: true });
|
|
385
|
+
const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
|
|
386
|
+
await writeFile(tempPath, content, "utf-8");
|
|
387
|
+
await rename(tempPath, filePath);
|
|
388
|
+
}
|
|
389
|
+
function atomicWriteFileSync(filePath, content) {
|
|
390
|
+
mkdirSync3(dirname2(filePath), { recursive: true });
|
|
391
|
+
const tempPath = `${filePath}.tmp.${String(Date.now())}.${String(process.pid)}`;
|
|
392
|
+
writeFileSync2(tempPath, content, "utf-8");
|
|
393
|
+
renameSync(tempPath, filePath);
|
|
394
|
+
}
|
|
395
|
+
|
|
371
396
|
// src/services/job.service.ts
|
|
372
397
|
var JobService = class {
|
|
373
398
|
jobsDir;
|
|
@@ -596,13 +621,13 @@ var JobService = class {
|
|
|
596
621
|
*/
|
|
597
622
|
writeJob(job) {
|
|
598
623
|
const jobFile = path.join(this.jobsDir, `${job.id}.json`);
|
|
599
|
-
|
|
624
|
+
atomicWriteFileSync(jobFile, JSON.stringify(job, null, 2));
|
|
600
625
|
}
|
|
601
626
|
};
|
|
602
627
|
|
|
603
628
|
// src/services/code-graph.service.ts
|
|
604
|
-
import { readFile, writeFile, mkdir, rm } from "fs/promises";
|
|
605
|
-
import { join as join4, dirname as
|
|
629
|
+
import { readFile, writeFile as writeFile2, mkdir as mkdir2, rm } from "fs/promises";
|
|
630
|
+
import { join as join4, dirname as dirname3 } from "path";
|
|
606
631
|
|
|
607
632
|
// src/analysis/ast-parser.ts
|
|
608
633
|
import { parse } from "@babel/parser";
|
|
@@ -1759,11 +1784,31 @@ var CodeGraphService = class {
|
|
|
1759
1784
|
parser;
|
|
1760
1785
|
parserFactory;
|
|
1761
1786
|
graphCache;
|
|
1787
|
+
cacheListeners;
|
|
1762
1788
|
constructor(dataDir, pythonBridge) {
|
|
1763
1789
|
this.dataDir = dataDir;
|
|
1764
1790
|
this.parser = new ASTParser();
|
|
1765
1791
|
this.parserFactory = new ParserFactory(pythonBridge);
|
|
1766
1792
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
1793
|
+
this.cacheListeners = /* @__PURE__ */ new Set();
|
|
1794
|
+
}
|
|
1795
|
+
/**
|
|
1796
|
+
* Subscribe to cache invalidation events.
|
|
1797
|
+
* Returns an unsubscribe function.
|
|
1798
|
+
*/
|
|
1799
|
+
onCacheInvalidation(listener) {
|
|
1800
|
+
this.cacheListeners.add(listener);
|
|
1801
|
+
return () => {
|
|
1802
|
+
this.cacheListeners.delete(listener);
|
|
1803
|
+
};
|
|
1804
|
+
}
|
|
1805
|
+
/**
|
|
1806
|
+
* Emit a cache invalidation event to all listeners.
|
|
1807
|
+
*/
|
|
1808
|
+
emitCacheInvalidation(event) {
|
|
1809
|
+
for (const listener of this.cacheListeners) {
|
|
1810
|
+
listener(event);
|
|
1811
|
+
}
|
|
1767
1812
|
}
|
|
1768
1813
|
/**
|
|
1769
1814
|
* Build a code graph from source files.
|
|
@@ -1819,9 +1864,10 @@ var CodeGraphService = class {
|
|
|
1819
1864
|
*/
|
|
1820
1865
|
async saveGraph(storeId, graph) {
|
|
1821
1866
|
const graphPath = this.getGraphPath(storeId);
|
|
1822
|
-
await
|
|
1867
|
+
await mkdir2(dirname3(graphPath), { recursive: true });
|
|
1823
1868
|
const serialized = graph.toJSON();
|
|
1824
|
-
await
|
|
1869
|
+
await writeFile2(graphPath, JSON.stringify(serialized, null, 2));
|
|
1870
|
+
this.emitCacheInvalidation({ type: "graph-updated", storeId });
|
|
1825
1871
|
}
|
|
1826
1872
|
/**
|
|
1827
1873
|
* Delete the code graph file for a store.
|
|
@@ -1831,6 +1877,7 @@ var CodeGraphService = class {
|
|
|
1831
1877
|
const graphPath = this.getGraphPath(storeId);
|
|
1832
1878
|
await rm(graphPath, { force: true });
|
|
1833
1879
|
this.graphCache.delete(storeId);
|
|
1880
|
+
this.emitCacheInvalidation({ type: "graph-deleted", storeId });
|
|
1834
1881
|
}
|
|
1835
1882
|
/**
|
|
1836
1883
|
* Load a code graph for a store.
|
|
@@ -1976,9 +2023,9 @@ var CodeGraphService = class {
|
|
|
1976
2023
|
};
|
|
1977
2024
|
|
|
1978
2025
|
// src/services/config.service.ts
|
|
1979
|
-
import { readFile as readFile2,
|
|
2026
|
+
import { readFile as readFile2, access } from "fs/promises";
|
|
1980
2027
|
import { homedir } from "os";
|
|
1981
|
-
import {
|
|
2028
|
+
import { isAbsolute, join as join5, resolve } from "path";
|
|
1982
2029
|
|
|
1983
2030
|
// src/types/config.ts
|
|
1984
2031
|
var DEFAULT_CONFIG = {
|
|
@@ -1986,8 +2033,7 @@ var DEFAULT_CONFIG = {
|
|
|
1986
2033
|
dataDir: ".bluera/bluera-knowledge/data",
|
|
1987
2034
|
embedding: {
|
|
1988
2035
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
1989
|
-
batchSize: 32
|
|
1990
|
-
dimensions: 384
|
|
2036
|
+
batchSize: 32
|
|
1991
2037
|
},
|
|
1992
2038
|
indexing: {
|
|
1993
2039
|
concurrency: 4,
|
|
@@ -1997,13 +2043,7 @@ var DEFAULT_CONFIG = {
|
|
|
1997
2043
|
},
|
|
1998
2044
|
search: {
|
|
1999
2045
|
defaultMode: "hybrid",
|
|
2000
|
-
defaultLimit: 10
|
|
2001
|
-
minScore: 0.5,
|
|
2002
|
-
rrf: {
|
|
2003
|
-
k: 40,
|
|
2004
|
-
vectorWeight: 0.7,
|
|
2005
|
-
ftsWeight: 0.3
|
|
2006
|
-
}
|
|
2046
|
+
defaultLimit: 10
|
|
2007
2047
|
},
|
|
2008
2048
|
crawl: {
|
|
2009
2049
|
userAgent: "BlueraKnowledge/1.0",
|
|
@@ -2016,6 +2056,34 @@ var DEFAULT_CONFIG = {
|
|
|
2016
2056
|
}
|
|
2017
2057
|
};
|
|
2018
2058
|
|
|
2059
|
+
// src/utils/deep-merge.ts
|
|
2060
|
+
function isPlainObject(value) {
|
|
2061
|
+
return typeof value === "object" && value !== null && !Array.isArray(value) && !(value instanceof Date);
|
|
2062
|
+
}
|
|
2063
|
+
function deepMerge(defaults, overrides) {
|
|
2064
|
+
if (!isPlainObject(overrides)) {
|
|
2065
|
+
return { ...defaults };
|
|
2066
|
+
}
|
|
2067
|
+
const defaultsRecord = defaults;
|
|
2068
|
+
return deepMergeRecords(defaultsRecord, overrides);
|
|
2069
|
+
}
|
|
2070
|
+
function deepMergeRecords(defaults, overrides) {
|
|
2071
|
+
const result = { ...defaults };
|
|
2072
|
+
for (const key of Object.keys(overrides)) {
|
|
2073
|
+
const defaultValue = defaults[key];
|
|
2074
|
+
const overrideValue = overrides[key];
|
|
2075
|
+
if (overrideValue === void 0) {
|
|
2076
|
+
continue;
|
|
2077
|
+
}
|
|
2078
|
+
if (isPlainObject(defaultValue) && isPlainObject(overrideValue)) {
|
|
2079
|
+
result[key] = deepMergeRecords(defaultValue, overrideValue);
|
|
2080
|
+
} else {
|
|
2081
|
+
result[key] = overrideValue;
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
return result;
|
|
2085
|
+
}
|
|
2086
|
+
|
|
2019
2087
|
// src/services/config.service.ts
|
|
2020
2088
|
var DEFAULT_CONFIG_PATH = ".bluera/bluera-knowledge/config.json";
|
|
2021
2089
|
async function fileExists(path4) {
|
|
@@ -2029,20 +2097,27 @@ async function fileExists(path4) {
|
|
|
2029
2097
|
var ConfigService = class {
|
|
2030
2098
|
configPath;
|
|
2031
2099
|
dataDir;
|
|
2100
|
+
projectRoot;
|
|
2032
2101
|
config = null;
|
|
2033
2102
|
constructor(configPath, dataDir, projectRoot) {
|
|
2034
|
-
|
|
2103
|
+
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
2035
2104
|
if (configPath !== void 0 && configPath !== "") {
|
|
2036
|
-
this.configPath = configPath;
|
|
2105
|
+
this.configPath = this.expandPath(configPath, this.projectRoot);
|
|
2037
2106
|
} else {
|
|
2038
|
-
this.configPath = join5(
|
|
2107
|
+
this.configPath = join5(this.projectRoot, DEFAULT_CONFIG_PATH);
|
|
2039
2108
|
}
|
|
2040
2109
|
if (dataDir !== void 0 && dataDir !== "") {
|
|
2041
|
-
this.dataDir = dataDir;
|
|
2110
|
+
this.dataDir = this.expandPath(dataDir, this.projectRoot);
|
|
2042
2111
|
} else {
|
|
2043
|
-
this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir,
|
|
2112
|
+
this.dataDir = this.expandPath(DEFAULT_CONFIG.dataDir, this.projectRoot);
|
|
2044
2113
|
}
|
|
2045
2114
|
}
|
|
2115
|
+
/**
|
|
2116
|
+
* Get the resolved project root directory.
|
|
2117
|
+
*/
|
|
2118
|
+
resolveProjectRoot() {
|
|
2119
|
+
return this.projectRoot;
|
|
2120
|
+
}
|
|
2046
2121
|
async load() {
|
|
2047
2122
|
if (this.config !== null) {
|
|
2048
2123
|
return this.config;
|
|
@@ -2055,7 +2130,7 @@ var ConfigService = class {
|
|
|
2055
2130
|
}
|
|
2056
2131
|
const content = await readFile2(this.configPath, "utf-8");
|
|
2057
2132
|
try {
|
|
2058
|
-
this.config =
|
|
2133
|
+
this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
|
|
2059
2134
|
} catch (error) {
|
|
2060
2135
|
throw new Error(
|
|
2061
2136
|
`Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -2064,8 +2139,7 @@ var ConfigService = class {
|
|
|
2064
2139
|
return this.config;
|
|
2065
2140
|
}
|
|
2066
2141
|
async save(config) {
|
|
2067
|
-
await
|
|
2068
|
-
await writeFile2(this.configPath, JSON.stringify(config, null, 2));
|
|
2142
|
+
await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
|
|
2069
2143
|
this.config = config;
|
|
2070
2144
|
}
|
|
2071
2145
|
resolveDataDir() {
|
|
@@ -2078,7 +2152,7 @@ var ConfigService = class {
|
|
|
2078
2152
|
if (path4.startsWith("~")) {
|
|
2079
2153
|
return path4.replace("~", homedir());
|
|
2080
2154
|
}
|
|
2081
|
-
if (!path4
|
|
2155
|
+
if (!isAbsolute(path4)) {
|
|
2082
2156
|
return resolve(baseDir, path4);
|
|
2083
2157
|
}
|
|
2084
2158
|
return path4;
|
|
@@ -2184,9 +2258,9 @@ ${REQUIRED_PATTERNS.join("\n")}
|
|
|
2184
2258
|
};
|
|
2185
2259
|
|
|
2186
2260
|
// src/services/index.service.ts
|
|
2187
|
-
import { createHash as
|
|
2188
|
-
import { readFile as
|
|
2189
|
-
import { join as join7, extname, basename } from "path";
|
|
2261
|
+
import { createHash as createHash3 } from "crypto";
|
|
2262
|
+
import { readFile as readFile5, readdir } from "fs/promises";
|
|
2263
|
+
import { join as join7, extname, basename, relative } from "path";
|
|
2190
2264
|
|
|
2191
2265
|
// src/services/chunking.service.ts
|
|
2192
2266
|
var CHUNK_PRESETS = {
|
|
@@ -2198,6 +2272,11 @@ var ChunkingService = class _ChunkingService {
|
|
|
2198
2272
|
chunkSize;
|
|
2199
2273
|
chunkOverlap;
|
|
2200
2274
|
constructor(config) {
|
|
2275
|
+
if (config.chunkOverlap >= config.chunkSize) {
|
|
2276
|
+
throw new Error(
|
|
2277
|
+
`chunkOverlap (${String(config.chunkOverlap)}) must be less than chunkSize (${String(config.chunkSize)})`
|
|
2278
|
+
);
|
|
2279
|
+
}
|
|
2201
2280
|
this.chunkSize = config.chunkSize;
|
|
2202
2281
|
this.chunkOverlap = config.chunkOverlap;
|
|
2203
2282
|
}
|
|
@@ -2292,7 +2371,7 @@ var ChunkingService = class _ChunkingService {
|
|
|
2292
2371
|
* Splits on top-level declarations to keep functions/classes together.
|
|
2293
2372
|
*/
|
|
2294
2373
|
chunkCode(text) {
|
|
2295
|
-
const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
|
|
2374
|
+
const declarationRegex = /^(?:\/\*\*[\s\S]*?\*\/\s*)?(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|let|var|enum)\s+(\w+)/gm;
|
|
2296
2375
|
const declarations = [];
|
|
2297
2376
|
let match;
|
|
2298
2377
|
while ((match = declarationRegex.exec(text)) !== null) {
|
|
@@ -2467,73 +2546,236 @@ var ChunkingService = class _ChunkingService {
|
|
|
2467
2546
|
}
|
|
2468
2547
|
};
|
|
2469
2548
|
|
|
2470
|
-
// src/
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2549
|
+
// src/services/drift.service.ts
|
|
2550
|
+
import { createHash as createHash2 } from "crypto";
|
|
2551
|
+
import { readFile as readFile4, stat } from "fs/promises";
|
|
2552
|
+
var DriftService = class {
|
|
2553
|
+
/**
|
|
2554
|
+
* Detect changes between current files and manifest.
|
|
2555
|
+
*
|
|
2556
|
+
* @param manifest - The stored manifest from last index
|
|
2557
|
+
* @param currentFiles - Current files on disk with mtime/size
|
|
2558
|
+
* @returns Classification of files into added, modified, deleted, unchanged
|
|
2559
|
+
*/
|
|
2560
|
+
async detectChanges(manifest, currentFiles) {
|
|
2561
|
+
const result = {
|
|
2562
|
+
added: [],
|
|
2563
|
+
modified: [],
|
|
2564
|
+
deleted: [],
|
|
2565
|
+
unchanged: []
|
|
2566
|
+
};
|
|
2567
|
+
const currentPathSet = new Set(currentFiles.map((f) => f.path));
|
|
2568
|
+
const manifestPaths = new Set(Object.keys(manifest.files));
|
|
2569
|
+
for (const path4 of manifestPaths) {
|
|
2570
|
+
if (!currentPathSet.has(path4)) {
|
|
2571
|
+
result.deleted.push(path4);
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
const potentiallyModified = [];
|
|
2575
|
+
for (const file of currentFiles) {
|
|
2576
|
+
const manifestState = manifest.files[file.path];
|
|
2577
|
+
if (manifestState === void 0) {
|
|
2578
|
+
result.added.push(file.path);
|
|
2579
|
+
} else {
|
|
2580
|
+
if (file.mtime === manifestState.mtime && file.size === manifestState.size) {
|
|
2581
|
+
result.unchanged.push(file.path);
|
|
2582
|
+
} else {
|
|
2583
|
+
potentiallyModified.push(file);
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
for (const file of potentiallyModified) {
|
|
2588
|
+
const manifestState = manifest.files[file.path];
|
|
2589
|
+
if (manifestState === void 0) {
|
|
2590
|
+
result.added.push(file.path);
|
|
2591
|
+
continue;
|
|
2592
|
+
}
|
|
2593
|
+
const currentHash = await this.computeFileHash(file.path);
|
|
2594
|
+
if (currentHash === manifestState.hash) {
|
|
2595
|
+
result.unchanged.push(file.path);
|
|
2596
|
+
} else {
|
|
2597
|
+
result.modified.push(file.path);
|
|
2598
|
+
}
|
|
2599
|
+
}
|
|
2600
|
+
return result;
|
|
2481
2601
|
}
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2602
|
+
/**
|
|
2603
|
+
* Get the current state of a file on disk.
|
|
2604
|
+
*/
|
|
2605
|
+
async getFileState(path4) {
|
|
2606
|
+
const stats = await stat(path4);
|
|
2607
|
+
return {
|
|
2608
|
+
path: path4,
|
|
2609
|
+
mtime: stats.mtimeMs,
|
|
2610
|
+
size: stats.size
|
|
2611
|
+
};
|
|
2487
2612
|
}
|
|
2488
|
-
|
|
2489
|
-
|
|
2613
|
+
/**
|
|
2614
|
+
* Compute MD5 hash of a file.
|
|
2615
|
+
*/
|
|
2616
|
+
async computeFileHash(path4) {
|
|
2617
|
+
const content = await readFile4(path4);
|
|
2618
|
+
return createHash2("md5").update(content).digest("hex");
|
|
2619
|
+
}
|
|
2620
|
+
/**
|
|
2621
|
+
* Create a file state entry for the manifest after indexing.
|
|
2622
|
+
*
|
|
2623
|
+
* @param path - File path
|
|
2624
|
+
* @param documentIds - Document IDs created from this file
|
|
2625
|
+
* @returns File state for manifest
|
|
2626
|
+
*/
|
|
2627
|
+
async createFileState(path4, documentIds) {
|
|
2628
|
+
const stats = await stat(path4);
|
|
2629
|
+
const content = await readFile4(path4);
|
|
2630
|
+
const hash = createHash2("md5").update(content).digest("hex");
|
|
2631
|
+
const { createDocumentId: createDocumentId2 } = await import("./brands-3EYIYV6T.js");
|
|
2632
|
+
return {
|
|
2633
|
+
state: {
|
|
2634
|
+
mtime: stats.mtimeMs,
|
|
2635
|
+
size: stats.size,
|
|
2636
|
+
hash,
|
|
2637
|
+
documentIds: documentIds.map((id) => createDocumentId2(id))
|
|
2638
|
+
},
|
|
2639
|
+
hash
|
|
2640
|
+
};
|
|
2641
|
+
}
|
|
2642
|
+
};
|
|
2490
2643
|
|
|
2491
2644
|
// src/services/index.service.ts
|
|
2492
2645
|
var logger = createLogger("index-service");
|
|
2493
2646
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
2647
|
+
// Text/docs
|
|
2494
2648
|
".txt",
|
|
2495
2649
|
".md",
|
|
2650
|
+
".rst",
|
|
2651
|
+
".adoc",
|
|
2652
|
+
// JavaScript/TypeScript
|
|
2496
2653
|
".js",
|
|
2497
2654
|
".ts",
|
|
2498
2655
|
".jsx",
|
|
2499
2656
|
".tsx",
|
|
2657
|
+
".mjs",
|
|
2658
|
+
".cjs",
|
|
2659
|
+
".mts",
|
|
2660
|
+
".cts",
|
|
2661
|
+
// Config/data
|
|
2500
2662
|
".json",
|
|
2501
2663
|
".yaml",
|
|
2502
2664
|
".yml",
|
|
2665
|
+
".toml",
|
|
2666
|
+
".ini",
|
|
2667
|
+
".env",
|
|
2668
|
+
// Web
|
|
2503
2669
|
".html",
|
|
2670
|
+
".htm",
|
|
2504
2671
|
".css",
|
|
2505
2672
|
".scss",
|
|
2673
|
+
".sass",
|
|
2506
2674
|
".less",
|
|
2675
|
+
".vue",
|
|
2676
|
+
".svelte",
|
|
2677
|
+
// Python
|
|
2507
2678
|
".py",
|
|
2679
|
+
".pyi",
|
|
2680
|
+
".pyx",
|
|
2681
|
+
// Ruby
|
|
2508
2682
|
".rb",
|
|
2683
|
+
".erb",
|
|
2684
|
+
".rake",
|
|
2685
|
+
// Go
|
|
2509
2686
|
".go",
|
|
2687
|
+
// Rust
|
|
2510
2688
|
".rs",
|
|
2689
|
+
// Java/JVM
|
|
2511
2690
|
".java",
|
|
2691
|
+
".kt",
|
|
2692
|
+
".kts",
|
|
2693
|
+
".scala",
|
|
2694
|
+
".groovy",
|
|
2695
|
+
".gradle",
|
|
2696
|
+
// C/C++
|
|
2512
2697
|
".c",
|
|
2513
2698
|
".cpp",
|
|
2699
|
+
".cc",
|
|
2700
|
+
".cxx",
|
|
2514
2701
|
".h",
|
|
2515
2702
|
".hpp",
|
|
2703
|
+
".hxx",
|
|
2704
|
+
// C#/.NET
|
|
2705
|
+
".cs",
|
|
2706
|
+
".fs",
|
|
2707
|
+
".vb",
|
|
2708
|
+
// Swift/Objective-C
|
|
2709
|
+
".swift",
|
|
2710
|
+
".m",
|
|
2711
|
+
".mm",
|
|
2712
|
+
// PHP
|
|
2713
|
+
".php",
|
|
2714
|
+
// Shell
|
|
2516
2715
|
".sh",
|
|
2517
2716
|
".bash",
|
|
2518
2717
|
".zsh",
|
|
2718
|
+
".fish",
|
|
2719
|
+
".ps1",
|
|
2720
|
+
".psm1",
|
|
2721
|
+
// SQL
|
|
2519
2722
|
".sql",
|
|
2520
|
-
|
|
2723
|
+
// Other
|
|
2724
|
+
".xml",
|
|
2725
|
+
".graphql",
|
|
2726
|
+
".gql",
|
|
2727
|
+
".proto",
|
|
2728
|
+
".lua",
|
|
2729
|
+
".r",
|
|
2730
|
+
".R",
|
|
2731
|
+
".jl",
|
|
2732
|
+
".ex",
|
|
2733
|
+
".exs",
|
|
2734
|
+
".erl",
|
|
2735
|
+
".hrl",
|
|
2736
|
+
".clj",
|
|
2737
|
+
".cljs",
|
|
2738
|
+
".cljc",
|
|
2739
|
+
".hs",
|
|
2740
|
+
".elm",
|
|
2741
|
+
".dart",
|
|
2742
|
+
".pl",
|
|
2743
|
+
".pm",
|
|
2744
|
+
".tcl",
|
|
2745
|
+
".vim",
|
|
2746
|
+
".zig",
|
|
2747
|
+
".nim",
|
|
2748
|
+
".v",
|
|
2749
|
+
".tf",
|
|
2750
|
+
".hcl",
|
|
2751
|
+
".dockerfile",
|
|
2752
|
+
".makefile",
|
|
2753
|
+
".cmake"
|
|
2521
2754
|
]);
|
|
2522
2755
|
var IndexService = class {
|
|
2523
2756
|
lanceStore;
|
|
2524
2757
|
embeddingEngine;
|
|
2525
2758
|
chunker;
|
|
2526
2759
|
codeGraphService;
|
|
2760
|
+
manifestService;
|
|
2761
|
+
driftService;
|
|
2527
2762
|
concurrency;
|
|
2763
|
+
ignoreDirs;
|
|
2764
|
+
ignoreFilePatterns;
|
|
2528
2765
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2529
2766
|
this.lanceStore = lanceStore;
|
|
2530
2767
|
this.embeddingEngine = embeddingEngine;
|
|
2531
2768
|
this.chunker = new ChunkingService({
|
|
2532
|
-
chunkSize: options.chunkSize ??
|
|
2533
|
-
chunkOverlap: options.chunkOverlap ??
|
|
2769
|
+
chunkSize: options.chunkSize ?? 1e3,
|
|
2770
|
+
chunkOverlap: options.chunkOverlap ?? 150
|
|
2534
2771
|
});
|
|
2535
2772
|
this.codeGraphService = options.codeGraphService;
|
|
2773
|
+
this.manifestService = options.manifestService;
|
|
2774
|
+
this.driftService = new DriftService();
|
|
2536
2775
|
this.concurrency = options.concurrency ?? 4;
|
|
2776
|
+
const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
|
|
2777
|
+
this.ignoreDirs = parsed.dirs;
|
|
2778
|
+
this.ignoreFilePatterns = parsed.fileMatchers;
|
|
2537
2779
|
}
|
|
2538
2780
|
async indexStore(store, onProgress) {
|
|
2539
2781
|
logger.info(
|
|
@@ -2564,8 +2806,205 @@ var IndexService = class {
|
|
|
2564
2806
|
return err(error instanceof Error ? error : new Error(String(error)));
|
|
2565
2807
|
}
|
|
2566
2808
|
}
|
|
2809
|
+
/**
|
|
2810
|
+
* Incrementally index a store, only processing changed files.
|
|
2811
|
+
* Requires manifestService to be configured.
|
|
2812
|
+
*
|
|
2813
|
+
* @param store - The store to index
|
|
2814
|
+
* @param onProgress - Optional progress callback
|
|
2815
|
+
* @returns Result with incremental index statistics
|
|
2816
|
+
*/
|
|
2817
|
+
async indexStoreIncremental(store, onProgress) {
|
|
2818
|
+
if (this.manifestService === void 0) {
|
|
2819
|
+
return err(new Error("ManifestService required for incremental indexing"));
|
|
2820
|
+
}
|
|
2821
|
+
if (store.type !== "file" && store.type !== "repo") {
|
|
2822
|
+
return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
|
|
2823
|
+
}
|
|
2824
|
+
logger.info(
|
|
2825
|
+
{
|
|
2826
|
+
storeId: store.id,
|
|
2827
|
+
storeName: store.name,
|
|
2828
|
+
storeType: store.type
|
|
2829
|
+
},
|
|
2830
|
+
"Starting incremental store indexing"
|
|
2831
|
+
);
|
|
2832
|
+
const startTime = Date.now();
|
|
2833
|
+
try {
|
|
2834
|
+
const manifest = await this.manifestService.load(store.id);
|
|
2835
|
+
const filePaths = await this.scanDirectory(store.path);
|
|
2836
|
+
const currentFiles = await Promise.all(
|
|
2837
|
+
filePaths.map((path4) => this.driftService.getFileState(path4))
|
|
2838
|
+
);
|
|
2839
|
+
const drift = await this.driftService.detectChanges(manifest, currentFiles);
|
|
2840
|
+
logger.debug(
|
|
2841
|
+
{
|
|
2842
|
+
storeId: store.id,
|
|
2843
|
+
added: drift.added.length,
|
|
2844
|
+
modified: drift.modified.length,
|
|
2845
|
+
deleted: drift.deleted.length,
|
|
2846
|
+
unchanged: drift.unchanged.length
|
|
2847
|
+
},
|
|
2848
|
+
"Drift detection complete"
|
|
2849
|
+
);
|
|
2850
|
+
const documentIdsToDelete = [];
|
|
2851
|
+
for (const path4 of [...drift.modified, ...drift.deleted]) {
|
|
2852
|
+
const fileState = manifest.files[path4];
|
|
2853
|
+
if (fileState !== void 0) {
|
|
2854
|
+
documentIdsToDelete.push(...fileState.documentIds);
|
|
2855
|
+
}
|
|
2856
|
+
}
|
|
2857
|
+
if (documentIdsToDelete.length > 0) {
|
|
2858
|
+
await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
|
|
2859
|
+
logger.debug(
|
|
2860
|
+
{ storeId: store.id, count: documentIdsToDelete.length },
|
|
2861
|
+
"Deleted old documents"
|
|
2862
|
+
);
|
|
2863
|
+
}
|
|
2864
|
+
const filesToProcess = [...drift.added, ...drift.modified];
|
|
2865
|
+
const totalFiles = filesToProcess.length;
|
|
2866
|
+
onProgress?.({
|
|
2867
|
+
type: "start",
|
|
2868
|
+
current: 0,
|
|
2869
|
+
total: totalFiles,
|
|
2870
|
+
message: `Processing ${String(totalFiles)} changed files`
|
|
2871
|
+
});
|
|
2872
|
+
const documents = [];
|
|
2873
|
+
const newManifestFiles = {};
|
|
2874
|
+
let filesProcessed = 0;
|
|
2875
|
+
for (const path4 of drift.unchanged) {
|
|
2876
|
+
const existingState = manifest.files[path4];
|
|
2877
|
+
if (existingState !== void 0) {
|
|
2878
|
+
newManifestFiles[path4] = existingState;
|
|
2879
|
+
}
|
|
2880
|
+
}
|
|
2881
|
+
for (let i = 0; i < filesToProcess.length; i += this.concurrency) {
|
|
2882
|
+
const batch = filesToProcess.slice(i, i + this.concurrency);
|
|
2883
|
+
const batchResults = await Promise.all(
|
|
2884
|
+
batch.map(async (filePath) => {
|
|
2885
|
+
try {
|
|
2886
|
+
const result = await this.processFile(filePath, store);
|
|
2887
|
+
const documentIds = result.documents.map((d) => d.id);
|
|
2888
|
+
const { state } = await this.driftService.createFileState(filePath, documentIds);
|
|
2889
|
+
return {
|
|
2890
|
+
filePath,
|
|
2891
|
+
documents: result.documents,
|
|
2892
|
+
fileState: state
|
|
2893
|
+
};
|
|
2894
|
+
} catch (error) {
|
|
2895
|
+
logger.warn(
|
|
2896
|
+
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
2897
|
+
"Failed to process file during incremental indexing, skipping"
|
|
2898
|
+
);
|
|
2899
|
+
return null;
|
|
2900
|
+
}
|
|
2901
|
+
})
|
|
2902
|
+
);
|
|
2903
|
+
for (const result of batchResults) {
|
|
2904
|
+
if (result !== null) {
|
|
2905
|
+
documents.push(...result.documents);
|
|
2906
|
+
newManifestFiles[result.filePath] = result.fileState;
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
filesProcessed += batch.length;
|
|
2910
|
+
onProgress?.({
|
|
2911
|
+
type: "progress",
|
|
2912
|
+
current: filesProcessed,
|
|
2913
|
+
total: totalFiles,
|
|
2914
|
+
message: `Processed ${String(filesProcessed)}/${String(totalFiles)} files`
|
|
2915
|
+
});
|
|
2916
|
+
}
|
|
2917
|
+
if (documents.length > 0) {
|
|
2918
|
+
await this.lanceStore.addDocuments(store.id, documents);
|
|
2919
|
+
}
|
|
2920
|
+
if (documentIdsToDelete.length > 0 || documents.length > 0) {
|
|
2921
|
+
await this.lanceStore.createFtsIndex(store.id);
|
|
2922
|
+
}
|
|
2923
|
+
if (this.codeGraphService) {
|
|
2924
|
+
const sourceExtensions = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"];
|
|
2925
|
+
const hasSourceChanges = filesToProcess.some((p) => sourceExtensions.includes(extname(p).toLowerCase())) || drift.deleted.some((p) => sourceExtensions.includes(extname(p).toLowerCase()));
|
|
2926
|
+
if (hasSourceChanges) {
|
|
2927
|
+
const allSourceFiles = [];
|
|
2928
|
+
const allPaths = [...drift.unchanged, ...filesToProcess];
|
|
2929
|
+
for (const filePath of allPaths) {
|
|
2930
|
+
const ext = extname(filePath).toLowerCase();
|
|
2931
|
+
if (sourceExtensions.includes(ext)) {
|
|
2932
|
+
try {
|
|
2933
|
+
const content = await readFile5(filePath, "utf-8");
|
|
2934
|
+
allSourceFiles.push({ path: filePath, content });
|
|
2935
|
+
} catch {
|
|
2936
|
+
}
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
if (allSourceFiles.length > 0) {
|
|
2940
|
+
const graph = await this.codeGraphService.buildGraph(allSourceFiles);
|
|
2941
|
+
await this.codeGraphService.saveGraph(store.id, graph);
|
|
2942
|
+
logger.debug(
|
|
2943
|
+
{ storeId: store.id, sourceFiles: allSourceFiles.length },
|
|
2944
|
+
"Rebuilt code graph during incremental indexing"
|
|
2945
|
+
);
|
|
2946
|
+
} else {
|
|
2947
|
+
await this.codeGraphService.deleteGraph(store.id);
|
|
2948
|
+
logger.debug(
|
|
2949
|
+
{ storeId: store.id },
|
|
2950
|
+
"Deleted stale code graph (no source files remain)"
|
|
2951
|
+
);
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
}
|
|
2955
|
+
const updatedManifest = {
|
|
2956
|
+
version: 1,
|
|
2957
|
+
storeId: store.id,
|
|
2958
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2959
|
+
files: newManifestFiles
|
|
2960
|
+
};
|
|
2961
|
+
await this.manifestService.save(updatedManifest);
|
|
2962
|
+
onProgress?.({
|
|
2963
|
+
type: "complete",
|
|
2964
|
+
current: totalFiles,
|
|
2965
|
+
total: totalFiles,
|
|
2966
|
+
message: "Incremental indexing complete"
|
|
2967
|
+
});
|
|
2968
|
+
const timeMs = Date.now() - startTime;
|
|
2969
|
+
logger.info(
|
|
2970
|
+
{
|
|
2971
|
+
storeId: store.id,
|
|
2972
|
+
storeName: store.name,
|
|
2973
|
+
filesAdded: drift.added.length,
|
|
2974
|
+
filesModified: drift.modified.length,
|
|
2975
|
+
filesDeleted: drift.deleted.length,
|
|
2976
|
+
filesUnchanged: drift.unchanged.length,
|
|
2977
|
+
chunksCreated: documents.length,
|
|
2978
|
+
timeMs
|
|
2979
|
+
},
|
|
2980
|
+
"Incremental indexing complete"
|
|
2981
|
+
);
|
|
2982
|
+
return ok({
|
|
2983
|
+
filesIndexed: filesToProcess.length,
|
|
2984
|
+
chunksCreated: documents.length,
|
|
2985
|
+
timeMs,
|
|
2986
|
+
filesAdded: drift.added.length,
|
|
2987
|
+
filesModified: drift.modified.length,
|
|
2988
|
+
filesDeleted: drift.deleted.length,
|
|
2989
|
+
filesUnchanged: drift.unchanged.length
|
|
2990
|
+
});
|
|
2991
|
+
} catch (error) {
|
|
2992
|
+
logger.error(
|
|
2993
|
+
{
|
|
2994
|
+
storeId: store.id,
|
|
2995
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2996
|
+
},
|
|
2997
|
+
"Incremental indexing failed"
|
|
2998
|
+
);
|
|
2999
|
+
return err(error instanceof Error ? error : new Error(String(error)));
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
2567
3002
|
async indexFileStore(store, onProgress) {
|
|
2568
3003
|
const startTime = Date.now();
|
|
3004
|
+
await this.lanceStore.clearAllDocuments(store.id);
|
|
3005
|
+
if (this.manifestService) {
|
|
3006
|
+
await this.manifestService.delete(store.id);
|
|
3007
|
+
}
|
|
2569
3008
|
const files = await this.scanDirectory(store.path);
|
|
2570
3009
|
const documents = [];
|
|
2571
3010
|
let filesProcessed = 0;
|
|
@@ -2588,7 +3027,17 @@ var IndexService = class {
|
|
|
2588
3027
|
for (let i = 0; i < files.length; i += this.concurrency) {
|
|
2589
3028
|
const batch = files.slice(i, i + this.concurrency);
|
|
2590
3029
|
const batchResults = await Promise.all(
|
|
2591
|
-
batch.map((filePath) =>
|
|
3030
|
+
batch.map(async (filePath) => {
|
|
3031
|
+
try {
|
|
3032
|
+
return await this.processFile(filePath, store);
|
|
3033
|
+
} catch (error) {
|
|
3034
|
+
logger.warn(
|
|
3035
|
+
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3036
|
+
"Failed to process file, skipping"
|
|
3037
|
+
);
|
|
3038
|
+
return { documents: [], sourceFile: void 0 };
|
|
3039
|
+
}
|
|
3040
|
+
})
|
|
2592
3041
|
);
|
|
2593
3042
|
for (const result of batchResults) {
|
|
2594
3043
|
documents.push(...result.documents);
|
|
@@ -2611,6 +3060,8 @@ var IndexService = class {
|
|
|
2611
3060
|
if (this.codeGraphService && sourceFiles.length > 0) {
|
|
2612
3061
|
const graph = await this.codeGraphService.buildGraph(sourceFiles);
|
|
2613
3062
|
await this.codeGraphService.saveGraph(store.id, graph);
|
|
3063
|
+
} else if (this.codeGraphService) {
|
|
3064
|
+
await this.codeGraphService.deleteGraph(store.id);
|
|
2614
3065
|
}
|
|
2615
3066
|
onProgress?.({
|
|
2616
3067
|
type: "complete",
|
|
@@ -2623,7 +3074,7 @@ var IndexService = class {
|
|
|
2623
3074
|
{
|
|
2624
3075
|
storeId: store.id,
|
|
2625
3076
|
storeName: store.name,
|
|
2626
|
-
|
|
3077
|
+
filesIndexed: filesProcessed,
|
|
2627
3078
|
chunksCreated: documents.length,
|
|
2628
3079
|
sourceFilesForGraph: sourceFiles.length,
|
|
2629
3080
|
timeMs
|
|
@@ -2631,7 +3082,7 @@ var IndexService = class {
|
|
|
2631
3082
|
"Store indexing complete"
|
|
2632
3083
|
);
|
|
2633
3084
|
return ok({
|
|
2634
|
-
|
|
3085
|
+
filesIndexed: filesProcessed,
|
|
2635
3086
|
chunksCreated: documents.length,
|
|
2636
3087
|
timeMs
|
|
2637
3088
|
});
|
|
@@ -2641,13 +3092,15 @@ var IndexService = class {
|
|
|
2641
3092
|
* Extracted for parallel processing.
|
|
2642
3093
|
*/
|
|
2643
3094
|
async processFile(filePath, store) {
|
|
2644
|
-
const content = await
|
|
2645
|
-
const fileHash =
|
|
3095
|
+
const content = await readFile5(filePath, "utf-8");
|
|
3096
|
+
const fileHash = createHash3("md5").update(content).digest("hex");
|
|
2646
3097
|
const chunks = this.chunker.chunk(content, filePath);
|
|
3098
|
+
const relativePath = relative(store.path, filePath);
|
|
3099
|
+
const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
|
|
2647
3100
|
const ext = extname(filePath).toLowerCase();
|
|
2648
3101
|
const fileName = basename(filePath).toLowerCase();
|
|
2649
3102
|
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
2650
|
-
const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
|
|
3103
|
+
const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
|
|
2651
3104
|
if (chunks.length === 0) {
|
|
2652
3105
|
return { documents: [], sourceFile };
|
|
2653
3106
|
}
|
|
@@ -2662,7 +3115,7 @@ var IndexService = class {
|
|
|
2662
3115
|
`Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
|
|
2663
3116
|
);
|
|
2664
3117
|
}
|
|
2665
|
-
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
3118
|
+
const chunkId = chunks.length > 1 ? `${store.id}-${pathHash}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${pathHash}-${fileHash}`;
|
|
2666
3119
|
documents.push({
|
|
2667
3120
|
id: createDocumentId(chunkId),
|
|
2668
3121
|
content: chunk.content,
|
|
@@ -2671,7 +3124,7 @@ var IndexService = class {
|
|
|
2671
3124
|
type: chunks.length > 1 ? "chunk" : "file",
|
|
2672
3125
|
storeId: store.id,
|
|
2673
3126
|
path: filePath,
|
|
2674
|
-
indexedAt: /* @__PURE__ */ new Date(),
|
|
3127
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2675
3128
|
fileHash,
|
|
2676
3129
|
chunkIndex: chunk.chunkIndex,
|
|
2677
3130
|
totalChunks: chunk.totalChunks,
|
|
@@ -2691,10 +3144,14 @@ var IndexService = class {
|
|
|
2691
3144
|
for (const entry of entries) {
|
|
2692
3145
|
const fullPath = join7(dir, entry.name);
|
|
2693
3146
|
if (entry.isDirectory()) {
|
|
2694
|
-
if (!
|
|
3147
|
+
if (!this.ignoreDirs.has(entry.name)) {
|
|
2695
3148
|
files.push(...await this.scanDirectory(fullPath));
|
|
2696
3149
|
}
|
|
2697
3150
|
} else if (entry.isFile()) {
|
|
3151
|
+
const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(entry.name));
|
|
3152
|
+
if (shouldIgnore) {
|
|
3153
|
+
continue;
|
|
3154
|
+
}
|
|
2698
3155
|
const ext = extname(entry.name).toLowerCase();
|
|
2699
3156
|
if (TEXT_EXTENSIONS.has(ext)) {
|
|
2700
3157
|
files.push(fullPath);
|
|
@@ -2784,6 +3241,141 @@ function classifyWebContentType(url, title) {
|
|
|
2784
3241
|
return "documentation";
|
|
2785
3242
|
}
|
|
2786
3243
|
|
|
3244
|
+
// src/services/manifest.service.ts
|
|
3245
|
+
import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
|
|
3246
|
+
import { join as join8 } from "path";
|
|
3247
|
+
|
|
3248
|
+
// src/types/manifest.ts
|
|
3249
|
+
import { z as z2 } from "zod";
|
|
3250
|
+
var FileStateSchema = z2.object({
|
|
3251
|
+
/** File modification time in milliseconds since epoch */
|
|
3252
|
+
mtime: z2.number(),
|
|
3253
|
+
/** File size in bytes */
|
|
3254
|
+
size: z2.number(),
|
|
3255
|
+
/** MD5 hash of file content */
|
|
3256
|
+
hash: z2.string(),
|
|
3257
|
+
/** Document IDs created from this file (for cleanup) */
|
|
3258
|
+
documentIds: z2.array(z2.string())
|
|
3259
|
+
});
|
|
3260
|
+
var StoreManifestSchema = z2.object({
|
|
3261
|
+
/** Schema version for future migrations */
|
|
3262
|
+
version: z2.literal(1),
|
|
3263
|
+
/** Store ID this manifest belongs to */
|
|
3264
|
+
storeId: z2.string(),
|
|
3265
|
+
/** When the manifest was last updated */
|
|
3266
|
+
indexedAt: z2.string(),
|
|
3267
|
+
/** Map of file paths to their state */
|
|
3268
|
+
files: z2.record(z2.string(), FileStateSchema)
|
|
3269
|
+
});
|
|
3270
|
+
function createEmptyManifest(storeId) {
|
|
3271
|
+
return {
|
|
3272
|
+
version: 1,
|
|
3273
|
+
storeId,
|
|
3274
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3275
|
+
files: {}
|
|
3276
|
+
};
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
// src/services/manifest.service.ts
|
|
3280
|
+
var ManifestService = class {
|
|
3281
|
+
manifestsDir;
|
|
3282
|
+
constructor(dataDir) {
|
|
3283
|
+
this.manifestsDir = join8(dataDir, "manifests");
|
|
3284
|
+
}
|
|
3285
|
+
/**
|
|
3286
|
+
* Initialize the manifests directory.
|
|
3287
|
+
*/
|
|
3288
|
+
async initialize() {
|
|
3289
|
+
await mkdir3(this.manifestsDir, { recursive: true });
|
|
3290
|
+
}
|
|
3291
|
+
/**
|
|
3292
|
+
* Get the file path for a store's manifest.
|
|
3293
|
+
*/
|
|
3294
|
+
getManifestPath(storeId) {
|
|
3295
|
+
return join8(this.manifestsDir, `${storeId}.manifest.json`);
|
|
3296
|
+
}
|
|
3297
|
+
/**
|
|
3298
|
+
* Load a store's manifest.
|
|
3299
|
+
* Returns an empty manifest if one doesn't exist.
|
|
3300
|
+
* Throws on parse/validation errors (fail fast).
|
|
3301
|
+
*/
|
|
3302
|
+
async load(storeId) {
|
|
3303
|
+
const manifestPath = this.getManifestPath(storeId);
|
|
3304
|
+
const exists = await this.fileExists(manifestPath);
|
|
3305
|
+
if (!exists) {
|
|
3306
|
+
return createEmptyManifest(storeId);
|
|
3307
|
+
}
|
|
3308
|
+
const content = await readFile6(manifestPath, "utf-8");
|
|
3309
|
+
let parsed;
|
|
3310
|
+
try {
|
|
3311
|
+
parsed = JSON.parse(content);
|
|
3312
|
+
} catch (error) {
|
|
3313
|
+
throw new Error(
|
|
3314
|
+
`Failed to parse manifest at ${manifestPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
3315
|
+
);
|
|
3316
|
+
}
|
|
3317
|
+
const result = StoreManifestSchema.safeParse(parsed);
|
|
3318
|
+
if (!result.success) {
|
|
3319
|
+
throw new Error(`Invalid manifest at ${manifestPath}: ${result.error.message}`);
|
|
3320
|
+
}
|
|
3321
|
+
return this.toTypedManifest(result.data, storeId);
|
|
3322
|
+
}
|
|
3323
|
+
/**
|
|
3324
|
+
* Save a store's manifest atomically.
|
|
3325
|
+
*/
|
|
3326
|
+
async save(manifest) {
|
|
3327
|
+
const manifestPath = this.getManifestPath(manifest.storeId);
|
|
3328
|
+
const toSave = {
|
|
3329
|
+
...manifest,
|
|
3330
|
+
indexedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
3331
|
+
};
|
|
3332
|
+
await atomicWriteFile(manifestPath, JSON.stringify(toSave, null, 2));
|
|
3333
|
+
}
|
|
3334
|
+
/**
|
|
3335
|
+
* Delete a store's manifest.
|
|
3336
|
+
* Called when a store is deleted or during full re-index.
|
|
3337
|
+
*/
|
|
3338
|
+
async delete(storeId) {
|
|
3339
|
+
const manifestPath = this.getManifestPath(storeId);
|
|
3340
|
+
const { unlink } = await import("fs/promises");
|
|
3341
|
+
const exists = await this.fileExists(manifestPath);
|
|
3342
|
+
if (exists) {
|
|
3343
|
+
await unlink(manifestPath);
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
/**
|
|
3347
|
+
* Check if a file exists.
|
|
3348
|
+
*/
|
|
3349
|
+
async fileExists(path4) {
|
|
3350
|
+
try {
|
|
3351
|
+
await access3(path4);
|
|
3352
|
+
return true;
|
|
3353
|
+
} catch {
|
|
3354
|
+
return false;
|
|
3355
|
+
}
|
|
3356
|
+
}
|
|
3357
|
+
/**
|
|
3358
|
+
* Convert a parsed manifest to a typed manifest with branded types.
|
|
3359
|
+
*/
|
|
3360
|
+
toTypedManifest(data, storeId) {
|
|
3361
|
+
const files = {};
|
|
3362
|
+
for (const [path4, state] of Object.entries(data.files)) {
|
|
3363
|
+
files[path4] = {
|
|
3364
|
+
mtime: state.mtime,
|
|
3365
|
+
size: state.size,
|
|
3366
|
+
hash: state.hash,
|
|
3367
|
+
documentIds: state.documentIds.map((id) => createDocumentId(id))
|
|
3368
|
+
};
|
|
3369
|
+
}
|
|
3370
|
+
return {
|
|
3371
|
+
version: 1,
|
|
3372
|
+
storeId,
|
|
3373
|
+
indexedAt: data.indexedAt,
|
|
3374
|
+
files
|
|
3375
|
+
};
|
|
3376
|
+
}
|
|
3377
|
+
};
|
|
3378
|
+
|
|
2787
3379
|
// src/services/code-unit.service.ts
|
|
2788
3380
|
var CodeUnitService = class {
|
|
2789
3381
|
extractCodeUnit(code, symbolName, language) {
|
|
@@ -2968,6 +3560,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2968
3560
|
// Stronger penalty - internal code less useful
|
|
2969
3561
|
test: 0.8,
|
|
2970
3562
|
config: 0.7,
|
|
3563
|
+
changelog: 0.6,
|
|
3564
|
+
// Changelogs rarely answer "how to" questions
|
|
2971
3565
|
other: 0.9
|
|
2972
3566
|
},
|
|
2973
3567
|
implementation: {
|
|
@@ -2980,6 +3574,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2980
3574
|
// Internal code can be relevant
|
|
2981
3575
|
test: 1,
|
|
2982
3576
|
config: 0.95,
|
|
3577
|
+
changelog: 0.8,
|
|
3578
|
+
// Might reference implementation changes
|
|
2983
3579
|
other: 1
|
|
2984
3580
|
},
|
|
2985
3581
|
conceptual: {
|
|
@@ -2990,6 +3586,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
2990
3586
|
"source-internal": 0.9,
|
|
2991
3587
|
test: 0.9,
|
|
2992
3588
|
config: 0.85,
|
|
3589
|
+
changelog: 0.7,
|
|
3590
|
+
// Sometimes explains concepts behind changes
|
|
2993
3591
|
other: 0.95
|
|
2994
3592
|
},
|
|
2995
3593
|
comparison: {
|
|
@@ -3000,6 +3598,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
3000
3598
|
"source-internal": 0.85,
|
|
3001
3599
|
test: 0.9,
|
|
3002
3600
|
config: 0.85,
|
|
3601
|
+
changelog: 0.9,
|
|
3602
|
+
// Version comparisons can be useful
|
|
3003
3603
|
other: 0.95
|
|
3004
3604
|
},
|
|
3005
3605
|
debugging: {
|
|
@@ -3012,6 +3612,8 @@ var INTENT_FILE_BOOSTS = {
|
|
|
3012
3612
|
test: 1.05,
|
|
3013
3613
|
// Tests can show expected behavior
|
|
3014
3614
|
config: 0.9,
|
|
3615
|
+
changelog: 1.1,
|
|
3616
|
+
// Often contains bug fixes and known issues
|
|
3015
3617
|
other: 1
|
|
3016
3618
|
}
|
|
3017
3619
|
};
|
|
@@ -3094,6 +3696,17 @@ function classifyQueryIntents(query) {
|
|
|
3094
3696
|
function getPrimaryIntent(intents) {
|
|
3095
3697
|
return intents[0]?.intent ?? "how-to";
|
|
3096
3698
|
}
|
|
3699
|
+
function mapSearchIntentToQueryIntent(intent) {
|
|
3700
|
+
switch (intent) {
|
|
3701
|
+
case "find-pattern":
|
|
3702
|
+
case "find-implementation":
|
|
3703
|
+
case "find-definition":
|
|
3704
|
+
return "implementation";
|
|
3705
|
+
case "find-usage":
|
|
3706
|
+
case "find-documentation":
|
|
3707
|
+
return "how-to";
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3097
3710
|
var RRF_PRESETS = {
|
|
3098
3711
|
code: { k: 20, vectorWeight: 0.6, ftsWeight: 0.4 },
|
|
3099
3712
|
web: { k: 30, vectorWeight: 0.55, ftsWeight: 0.45 }
|
|
@@ -3108,12 +3721,27 @@ var SearchService = class {
|
|
|
3108
3721
|
codeUnitService;
|
|
3109
3722
|
codeGraphService;
|
|
3110
3723
|
graphCache;
|
|
3111
|
-
|
|
3724
|
+
searchConfig;
|
|
3725
|
+
unsubscribeCacheInvalidation;
|
|
3726
|
+
constructor(lanceStore, embeddingEngine, codeGraphService, searchConfig) {
|
|
3112
3727
|
this.lanceStore = lanceStore;
|
|
3113
3728
|
this.embeddingEngine = embeddingEngine;
|
|
3114
3729
|
this.codeUnitService = new CodeUnitService();
|
|
3115
3730
|
this.codeGraphService = codeGraphService;
|
|
3116
3731
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
3732
|
+
this.searchConfig = searchConfig;
|
|
3733
|
+
if (codeGraphService) {
|
|
3734
|
+
this.unsubscribeCacheInvalidation = codeGraphService.onCacheInvalidation((event) => {
|
|
3735
|
+
this.graphCache.delete(event.storeId);
|
|
3736
|
+
});
|
|
3737
|
+
}
|
|
3738
|
+
}
|
|
3739
|
+
/**
|
|
3740
|
+
* Clean up resources (unsubscribe from events).
|
|
3741
|
+
* Call this when destroying the service.
|
|
3742
|
+
*/
|
|
3743
|
+
cleanup() {
|
|
3744
|
+
this.unsubscribeCacheInvalidation?.();
|
|
3117
3745
|
}
|
|
3118
3746
|
/**
|
|
3119
3747
|
* Load code graph for a store, with caching.
|
|
@@ -3141,12 +3769,12 @@ var SearchService = class {
|
|
|
3141
3769
|
}
|
|
3142
3770
|
async search(query) {
|
|
3143
3771
|
const startTime = Date.now();
|
|
3144
|
-
const mode = query.mode ?? "hybrid";
|
|
3145
|
-
const limit = query.limit ?? 10;
|
|
3772
|
+
const mode = query.mode ?? this.searchConfig?.defaultMode ?? "hybrid";
|
|
3773
|
+
const limit = query.limit ?? this.searchConfig?.defaultLimit ?? 10;
|
|
3146
3774
|
const stores = query.stores ?? [];
|
|
3147
3775
|
const detail = query.detail ?? "minimal";
|
|
3148
3776
|
const intents = classifyQueryIntents(query.query);
|
|
3149
|
-
const primaryIntent = getPrimaryIntent(intents);
|
|
3777
|
+
const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
|
|
3150
3778
|
logger2.debug(
|
|
3151
3779
|
{
|
|
3152
3780
|
query: query.query,
|
|
@@ -3155,7 +3783,8 @@ var SearchService = class {
|
|
|
3155
3783
|
stores,
|
|
3156
3784
|
detail,
|
|
3157
3785
|
intent: primaryIntent,
|
|
3158
|
-
|
|
3786
|
+
userIntent: query.intent,
|
|
3787
|
+
autoClassifiedIntents: intents,
|
|
3159
3788
|
minRelevance: query.minRelevance
|
|
3160
3789
|
},
|
|
3161
3790
|
"Search query received"
|
|
@@ -3166,7 +3795,7 @@ var SearchService = class {
|
|
|
3166
3795
|
if (mode === "vector") {
|
|
3167
3796
|
const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
|
|
3168
3797
|
maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
|
|
3169
|
-
allResults =
|
|
3798
|
+
allResults = this.normalizeAndFilterScores(rawResults, query.threshold).slice(0, fetchLimit);
|
|
3170
3799
|
} else if (mode === "fts") {
|
|
3171
3800
|
allResults = await this.ftsSearch(query.query, stores, fetchLimit);
|
|
3172
3801
|
} else {
|
|
@@ -3179,28 +3808,35 @@ var SearchService = class {
|
|
|
3179
3808
|
allResults = hybridResult.results;
|
|
3180
3809
|
maxRawScore = hybridResult.maxRawScore;
|
|
3181
3810
|
}
|
|
3182
|
-
if (query.minRelevance !== void 0
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3811
|
+
if (query.minRelevance !== void 0) {
|
|
3812
|
+
if (mode === "fts") {
|
|
3813
|
+
logger2.warn(
|
|
3814
|
+
{ query: query.query, minRelevance: query.minRelevance },
|
|
3815
|
+
"minRelevance filter ignored in FTS mode (no vector scores available)"
|
|
3816
|
+
);
|
|
3817
|
+
} else if (maxRawScore < query.minRelevance) {
|
|
3818
|
+
const timeMs2 = Date.now() - startTime;
|
|
3819
|
+
logger2.info(
|
|
3820
|
+
{
|
|
3821
|
+
query: query.query,
|
|
3822
|
+
mode,
|
|
3823
|
+
maxRawScore,
|
|
3824
|
+
minRelevance: query.minRelevance,
|
|
3825
|
+
timeMs: timeMs2
|
|
3826
|
+
},
|
|
3827
|
+
"Search filtered by minRelevance - no sufficiently relevant results"
|
|
3828
|
+
);
|
|
3829
|
+
return {
|
|
3186
3830
|
query: query.query,
|
|
3187
3831
|
mode,
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
mode,
|
|
3197
|
-
stores,
|
|
3198
|
-
results: [],
|
|
3199
|
-
totalResults: 0,
|
|
3200
|
-
timeMs: timeMs2,
|
|
3201
|
-
confidence: this.calculateConfidence(maxRawScore),
|
|
3202
|
-
maxRawScore
|
|
3203
|
-
};
|
|
3832
|
+
stores,
|
|
3833
|
+
results: [],
|
|
3834
|
+
totalResults: 0,
|
|
3835
|
+
timeMs: timeMs2,
|
|
3836
|
+
confidence: this.calculateConfidence(maxRawScore),
|
|
3837
|
+
maxRawScore
|
|
3838
|
+
};
|
|
3839
|
+
}
|
|
3204
3840
|
}
|
|
3205
3841
|
const dedupedResults = this.deduplicateBySource(allResults, query.query);
|
|
3206
3842
|
const resultsToEnhance = dedupedResults.slice(0, limit);
|
|
@@ -3249,7 +3885,9 @@ var SearchService = class {
|
|
|
3249
3885
|
const bySource = /* @__PURE__ */ new Map();
|
|
3250
3886
|
const queryTerms = query.toLowerCase().split(/\s+/).filter((t2) => t2.length > 2);
|
|
3251
3887
|
for (const result of results) {
|
|
3252
|
-
const
|
|
3888
|
+
const storeId = result.metadata.storeId;
|
|
3889
|
+
const source = result.metadata.path ?? result.metadata.url ?? result.id;
|
|
3890
|
+
const sourceKey = `${storeId}:${source}`;
|
|
3253
3891
|
const existing = bySource.get(sourceKey);
|
|
3254
3892
|
if (!existing) {
|
|
3255
3893
|
bySource.set(sourceKey, result);
|
|
@@ -3320,11 +3958,6 @@ var SearchService = class {
|
|
|
3320
3958
|
}
|
|
3321
3959
|
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
3322
3960
|
}
|
|
3323
|
-
async vectorSearch(query, stores, limit, threshold) {
|
|
3324
|
-
const results = await this.vectorSearchRaw(query, stores, limit);
|
|
3325
|
-
const normalized = this.normalizeAndFilterScores(results, threshold);
|
|
3326
|
-
return normalized.slice(0, limit);
|
|
3327
|
-
}
|
|
3328
3961
|
async ftsSearch(query, stores, limit) {
|
|
3329
3962
|
const results = [];
|
|
3330
3963
|
for (const storeId of stores) {
|
|
@@ -3484,6 +4117,9 @@ var SearchService = class {
|
|
|
3484
4117
|
case "config":
|
|
3485
4118
|
baseBoost = 0.5;
|
|
3486
4119
|
break;
|
|
4120
|
+
case "changelog":
|
|
4121
|
+
baseBoost = 0.7;
|
|
4122
|
+
break;
|
|
3487
4123
|
default:
|
|
3488
4124
|
baseBoost = 1;
|
|
3489
4125
|
}
|
|
@@ -3889,42 +4525,53 @@ var SearchService = class {
|
|
|
3889
4525
|
};
|
|
3890
4526
|
|
|
3891
4527
|
// src/services/store-definition.service.ts
|
|
3892
|
-
import { readFile as
|
|
3893
|
-
import {
|
|
4528
|
+
import { readFile as readFile7, access as access4 } from "fs/promises";
|
|
4529
|
+
import { resolve as resolve2, isAbsolute as isAbsolute2, join as join9 } from "path";
|
|
3894
4530
|
|
|
3895
4531
|
// src/types/store-definition.ts
|
|
3896
|
-
import { z as
|
|
3897
|
-
var BaseStoreDefinitionSchema =
|
|
3898
|
-
name:
|
|
3899
|
-
description:
|
|
3900
|
-
tags:
|
|
4532
|
+
import { z as z3 } from "zod";
|
|
4533
|
+
var BaseStoreDefinitionSchema = z3.object({
|
|
4534
|
+
name: z3.string().min(1, "Store name is required"),
|
|
4535
|
+
description: z3.string().optional(),
|
|
4536
|
+
tags: z3.array(z3.string()).optional()
|
|
3901
4537
|
});
|
|
3902
4538
|
var FileStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3903
|
-
type:
|
|
3904
|
-
path:
|
|
4539
|
+
type: z3.literal("file"),
|
|
4540
|
+
path: z3.string().min(1, "Path is required for file stores")
|
|
3905
4541
|
});
|
|
4542
|
+
var GitUrlSchema = z3.string().refine(
|
|
4543
|
+
(val) => {
|
|
4544
|
+
try {
|
|
4545
|
+
new URL(val);
|
|
4546
|
+
return true;
|
|
4547
|
+
} catch {
|
|
4548
|
+
return /^git@[\w.-]+:[\w./-]+$/.test(val);
|
|
4549
|
+
}
|
|
4550
|
+
},
|
|
4551
|
+
{ message: "Must be a valid URL or SSH URL (git@host:path)" }
|
|
4552
|
+
);
|
|
3906
4553
|
var RepoStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3907
|
-
type:
|
|
3908
|
-
url:
|
|
3909
|
-
branch:
|
|
3910
|
-
depth:
|
|
4554
|
+
type: z3.literal("repo"),
|
|
4555
|
+
url: GitUrlSchema,
|
|
4556
|
+
branch: z3.string().optional(),
|
|
4557
|
+
depth: z3.number().int().positive("Depth must be a positive integer").optional()
|
|
3911
4558
|
});
|
|
3912
4559
|
var WebStoreDefinitionSchema = BaseStoreDefinitionSchema.extend({
|
|
3913
|
-
type:
|
|
3914
|
-
url:
|
|
3915
|
-
depth:
|
|
3916
|
-
maxPages:
|
|
3917
|
-
crawlInstructions:
|
|
3918
|
-
extractInstructions:
|
|
4560
|
+
type: z3.literal("web"),
|
|
4561
|
+
url: z3.url("Valid URL is required for web stores"),
|
|
4562
|
+
depth: z3.number().int().min(0, "Depth must be non-negative").default(1),
|
|
4563
|
+
maxPages: z3.number().int().positive("maxPages must be a positive integer").optional(),
|
|
4564
|
+
crawlInstructions: z3.string().optional(),
|
|
4565
|
+
extractInstructions: z3.string().optional()
|
|
3919
4566
|
});
|
|
3920
|
-
var StoreDefinitionSchema =
|
|
4567
|
+
var StoreDefinitionSchema = z3.discriminatedUnion("type", [
|
|
3921
4568
|
FileStoreDefinitionSchema,
|
|
3922
4569
|
RepoStoreDefinitionSchema,
|
|
3923
4570
|
WebStoreDefinitionSchema
|
|
3924
4571
|
]);
|
|
3925
|
-
var StoreDefinitionsConfigSchema =
|
|
3926
|
-
version:
|
|
3927
|
-
stores:
|
|
4572
|
+
var StoreDefinitionsConfigSchema = z3.object({
|
|
4573
|
+
version: z3.literal(1),
|
|
4574
|
+
stores: z3.array(StoreDefinitionSchema)
|
|
3928
4575
|
});
|
|
3929
4576
|
function isFileStoreDefinition(def) {
|
|
3930
4577
|
return def.type === "file";
|
|
@@ -3943,7 +4590,7 @@ var DEFAULT_STORE_DEFINITIONS_CONFIG = {
|
|
|
3943
4590
|
// src/services/store-definition.service.ts
|
|
3944
4591
|
async function fileExists3(path4) {
|
|
3945
4592
|
try {
|
|
3946
|
-
await
|
|
4593
|
+
await access4(path4);
|
|
3947
4594
|
return true;
|
|
3948
4595
|
} catch {
|
|
3949
4596
|
return false;
|
|
@@ -3955,7 +4602,7 @@ var StoreDefinitionService = class {
|
|
|
3955
4602
|
config = null;
|
|
3956
4603
|
constructor(projectRoot) {
|
|
3957
4604
|
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
3958
|
-
this.configPath =
|
|
4605
|
+
this.configPath = join9(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
|
|
3959
4606
|
}
|
|
3960
4607
|
/**
|
|
3961
4608
|
* Load store definitions from config file.
|
|
@@ -3974,7 +4621,7 @@ var StoreDefinitionService = class {
|
|
|
3974
4621
|
};
|
|
3975
4622
|
return this.config;
|
|
3976
4623
|
}
|
|
3977
|
-
const content = await
|
|
4624
|
+
const content = await readFile7(this.configPath, "utf-8");
|
|
3978
4625
|
let parsed;
|
|
3979
4626
|
try {
|
|
3980
4627
|
parsed = JSON.parse(content);
|
|
@@ -3994,8 +4641,7 @@ var StoreDefinitionService = class {
|
|
|
3994
4641
|
* Save store definitions to config file.
|
|
3995
4642
|
*/
|
|
3996
4643
|
async save(config) {
|
|
3997
|
-
await
|
|
3998
|
-
await writeFile4(this.configPath, JSON.stringify(config, null, 2));
|
|
4644
|
+
await atomicWriteFile(this.configPath, JSON.stringify(config, null, 2));
|
|
3999
4645
|
this.config = config;
|
|
4000
4646
|
}
|
|
4001
4647
|
/**
|
|
@@ -4067,7 +4713,7 @@ var StoreDefinitionService = class {
|
|
|
4067
4713
|
* Resolve a file store path relative to project root.
|
|
4068
4714
|
*/
|
|
4069
4715
|
resolvePath(path4) {
|
|
4070
|
-
if (
|
|
4716
|
+
if (isAbsolute2(path4)) {
|
|
4071
4717
|
return path4;
|
|
4072
4718
|
}
|
|
4073
4719
|
return resolve2(this.projectRoot, path4);
|
|
@@ -4094,8 +4740,8 @@ var StoreDefinitionService = class {
|
|
|
4094
4740
|
|
|
4095
4741
|
// src/services/store.service.ts
|
|
4096
4742
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
4097
|
-
import { readFile as
|
|
4098
|
-
import { join as
|
|
4743
|
+
import { readFile as readFile8, mkdir as mkdir5, stat as stat2, access as access5 } from "fs/promises";
|
|
4744
|
+
import { join as join10, resolve as resolve3 } from "path";
|
|
4099
4745
|
|
|
4100
4746
|
// src/plugin/git-clone.ts
|
|
4101
4747
|
import { spawn } from "child_process";
|
|
@@ -4126,6 +4772,9 @@ async function cloneRepository(options) {
|
|
|
4126
4772
|
});
|
|
4127
4773
|
});
|
|
4128
4774
|
}
|
|
4775
|
+
function isGitUrl(source) {
|
|
4776
|
+
return source.startsWith("http://") || source.startsWith("https://") || source.startsWith("git@");
|
|
4777
|
+
}
|
|
4129
4778
|
function extractRepoName(url) {
|
|
4130
4779
|
const match = /\/([^/]+?)(\.git)?$/.exec(url);
|
|
4131
4780
|
const name = match?.[1];
|
|
@@ -4138,7 +4787,7 @@ function extractRepoName(url) {
|
|
|
4138
4787
|
// src/services/store.service.ts
|
|
4139
4788
|
async function fileExists4(path4) {
|
|
4140
4789
|
try {
|
|
4141
|
-
await
|
|
4790
|
+
await access5(path4);
|
|
4142
4791
|
return true;
|
|
4143
4792
|
} catch {
|
|
4144
4793
|
return false;
|
|
@@ -4148,11 +4797,13 @@ var StoreService = class {
|
|
|
4148
4797
|
dataDir;
|
|
4149
4798
|
definitionService;
|
|
4150
4799
|
gitignoreService;
|
|
4800
|
+
projectRoot;
|
|
4151
4801
|
registry = { stores: [] };
|
|
4152
4802
|
constructor(dataDir, options) {
|
|
4153
4803
|
this.dataDir = dataDir;
|
|
4154
4804
|
this.definitionService = options?.definitionService ?? void 0;
|
|
4155
4805
|
this.gitignoreService = options?.gitignoreService ?? void 0;
|
|
4806
|
+
this.projectRoot = options?.projectRoot ?? void 0;
|
|
4156
4807
|
}
|
|
4157
4808
|
async initialize() {
|
|
4158
4809
|
await mkdir5(this.dataDir, { recursive: true });
|
|
@@ -4160,6 +4811,7 @@ var StoreService = class {
|
|
|
4160
4811
|
}
|
|
4161
4812
|
/**
|
|
4162
4813
|
* Convert a Store and CreateStoreInput to a StoreDefinition for persistence.
|
|
4814
|
+
* Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
|
|
4163
4815
|
*/
|
|
4164
4816
|
createDefinitionFromStore(store, input) {
|
|
4165
4817
|
const tags = store.tags !== void 0 ? [...store.tags] : void 0;
|
|
@@ -4181,10 +4833,13 @@ var StoreService = class {
|
|
|
4181
4833
|
}
|
|
4182
4834
|
case "repo": {
|
|
4183
4835
|
const repoStore = store;
|
|
4836
|
+
if (repoStore.url === void 0) {
|
|
4837
|
+
return void 0;
|
|
4838
|
+
}
|
|
4184
4839
|
const repoDef = {
|
|
4185
4840
|
...base,
|
|
4186
4841
|
type: "repo",
|
|
4187
|
-
url: repoStore.url
|
|
4842
|
+
url: repoStore.url,
|
|
4188
4843
|
branch: repoStore.branch,
|
|
4189
4844
|
depth: input.depth
|
|
4190
4845
|
};
|
|
@@ -4196,7 +4851,58 @@ var StoreService = class {
|
|
|
4196
4851
|
...base,
|
|
4197
4852
|
type: "web",
|
|
4198
4853
|
url: webStore.url,
|
|
4199
|
-
depth: webStore.depth
|
|
4854
|
+
depth: webStore.depth,
|
|
4855
|
+
maxPages: input.maxPages,
|
|
4856
|
+
crawlInstructions: input.crawlInstructions,
|
|
4857
|
+
extractInstructions: input.extractInstructions
|
|
4858
|
+
};
|
|
4859
|
+
return webDef;
|
|
4860
|
+
}
|
|
4861
|
+
}
|
|
4862
|
+
}
|
|
4863
|
+
/**
|
|
4864
|
+
* Create a StoreDefinition from an existing store (without original input).
|
|
4865
|
+
* Used when updating/renaming stores where we don't have the original input.
|
|
4866
|
+
* Returns undefined for stores that shouldn't be persisted (e.g., local repo stores).
|
|
4867
|
+
*/
|
|
4868
|
+
createDefinitionFromExistingStore(store) {
|
|
4869
|
+
const tags = store.tags !== void 0 ? [...store.tags] : void 0;
|
|
4870
|
+
const base = {
|
|
4871
|
+
name: store.name,
|
|
4872
|
+
description: store.description,
|
|
4873
|
+
tags
|
|
4874
|
+
};
|
|
4875
|
+
switch (store.type) {
|
|
4876
|
+
case "file": {
|
|
4877
|
+
const fileDef = {
|
|
4878
|
+
...base,
|
|
4879
|
+
type: "file",
|
|
4880
|
+
path: store.path
|
|
4881
|
+
};
|
|
4882
|
+
return fileDef;
|
|
4883
|
+
}
|
|
4884
|
+
case "repo": {
|
|
4885
|
+
if (store.url === void 0) {
|
|
4886
|
+
return void 0;
|
|
4887
|
+
}
|
|
4888
|
+
const repoDef = {
|
|
4889
|
+
...base,
|
|
4890
|
+
type: "repo",
|
|
4891
|
+
url: store.url,
|
|
4892
|
+
branch: store.branch,
|
|
4893
|
+
depth: store.depth
|
|
4894
|
+
};
|
|
4895
|
+
return repoDef;
|
|
4896
|
+
}
|
|
4897
|
+
case "web": {
|
|
4898
|
+
const webDef = {
|
|
4899
|
+
...base,
|
|
4900
|
+
type: "web",
|
|
4901
|
+
url: store.url,
|
|
4902
|
+
depth: store.depth,
|
|
4903
|
+
maxPages: store.maxPages,
|
|
4904
|
+
crawlInstructions: store.crawlInstructions,
|
|
4905
|
+
extractInstructions: store.extractInstructions
|
|
4200
4906
|
};
|
|
4201
4907
|
return webDef;
|
|
4202
4908
|
}
|
|
@@ -4218,9 +4924,9 @@ var StoreService = class {
|
|
|
4218
4924
|
if (input.path === void 0) {
|
|
4219
4925
|
return err(new Error("Path is required for file stores"));
|
|
4220
4926
|
}
|
|
4221
|
-
const normalizedPath = resolve3(input.path);
|
|
4927
|
+
const normalizedPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, input.path) : resolve3(input.path);
|
|
4222
4928
|
try {
|
|
4223
|
-
const stats = await
|
|
4929
|
+
const stats = await stat2(normalizedPath);
|
|
4224
4930
|
if (!stats.isDirectory()) {
|
|
4225
4931
|
return err(new Error(`Path is not a directory: ${normalizedPath}`));
|
|
4226
4932
|
}
|
|
@@ -4243,7 +4949,7 @@ var StoreService = class {
|
|
|
4243
4949
|
case "repo": {
|
|
4244
4950
|
let repoPath = input.path;
|
|
4245
4951
|
if (input.url !== void 0) {
|
|
4246
|
-
const cloneDir =
|
|
4952
|
+
const cloneDir = join10(this.dataDir, "repos", id);
|
|
4247
4953
|
const result = await cloneRepository({
|
|
4248
4954
|
url: input.url,
|
|
4249
4955
|
targetDir: cloneDir,
|
|
@@ -4258,7 +4964,17 @@ var StoreService = class {
|
|
|
4258
4964
|
if (repoPath === void 0) {
|
|
4259
4965
|
return err(new Error("Path or URL required for repo stores"));
|
|
4260
4966
|
}
|
|
4261
|
-
const normalizedRepoPath = resolve3(repoPath);
|
|
4967
|
+
const normalizedRepoPath = this.projectRoot !== void 0 ? resolve3(this.projectRoot, repoPath) : resolve3(repoPath);
|
|
4968
|
+
if (input.url === void 0) {
|
|
4969
|
+
try {
|
|
4970
|
+
const stats = await stat2(normalizedRepoPath);
|
|
4971
|
+
if (!stats.isDirectory()) {
|
|
4972
|
+
return err(new Error(`Path is not a directory: ${normalizedRepoPath}`));
|
|
4973
|
+
}
|
|
4974
|
+
} catch {
|
|
4975
|
+
return err(new Error(`Repository path does not exist: ${normalizedRepoPath}`));
|
|
4976
|
+
}
|
|
4977
|
+
}
|
|
4262
4978
|
store = {
|
|
4263
4979
|
type: "repo",
|
|
4264
4980
|
id,
|
|
@@ -4266,6 +4982,7 @@ var StoreService = class {
|
|
|
4266
4982
|
path: normalizedRepoPath,
|
|
4267
4983
|
url: input.url,
|
|
4268
4984
|
branch: input.branch,
|
|
4985
|
+
depth: input.depth ?? 1,
|
|
4269
4986
|
description: input.description,
|
|
4270
4987
|
tags: input.tags,
|
|
4271
4988
|
status: "ready",
|
|
@@ -4284,6 +5001,9 @@ var StoreService = class {
|
|
|
4284
5001
|
name: input.name,
|
|
4285
5002
|
url: input.url,
|
|
4286
5003
|
depth: input.depth ?? 1,
|
|
5004
|
+
maxPages: input.maxPages,
|
|
5005
|
+
crawlInstructions: input.crawlInstructions,
|
|
5006
|
+
extractInstructions: input.extractInstructions,
|
|
4287
5007
|
description: input.description,
|
|
4288
5008
|
tags: input.tags,
|
|
4289
5009
|
status: "ready",
|
|
@@ -4303,7 +5023,9 @@ var StoreService = class {
|
|
|
4303
5023
|
}
|
|
4304
5024
|
if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
|
|
4305
5025
|
const definition = this.createDefinitionFromStore(store, input);
|
|
4306
|
-
|
|
5026
|
+
if (definition !== void 0) {
|
|
5027
|
+
await this.definitionService.addDefinition(definition);
|
|
5028
|
+
}
|
|
4307
5029
|
}
|
|
4308
5030
|
return ok(store);
|
|
4309
5031
|
}
|
|
@@ -4333,6 +5055,16 @@ var StoreService = class {
|
|
|
4333
5055
|
if (store === void 0) {
|
|
4334
5056
|
return err(new Error(`Store not found: ${id}`));
|
|
4335
5057
|
}
|
|
5058
|
+
if (updates.name?.trim() === "") {
|
|
5059
|
+
return err(new Error("Store name cannot be empty"));
|
|
5060
|
+
}
|
|
5061
|
+
const isRenaming = updates.name !== void 0 && updates.name !== store.name;
|
|
5062
|
+
if (isRenaming) {
|
|
5063
|
+
const existing = this.registry.stores.find((s) => s.name === updates.name && s.id !== id);
|
|
5064
|
+
if (existing !== void 0) {
|
|
5065
|
+
return err(new Error(`Store with name '${updates.name}' already exists`));
|
|
5066
|
+
}
|
|
5067
|
+
}
|
|
4336
5068
|
const updated = {
|
|
4337
5069
|
...store,
|
|
4338
5070
|
...updates,
|
|
@@ -4341,14 +5073,24 @@ var StoreService = class {
|
|
|
4341
5073
|
this.registry.stores[index] = updated;
|
|
4342
5074
|
await this.saveRegistry();
|
|
4343
5075
|
if (this.definitionService !== void 0 && options?.skipDefinitionSync !== true) {
|
|
4344
|
-
|
|
4345
|
-
|
|
4346
|
-
|
|
4347
|
-
|
|
4348
|
-
|
|
4349
|
-
|
|
5076
|
+
if (isRenaming) {
|
|
5077
|
+
await this.definitionService.removeDefinition(store.name);
|
|
5078
|
+
const newDefinition = this.createDefinitionFromExistingStore(updated);
|
|
5079
|
+
if (newDefinition !== void 0) {
|
|
5080
|
+
await this.definitionService.addDefinition(newDefinition);
|
|
5081
|
+
}
|
|
5082
|
+
} else {
|
|
5083
|
+
const defUpdates = {};
|
|
5084
|
+
if (updates.description !== void 0) {
|
|
5085
|
+
defUpdates.description = updates.description;
|
|
5086
|
+
}
|
|
5087
|
+
if (updates.tags !== void 0) {
|
|
5088
|
+
defUpdates.tags = [...updates.tags];
|
|
5089
|
+
}
|
|
5090
|
+
if (Object.keys(defUpdates).length > 0) {
|
|
5091
|
+
await this.definitionService.updateDefinition(store.name, defUpdates);
|
|
5092
|
+
}
|
|
4350
5093
|
}
|
|
4351
|
-
await this.definitionService.updateDefinition(store.name, defUpdates);
|
|
4352
5094
|
}
|
|
4353
5095
|
return ok(updated);
|
|
4354
5096
|
}
|
|
@@ -4370,14 +5112,14 @@ var StoreService = class {
|
|
|
4370
5112
|
return ok(void 0);
|
|
4371
5113
|
}
|
|
4372
5114
|
async loadRegistry() {
|
|
4373
|
-
const registryPath =
|
|
5115
|
+
const registryPath = join10(this.dataDir, "stores.json");
|
|
4374
5116
|
const exists = await fileExists4(registryPath);
|
|
4375
5117
|
if (!exists) {
|
|
4376
5118
|
this.registry = { stores: [] };
|
|
4377
5119
|
await this.saveRegistry();
|
|
4378
5120
|
return;
|
|
4379
5121
|
}
|
|
4380
|
-
const content = await
|
|
5122
|
+
const content = await readFile8(registryPath, "utf-8");
|
|
4381
5123
|
try {
|
|
4382
5124
|
const data = JSON.parse(content);
|
|
4383
5125
|
this.registry = {
|
|
@@ -4395,8 +5137,8 @@ var StoreService = class {
|
|
|
4395
5137
|
}
|
|
4396
5138
|
}
|
|
4397
5139
|
async saveRegistry() {
|
|
4398
|
-
const registryPath =
|
|
4399
|
-
await
|
|
5140
|
+
const registryPath = join10(this.dataDir, "stores.json");
|
|
5141
|
+
await atomicWriteFile(registryPath, JSON.stringify(this.registry, null, 2));
|
|
4400
5142
|
}
|
|
4401
5143
|
};
|
|
4402
5144
|
|
|
@@ -4410,33 +5152,33 @@ import { fileURLToPath } from "url";
|
|
|
4410
5152
|
import { ZodError } from "zod";
|
|
4411
5153
|
|
|
4412
5154
|
// src/crawl/schemas.ts
|
|
4413
|
-
import { z as
|
|
4414
|
-
var CrawledLinkSchema =
|
|
4415
|
-
href:
|
|
4416
|
-
text:
|
|
4417
|
-
title:
|
|
4418
|
-
base_domain:
|
|
4419
|
-
head_data:
|
|
4420
|
-
head_extraction_status:
|
|
4421
|
-
head_extraction_error:
|
|
4422
|
-
intrinsic_score:
|
|
4423
|
-
contextual_score:
|
|
4424
|
-
total_score:
|
|
5155
|
+
import { z as z4 } from "zod";
|
|
5156
|
+
var CrawledLinkSchema = z4.object({
|
|
5157
|
+
href: z4.string(),
|
|
5158
|
+
text: z4.string(),
|
|
5159
|
+
title: z4.string().optional(),
|
|
5160
|
+
base_domain: z4.string().optional(),
|
|
5161
|
+
head_data: z4.unknown().optional(),
|
|
5162
|
+
head_extraction_status: z4.unknown().optional(),
|
|
5163
|
+
head_extraction_error: z4.unknown().optional(),
|
|
5164
|
+
intrinsic_score: z4.number().optional(),
|
|
5165
|
+
contextual_score: z4.unknown().optional(),
|
|
5166
|
+
total_score: z4.unknown().optional()
|
|
4425
5167
|
});
|
|
4426
|
-
var CrawlPageSchema =
|
|
4427
|
-
url:
|
|
4428
|
-
title:
|
|
4429
|
-
content:
|
|
4430
|
-
links:
|
|
4431
|
-
crawledAt:
|
|
5168
|
+
var CrawlPageSchema = z4.object({
|
|
5169
|
+
url: z4.string(),
|
|
5170
|
+
title: z4.string(),
|
|
5171
|
+
content: z4.string(),
|
|
5172
|
+
links: z4.array(z4.string()),
|
|
5173
|
+
crawledAt: z4.string()
|
|
4432
5174
|
});
|
|
4433
|
-
var CrawlResultSchema =
|
|
4434
|
-
pages:
|
|
5175
|
+
var CrawlResultSchema = z4.object({
|
|
5176
|
+
pages: z4.array(CrawlPageSchema)
|
|
4435
5177
|
});
|
|
4436
|
-
var HeadlessResultSchema =
|
|
4437
|
-
html:
|
|
4438
|
-
markdown:
|
|
4439
|
-
links:
|
|
5178
|
+
var HeadlessResultSchema = z4.object({
|
|
5179
|
+
html: z4.string(),
|
|
5180
|
+
markdown: z4.string(),
|
|
5181
|
+
links: z4.array(z4.union([CrawledLinkSchema, z4.string()]))
|
|
4440
5182
|
});
|
|
4441
5183
|
function validateHeadlessResult(data) {
|
|
4442
5184
|
return HeadlessResultSchema.parse(data);
|
|
@@ -4444,33 +5186,33 @@ function validateHeadlessResult(data) {
|
|
|
4444
5186
|
function validateCrawlResult(data) {
|
|
4445
5187
|
return CrawlResultSchema.parse(data);
|
|
4446
5188
|
}
|
|
4447
|
-
var MethodInfoSchema =
|
|
4448
|
-
name:
|
|
4449
|
-
async:
|
|
4450
|
-
signature:
|
|
4451
|
-
startLine:
|
|
4452
|
-
endLine:
|
|
4453
|
-
calls:
|
|
5189
|
+
var MethodInfoSchema = z4.object({
|
|
5190
|
+
name: z4.string(),
|
|
5191
|
+
async: z4.boolean(),
|
|
5192
|
+
signature: z4.string(),
|
|
5193
|
+
startLine: z4.number(),
|
|
5194
|
+
endLine: z4.number(),
|
|
5195
|
+
calls: z4.array(z4.string())
|
|
4454
5196
|
});
|
|
4455
|
-
var CodeNodeSchema =
|
|
4456
|
-
type:
|
|
4457
|
-
name:
|
|
4458
|
-
exported:
|
|
4459
|
-
startLine:
|
|
4460
|
-
endLine:
|
|
4461
|
-
async:
|
|
4462
|
-
signature:
|
|
4463
|
-
calls:
|
|
4464
|
-
methods:
|
|
5197
|
+
var CodeNodeSchema = z4.object({
|
|
5198
|
+
type: z4.enum(["function", "class"]),
|
|
5199
|
+
name: z4.string(),
|
|
5200
|
+
exported: z4.boolean(),
|
|
5201
|
+
startLine: z4.number(),
|
|
5202
|
+
endLine: z4.number(),
|
|
5203
|
+
async: z4.boolean().optional(),
|
|
5204
|
+
signature: z4.string().optional(),
|
|
5205
|
+
calls: z4.array(z4.string()).optional(),
|
|
5206
|
+
methods: z4.array(MethodInfoSchema).optional()
|
|
4465
5207
|
});
|
|
4466
|
-
var ImportInfoSchema =
|
|
4467
|
-
source:
|
|
4468
|
-
imported:
|
|
4469
|
-
alias:
|
|
5208
|
+
var ImportInfoSchema = z4.object({
|
|
5209
|
+
source: z4.string(),
|
|
5210
|
+
imported: z4.string(),
|
|
5211
|
+
alias: z4.string().optional().nullable()
|
|
4470
5212
|
});
|
|
4471
|
-
var ParsePythonResultSchema =
|
|
4472
|
-
nodes:
|
|
4473
|
-
imports:
|
|
5213
|
+
var ParsePythonResultSchema = z4.object({
|
|
5214
|
+
nodes: z4.array(CodeNodeSchema),
|
|
5215
|
+
imports: z4.array(ImportInfoSchema)
|
|
4474
5216
|
});
|
|
4475
5217
|
function validateParsePythonResult(data) {
|
|
4476
5218
|
return ParsePythonResultSchema.parse(data);
|
|
@@ -4478,6 +5220,15 @@ function validateParsePythonResult(data) {
|
|
|
4478
5220
|
|
|
4479
5221
|
// src/crawl/bridge.ts
|
|
4480
5222
|
var logger3 = createLogger("python-bridge");
|
|
5223
|
+
function getPythonExecutable() {
|
|
5224
|
+
return process.platform === "win32" ? "python" : "python3";
|
|
5225
|
+
}
|
|
5226
|
+
function getVenvPythonPath(pluginRoot) {
|
|
5227
|
+
if (process.platform === "win32") {
|
|
5228
|
+
return path3.join(pluginRoot, ".venv", "Scripts", "python.exe");
|
|
5229
|
+
}
|
|
5230
|
+
return path3.join(pluginRoot, ".venv", "bin", "python3");
|
|
5231
|
+
}
|
|
4481
5232
|
var PythonBridge = class {
|
|
4482
5233
|
process = null;
|
|
4483
5234
|
pending = /* @__PURE__ */ new Map();
|
|
@@ -4487,20 +5238,21 @@ var PythonBridge = class {
|
|
|
4487
5238
|
start() {
|
|
4488
5239
|
if (this.process) return Promise.resolve();
|
|
4489
5240
|
const currentFilePath = fileURLToPath(import.meta.url);
|
|
4490
|
-
const
|
|
5241
|
+
const distPattern = `${path3.sep}dist${path3.sep}`;
|
|
5242
|
+
const isProduction = currentFilePath.includes(distPattern);
|
|
4491
5243
|
let pythonWorkerPath;
|
|
4492
5244
|
let pythonPath;
|
|
4493
5245
|
if (isProduction) {
|
|
4494
|
-
const distIndex = currentFilePath.indexOf(
|
|
5246
|
+
const distIndex = currentFilePath.indexOf(distPattern);
|
|
4495
5247
|
const pluginRoot = currentFilePath.substring(0, distIndex);
|
|
4496
5248
|
pythonWorkerPath = path3.join(pluginRoot, "python", "crawl_worker.py");
|
|
4497
|
-
const venvPython =
|
|
4498
|
-
pythonPath = existsSync4(venvPython) ? venvPython :
|
|
5249
|
+
const venvPython = getVenvPythonPath(pluginRoot);
|
|
5250
|
+
pythonPath = existsSync4(venvPython) ? venvPython : getPythonExecutable();
|
|
4499
5251
|
} else {
|
|
4500
5252
|
const srcDir = path3.dirname(path3.dirname(currentFilePath));
|
|
4501
5253
|
const projectRoot = path3.dirname(srcDir);
|
|
4502
5254
|
pythonWorkerPath = path3.join(projectRoot, "python", "crawl_worker.py");
|
|
4503
|
-
pythonPath =
|
|
5255
|
+
pythonPath = getPythonExecutable();
|
|
4504
5256
|
}
|
|
4505
5257
|
logger3.debug(
|
|
4506
5258
|
{ pythonWorkerPath, pythonPath, currentFilePath, isProduction },
|
|
@@ -4737,16 +5489,18 @@ var PythonBridge = class {
|
|
|
4737
5489
|
|
|
4738
5490
|
// src/db/embeddings.ts
|
|
4739
5491
|
import { homedir as homedir2 } from "os";
|
|
4740
|
-
import { join as
|
|
5492
|
+
import { join as join11 } from "path";
|
|
4741
5493
|
import { pipeline, env } from "@huggingface/transformers";
|
|
4742
|
-
env.cacheDir =
|
|
5494
|
+
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
4743
5495
|
var EmbeddingEngine = class {
|
|
4744
5496
|
extractor = null;
|
|
5497
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed()
|
|
5498
|
+
_dimensions = null;
|
|
4745
5499
|
modelName;
|
|
4746
|
-
|
|
4747
|
-
constructor(modelName = "Xenova/all-MiniLM-L6-v2",
|
|
5500
|
+
batchSize;
|
|
5501
|
+
constructor(modelName = "Xenova/all-MiniLM-L6-v2", batchSize = 32) {
|
|
4748
5502
|
this.modelName = modelName;
|
|
4749
|
-
this.
|
|
5503
|
+
this.batchSize = batchSize;
|
|
4750
5504
|
}
|
|
4751
5505
|
async initialize() {
|
|
4752
5506
|
if (this.extractor !== null) return;
|
|
@@ -4766,23 +5520,43 @@ var EmbeddingEngine = class {
|
|
|
4766
5520
|
normalize: true
|
|
4767
5521
|
});
|
|
4768
5522
|
const result = Array.from(output.data);
|
|
5523
|
+
this._dimensions ??= result.length;
|
|
4769
5524
|
return result.map((v) => Number(v));
|
|
4770
5525
|
}
|
|
4771
5526
|
async embedBatch(texts) {
|
|
4772
|
-
const BATCH_SIZE = 32;
|
|
4773
5527
|
const results = [];
|
|
4774
|
-
for (let i = 0; i < texts.length; i +=
|
|
4775
|
-
const batch = texts.slice(i, i +
|
|
5528
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
5529
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
4776
5530
|
const batchResults = await Promise.all(batch.map((text) => this.embed(text)));
|
|
4777
5531
|
results.push(...batchResults);
|
|
4778
|
-
if (i +
|
|
5532
|
+
if (i + this.batchSize < texts.length) {
|
|
4779
5533
|
await new Promise((resolve4) => setTimeout(resolve4, 100));
|
|
4780
5534
|
}
|
|
4781
5535
|
}
|
|
4782
5536
|
return results;
|
|
4783
5537
|
}
|
|
5538
|
+
/**
|
|
5539
|
+
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
5540
|
+
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
5541
|
+
*/
|
|
4784
5542
|
getDimensions() {
|
|
4785
|
-
|
|
5543
|
+
if (this._dimensions === null) {
|
|
5544
|
+
throw new Error("Cannot get dimensions before first embed() call");
|
|
5545
|
+
}
|
|
5546
|
+
return this._dimensions;
|
|
5547
|
+
}
|
|
5548
|
+
/**
|
|
5549
|
+
* Ensure dimensions are available, initializing the model if needed.
|
|
5550
|
+
* Returns the embedding dimensions for the current model.
|
|
5551
|
+
*/
|
|
5552
|
+
async ensureDimensions() {
|
|
5553
|
+
if (this._dimensions === null) {
|
|
5554
|
+
await this.embed("");
|
|
5555
|
+
}
|
|
5556
|
+
if (this._dimensions === null) {
|
|
5557
|
+
throw new Error("Failed to determine embedding dimensions");
|
|
5558
|
+
}
|
|
5559
|
+
return this._dimensions;
|
|
4786
5560
|
}
|
|
4787
5561
|
/**
|
|
4788
5562
|
* Dispose the embedding pipeline to free resources.
|
|
@@ -4800,17 +5574,18 @@ var EmbeddingEngine = class {
|
|
|
4800
5574
|
import * as lancedb from "@lancedb/lancedb";
|
|
4801
5575
|
|
|
4802
5576
|
// src/types/document.ts
|
|
4803
|
-
import { z as
|
|
4804
|
-
var DocumentTypeSchema =
|
|
4805
|
-
var DocumentMetadataSchema =
|
|
4806
|
-
path:
|
|
4807
|
-
url:
|
|
5577
|
+
import { z as z5 } from "zod";
|
|
5578
|
+
var DocumentTypeSchema = z5.enum(["file", "chunk", "web"]);
|
|
5579
|
+
var DocumentMetadataSchema = z5.object({
|
|
5580
|
+
path: z5.string().optional(),
|
|
5581
|
+
url: z5.string().optional(),
|
|
4808
5582
|
type: DocumentTypeSchema,
|
|
4809
|
-
storeId:
|
|
4810
|
-
indexedAt:
|
|
4811
|
-
|
|
4812
|
-
|
|
4813
|
-
|
|
5583
|
+
storeId: z5.string(),
|
|
5584
|
+
indexedAt: z5.string(),
|
|
5585
|
+
// ISO 8601 string (what JSON serialization produces)
|
|
5586
|
+
fileHash: z5.string().optional(),
|
|
5587
|
+
chunkIndex: z5.number().optional(),
|
|
5588
|
+
totalChunks: z5.number().optional()
|
|
4814
5589
|
}).loose();
|
|
4815
5590
|
|
|
4816
5591
|
// src/db/lance.ts
|
|
@@ -4818,10 +5593,23 @@ var LanceStore = class {
|
|
|
4818
5593
|
connection = null;
|
|
4819
5594
|
tables = /* @__PURE__ */ new Map();
|
|
4820
5595
|
dataDir;
|
|
5596
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- set via setDimensions()
|
|
5597
|
+
_dimensions = null;
|
|
4821
5598
|
constructor(dataDir) {
|
|
4822
5599
|
this.dataDir = dataDir;
|
|
4823
5600
|
}
|
|
5601
|
+
/**
|
|
5602
|
+
* Set the embedding dimensions. Must be called before initialize().
|
|
5603
|
+
* This allows dimensions to be derived from the embedding model at runtime.
|
|
5604
|
+
* Idempotent: subsequent calls are ignored if dimensions are already set.
|
|
5605
|
+
*/
|
|
5606
|
+
setDimensions(dimensions) {
|
|
5607
|
+
this._dimensions ??= dimensions;
|
|
5608
|
+
}
|
|
4824
5609
|
async initialize(storeId) {
|
|
5610
|
+
if (this._dimensions === null) {
|
|
5611
|
+
throw new Error("Dimensions not set. Call setDimensions() before initialize().");
|
|
5612
|
+
}
|
|
4825
5613
|
this.connection ??= await lancedb.connect(this.dataDir);
|
|
4826
5614
|
const tableName = this.getTableName(storeId);
|
|
4827
5615
|
const tableNames = await this.connection.tableNames();
|
|
@@ -4830,7 +5618,7 @@ var LanceStore = class {
|
|
|
4830
5618
|
{
|
|
4831
5619
|
id: "__init__",
|
|
4832
5620
|
content: "",
|
|
4833
|
-
vector: new Array(
|
|
5621
|
+
vector: new Array(this._dimensions).fill(0),
|
|
4834
5622
|
metadata: "{}"
|
|
4835
5623
|
}
|
|
4836
5624
|
]);
|
|
@@ -4852,10 +5640,17 @@ var LanceStore = class {
|
|
|
4852
5640
|
await table.add(lanceDocuments);
|
|
4853
5641
|
}
|
|
4854
5642
|
async deleteDocuments(storeId, documentIds) {
|
|
5643
|
+
if (documentIds.length === 0) {
|
|
5644
|
+
return;
|
|
5645
|
+
}
|
|
4855
5646
|
const table = await this.getTable(storeId);
|
|
4856
5647
|
const idList = documentIds.map((id) => `"${id}"`).join(", ");
|
|
4857
5648
|
await table.delete(`id IN (${idList})`);
|
|
4858
5649
|
}
|
|
5650
|
+
async clearAllDocuments(storeId) {
|
|
5651
|
+
const table = await this.getTable(storeId);
|
|
5652
|
+
await table.delete("id IS NOT NULL");
|
|
5653
|
+
}
|
|
4859
5654
|
async search(storeId, vector, limit, _threshold) {
|
|
4860
5655
|
const table = await this.getTable(storeId);
|
|
4861
5656
|
const query = table.vectorSearch(vector).limit(limit).distanceType("cosine");
|
|
@@ -4895,7 +5690,9 @@ var LanceStore = class {
|
|
|
4895
5690
|
}
|
|
4896
5691
|
async deleteStore(storeId) {
|
|
4897
5692
|
const tableName = this.getTableName(storeId);
|
|
4898
|
-
|
|
5693
|
+
this.connection ??= await lancedb.connect(this.dataDir);
|
|
5694
|
+
const tableNames = await this.connection.tableNames();
|
|
5695
|
+
if (tableNames.includes(tableName)) {
|
|
4899
5696
|
await this.connection.dropTable(tableName);
|
|
4900
5697
|
this.tables.delete(tableName);
|
|
4901
5698
|
}
|
|
@@ -4945,6 +5742,8 @@ var LazyServiceContainer = class {
|
|
|
4945
5742
|
appConfig;
|
|
4946
5743
|
dataDir;
|
|
4947
5744
|
// Lazily initialized (heavy)
|
|
5745
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
|
|
5746
|
+
_manifest = null;
|
|
4948
5747
|
_embeddings = null;
|
|
4949
5748
|
_codeGraph = null;
|
|
4950
5749
|
_search = null;
|
|
@@ -4966,7 +5765,7 @@ var LazyServiceContainer = class {
|
|
|
4966
5765
|
logger4.debug("Lazy-initializing EmbeddingEngine");
|
|
4967
5766
|
this._embeddings = new EmbeddingEngine(
|
|
4968
5767
|
this.appConfig.embedding.model,
|
|
4969
|
-
this.appConfig.embedding.
|
|
5768
|
+
this.appConfig.embedding.batchSize
|
|
4970
5769
|
);
|
|
4971
5770
|
}
|
|
4972
5771
|
return this._embeddings;
|
|
@@ -4987,7 +5786,12 @@ var LazyServiceContainer = class {
|
|
|
4987
5786
|
get search() {
|
|
4988
5787
|
if (this._search === null) {
|
|
4989
5788
|
logger4.debug("Lazy-initializing SearchService");
|
|
4990
|
-
this._search = new SearchService(
|
|
5789
|
+
this._search = new SearchService(
|
|
5790
|
+
this.lance,
|
|
5791
|
+
this.embeddings,
|
|
5792
|
+
this.codeGraph,
|
|
5793
|
+
this.appConfig.search
|
|
5794
|
+
);
|
|
4991
5795
|
}
|
|
4992
5796
|
return this._search;
|
|
4993
5797
|
}
|
|
@@ -4998,17 +5802,38 @@ var LazyServiceContainer = class {
|
|
|
4998
5802
|
if (this._index === null) {
|
|
4999
5803
|
logger4.debug("Lazy-initializing IndexService");
|
|
5000
5804
|
this._index = new IndexService(this.lance, this.embeddings, {
|
|
5001
|
-
codeGraphService: this.codeGraph
|
|
5805
|
+
codeGraphService: this.codeGraph,
|
|
5806
|
+
manifestService: this.manifest,
|
|
5807
|
+
chunkSize: this.appConfig.indexing.chunkSize,
|
|
5808
|
+
chunkOverlap: this.appConfig.indexing.chunkOverlap,
|
|
5809
|
+
concurrency: this.appConfig.indexing.concurrency,
|
|
5810
|
+
ignorePatterns: this.appConfig.indexing.ignorePatterns
|
|
5002
5811
|
});
|
|
5003
5812
|
}
|
|
5004
5813
|
return this._index;
|
|
5005
5814
|
}
|
|
5815
|
+
/**
|
|
5816
|
+
* ManifestService is lazily created on first access.
|
|
5817
|
+
*/
|
|
5818
|
+
get manifest() {
|
|
5819
|
+
if (this._manifest === null) {
|
|
5820
|
+
logger4.debug("Lazy-initializing ManifestService");
|
|
5821
|
+
this._manifest = new ManifestService(this.dataDir);
|
|
5822
|
+
}
|
|
5823
|
+
return this._manifest;
|
|
5824
|
+
}
|
|
5006
5825
|
/**
|
|
5007
5826
|
* Check if embeddings have been initialized (for cleanup purposes).
|
|
5008
5827
|
*/
|
|
5009
5828
|
get hasEmbeddings() {
|
|
5010
5829
|
return this._embeddings !== null;
|
|
5011
5830
|
}
|
|
5831
|
+
/**
|
|
5832
|
+
* Check if search service has been initialized (for cleanup purposes).
|
|
5833
|
+
*/
|
|
5834
|
+
get hasSearch() {
|
|
5835
|
+
return this._search !== null;
|
|
5836
|
+
}
|
|
5012
5837
|
};
|
|
5013
5838
|
async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
5014
5839
|
logger4.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
|
|
@@ -5019,16 +5844,21 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
|
5019
5844
|
const pythonBridge = new PythonBridge();
|
|
5020
5845
|
await pythonBridge.start();
|
|
5021
5846
|
const lance = new LanceStore(resolvedDataDir);
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5026
|
-
|
|
5027
|
-
|
|
5847
|
+
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5848
|
+
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
5849
|
+
const gitignoreService = new GitignoreService(resolvedProjectRoot);
|
|
5850
|
+
const storeOptions = {
|
|
5851
|
+
definitionService,
|
|
5852
|
+
gitignoreService,
|
|
5853
|
+
projectRoot: resolvedProjectRoot
|
|
5854
|
+
};
|
|
5028
5855
|
const store = new StoreService(resolvedDataDir, storeOptions);
|
|
5029
5856
|
await store.initialize();
|
|
5030
5857
|
const durationMs = Date.now() - startTime;
|
|
5031
|
-
logger4.info(
|
|
5858
|
+
logger4.info(
|
|
5859
|
+
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
|
|
5860
|
+
"Lazy services initialized"
|
|
5861
|
+
);
|
|
5032
5862
|
return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
|
|
5033
5863
|
}
|
|
5034
5864
|
async function createServices(configPath, dataDir, projectRoot) {
|
|
@@ -5039,20 +5869,33 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5039
5869
|
const pythonBridge = new PythonBridge();
|
|
5040
5870
|
await pythonBridge.start();
|
|
5041
5871
|
const lance = new LanceStore(resolvedDataDir);
|
|
5042
|
-
const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.
|
|
5872
|
+
const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.batchSize);
|
|
5043
5873
|
await embeddings.initialize();
|
|
5044
|
-
|
|
5045
|
-
|
|
5046
|
-
|
|
5047
|
-
|
|
5048
|
-
|
|
5049
|
-
|
|
5874
|
+
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5875
|
+
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
5876
|
+
const gitignoreService = new GitignoreService(resolvedProjectRoot);
|
|
5877
|
+
const storeOptions = {
|
|
5878
|
+
definitionService,
|
|
5879
|
+
gitignoreService,
|
|
5880
|
+
projectRoot: resolvedProjectRoot
|
|
5881
|
+
};
|
|
5050
5882
|
const store = new StoreService(resolvedDataDir, storeOptions);
|
|
5051
5883
|
await store.initialize();
|
|
5052
5884
|
const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
|
|
5053
|
-
const
|
|
5054
|
-
const
|
|
5055
|
-
|
|
5885
|
+
const manifest = new ManifestService(resolvedDataDir);
|
|
5886
|
+
const search = new SearchService(lance, embeddings, codeGraph, appConfig.search);
|
|
5887
|
+
const index = new IndexService(lance, embeddings, {
|
|
5888
|
+
codeGraphService: codeGraph,
|
|
5889
|
+
manifestService: manifest,
|
|
5890
|
+
chunkSize: appConfig.indexing.chunkSize,
|
|
5891
|
+
chunkOverlap: appConfig.indexing.chunkOverlap,
|
|
5892
|
+
concurrency: appConfig.indexing.concurrency,
|
|
5893
|
+
ignorePatterns: appConfig.indexing.ignorePatterns
|
|
5894
|
+
});
|
|
5895
|
+
logger4.info(
|
|
5896
|
+
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
|
|
5897
|
+
"Services initialized successfully"
|
|
5898
|
+
);
|
|
5056
5899
|
return {
|
|
5057
5900
|
config,
|
|
5058
5901
|
store,
|
|
@@ -5061,12 +5904,20 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5061
5904
|
lance,
|
|
5062
5905
|
embeddings,
|
|
5063
5906
|
codeGraph,
|
|
5064
|
-
pythonBridge
|
|
5907
|
+
pythonBridge,
|
|
5908
|
+
manifest
|
|
5065
5909
|
};
|
|
5066
5910
|
}
|
|
5067
5911
|
async function destroyServices(services) {
|
|
5068
5912
|
logger4.info("Shutting down services");
|
|
5069
5913
|
const errors = [];
|
|
5914
|
+
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
5915
|
+
const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
|
|
5916
|
+
if (shouldCleanupSearch) {
|
|
5917
|
+
services.search.cleanup();
|
|
5918
|
+
} else {
|
|
5919
|
+
logger4.debug("Skipping search cleanup (not initialized)");
|
|
5920
|
+
}
|
|
5070
5921
|
try {
|
|
5071
5922
|
await services.pythonBridge.stop();
|
|
5072
5923
|
} catch (e) {
|
|
@@ -5074,7 +5925,6 @@ async function destroyServices(services) {
|
|
|
5074
5925
|
logger4.error({ error }, "Error stopping Python bridge");
|
|
5075
5926
|
errors.push(error);
|
|
5076
5927
|
}
|
|
5077
|
-
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
5078
5928
|
const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
|
|
5079
5929
|
if (shouldDisposeEmbeddings) {
|
|
5080
5930
|
try {
|
|
@@ -5104,6 +5954,7 @@ async function destroyServices(services) {
|
|
|
5104
5954
|
|
|
5105
5955
|
export {
|
|
5106
5956
|
AdapterRegistry,
|
|
5957
|
+
ProjectRootService,
|
|
5107
5958
|
createLogger,
|
|
5108
5959
|
shutdownLogger,
|
|
5109
5960
|
summarizePayload,
|
|
@@ -5111,8 +5962,6 @@ export {
|
|
|
5111
5962
|
PythonBridge,
|
|
5112
5963
|
ChunkingService,
|
|
5113
5964
|
ASTParser,
|
|
5114
|
-
createStoreId,
|
|
5115
|
-
createDocumentId,
|
|
5116
5965
|
ok,
|
|
5117
5966
|
err,
|
|
5118
5967
|
classifyWebContentType,
|
|
@@ -5120,10 +5969,11 @@ export {
|
|
|
5120
5969
|
isRepoStoreDefinition,
|
|
5121
5970
|
isWebStoreDefinition,
|
|
5122
5971
|
StoreDefinitionService,
|
|
5972
|
+
isGitUrl,
|
|
5123
5973
|
extractRepoName,
|
|
5124
5974
|
JobService,
|
|
5125
5975
|
createLazyServices,
|
|
5126
5976
|
createServices,
|
|
5127
5977
|
destroyServices
|
|
5128
5978
|
};
|
|
5129
|
-
//# sourceMappingURL=chunk-
|
|
5979
|
+
//# sourceMappingURL=chunk-RDDGZIDL.js.map
|