bluera-knowledge 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +194 -13
- package/dist/{chunk-H25AEF47.js → chunk-7JTPAQFO.js} +80 -2
- package/dist/chunk-7JTPAQFO.js.map +1 -0
- package/dist/{chunk-BYLIDCWD.js → chunk-H465AZXC.js} +2 -2
- package/dist/{chunk-UXT3BCAH.js → chunk-T7J5RB6F.js} +21 -16
- package/dist/{chunk-UXT3BCAH.js.map → chunk-T7J5RB6F.js.map} +1 -1
- package/dist/{chunk-WP2GERAJ.js → chunk-U27UECDZ.js} +1323 -465
- package/dist/chunk-U27UECDZ.js.map +1 -0
- package/dist/index.js +8 -5
- package/dist/index.js.map +1 -1
- package/dist/mcp/bootstrap.js +9 -1
- package/dist/mcp/bootstrap.js.map +1 -1
- package/dist/mcp/server.d.ts +135 -10
- package/dist/mcp/server.js +3 -3
- package/dist/{watch.service-THP6X5ZZ.js → watch.service-3ZP35WTM.js} +2 -2
- package/dist/workers/background-worker-cli.js +3 -3
- package/package.json +12 -3
- package/dist/chunk-H25AEF47.js.map +0 -1
- package/dist/chunk-WP2GERAJ.js.map +0 -1
- /package/dist/{chunk-BYLIDCWD.js.map → chunk-H465AZXC.js.map} +0 -0
- /package/dist/{watch.service-THP6X5ZZ.js.map → watch.service-3ZP35WTM.js.map} +0 -0
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
} from "./chunk-CLIMKLTW.js";
|
|
5
5
|
import {
|
|
6
6
|
parseIgnorePatternsForScanning
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-7JTPAQFO.js";
|
|
8
8
|
import {
|
|
9
9
|
__require
|
|
10
10
|
} from "./chunk-DGUM43GV.js";
|
|
@@ -2060,8 +2060,601 @@ var CodeGraphService = class {
|
|
|
2060
2060
|
|
|
2061
2061
|
// src/services/config.service.ts
|
|
2062
2062
|
import { readFile as readFile2, access } from "fs/promises";
|
|
2063
|
+
import { homedir as homedir2 } from "os";
|
|
2064
|
+
import { isAbsolute, join as join6, resolve } from "path";
|
|
2065
|
+
|
|
2066
|
+
// src/services/reranker-env.ts
|
|
2067
|
+
var logger = createLogger("reranker-env");
|
|
2068
|
+
function parseRerankerEnvOverrides(strict) {
|
|
2069
|
+
return {
|
|
2070
|
+
enabled: parseEnabled(process.env["BK_RERANKER_ENABLED"], strict),
|
|
2071
|
+
topK: parseTopK(process.env["BK_RERANKER_TOPK"], strict)
|
|
2072
|
+
};
|
|
2073
|
+
}
|
|
2074
|
+
function parseEnabled(raw, strict) {
|
|
2075
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
2076
|
+
if (raw === "1") return true;
|
|
2077
|
+
if (raw === "0") return false;
|
|
2078
|
+
const msg = `BK_RERANKER_ENABLED must be '0' or '1', got: "${raw}"`;
|
|
2079
|
+
if (strict) throw new Error(msg);
|
|
2080
|
+
logger.warn(msg);
|
|
2081
|
+
return void 0;
|
|
2082
|
+
}
|
|
2083
|
+
function parseTopK(raw, strict) {
|
|
2084
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
2085
|
+
const parsed = Number.parseInt(raw, 10);
|
|
2086
|
+
if (Number.isNaN(parsed) || parsed < 1) {
|
|
2087
|
+
const msg = `BK_RERANKER_TOPK must be a positive integer, got: "${raw}"`;
|
|
2088
|
+
if (strict) throw new Error(msg);
|
|
2089
|
+
logger.warn(msg);
|
|
2090
|
+
return void 0;
|
|
2091
|
+
}
|
|
2092
|
+
return parsed;
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
// src/db/embeddings.ts
|
|
2063
2096
|
import { homedir } from "os";
|
|
2064
|
-
import {
|
|
2097
|
+
import { join as join5 } from "path";
|
|
2098
|
+
import { pipeline, env } from "@huggingface/transformers";
|
|
2099
|
+
|
|
2100
|
+
// src/models/registry.ts
|
|
2101
|
+
var MODEL_REGISTRY = {
|
|
2102
|
+
// ============================================================
|
|
2103
|
+
// BGE Models (BAAI) - Best for retrieval tasks
|
|
2104
|
+
// ============================================================
|
|
2105
|
+
"bge-small-en-v1.5": {
|
|
2106
|
+
id: "Xenova/bge-small-en-v1.5",
|
|
2107
|
+
name: "BGE Small English v1.5",
|
|
2108
|
+
dimensions: 384,
|
|
2109
|
+
pooling: "cls",
|
|
2110
|
+
normalize: true,
|
|
2111
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2112
|
+
docPrefix: "",
|
|
2113
|
+
category: "bge",
|
|
2114
|
+
sizeCategory: "small",
|
|
2115
|
+
notes: "Default model. Best balance of speed and quality for code search."
|
|
2116
|
+
},
|
|
2117
|
+
"bge-base-en-v1.5": {
|
|
2118
|
+
id: "Xenova/bge-base-en-v1.5",
|
|
2119
|
+
name: "BGE Base English v1.5",
|
|
2120
|
+
dimensions: 768,
|
|
2121
|
+
pooling: "cls",
|
|
2122
|
+
normalize: true,
|
|
2123
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2124
|
+
docPrefix: "",
|
|
2125
|
+
category: "bge",
|
|
2126
|
+
sizeCategory: "base",
|
|
2127
|
+
notes: "53% slower than small. Tested: regression on code search."
|
|
2128
|
+
},
|
|
2129
|
+
"bge-large-en-v1.5": {
|
|
2130
|
+
id: "Xenova/bge-large-en-v1.5",
|
|
2131
|
+
name: "BGE Large English v1.5",
|
|
2132
|
+
dimensions: 1024,
|
|
2133
|
+
pooling: "cls",
|
|
2134
|
+
normalize: true,
|
|
2135
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2136
|
+
docPrefix: "",
|
|
2137
|
+
category: "bge",
|
|
2138
|
+
sizeCategory: "large",
|
|
2139
|
+
notes: "Highest quality BGE but slow. Use for accuracy-critical tasks."
|
|
2140
|
+
},
|
|
2141
|
+
"bge-small-en": {
|
|
2142
|
+
id: "Xenova/bge-small-en",
|
|
2143
|
+
name: "BGE Small English v1.0",
|
|
2144
|
+
dimensions: 384,
|
|
2145
|
+
pooling: "cls",
|
|
2146
|
+
normalize: true,
|
|
2147
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2148
|
+
docPrefix: "",
|
|
2149
|
+
category: "bge",
|
|
2150
|
+
sizeCategory: "small",
|
|
2151
|
+
notes: "Older version. Use v1.5 instead."
|
|
2152
|
+
},
|
|
2153
|
+
"bge-base-en": {
|
|
2154
|
+
id: "Xenova/bge-base-en",
|
|
2155
|
+
name: "BGE Base English v1.0",
|
|
2156
|
+
dimensions: 768,
|
|
2157
|
+
pooling: "cls",
|
|
2158
|
+
normalize: true,
|
|
2159
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2160
|
+
docPrefix: "",
|
|
2161
|
+
category: "bge",
|
|
2162
|
+
sizeCategory: "base",
|
|
2163
|
+
notes: "Older version. Use v1.5 instead."
|
|
2164
|
+
},
|
|
2165
|
+
// ============================================================
|
|
2166
|
+
// E5 Models (Microsoft) - Asymmetric retrieval
|
|
2167
|
+
// ============================================================
|
|
2168
|
+
"e5-small-v2": {
|
|
2169
|
+
id: "Xenova/e5-small-v2",
|
|
2170
|
+
name: "E5 Small v2",
|
|
2171
|
+
dimensions: 384,
|
|
2172
|
+
pooling: "mean",
|
|
2173
|
+
normalize: true,
|
|
2174
|
+
queryPrefix: "query: ",
|
|
2175
|
+
docPrefix: "passage: ",
|
|
2176
|
+
category: "e5",
|
|
2177
|
+
sizeCategory: "small",
|
|
2178
|
+
notes: "Tested: underperformed BGE on code search. Better for general text."
|
|
2179
|
+
},
|
|
2180
|
+
"e5-base-v2": {
|
|
2181
|
+
id: "Xenova/e5-base-v2",
|
|
2182
|
+
name: "E5 Base v2",
|
|
2183
|
+
dimensions: 768,
|
|
2184
|
+
pooling: "mean",
|
|
2185
|
+
normalize: true,
|
|
2186
|
+
queryPrefix: "query: ",
|
|
2187
|
+
docPrefix: "passage: ",
|
|
2188
|
+
category: "e5",
|
|
2189
|
+
sizeCategory: "base",
|
|
2190
|
+
notes: 'Larger E5 variant. Requires "query:" and "passage:" prefixes.'
|
|
2191
|
+
},
|
|
2192
|
+
"e5-large-v2": {
|
|
2193
|
+
id: "Xenova/e5-large-v2",
|
|
2194
|
+
name: "E5 Large v2",
|
|
2195
|
+
dimensions: 1024,
|
|
2196
|
+
pooling: "mean",
|
|
2197
|
+
normalize: true,
|
|
2198
|
+
queryPrefix: "query: ",
|
|
2199
|
+
docPrefix: "passage: ",
|
|
2200
|
+
category: "e5",
|
|
2201
|
+
sizeCategory: "large",
|
|
2202
|
+
notes: "Highest quality E5. Slow but accurate for general retrieval."
|
|
2203
|
+
},
|
|
2204
|
+
"multilingual-e5-small": {
|
|
2205
|
+
id: "Xenova/multilingual-e5-small",
|
|
2206
|
+
name: "Multilingual E5 Small",
|
|
2207
|
+
dimensions: 384,
|
|
2208
|
+
pooling: "mean",
|
|
2209
|
+
normalize: true,
|
|
2210
|
+
queryPrefix: "query: ",
|
|
2211
|
+
docPrefix: "passage: ",
|
|
2212
|
+
category: "e5",
|
|
2213
|
+
sizeCategory: "small",
|
|
2214
|
+
notes: "Supports 100+ languages. Good for multilingual codebases."
|
|
2215
|
+
},
|
|
2216
|
+
"multilingual-e5-base": {
|
|
2217
|
+
id: "Xenova/multilingual-e5-base",
|
|
2218
|
+
name: "Multilingual E5 Base",
|
|
2219
|
+
dimensions: 768,
|
|
2220
|
+
pooling: "mean",
|
|
2221
|
+
normalize: true,
|
|
2222
|
+
queryPrefix: "query: ",
|
|
2223
|
+
docPrefix: "passage: ",
|
|
2224
|
+
category: "e5",
|
|
2225
|
+
sizeCategory: "base",
|
|
2226
|
+
notes: "Supports 100+ languages. Larger multilingual variant."
|
|
2227
|
+
},
|
|
2228
|
+
// ============================================================
|
|
2229
|
+
// MiniLM Models (Sentence Transformers) - Fast general-purpose
|
|
2230
|
+
// ============================================================
|
|
2231
|
+
"all-MiniLM-L6-v2": {
|
|
2232
|
+
id: "Xenova/all-MiniLM-L6-v2",
|
|
2233
|
+
name: "all-MiniLM-L6-v2",
|
|
2234
|
+
dimensions: 384,
|
|
2235
|
+
pooling: "mean",
|
|
2236
|
+
normalize: true,
|
|
2237
|
+
queryPrefix: "",
|
|
2238
|
+
docPrefix: "",
|
|
2239
|
+
category: "minilm",
|
|
2240
|
+
sizeCategory: "small",
|
|
2241
|
+
notes: "Popular universal model. No prefixes needed. Very fast."
|
|
2242
|
+
},
|
|
2243
|
+
"all-MiniLM-L12-v2": {
|
|
2244
|
+
id: "Xenova/all-MiniLM-L12-v2",
|
|
2245
|
+
name: "all-MiniLM-L12-v2",
|
|
2246
|
+
dimensions: 384,
|
|
2247
|
+
pooling: "mean",
|
|
2248
|
+
normalize: true,
|
|
2249
|
+
queryPrefix: "",
|
|
2250
|
+
docPrefix: "",
|
|
2251
|
+
category: "minilm",
|
|
2252
|
+
sizeCategory: "small",
|
|
2253
|
+
notes: "Deeper MiniLM. Slightly better quality than L6."
|
|
2254
|
+
},
|
|
2255
|
+
"paraphrase-MiniLM-L6-v2": {
|
|
2256
|
+
id: "Xenova/paraphrase-MiniLM-L6-v2",
|
|
2257
|
+
name: "paraphrase-MiniLM-L6-v2",
|
|
2258
|
+
dimensions: 384,
|
|
2259
|
+
pooling: "mean",
|
|
2260
|
+
normalize: true,
|
|
2261
|
+
queryPrefix: "",
|
|
2262
|
+
docPrefix: "",
|
|
2263
|
+
category: "minilm",
|
|
2264
|
+
sizeCategory: "small",
|
|
2265
|
+
notes: "Optimized for paraphrase detection. Good for similarity."
|
|
2266
|
+
},
|
|
2267
|
+
"multi-qa-MiniLM-L6-cos-v1": {
|
|
2268
|
+
id: "Xenova/multi-qa-MiniLM-L6-cos-v1",
|
|
2269
|
+
name: "multi-qa-MiniLM-L6-cos-v1",
|
|
2270
|
+
dimensions: 384,
|
|
2271
|
+
pooling: "mean",
|
|
2272
|
+
normalize: true,
|
|
2273
|
+
queryPrefix: "",
|
|
2274
|
+
docPrefix: "",
|
|
2275
|
+
category: "minilm",
|
|
2276
|
+
sizeCategory: "small",
|
|
2277
|
+
notes: "Trained on 215M QA pairs. Good for question answering."
|
|
2278
|
+
},
|
|
2279
|
+
// ============================================================
|
|
2280
|
+
// GTE Models (Alibaba) - State-of-the-art small models
|
|
2281
|
+
// ============================================================
|
|
2282
|
+
"gte-small": {
|
|
2283
|
+
id: "Xenova/gte-small",
|
|
2284
|
+
name: "GTE Small",
|
|
2285
|
+
dimensions: 384,
|
|
2286
|
+
pooling: "mean",
|
|
2287
|
+
normalize: true,
|
|
2288
|
+
queryPrefix: "",
|
|
2289
|
+
docPrefix: "",
|
|
2290
|
+
category: "gte",
|
|
2291
|
+
sizeCategory: "small",
|
|
2292
|
+
notes: "Competitive with larger models. No prefixes needed."
|
|
2293
|
+
},
|
|
2294
|
+
"gte-base": {
|
|
2295
|
+
id: "Xenova/gte-base",
|
|
2296
|
+
name: "GTE Base",
|
|
2297
|
+
dimensions: 768,
|
|
2298
|
+
pooling: "mean",
|
|
2299
|
+
normalize: true,
|
|
2300
|
+
queryPrefix: "",
|
|
2301
|
+
docPrefix: "",
|
|
2302
|
+
category: "gte",
|
|
2303
|
+
sizeCategory: "base",
|
|
2304
|
+
notes: "Strong performance on MTEB benchmark."
|
|
2305
|
+
},
|
|
2306
|
+
"gte-large": {
|
|
2307
|
+
id: "Xenova/gte-large",
|
|
2308
|
+
name: "GTE Large",
|
|
2309
|
+
dimensions: 1024,
|
|
2310
|
+
pooling: "mean",
|
|
2311
|
+
normalize: true,
|
|
2312
|
+
queryPrefix: "",
|
|
2313
|
+
docPrefix: "",
|
|
2314
|
+
category: "gte",
|
|
2315
|
+
sizeCategory: "large",
|
|
2316
|
+
notes: "Top MTEB scores. Slow but very accurate."
|
|
2317
|
+
},
|
|
2318
|
+
// ============================================================
|
|
2319
|
+
// Nomic Models - Long context support
|
|
2320
|
+
// ============================================================
|
|
2321
|
+
"nomic-embed-text-v1": {
|
|
2322
|
+
id: "nomic-ai/nomic-embed-text-v1",
|
|
2323
|
+
name: "Nomic Embed Text v1",
|
|
2324
|
+
dimensions: 768,
|
|
2325
|
+
pooling: "mean",
|
|
2326
|
+
normalize: true,
|
|
2327
|
+
queryPrefix: "search_query: ",
|
|
2328
|
+
docPrefix: "search_document: ",
|
|
2329
|
+
category: "nomic",
|
|
2330
|
+
sizeCategory: "base",
|
|
2331
|
+
notes: "8192 token context. May need trust_remote_code."
|
|
2332
|
+
},
|
|
2333
|
+
"nomic-embed-text-v1.5": {
|
|
2334
|
+
id: "nomic-ai/nomic-embed-text-v1.5",
|
|
2335
|
+
name: "Nomic Embed Text v1.5",
|
|
2336
|
+
dimensions: 768,
|
|
2337
|
+
pooling: "mean",
|
|
2338
|
+
normalize: true,
|
|
2339
|
+
queryPrefix: "search_query: ",
|
|
2340
|
+
docPrefix: "search_document: ",
|
|
2341
|
+
category: "nomic",
|
|
2342
|
+
sizeCategory: "base",
|
|
2343
|
+
notes: "8192 token context. Matryoshka embeddings support."
|
|
2344
|
+
},
|
|
2345
|
+
// ============================================================
|
|
2346
|
+
// Other Notable Models
|
|
2347
|
+
// ============================================================
|
|
2348
|
+
"jina-embeddings-v2-small-en": {
|
|
2349
|
+
id: "Xenova/jina-embeddings-v2-small-en",
|
|
2350
|
+
name: "Jina Embeddings v2 Small",
|
|
2351
|
+
dimensions: 512,
|
|
2352
|
+
pooling: "mean",
|
|
2353
|
+
normalize: true,
|
|
2354
|
+
queryPrefix: "",
|
|
2355
|
+
docPrefix: "",
|
|
2356
|
+
category: "other",
|
|
2357
|
+
sizeCategory: "small",
|
|
2358
|
+
notes: "8192 token context. Good for long documents."
|
|
2359
|
+
},
|
|
2360
|
+
"jina-embeddings-v2-base-en": {
|
|
2361
|
+
id: "Xenova/jina-embeddings-v2-base-en",
|
|
2362
|
+
name: "Jina Embeddings v2 Base",
|
|
2363
|
+
dimensions: 768,
|
|
2364
|
+
pooling: "mean",
|
|
2365
|
+
normalize: true,
|
|
2366
|
+
queryPrefix: "",
|
|
2367
|
+
docPrefix: "",
|
|
2368
|
+
category: "other",
|
|
2369
|
+
sizeCategory: "base",
|
|
2370
|
+
notes: "8192 token context. Larger Jina variant."
|
|
2371
|
+
}
|
|
2372
|
+
};
|
|
2373
|
+
var DEFAULT_MODEL_ID = "bge-small-en-v1.5";
|
|
2374
|
+
function getModelConfig(modelId) {
|
|
2375
|
+
if (modelId in MODEL_REGISTRY) {
|
|
2376
|
+
return MODEL_REGISTRY[modelId];
|
|
2377
|
+
}
|
|
2378
|
+
for (const config of Object.values(MODEL_REGISTRY)) {
|
|
2379
|
+
if (config.id === modelId) {
|
|
2380
|
+
return config;
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
return void 0;
|
|
2384
|
+
}
|
|
2385
|
+
function getConfiguredModelId() {
|
|
2386
|
+
const envModel = process.env["BK_MODEL"] ?? process.env["BK_EMBEDDING_MODEL"];
|
|
2387
|
+
if (envModel !== void 0 && envModel !== "") {
|
|
2388
|
+
const config = getModelConfig(envModel);
|
|
2389
|
+
if (config === void 0) {
|
|
2390
|
+
console.warn(`Warning: Unknown model "${envModel}", using default "${DEFAULT_MODEL_ID}"`);
|
|
2391
|
+
return DEFAULT_MODEL_ID;
|
|
2392
|
+
}
|
|
2393
|
+
return envModel;
|
|
2394
|
+
}
|
|
2395
|
+
return DEFAULT_MODEL_ID;
|
|
2396
|
+
}
|
|
2397
|
+
|
|
2398
|
+
// src/db/embeddings.ts
|
|
2399
|
+
env.cacheDir = join5(homedir(), ".cache", "huggingface-transformers");
|
|
2400
|
+
function getFinetunedModelPath() {
|
|
2401
|
+
const path4 = process.env["BK_FINETUNED_MODEL"];
|
|
2402
|
+
if (path4 !== void 0 && path4 !== "") {
|
|
2403
|
+
return path4;
|
|
2404
|
+
}
|
|
2405
|
+
return void 0;
|
|
2406
|
+
}
|
|
2407
|
+
function buildEmbeddingConfig(modelId, overrides) {
|
|
2408
|
+
const rawPooling = process.env["BK_POOLING"];
|
|
2409
|
+
const envPooling = rawPooling === "mean" || rawPooling === "cls" || rawPooling === "none" ? rawPooling : void 0;
|
|
2410
|
+
const envQueryPrefix = process.env["BK_QUERY_PREFIX"];
|
|
2411
|
+
const modelConfig = getModelConfig(modelId);
|
|
2412
|
+
if (modelConfig === void 0) {
|
|
2413
|
+
return {
|
|
2414
|
+
model: modelId,
|
|
2415
|
+
batchSize: overrides?.batchSize ?? 32,
|
|
2416
|
+
dtype: overrides?.dtype ?? "fp32",
|
|
2417
|
+
pooling: overrides?.pooling ?? envPooling ?? "mean",
|
|
2418
|
+
normalize: overrides?.normalize ?? true,
|
|
2419
|
+
queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? "",
|
|
2420
|
+
docPrefix: overrides?.docPrefix ?? "",
|
|
2421
|
+
maxInFlightBatches: overrides?.maxInFlightBatches ?? 1
|
|
2422
|
+
};
|
|
2423
|
+
}
|
|
2424
|
+
return {
|
|
2425
|
+
model: modelConfig.id,
|
|
2426
|
+
batchSize: overrides?.batchSize ?? 32,
|
|
2427
|
+
dtype: overrides?.dtype ?? "fp32",
|
|
2428
|
+
pooling: overrides?.pooling ?? envPooling ?? modelConfig.pooling,
|
|
2429
|
+
normalize: overrides?.normalize ?? modelConfig.normalize,
|
|
2430
|
+
queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? modelConfig.queryPrefix,
|
|
2431
|
+
docPrefix: overrides?.docPrefix ?? modelConfig.docPrefix,
|
|
2432
|
+
maxInFlightBatches: overrides?.maxInFlightBatches ?? 1
|
|
2433
|
+
};
|
|
2434
|
+
}
|
|
2435
|
+
var DEFAULT_EMBEDDING_CONFIG = buildEmbeddingConfig(
|
|
2436
|
+
getFinetunedModelPath() ?? getConfiguredModelId()
|
|
2437
|
+
);
|
|
2438
|
+
var EmbeddingEngine = class {
|
|
2439
|
+
extractor = null;
|
|
2440
|
+
initPromise = null;
|
|
2441
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed() and embedBatch()
|
|
2442
|
+
_dimensions = null;
|
|
2443
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
2444
|
+
disposed = false;
|
|
2445
|
+
config;
|
|
2446
|
+
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
2447
|
+
if (process.env["BK_DEBUG"] !== void 0 && process.env["BK_DEBUG"] !== "") {
|
|
2448
|
+
console.log("[EmbeddingEngine] Using model:", config.model);
|
|
2449
|
+
}
|
|
2450
|
+
this.config = config;
|
|
2451
|
+
}
|
|
2452
|
+
/**
|
|
2453
|
+
* Guard against use-after-dispose
|
|
2454
|
+
*/
|
|
2455
|
+
assertNotDisposed() {
|
|
2456
|
+
if (this.disposed) {
|
|
2457
|
+
throw new Error("EmbeddingEngine has been disposed");
|
|
2458
|
+
}
|
|
2459
|
+
}
|
|
2460
|
+
/**
|
|
2461
|
+
* Initialize the embedding pipeline (concurrency-safe).
|
|
2462
|
+
* Multiple concurrent calls will share the same initialization promise.
|
|
2463
|
+
*/
|
|
2464
|
+
async initialize() {
|
|
2465
|
+
this.assertNotDisposed();
|
|
2466
|
+
if (this.extractor !== null) return;
|
|
2467
|
+
this.initPromise ??= (async () => {
|
|
2468
|
+
try {
|
|
2469
|
+
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
2470
|
+
dtype: this.config.dtype
|
|
2471
|
+
});
|
|
2472
|
+
} catch (error) {
|
|
2473
|
+
this.initPromise = null;
|
|
2474
|
+
throw error;
|
|
2475
|
+
}
|
|
2476
|
+
})();
|
|
2477
|
+
await this.initPromise;
|
|
2478
|
+
}
|
|
2479
|
+
/**
|
|
2480
|
+
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
2481
|
+
*/
|
|
2482
|
+
async embedQuery(text) {
|
|
2483
|
+
return this.embedText(this.config.queryPrefix + text);
|
|
2484
|
+
}
|
|
2485
|
+
/**
|
|
2486
|
+
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
2487
|
+
*/
|
|
2488
|
+
async embedDocument(text) {
|
|
2489
|
+
return this.embedText(this.config.docPrefix + text);
|
|
2490
|
+
}
|
|
2491
|
+
/**
|
|
2492
|
+
* Internal: embed text without prefix.
|
|
2493
|
+
*/
|
|
2494
|
+
async embedText(text) {
|
|
2495
|
+
this.assertNotDisposed();
|
|
2496
|
+
if (this.extractor === null) {
|
|
2497
|
+
await this.initialize();
|
|
2498
|
+
}
|
|
2499
|
+
if (this.extractor === null) {
|
|
2500
|
+
throw new Error("Failed to initialize embedding model");
|
|
2501
|
+
}
|
|
2502
|
+
const output = await this.extractor(text, {
|
|
2503
|
+
pooling: this.config.pooling,
|
|
2504
|
+
normalize: this.config.normalize
|
|
2505
|
+
});
|
|
2506
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
2507
|
+
this._dimensions ??= dim;
|
|
2508
|
+
return Float32Array.from(output.data);
|
|
2509
|
+
}
|
|
2510
|
+
/**
|
|
2511
|
+
* Embed a batch of documents with optional parallelism.
|
|
2512
|
+
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
2513
|
+
*/
|
|
2514
|
+
async embedBatch(texts) {
|
|
2515
|
+
this.assertNotDisposed();
|
|
2516
|
+
if (this.extractor === null) {
|
|
2517
|
+
await this.initialize();
|
|
2518
|
+
}
|
|
2519
|
+
if (this.extractor === null) {
|
|
2520
|
+
throw new Error("Failed to initialize embedding model");
|
|
2521
|
+
}
|
|
2522
|
+
const batches = [];
|
|
2523
|
+
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
2524
|
+
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
2525
|
+
}
|
|
2526
|
+
if (batches.length === 0) {
|
|
2527
|
+
return [];
|
|
2528
|
+
}
|
|
2529
|
+
if (this.config.maxInFlightBatches <= 1) {
|
|
2530
|
+
return this.embedBatchesSequential(batches);
|
|
2531
|
+
} else {
|
|
2532
|
+
return this.embedBatchesConcurrent(batches);
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
/**
|
|
2536
|
+
* Process batches sequentially (original behavior).
|
|
2537
|
+
*/
|
|
2538
|
+
async embedBatchesSequential(batches) {
|
|
2539
|
+
const results = [];
|
|
2540
|
+
for (let i = 0; i < batches.length; i++) {
|
|
2541
|
+
const batch = batches[i];
|
|
2542
|
+
if (batch === void 0) continue;
|
|
2543
|
+
const batchResults = await this.processSingleBatch(batch);
|
|
2544
|
+
results.push(...batchResults);
|
|
2545
|
+
if (i < batches.length - 1) {
|
|
2546
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
return results;
|
|
2550
|
+
}
|
|
2551
|
+
/**
|
|
2552
|
+
* Process batches with controlled concurrency.
|
|
2553
|
+
*/
|
|
2554
|
+
async embedBatchesConcurrent(batches) {
|
|
2555
|
+
const results = new Array(batches.length);
|
|
2556
|
+
let inFlight = 0;
|
|
2557
|
+
const maxConcurrent = this.config.maxInFlightBatches;
|
|
2558
|
+
await Promise.all(
|
|
2559
|
+
batches.map(async (batch, idx) => {
|
|
2560
|
+
while (inFlight >= maxConcurrent) {
|
|
2561
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
2562
|
+
}
|
|
2563
|
+
inFlight++;
|
|
2564
|
+
try {
|
|
2565
|
+
results[idx] = await this.processSingleBatch(batch);
|
|
2566
|
+
} finally {
|
|
2567
|
+
inFlight--;
|
|
2568
|
+
}
|
|
2569
|
+
})
|
|
2570
|
+
);
|
|
2571
|
+
return results.flat();
|
|
2572
|
+
}
|
|
2573
|
+
/**
|
|
2574
|
+
* Process a single batch and return embeddings.
|
|
2575
|
+
*/
|
|
2576
|
+
async processSingleBatch(batch) {
|
|
2577
|
+
if (this.extractor === null) {
|
|
2578
|
+
throw new Error("Extractor not initialized");
|
|
2579
|
+
}
|
|
2580
|
+
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
2581
|
+
const output = await this.extractor(prefixedBatch, {
|
|
2582
|
+
pooling: this.config.pooling,
|
|
2583
|
+
normalize: this.config.normalize
|
|
2584
|
+
});
|
|
2585
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
2586
|
+
const batchResults = [];
|
|
2587
|
+
for (let b = 0; b < batch.length; b++) {
|
|
2588
|
+
const start = b * dim;
|
|
2589
|
+
const end = start + dim;
|
|
2590
|
+
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
2591
|
+
}
|
|
2592
|
+
this._dimensions ??= dim;
|
|
2593
|
+
return batchResults;
|
|
2594
|
+
}
|
|
2595
|
+
/**
|
|
2596
|
+
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
2597
|
+
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
2598
|
+
*/
|
|
2599
|
+
getDimensions() {
|
|
2600
|
+
if (this._dimensions === null) {
|
|
2601
|
+
throw new Error("Cannot get dimensions before first embed() call");
|
|
2602
|
+
}
|
|
2603
|
+
return this._dimensions;
|
|
2604
|
+
}
|
|
2605
|
+
/**
|
|
2606
|
+
* Check if the embedding pipeline is initialized.
|
|
2607
|
+
*/
|
|
2608
|
+
isInitialized() {
|
|
2609
|
+
return this.extractor !== null;
|
|
2610
|
+
}
|
|
2611
|
+
/**
|
|
2612
|
+
* Check if this engine has been disposed.
|
|
2613
|
+
*/
|
|
2614
|
+
isDisposed() {
|
|
2615
|
+
return this.disposed;
|
|
2616
|
+
}
|
|
2617
|
+
/**
|
|
2618
|
+
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
2619
|
+
* If currently initialized, disposes the pipeline first.
|
|
2620
|
+
*/
|
|
2621
|
+
async reset() {
|
|
2622
|
+
if (this.extractor !== null) {
|
|
2623
|
+
await this.extractor.dispose();
|
|
2624
|
+
this.extractor = null;
|
|
2625
|
+
}
|
|
2626
|
+
this.initPromise = null;
|
|
2627
|
+
this._dimensions = null;
|
|
2628
|
+
this.disposed = false;
|
|
2629
|
+
}
|
|
2630
|
+
/**
|
|
2631
|
+
* Ensure dimensions are available, initializing the model if needed.
|
|
2632
|
+
* Returns the embedding dimensions for the current model.
|
|
2633
|
+
*/
|
|
2634
|
+
async ensureDimensions() {
|
|
2635
|
+
if (this._dimensions === null) {
|
|
2636
|
+
await this.embedText("dimension probe");
|
|
2637
|
+
}
|
|
2638
|
+
if (this._dimensions === null) {
|
|
2639
|
+
throw new Error("Failed to determine embedding dimensions");
|
|
2640
|
+
}
|
|
2641
|
+
return this._dimensions;
|
|
2642
|
+
}
|
|
2643
|
+
/**
|
|
2644
|
+
* Dispose the embedding pipeline to free resources.
|
|
2645
|
+
* Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
|
|
2646
|
+
* After disposal, this engine cannot be used again.
|
|
2647
|
+
*/
|
|
2648
|
+
async dispose() {
|
|
2649
|
+
if (this.extractor !== null) {
|
|
2650
|
+
await this.extractor.dispose();
|
|
2651
|
+
this.extractor = null;
|
|
2652
|
+
}
|
|
2653
|
+
this.initPromise = null;
|
|
2654
|
+
this._dimensions = null;
|
|
2655
|
+
this.disposed = true;
|
|
2656
|
+
}
|
|
2657
|
+
};
|
|
2065
2658
|
|
|
2066
2659
|
// src/types/config.ts
|
|
2067
2660
|
var DEFAULT_CONFIG = {
|
|
@@ -2082,6 +2675,7 @@ var DEFAULT_CONFIG = {
|
|
|
2082
2675
|
chunkSize: 1e3,
|
|
2083
2676
|
chunkOverlap: 150,
|
|
2084
2677
|
ignorePatterns: ["node_modules/**", ".git/**", "*.min.js", "*.map"],
|
|
2678
|
+
prependPath: false,
|
|
2085
2679
|
maxFileSizeBytes: 1048576
|
|
2086
2680
|
// 1MB
|
|
2087
2681
|
},
|
|
@@ -2097,6 +2691,12 @@ var DEFAULT_CONFIG = {
|
|
|
2097
2691
|
server: {
|
|
2098
2692
|
port: 3847,
|
|
2099
2693
|
host: "127.0.0.1"
|
|
2694
|
+
},
|
|
2695
|
+
reranker: {
|
|
2696
|
+
enabled: false,
|
|
2697
|
+
model: "Xenova/ms-marco-MiniLM-L-6-v2",
|
|
2698
|
+
topK: 20,
|
|
2699
|
+
returnK: 10
|
|
2100
2700
|
}
|
|
2101
2701
|
};
|
|
2102
2702
|
|
|
@@ -2148,7 +2748,7 @@ var ConfigService = class {
|
|
|
2148
2748
|
if (configPath !== void 0 && configPath !== "") {
|
|
2149
2749
|
this.configPath = this.expandPath(configPath, this.projectRoot);
|
|
2150
2750
|
} else {
|
|
2151
|
-
this.configPath =
|
|
2751
|
+
this.configPath = join6(this.projectRoot, DEFAULT_CONFIG_PATH);
|
|
2152
2752
|
}
|
|
2153
2753
|
if (dataDir !== void 0 && dataDir !== "") {
|
|
2154
2754
|
this.dataDir = this.expandPath(dataDir, this.projectRoot);
|
|
@@ -2170,15 +2770,60 @@ var ConfigService = class {
|
|
|
2170
2770
|
if (!exists) {
|
|
2171
2771
|
this.config = { ...DEFAULT_CONFIG };
|
|
2172
2772
|
await this.save(this.config);
|
|
2173
|
-
|
|
2773
|
+
} else {
|
|
2774
|
+
const content = await readFile2(this.configPath, "utf-8");
|
|
2775
|
+
try {
|
|
2776
|
+
this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
|
|
2777
|
+
} catch (error) {
|
|
2778
|
+
throw new Error(
|
|
2779
|
+
`Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
2780
|
+
);
|
|
2781
|
+
}
|
|
2174
2782
|
}
|
|
2175
|
-
const
|
|
2176
|
-
|
|
2177
|
-
this.config =
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2783
|
+
const finetunedPath = getFinetunedModelPath();
|
|
2784
|
+
if (finetunedPath !== void 0) {
|
|
2785
|
+
this.config = {
|
|
2786
|
+
...this.config,
|
|
2787
|
+
embedding: buildEmbeddingConfig(finetunedPath, {
|
|
2788
|
+
batchSize: this.config.embedding.batchSize,
|
|
2789
|
+
maxInFlightBatches: this.config.embedding.maxInFlightBatches
|
|
2790
|
+
})
|
|
2791
|
+
};
|
|
2792
|
+
} else {
|
|
2793
|
+
const configuredModelId = getConfiguredModelId();
|
|
2794
|
+
if (configuredModelId !== this.config.embedding.model) {
|
|
2795
|
+
this.config = {
|
|
2796
|
+
...this.config,
|
|
2797
|
+
embedding: buildEmbeddingConfig(configuredModelId, {
|
|
2798
|
+
batchSize: this.config.embedding.batchSize,
|
|
2799
|
+
maxInFlightBatches: this.config.embedding.maxInFlightBatches
|
|
2800
|
+
})
|
|
2801
|
+
};
|
|
2802
|
+
}
|
|
2803
|
+
}
|
|
2804
|
+
const rawPooling = process.env["BK_POOLING"];
|
|
2805
|
+
const envPooling = rawPooling === "mean" || rawPooling === "cls" || rawPooling === "none" ? rawPooling : void 0;
|
|
2806
|
+
const envQueryPrefix = process.env["BK_QUERY_PREFIX"];
|
|
2807
|
+
if (envPooling !== void 0 || envQueryPrefix !== void 0) {
|
|
2808
|
+
this.config = {
|
|
2809
|
+
...this.config,
|
|
2810
|
+
embedding: {
|
|
2811
|
+
...this.config.embedding,
|
|
2812
|
+
...envPooling !== void 0 ? { pooling: envPooling } : {},
|
|
2813
|
+
...envQueryPrefix !== void 0 ? { queryPrefix: envQueryPrefix } : {}
|
|
2814
|
+
}
|
|
2815
|
+
};
|
|
2816
|
+
}
|
|
2817
|
+
const rerankerOverrides = parseRerankerEnvOverrides(false);
|
|
2818
|
+
if (rerankerOverrides.enabled !== void 0 || rerankerOverrides.topK !== void 0) {
|
|
2819
|
+
this.config = {
|
|
2820
|
+
...this.config,
|
|
2821
|
+
reranker: {
|
|
2822
|
+
...this.config.reranker,
|
|
2823
|
+
...rerankerOverrides.enabled !== void 0 ? { enabled: rerankerOverrides.enabled } : {},
|
|
2824
|
+
...rerankerOverrides.topK !== void 0 ? { topK: rerankerOverrides.topK } : {}
|
|
2825
|
+
}
|
|
2826
|
+
};
|
|
2182
2827
|
}
|
|
2183
2828
|
return this.config;
|
|
2184
2829
|
}
|
|
@@ -2194,7 +2839,7 @@ var ConfigService = class {
|
|
|
2194
2839
|
}
|
|
2195
2840
|
expandPath(path4, baseDir) {
|
|
2196
2841
|
if (path4.startsWith("~")) {
|
|
2197
|
-
return path4.replace("~",
|
|
2842
|
+
return path4.replace("~", homedir2());
|
|
2198
2843
|
}
|
|
2199
2844
|
if (!isAbsolute(path4)) {
|
|
2200
2845
|
return resolve(baseDir, path4);
|
|
@@ -2205,19 +2850,38 @@ var ConfigService = class {
|
|
|
2205
2850
|
|
|
2206
2851
|
// src/services/gitignore.service.ts
|
|
2207
2852
|
import { readFile as readFile3, writeFile as writeFile3, access as access2 } from "fs/promises";
|
|
2208
|
-
import { join as
|
|
2853
|
+
import { join as join7 } from "path";
|
|
2209
2854
|
var REQUIRED_PATTERNS = [
|
|
2210
|
-
".bluera
|
|
2855
|
+
".bluera/",
|
|
2856
|
+
"!.bluera/",
|
|
2211
2857
|
"!.bluera/bluera-knowledge/",
|
|
2212
|
-
".bluera/bluera-knowledge/*",
|
|
2213
2858
|
"!.bluera/bluera-knowledge/stores.config.json",
|
|
2214
2859
|
"!.bluera/bluera-knowledge/config.json",
|
|
2215
2860
|
"!.bluera/bluera-knowledge/skill-activation.json",
|
|
2216
2861
|
".bluera/bluera-knowledge/data/",
|
|
2217
2862
|
".bluera/bluera-knowledge/logs/"
|
|
2218
2863
|
];
|
|
2219
|
-
|
|
2220
|
-
|
|
2864
|
+
function isPatternCovered(pattern, existingLines) {
|
|
2865
|
+
if (existingLines.includes(pattern)) return true;
|
|
2866
|
+
if (pattern.endsWith("/")) {
|
|
2867
|
+
const withoutSlash = pattern.slice(0, -1);
|
|
2868
|
+
if (existingLines.includes(withoutSlash)) return true;
|
|
2869
|
+
} else {
|
|
2870
|
+
if (existingLines.includes(`${pattern}/`)) return true;
|
|
2871
|
+
}
|
|
2872
|
+
if (pattern === ".bluera/") {
|
|
2873
|
+
if (existingLines.includes(".bluera/*")) return true;
|
|
2874
|
+
}
|
|
2875
|
+
if (pattern === "!.bluera/") {
|
|
2876
|
+
if (existingLines.includes(".bluera/*")) return true;
|
|
2877
|
+
}
|
|
2878
|
+
return false;
|
|
2879
|
+
}
|
|
2880
|
+
var SECTION_HEADER = `
|
|
2881
|
+
# Bluera Knowledge
|
|
2882
|
+
# Config files (stores.config.json, config.json, skill-activation.json) can be committed
|
|
2883
|
+
# Data directory (vector DB, cloned repos) and logs are not committed
|
|
2884
|
+
`;
|
|
2221
2885
|
async function fileExists2(path4) {
|
|
2222
2886
|
try {
|
|
2223
2887
|
await access2(path4);
|
|
@@ -2226,73 +2890,13 @@ async function fileExists2(path4) {
|
|
|
2226
2890
|
return false;
|
|
2227
2891
|
}
|
|
2228
2892
|
}
|
|
2229
|
-
function isPatternSatisfied(requiredPattern, existingLines) {
|
|
2230
|
-
if (existingLines.includes(requiredPattern)) {
|
|
2231
|
-
return true;
|
|
2232
|
-
}
|
|
2233
|
-
switch (requiredPattern) {
|
|
2234
|
-
case ".bluera/*":
|
|
2235
|
-
return existingLines.includes(".bluera/");
|
|
2236
|
-
case "!.bluera/bluera-knowledge/":
|
|
2237
|
-
return existingLines.includes("!.bluera/bluera-knowledge");
|
|
2238
|
-
case ".bluera/bluera-knowledge/data/":
|
|
2239
|
-
case ".bluera/bluera-knowledge/logs/":
|
|
2240
|
-
return existingLines.includes(".bluera/bluera-knowledge/*");
|
|
2241
|
-
default:
|
|
2242
|
-
return false;
|
|
2243
|
-
}
|
|
2244
|
-
}
|
|
2245
|
-
function removeBkSection(content) {
|
|
2246
|
-
const lines = content.split("\n");
|
|
2247
|
-
const outputLines = [];
|
|
2248
|
-
let removed = false;
|
|
2249
|
-
let i = 0;
|
|
2250
|
-
while (i < lines.length) {
|
|
2251
|
-
const line = lines[i] ?? "";
|
|
2252
|
-
const trimmed = line.trim();
|
|
2253
|
-
if (trimmed === SECTION_BEGIN) {
|
|
2254
|
-
removed = true;
|
|
2255
|
-
i++;
|
|
2256
|
-
while (i < lines.length && (lines[i] ?? "").trim() !== SECTION_END) {
|
|
2257
|
-
i++;
|
|
2258
|
-
}
|
|
2259
|
-
if (i < lines.length) {
|
|
2260
|
-
i++;
|
|
2261
|
-
}
|
|
2262
|
-
continue;
|
|
2263
|
-
}
|
|
2264
|
-
if (trimmed === "# Bluera Knowledge") {
|
|
2265
|
-
removed = true;
|
|
2266
|
-
i++;
|
|
2267
|
-
while (i < lines.length) {
|
|
2268
|
-
const lt = (lines[i] ?? "").trim();
|
|
2269
|
-
if (lt === "" || lt.startsWith("#") || lt.includes(".bluera")) {
|
|
2270
|
-
i++;
|
|
2271
|
-
} else {
|
|
2272
|
-
break;
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
continue;
|
|
2276
|
-
}
|
|
2277
|
-
outputLines.push(line);
|
|
2278
|
-
i++;
|
|
2279
|
-
}
|
|
2280
|
-
let lastIdx = outputLines.length - 1;
|
|
2281
|
-
while (lastIdx >= 0 && outputLines[lastIdx]?.trim() === "") {
|
|
2282
|
-
outputLines.pop();
|
|
2283
|
-
lastIdx--;
|
|
2284
|
-
}
|
|
2285
|
-
const cleaned = outputLines.length > 0 ? `${outputLines.join("\n")}
|
|
2286
|
-
` : "";
|
|
2287
|
-
return { cleaned, removed };
|
|
2288
|
-
}
|
|
2289
2893
|
var GitignoreService = class {
|
|
2290
2894
|
gitignorePath;
|
|
2291
2895
|
constructor(projectRoot) {
|
|
2292
|
-
this.gitignorePath =
|
|
2896
|
+
this.gitignorePath = join7(projectRoot, ".gitignore");
|
|
2293
2897
|
}
|
|
2294
2898
|
/**
|
|
2295
|
-
* Check if all required patterns are
|
|
2899
|
+
* Check if all required patterns are present in .gitignore
|
|
2296
2900
|
*/
|
|
2297
2901
|
async hasRequiredPatterns() {
|
|
2298
2902
|
const exists = await fileExists2(this.gitignorePath);
|
|
@@ -2301,62 +2905,52 @@ var GitignoreService = class {
|
|
|
2301
2905
|
}
|
|
2302
2906
|
const content = await readFile3(this.gitignorePath, "utf-8");
|
|
2303
2907
|
const lines = content.split("\n").map((l) => l.trim());
|
|
2304
|
-
|
|
2908
|
+
for (const pattern of REQUIRED_PATTERNS) {
|
|
2909
|
+
if (!isPatternCovered(pattern, lines)) {
|
|
2910
|
+
return false;
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
return true;
|
|
2305
2914
|
}
|
|
2306
2915
|
/**
|
|
2307
2916
|
* Ensure required .gitignore patterns are present.
|
|
2308
2917
|
*
|
|
2309
2918
|
* - Creates .gitignore if it doesn't exist
|
|
2310
|
-
* -
|
|
2311
|
-
* -
|
|
2312
|
-
* - Persists cleanup even when no patterns are missing
|
|
2919
|
+
* - Appends missing patterns if .gitignore exists
|
|
2920
|
+
* - Does nothing if all patterns are already present
|
|
2313
2921
|
*
|
|
2314
2922
|
* @returns Object with updated flag and descriptive message
|
|
2315
2923
|
*/
|
|
2316
2924
|
async ensureGitignorePatterns() {
|
|
2317
2925
|
const exists = await fileExists2(this.gitignorePath);
|
|
2318
2926
|
if (!exists) {
|
|
2319
|
-
const content =
|
|
2927
|
+
const content = `${SECTION_HEADER.trim()}
|
|
2928
|
+
${REQUIRED_PATTERNS.join("\n")}
|
|
2929
|
+
`;
|
|
2320
2930
|
await writeFile3(this.gitignorePath, content);
|
|
2321
2931
|
return {
|
|
2322
2932
|
updated: true,
|
|
2323
2933
|
message: "Created .gitignore with Bluera Knowledge patterns"
|
|
2324
2934
|
};
|
|
2325
2935
|
}
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
const { cleaned, removed } = removeBkSection(rawContent);
|
|
2329
|
-
const cleanedLines = cleaned.split("\n").map((l) => l.trim());
|
|
2936
|
+
const existingContent = await readFile3(this.gitignorePath, "utf-8");
|
|
2937
|
+
const lines = existingContent.split("\n").map((l) => l.trim());
|
|
2330
2938
|
const missingPatterns = REQUIRED_PATTERNS.filter(
|
|
2331
|
-
(pattern) => !
|
|
2939
|
+
(pattern) => !isPatternCovered(pattern, lines)
|
|
2332
2940
|
);
|
|
2333
2941
|
if (missingPatterns.length === 0) {
|
|
2334
|
-
if (removed) {
|
|
2335
|
-
await writeFile3(this.gitignorePath, cleaned);
|
|
2336
|
-
return {
|
|
2337
|
-
updated: true,
|
|
2338
|
-
message: "Cleaned redundant Bluera Knowledge section from .gitignore"
|
|
2339
|
-
};
|
|
2340
|
-
}
|
|
2341
2942
|
return {
|
|
2342
2943
|
updated: false,
|
|
2343
2944
|
message: "All Bluera Knowledge patterns already present in .gitignore"
|
|
2344
2945
|
};
|
|
2345
2946
|
}
|
|
2346
|
-
let newContent =
|
|
2347
|
-
if (newContent.
|
|
2348
|
-
if (!newContent.endsWith("\n")) {
|
|
2349
|
-
newContent += "\n";
|
|
2350
|
-
}
|
|
2947
|
+
let newContent = existingContent;
|
|
2948
|
+
if (!newContent.endsWith("\n")) {
|
|
2351
2949
|
newContent += "\n";
|
|
2352
2950
|
}
|
|
2353
|
-
newContent +=
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
updated: false,
|
|
2357
|
-
message: "All Bluera Knowledge patterns already present in .gitignore"
|
|
2358
|
-
};
|
|
2359
|
-
}
|
|
2951
|
+
newContent += SECTION_HEADER;
|
|
2952
|
+
newContent += `${missingPatterns.join("\n")}
|
|
2953
|
+
`;
|
|
2360
2954
|
await writeFile3(this.gitignorePath, newContent);
|
|
2361
2955
|
return {
|
|
2362
2956
|
updated: true,
|
|
@@ -2375,9 +2969,9 @@ var GitignoreService = class {
|
|
|
2375
2969
|
import { execFile } from "child_process";
|
|
2376
2970
|
import { createHash as createHash3 } from "crypto";
|
|
2377
2971
|
import { open, readFile as readFile5, readdir, stat as stat2 } from "fs/promises";
|
|
2378
|
-
import { join as
|
|
2972
|
+
import { join as join8, extname, basename, relative } from "path";
|
|
2379
2973
|
import { promisify } from "util";
|
|
2380
|
-
import { minimatch } from "minimatch";
|
|
2974
|
+
import { minimatch as minimatchFn } from "minimatch";
|
|
2381
2975
|
|
|
2382
2976
|
// src/services/chunking.service.ts
|
|
2383
2977
|
var CHUNK_PRESETS = {
|
|
@@ -2759,8 +3353,9 @@ var DriftService = class {
|
|
|
2759
3353
|
};
|
|
2760
3354
|
|
|
2761
3355
|
// src/services/index.service.ts
|
|
3356
|
+
var minimatch = minimatchFn;
|
|
2762
3357
|
var execFileAsync = promisify(execFile);
|
|
2763
|
-
var
|
|
3358
|
+
var logger2 = createLogger("index-service");
|
|
2764
3359
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
2765
3360
|
// Text/docs
|
|
2766
3361
|
".txt",
|
|
@@ -2892,6 +3487,110 @@ async function isBinaryFile(filePath) {
|
|
|
2892
3487
|
await fd.close();
|
|
2893
3488
|
}
|
|
2894
3489
|
}
|
|
3490
|
+
var EXT_TO_LANGUAGE = {
|
|
3491
|
+
".ts": "typescript",
|
|
3492
|
+
".tsx": "typescript",
|
|
3493
|
+
".mts": "typescript",
|
|
3494
|
+
".cts": "typescript",
|
|
3495
|
+
".js": "javascript",
|
|
3496
|
+
".jsx": "javascript",
|
|
3497
|
+
".mjs": "javascript",
|
|
3498
|
+
".cjs": "javascript",
|
|
3499
|
+
".py": "python",
|
|
3500
|
+
".pyi": "python",
|
|
3501
|
+
".pyx": "python",
|
|
3502
|
+
".rs": "rust",
|
|
3503
|
+
".go": "go",
|
|
3504
|
+
".java": "java",
|
|
3505
|
+
".kt": "kotlin",
|
|
3506
|
+
".kts": "kotlin",
|
|
3507
|
+
".scala": "scala",
|
|
3508
|
+
".groovy": "groovy",
|
|
3509
|
+
".rb": "ruby",
|
|
3510
|
+
".erb": "ruby",
|
|
3511
|
+
".rake": "ruby",
|
|
3512
|
+
".php": "php",
|
|
3513
|
+
".swift": "swift",
|
|
3514
|
+
".m": "objective-c",
|
|
3515
|
+
".mm": "objective-c",
|
|
3516
|
+
".c": "c",
|
|
3517
|
+
".h": "c",
|
|
3518
|
+
".cpp": "cpp",
|
|
3519
|
+
".cc": "cpp",
|
|
3520
|
+
".cxx": "cpp",
|
|
3521
|
+
".hpp": "cpp",
|
|
3522
|
+
".hxx": "cpp",
|
|
3523
|
+
".cs": "csharp",
|
|
3524
|
+
".fs": "fsharp",
|
|
3525
|
+
".vb": "vb",
|
|
3526
|
+
".sh": "shell",
|
|
3527
|
+
".bash": "shell",
|
|
3528
|
+
".zsh": "shell",
|
|
3529
|
+
".fish": "shell",
|
|
3530
|
+
".ps1": "powershell",
|
|
3531
|
+
".psm1": "powershell",
|
|
3532
|
+
".sql": "sql",
|
|
3533
|
+
".md": "markdown",
|
|
3534
|
+
".rst": "restructuredtext",
|
|
3535
|
+
".lua": "lua",
|
|
3536
|
+
".r": "r",
|
|
3537
|
+
".R": "r",
|
|
3538
|
+
".jl": "julia",
|
|
3539
|
+
".ex": "elixir",
|
|
3540
|
+
".exs": "elixir",
|
|
3541
|
+
".erl": "erlang",
|
|
3542
|
+
".hrl": "erlang",
|
|
3543
|
+
".clj": "clojure",
|
|
3544
|
+
".cljs": "clojure",
|
|
3545
|
+
".cljc": "clojure",
|
|
3546
|
+
".hs": "haskell",
|
|
3547
|
+
".elm": "elm",
|
|
3548
|
+
".dart": "dart",
|
|
3549
|
+
".zig": "zig",
|
|
3550
|
+
".nim": "nim",
|
|
3551
|
+
".v": "v",
|
|
3552
|
+
".pl": "perl",
|
|
3553
|
+
".pm": "perl",
|
|
3554
|
+
".tf": "terraform",
|
|
3555
|
+
".hcl": "hcl",
|
|
3556
|
+
".proto": "protobuf",
|
|
3557
|
+
".graphql": "graphql",
|
|
3558
|
+
".gql": "graphql",
|
|
3559
|
+
".vue": "vue",
|
|
3560
|
+
".svelte": "svelte",
|
|
3561
|
+
".html": "html",
|
|
3562
|
+
".htm": "html",
|
|
3563
|
+
".css": "css",
|
|
3564
|
+
".scss": "scss",
|
|
3565
|
+
".sass": "sass",
|
|
3566
|
+
".less": "less",
|
|
3567
|
+
".json": "json",
|
|
3568
|
+
".yaml": "yaml",
|
|
3569
|
+
".yml": "yaml",
|
|
3570
|
+
".toml": "toml",
|
|
3571
|
+
".xml": "xml"
|
|
3572
|
+
};
|
|
3573
|
+
var ENTRY_POINT_NAMES = /* @__PURE__ */ new Set([
|
|
3574
|
+
"index.ts",
|
|
3575
|
+
"index.js",
|
|
3576
|
+
"index.mjs",
|
|
3577
|
+
"index.tsx",
|
|
3578
|
+
"index.jsx",
|
|
3579
|
+
"main.ts",
|
|
3580
|
+
"main.js",
|
|
3581
|
+
"main.py",
|
|
3582
|
+
"main.go",
|
|
3583
|
+
"main.rs",
|
|
3584
|
+
"app.ts",
|
|
3585
|
+
"app.js",
|
|
3586
|
+
"app.py",
|
|
3587
|
+
"mod.rs",
|
|
3588
|
+
"lib.rs",
|
|
3589
|
+
"__init__.py",
|
|
3590
|
+
"server.ts",
|
|
3591
|
+
"server.js",
|
|
3592
|
+
"server.py"
|
|
3593
|
+
]);
|
|
2895
3594
|
var IndexService = class {
|
|
2896
3595
|
lanceStore;
|
|
2897
3596
|
embeddingEngine;
|
|
@@ -2902,6 +3601,7 @@ var IndexService = class {
|
|
|
2902
3601
|
concurrency;
|
|
2903
3602
|
ignoreDirs;
|
|
2904
3603
|
ignoreFilePatterns;
|
|
3604
|
+
prependPath;
|
|
2905
3605
|
maxFileSizeBytes;
|
|
2906
3606
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2907
3607
|
this.lanceStore = lanceStore;
|
|
@@ -2914,13 +3614,14 @@ var IndexService = class {
|
|
|
2914
3614
|
this.manifestService = options.manifestService;
|
|
2915
3615
|
this.driftService = new DriftService();
|
|
2916
3616
|
this.concurrency = options.concurrency ?? 4;
|
|
3617
|
+
this.prependPath = options.prependPath ?? false;
|
|
2917
3618
|
const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
|
|
2918
3619
|
this.ignoreDirs = parsed.dirs;
|
|
2919
3620
|
this.ignoreFilePatterns = parsed.fileMatchers;
|
|
2920
3621
|
this.maxFileSizeBytes = options.maxFileSizeBytes ?? 1048576;
|
|
2921
3622
|
}
|
|
2922
3623
|
async indexStore(store, onProgress) {
|
|
2923
|
-
|
|
3624
|
+
logger2.info(
|
|
2924
3625
|
{
|
|
2925
3626
|
storeId: store.id,
|
|
2926
3627
|
storeName: store.name,
|
|
@@ -2932,13 +3633,13 @@ var IndexService = class {
|
|
|
2932
3633
|
if (store.type === "file" || store.type === "repo") {
|
|
2933
3634
|
return await this.indexFileStore(store, onProgress);
|
|
2934
3635
|
}
|
|
2935
|
-
|
|
3636
|
+
logger2.error(
|
|
2936
3637
|
{ storeId: store.id, storeType: store.type },
|
|
2937
3638
|
"Unsupported store type for indexing"
|
|
2938
3639
|
);
|
|
2939
3640
|
return err(new Error(`Indexing not supported for store type: ${store.type}`));
|
|
2940
3641
|
} catch (error) {
|
|
2941
|
-
|
|
3642
|
+
logger2.error(
|
|
2942
3643
|
{
|
|
2943
3644
|
storeId: store.id,
|
|
2944
3645
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -2963,7 +3664,7 @@ var IndexService = class {
|
|
|
2963
3664
|
if (store.type !== "file" && store.type !== "repo") {
|
|
2964
3665
|
return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
|
|
2965
3666
|
}
|
|
2966
|
-
|
|
3667
|
+
logger2.info(
|
|
2967
3668
|
{
|
|
2968
3669
|
storeId: store.id,
|
|
2969
3670
|
storeName: store.name,
|
|
@@ -2979,7 +3680,7 @@ var IndexService = class {
|
|
|
2979
3680
|
filePaths.map((path4) => this.driftService.getFileState(path4))
|
|
2980
3681
|
);
|
|
2981
3682
|
const drift = await this.driftService.detectChanges(manifest, currentFiles);
|
|
2982
|
-
|
|
3683
|
+
logger2.debug(
|
|
2983
3684
|
{
|
|
2984
3685
|
storeId: store.id,
|
|
2985
3686
|
added: drift.added.length,
|
|
@@ -2998,7 +3699,7 @@ var IndexService = class {
|
|
|
2998
3699
|
}
|
|
2999
3700
|
if (documentIdsToDelete.length > 0) {
|
|
3000
3701
|
await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
|
|
3001
|
-
|
|
3702
|
+
logger2.debug(
|
|
3002
3703
|
{ storeId: store.id, count: documentIdsToDelete.length },
|
|
3003
3704
|
"Deleted old documents"
|
|
3004
3705
|
);
|
|
@@ -3034,7 +3735,7 @@ var IndexService = class {
|
|
|
3034
3735
|
fileState: state
|
|
3035
3736
|
};
|
|
3036
3737
|
} catch (error) {
|
|
3037
|
-
|
|
3738
|
+
logger2.warn(
|
|
3038
3739
|
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3039
3740
|
"Failed to process file during incremental indexing, skipping"
|
|
3040
3741
|
);
|
|
@@ -3081,13 +3782,13 @@ var IndexService = class {
|
|
|
3081
3782
|
if (allSourceFiles.length > 0) {
|
|
3082
3783
|
const graph = await this.codeGraphService.buildGraph(allSourceFiles);
|
|
3083
3784
|
await this.codeGraphService.saveGraph(store.id, graph);
|
|
3084
|
-
|
|
3785
|
+
logger2.debug(
|
|
3085
3786
|
{ storeId: store.id, sourceFiles: allSourceFiles.length },
|
|
3086
3787
|
"Rebuilt code graph during incremental indexing"
|
|
3087
3788
|
);
|
|
3088
3789
|
} else {
|
|
3089
3790
|
await this.codeGraphService.deleteGraph(store.id);
|
|
3090
|
-
|
|
3791
|
+
logger2.debug(
|
|
3091
3792
|
{ storeId: store.id },
|
|
3092
3793
|
"Deleted stale code graph (no source files remain)"
|
|
3093
3794
|
);
|
|
@@ -3108,7 +3809,7 @@ var IndexService = class {
|
|
|
3108
3809
|
message: "Incremental indexing complete"
|
|
3109
3810
|
});
|
|
3110
3811
|
const timeMs = Date.now() - startTime;
|
|
3111
|
-
|
|
3812
|
+
logger2.info(
|
|
3112
3813
|
{
|
|
3113
3814
|
storeId: store.id,
|
|
3114
3815
|
storeName: store.name,
|
|
@@ -3131,7 +3832,7 @@ var IndexService = class {
|
|
|
3131
3832
|
filesUnchanged: drift.unchanged.length
|
|
3132
3833
|
});
|
|
3133
3834
|
} catch (error) {
|
|
3134
|
-
|
|
3835
|
+
logger2.error(
|
|
3135
3836
|
{
|
|
3136
3837
|
storeId: store.id,
|
|
3137
3838
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -3150,7 +3851,7 @@ var IndexService = class {
|
|
|
3150
3851
|
const files = await this.discoverFiles(store.path, store.ingest);
|
|
3151
3852
|
const documents = [];
|
|
3152
3853
|
let filesProcessed = 0;
|
|
3153
|
-
|
|
3854
|
+
logger2.debug(
|
|
3154
3855
|
{
|
|
3155
3856
|
storeId: store.id,
|
|
3156
3857
|
path: store.path,
|
|
@@ -3174,7 +3875,7 @@ var IndexService = class {
|
|
|
3174
3875
|
try {
|
|
3175
3876
|
return await this.processFile(filePath, store);
|
|
3176
3877
|
} catch (error) {
|
|
3177
|
-
|
|
3878
|
+
logger2.warn(
|
|
3178
3879
|
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3179
3880
|
"Failed to process file, skipping"
|
|
3180
3881
|
);
|
|
@@ -3189,11 +3890,19 @@ var IndexService = class {
|
|
|
3189
3890
|
}
|
|
3190
3891
|
}
|
|
3191
3892
|
filesProcessed += batch.length;
|
|
3893
|
+
const elapsedMs = Date.now() - startTime;
|
|
3894
|
+
const filesPerSecond = elapsedMs > 0 ? filesProcessed / elapsedMs * 1e3 : 0;
|
|
3895
|
+
const lastFile = batch.at(-1);
|
|
3192
3896
|
onProgress?.({
|
|
3193
3897
|
type: "progress",
|
|
3194
3898
|
current: filesProcessed,
|
|
3195
3899
|
total: files.length,
|
|
3196
|
-
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files
|
|
3900
|
+
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`,
|
|
3901
|
+
details: {
|
|
3902
|
+
...lastFile !== void 0 ? { currentFile: basename(lastFile) } : {},
|
|
3903
|
+
elapsedMs,
|
|
3904
|
+
filesPerSecond
|
|
3905
|
+
}
|
|
3197
3906
|
});
|
|
3198
3907
|
}
|
|
3199
3908
|
if (documents.length > 0) {
|
|
@@ -3206,14 +3915,18 @@ var IndexService = class {
|
|
|
3206
3915
|
} else if (this.codeGraphService) {
|
|
3207
3916
|
await this.codeGraphService.deleteGraph(store.id);
|
|
3208
3917
|
}
|
|
3918
|
+
const timeMs = Date.now() - startTime;
|
|
3209
3919
|
onProgress?.({
|
|
3210
3920
|
type: "complete",
|
|
3211
3921
|
current: files.length,
|
|
3212
3922
|
total: files.length,
|
|
3213
|
-
message: "Indexing complete"
|
|
3923
|
+
message: "Indexing complete",
|
|
3924
|
+
details: {
|
|
3925
|
+
elapsedMs: timeMs,
|
|
3926
|
+
filesPerSecond: timeMs > 0 ? files.length / timeMs * 1e3 : 0
|
|
3927
|
+
}
|
|
3214
3928
|
});
|
|
3215
|
-
|
|
3216
|
-
logger.info(
|
|
3929
|
+
logger2.info(
|
|
3217
3930
|
{
|
|
3218
3931
|
storeId: store.id,
|
|
3219
3932
|
storeName: store.name,
|
|
@@ -3235,14 +3948,20 @@ var IndexService = class {
|
|
|
3235
3948
|
* Extracted for parallel processing.
|
|
3236
3949
|
*/
|
|
3237
3950
|
async processFile(filePath, store) {
|
|
3238
|
-
const
|
|
3239
|
-
const fileHash = createHash3("md5").update(
|
|
3240
|
-
const chunks = this.chunker.chunk(content, filePath);
|
|
3951
|
+
const rawContent = await readFile5(filePath, "utf-8");
|
|
3952
|
+
const fileHash = createHash3("md5").update(rawContent).digest("hex");
|
|
3241
3953
|
const relativePath = relative(store.path, filePath);
|
|
3954
|
+
const content = this.prependPath ? `[${relativePath}]
|
|
3955
|
+
${rawContent}` : rawContent;
|
|
3956
|
+
const chunks = this.chunker.chunk(content, filePath);
|
|
3242
3957
|
const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
|
|
3243
3958
|
const ext = extname(filePath).toLowerCase();
|
|
3244
3959
|
const fileName = basename(filePath).toLowerCase();
|
|
3245
3960
|
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
3961
|
+
const language = EXT_TO_LANGUAGE[ext];
|
|
3962
|
+
const normalizedRelPath = relativePath.replaceAll("\\", "/");
|
|
3963
|
+
const depth = normalizedRelPath.split("/").length - 1;
|
|
3964
|
+
const isEntryPoint = ENTRY_POINT_NAMES.has(basename(filePath));
|
|
3246
3965
|
const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
|
|
3247
3966
|
if (chunks.length === 0) {
|
|
3248
3967
|
return { documents: [], sourceFile };
|
|
@@ -3275,7 +3994,11 @@ var IndexService = class {
|
|
|
3275
3994
|
sectionHeader: chunk.sectionHeader,
|
|
3276
3995
|
functionName: chunk.functionName,
|
|
3277
3996
|
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
3278
|
-
docSummary: chunk.docSummary
|
|
3997
|
+
docSummary: chunk.docSummary,
|
|
3998
|
+
relativePath: normalizedRelPath,
|
|
3999
|
+
language,
|
|
4000
|
+
depth,
|
|
4001
|
+
isEntryPoint
|
|
3279
4002
|
}
|
|
3280
4003
|
});
|
|
3281
4004
|
}
|
|
@@ -3286,11 +4009,11 @@ var IndexService = class {
|
|
|
3286
4009
|
*/
|
|
3287
4010
|
async getTrackedFiles(repoPath) {
|
|
3288
4011
|
try {
|
|
3289
|
-
const gitDir =
|
|
4012
|
+
const gitDir = join8(repoPath, ".git");
|
|
3290
4013
|
try {
|
|
3291
4014
|
await stat2(gitDir);
|
|
3292
4015
|
} catch {
|
|
3293
|
-
|
|
4016
|
+
logger2.info({ repoPath }, "Not a git repository, using filesystem walk");
|
|
3294
4017
|
return null;
|
|
3295
4018
|
}
|
|
3296
4019
|
const { stdout } = await execFileAsync("git", ["ls-files", "-z"], {
|
|
@@ -3300,12 +4023,12 @@ var IndexService = class {
|
|
|
3300
4023
|
});
|
|
3301
4024
|
const files = stdout.split("\0").filter(Boolean);
|
|
3302
4025
|
if (files.some((f) => f === ".gitmodules")) {
|
|
3303
|
-
|
|
4026
|
+
logger2.info({ repoPath }, "Repository has submodules (skipped by ls-files)");
|
|
3304
4027
|
}
|
|
3305
4028
|
return files;
|
|
3306
4029
|
} catch (error) {
|
|
3307
4030
|
const reason = error instanceof Error ? error.message : String(error);
|
|
3308
|
-
|
|
4031
|
+
logger2.warn({ repoPath, reason }, "git ls-files failed, using filesystem walk");
|
|
3309
4032
|
return null;
|
|
3310
4033
|
}
|
|
3311
4034
|
}
|
|
@@ -3318,11 +4041,11 @@ var IndexService = class {
|
|
|
3318
4041
|
const trackedFiles = await this.getTrackedFiles(storePath);
|
|
3319
4042
|
let candidates;
|
|
3320
4043
|
if (trackedFiles !== null) {
|
|
3321
|
-
candidates = trackedFiles.map((f) =>
|
|
3322
|
-
|
|
4044
|
+
candidates = trackedFiles.map((f) => join8(storePath, f));
|
|
4045
|
+
logger2.debug({ storePath, count: candidates.length }, "Using git ls-files for discovery");
|
|
3323
4046
|
} else {
|
|
3324
4047
|
candidates = await this.scanDirectory(storePath);
|
|
3325
|
-
|
|
4048
|
+
logger2.debug({ storePath, count: candidates.length }, "Using filesystem walk for discovery");
|
|
3326
4049
|
}
|
|
3327
4050
|
return this.filterFiles(candidates, storePath, ingestConfig);
|
|
3328
4051
|
}
|
|
@@ -3344,6 +4067,12 @@ var IndexService = class {
|
|
|
3344
4067
|
const ext = extname(filePath).toLowerCase();
|
|
3345
4068
|
const filename = basename(filePath);
|
|
3346
4069
|
if (!TEXT_EXTENSIONS.has(ext)) continue;
|
|
4070
|
+
const relativePath = relative(storePath, filePath).replaceAll("\\", "/");
|
|
4071
|
+
const pathSegments = relativePath.split("/");
|
|
4072
|
+
const dirSegments = pathSegments.slice(0, -1);
|
|
4073
|
+
if (dirSegments.some((segment) => this.ignoreDirs.has(segment))) {
|
|
4074
|
+
continue;
|
|
4075
|
+
}
|
|
3347
4076
|
const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(filename));
|
|
3348
4077
|
if (shouldIgnore) continue;
|
|
3349
4078
|
if (skipMinified && isMinifiedFile(filename)) {
|
|
@@ -3351,10 +4080,10 @@ var IndexService = class {
|
|
|
3351
4080
|
continue;
|
|
3352
4081
|
}
|
|
3353
4082
|
if (excludeGlobs.length > 0) {
|
|
3354
|
-
const
|
|
3355
|
-
const excluded = excludeGlobs.some((glob) => minimatch(
|
|
4083
|
+
const relativePath2 = relative(storePath, filePath);
|
|
4084
|
+
const excluded = excludeGlobs.some((glob) => minimatch(relativePath2, glob));
|
|
3356
4085
|
if (excluded) {
|
|
3357
|
-
skippedExcluded.push(
|
|
4086
|
+
skippedExcluded.push(relativePath2);
|
|
3358
4087
|
continue;
|
|
3359
4088
|
}
|
|
3360
4089
|
}
|
|
@@ -3382,7 +4111,7 @@ var IndexService = class {
|
|
|
3382
4111
|
result.push(filePath);
|
|
3383
4112
|
}
|
|
3384
4113
|
if (skippedLarge.length > 0) {
|
|
3385
|
-
|
|
4114
|
+
logger2.info(
|
|
3386
4115
|
{
|
|
3387
4116
|
storePath,
|
|
3388
4117
|
count: skippedLarge.length,
|
|
@@ -3393,19 +4122,19 @@ var IndexService = class {
|
|
|
3393
4122
|
);
|
|
3394
4123
|
}
|
|
3395
4124
|
if (skippedMinified.length > 0) {
|
|
3396
|
-
|
|
4125
|
+
logger2.debug(
|
|
3397
4126
|
{ storePath, count: skippedMinified.length, examples: skippedMinified.slice(0, 5) },
|
|
3398
4127
|
"Skipped minified files"
|
|
3399
4128
|
);
|
|
3400
4129
|
}
|
|
3401
4130
|
if (skippedBinary.length > 0) {
|
|
3402
|
-
|
|
4131
|
+
logger2.debug(
|
|
3403
4132
|
{ storePath, count: skippedBinary.length, examples: skippedBinary.slice(0, 5) },
|
|
3404
4133
|
"Skipped binary files"
|
|
3405
4134
|
);
|
|
3406
4135
|
}
|
|
3407
4136
|
if (skippedExcluded.length > 0) {
|
|
3408
|
-
|
|
4137
|
+
logger2.debug(
|
|
3409
4138
|
{ storePath, count: skippedExcluded.length, examples: skippedExcluded.slice(0, 5) },
|
|
3410
4139
|
"Skipped excluded files"
|
|
3411
4140
|
);
|
|
@@ -3416,7 +4145,7 @@ var IndexService = class {
|
|
|
3416
4145
|
const files = [];
|
|
3417
4146
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
3418
4147
|
for (const entry of entries) {
|
|
3419
|
-
const fullPath =
|
|
4148
|
+
const fullPath = join8(dir, entry.name);
|
|
3420
4149
|
if (entry.isDirectory()) {
|
|
3421
4150
|
if (!this.ignoreDirs.has(entry.name)) {
|
|
3422
4151
|
files.push(...await this.scanDirectory(fullPath));
|
|
@@ -3517,7 +4246,7 @@ function classifyWebContentType(url, title) {
|
|
|
3517
4246
|
|
|
3518
4247
|
// src/services/manifest.service.ts
|
|
3519
4248
|
import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
|
|
3520
|
-
import { join as
|
|
4249
|
+
import { join as join9 } from "path";
|
|
3521
4250
|
|
|
3522
4251
|
// src/types/manifest.ts
|
|
3523
4252
|
import { z as z2 } from "zod";
|
|
@@ -3554,7 +4283,7 @@ function createEmptyManifest(storeId) {
|
|
|
3554
4283
|
var ManifestService = class {
|
|
3555
4284
|
manifestsDir;
|
|
3556
4285
|
constructor(dataDir) {
|
|
3557
|
-
this.manifestsDir =
|
|
4286
|
+
this.manifestsDir = join9(dataDir, "manifests");
|
|
3558
4287
|
}
|
|
3559
4288
|
/**
|
|
3560
4289
|
* Initialize the manifests directory.
|
|
@@ -3566,7 +4295,7 @@ var ManifestService = class {
|
|
|
3566
4295
|
* Get the file path for a store's manifest.
|
|
3567
4296
|
*/
|
|
3568
4297
|
getManifestPath(storeId) {
|
|
3569
|
-
return
|
|
4298
|
+
return join9(this.manifestsDir, `${storeId}.manifest.json`);
|
|
3570
4299
|
}
|
|
3571
4300
|
/**
|
|
3572
4301
|
* Load a store's manifest.
|
|
@@ -3650,6 +4379,202 @@ var ManifestService = class {
|
|
|
3650
4379
|
}
|
|
3651
4380
|
};
|
|
3652
4381
|
|
|
4382
|
+
// src/services/reranker.service.ts
|
|
4383
|
+
import { homedir as homedir3 } from "os";
|
|
4384
|
+
import { join as join10 } from "path";
|
|
4385
|
+
import { env as env2, AutoModelForSequenceClassification, AutoTokenizer } from "@huggingface/transformers";
|
|
4386
|
+
env2.cacheDir = join10(homedir3(), ".cache", "huggingface-transformers");
|
|
4387
|
+
var DEBUG_RERANKER = process.env["BK_DEBUG_RERANKER"] === "1";
|
|
4388
|
+
var debugStats = {
|
|
4389
|
+
totalQueries: 0,
|
|
4390
|
+
top1Reordered: 0,
|
|
4391
|
+
top3Reordered: 0,
|
|
4392
|
+
scoreVariancePositive: 0
|
|
4393
|
+
};
|
|
4394
|
+
if (DEBUG_RERANKER) {
|
|
4395
|
+
process.on("beforeExit", () => {
|
|
4396
|
+
RerankerService.logDebugStats();
|
|
4397
|
+
});
|
|
4398
|
+
}
|
|
4399
|
+
var RerankerService = class {
|
|
4400
|
+
model = null;
|
|
4401
|
+
tokenizer = null;
|
|
4402
|
+
initPromise = null;
|
|
4403
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
4404
|
+
disposed = false;
|
|
4405
|
+
config;
|
|
4406
|
+
constructor(config) {
|
|
4407
|
+
this.config = config;
|
|
4408
|
+
}
|
|
4409
|
+
/**
|
|
4410
|
+
* Guard against use-after-dispose
|
|
4411
|
+
*/
|
|
4412
|
+
assertNotDisposed() {
|
|
4413
|
+
if (this.disposed) {
|
|
4414
|
+
throw new Error("RerankerService has been disposed");
|
|
4415
|
+
}
|
|
4416
|
+
}
|
|
4417
|
+
/**
|
|
4418
|
+
* Check if reranking is enabled
|
|
4419
|
+
*/
|
|
4420
|
+
isEnabled() {
|
|
4421
|
+
return this.config.enabled;
|
|
4422
|
+
}
|
|
4423
|
+
/**
|
|
4424
|
+
* Initialize the reranker model (concurrency-safe).
|
|
4425
|
+
* Multiple concurrent calls will share the same initialization promise.
|
|
4426
|
+
*/
|
|
4427
|
+
async initialize() {
|
|
4428
|
+
this.assertNotDisposed();
|
|
4429
|
+
if (this.model !== null && this.tokenizer !== null) return;
|
|
4430
|
+
this.initPromise ??= (async () => {
|
|
4431
|
+
try {
|
|
4432
|
+
const [model, tokenizer] = await Promise.all([
|
|
4433
|
+
AutoModelForSequenceClassification.from_pretrained(this.config.model, {
|
|
4434
|
+
dtype: "fp32"
|
|
4435
|
+
}),
|
|
4436
|
+
AutoTokenizer.from_pretrained(this.config.model)
|
|
4437
|
+
]);
|
|
4438
|
+
this.model = model;
|
|
4439
|
+
this.tokenizer = tokenizer;
|
|
4440
|
+
} catch (error) {
|
|
4441
|
+
this.initPromise = null;
|
|
4442
|
+
throw error;
|
|
4443
|
+
}
|
|
4444
|
+
})();
|
|
4445
|
+
await this.initPromise;
|
|
4446
|
+
}
|
|
4447
|
+
/**
|
|
4448
|
+
* Rerank candidates by scoring query-document pairs with the cross-encoder.
|
|
4449
|
+
* Returns results sorted by reranker score (descending).
|
|
4450
|
+
*/
|
|
4451
|
+
async rerank(query, candidates) {
|
|
4452
|
+
this.assertNotDisposed();
|
|
4453
|
+
const startTime = Date.now();
|
|
4454
|
+
if (!this.config.enabled) {
|
|
4455
|
+
return {
|
|
4456
|
+
results: candidates.map((c) => ({
|
|
4457
|
+
id: c.id,
|
|
4458
|
+
originalScore: c.score,
|
|
4459
|
+
rerankerScore: c.score
|
|
4460
|
+
})),
|
|
4461
|
+
timeMs: Date.now() - startTime
|
|
4462
|
+
};
|
|
4463
|
+
}
|
|
4464
|
+
if (candidates.length === 0) {
|
|
4465
|
+
return { results: [], timeMs: Date.now() - startTime };
|
|
4466
|
+
}
|
|
4467
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4468
|
+
await this.initialize();
|
|
4469
|
+
}
|
|
4470
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4471
|
+
throw new Error("Failed to initialize reranker model");
|
|
4472
|
+
}
|
|
4473
|
+
const toRerank = candidates.slice(0, this.config.topK);
|
|
4474
|
+
const scoredResults = [];
|
|
4475
|
+
for (const candidate of toRerank) {
|
|
4476
|
+
const score = await this.scoreQueryDocPair(query, candidate.content);
|
|
4477
|
+
scoredResults.push({
|
|
4478
|
+
id: candidate.id,
|
|
4479
|
+
originalScore: candidate.score,
|
|
4480
|
+
rerankerScore: score
|
|
4481
|
+
});
|
|
4482
|
+
}
|
|
4483
|
+
const preRankIds = scoredResults.map((r) => r.id);
|
|
4484
|
+
scoredResults.sort((a, b) => b.rerankerScore - a.rerankerScore);
|
|
4485
|
+
if (DEBUG_RERANKER) {
|
|
4486
|
+
const postRankIds = scoredResults.map((r) => r.id);
|
|
4487
|
+
debugStats.totalQueries++;
|
|
4488
|
+
if (preRankIds[0] !== postRankIds[0]) debugStats.top1Reordered++;
|
|
4489
|
+
const preTop3 = preRankIds.slice(0, 3).join(",");
|
|
4490
|
+
const postTop3 = postRankIds.slice(0, 3).join(",");
|
|
4491
|
+
if (preTop3 !== postTop3) debugStats.top3Reordered++;
|
|
4492
|
+
const scores = scoredResults.map((r) => r.rerankerScore);
|
|
4493
|
+
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
4494
|
+
const variance = scores.reduce((a, b) => a + (b - mean) ** 2, 0) / scores.length;
|
|
4495
|
+
if (variance > 1e-10) debugStats.scoreVariancePositive++;
|
|
4496
|
+
if (debugStats.totalQueries <= 3) {
|
|
4497
|
+
console.error(
|
|
4498
|
+
`[DEBUG_RERANKER] query #${String(debugStats.totalQueries)}: pre=[${preRankIds.slice(0, 3).join(", ")}] post=[${postRankIds.slice(0, 3).join(", ")}] scores=[${scores.slice(0, 3).map((s) => s.toFixed(4)).join(", ")}] variance=${variance.toFixed(6)}`
|
|
4499
|
+
);
|
|
4500
|
+
}
|
|
4501
|
+
}
|
|
4502
|
+
const finalResults = scoredResults.slice(0, this.config.returnK);
|
|
4503
|
+
return {
|
|
4504
|
+
results: finalResults,
|
|
4505
|
+
timeMs: Date.now() - startTime
|
|
4506
|
+
};
|
|
4507
|
+
}
|
|
4508
|
+
/**
|
|
4509
|
+
* Score a single query-document pair using the cross-encoder.
|
|
4510
|
+
*/
|
|
4511
|
+
async scoreQueryDocPair(query, document) {
|
|
4512
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4513
|
+
throw new Error("Model not initialized");
|
|
4514
|
+
}
|
|
4515
|
+
const inputs = await this.tokenizer([query], {
|
|
4516
|
+
text_pair: [document],
|
|
4517
|
+
padding: true,
|
|
4518
|
+
truncation: true,
|
|
4519
|
+
max_length: 512
|
|
4520
|
+
});
|
|
4521
|
+
const output = await this.model(inputs);
|
|
4522
|
+
const logits = output.logits;
|
|
4523
|
+
const scores = Array.from(logits.data);
|
|
4524
|
+
const score = scores[0];
|
|
4525
|
+
if (typeof score !== "number") {
|
|
4526
|
+
throw new Error("Invalid reranker output: expected numeric score");
|
|
4527
|
+
}
|
|
4528
|
+
return score;
|
|
4529
|
+
}
|
|
4530
|
+
/**
|
|
4531
|
+
* Check if the reranker is initialized.
|
|
4532
|
+
*/
|
|
4533
|
+
isInitialized() {
|
|
4534
|
+
return this.model !== null && this.tokenizer !== null;
|
|
4535
|
+
}
|
|
4536
|
+
/**
|
|
4537
|
+
* Check if this service has been disposed.
|
|
4538
|
+
*/
|
|
4539
|
+
isDisposed() {
|
|
4540
|
+
return this.disposed;
|
|
4541
|
+
}
|
|
4542
|
+
/**
|
|
4543
|
+
* Reset the service to uninitialized state, allowing reuse after disposal.
|
|
4544
|
+
*/
|
|
4545
|
+
async reset() {
|
|
4546
|
+
if (this.model !== null) {
|
|
4547
|
+
await this.model.dispose();
|
|
4548
|
+
this.model = null;
|
|
4549
|
+
}
|
|
4550
|
+
this.tokenizer = null;
|
|
4551
|
+
this.initPromise = null;
|
|
4552
|
+
this.disposed = false;
|
|
4553
|
+
}
|
|
4554
|
+
/**
|
|
4555
|
+
* Log aggregate reranker debug stats. Only meaningful when BK_DEBUG_RERANKER=1.
|
|
4556
|
+
*/
|
|
4557
|
+
static logDebugStats() {
|
|
4558
|
+
if (!DEBUG_RERANKER || debugStats.totalQueries === 0) return;
|
|
4559
|
+
const pct = (n) => (n / debugStats.totalQueries * 100).toFixed(1);
|
|
4560
|
+
console.error(
|
|
4561
|
+
`[DEBUG_RERANKER] Aggregate: ${String(debugStats.totalQueries)} queries, top1 reordered: ${pct(debugStats.top1Reordered)}% (${String(debugStats.top1Reordered)}/${String(debugStats.totalQueries)}), top3 reordered: ${pct(debugStats.top3Reordered)}% (${String(debugStats.top3Reordered)}/${String(debugStats.totalQueries)}), score variance>0: ${pct(debugStats.scoreVariancePositive)}% (${String(debugStats.scoreVariancePositive)}/${String(debugStats.totalQueries)})`
|
|
4562
|
+
);
|
|
4563
|
+
}
|
|
4564
|
+
/**
|
|
4565
|
+
* Dispose the reranker to free resources.
|
|
4566
|
+
*/
|
|
4567
|
+
async dispose() {
|
|
4568
|
+
if (this.model !== null) {
|
|
4569
|
+
await this.model.dispose();
|
|
4570
|
+
this.model = null;
|
|
4571
|
+
}
|
|
4572
|
+
this.tokenizer = null;
|
|
4573
|
+
this.initPromise = null;
|
|
4574
|
+
this.disposed = true;
|
|
4575
|
+
}
|
|
4576
|
+
};
|
|
4577
|
+
|
|
3653
4578
|
// src/services/code-unit.service.ts
|
|
3654
4579
|
var CodeUnitService = class {
|
|
3655
4580
|
extractCodeUnit(code, symbolName, language) {
|
|
@@ -3819,8 +4744,51 @@ var CodeUnitService = class {
|
|
|
3819
4744
|
}
|
|
3820
4745
|
};
|
|
3821
4746
|
|
|
4747
|
+
// src/services/search-env.ts
|
|
4748
|
+
var logger3 = createLogger("search-env");
|
|
4749
|
+
function parseSearchEnvOverrides(strict) {
|
|
4750
|
+
return {
|
|
4751
|
+
rrfK: parseRrfK(process.env["BK_RRF_K"], strict),
|
|
4752
|
+
vectorWeight: parseVectorWeight(process.env["BK_RRF_VECTOR_WEIGHT"], strict),
|
|
4753
|
+
candidateMultiplier: parseCandidateMultiplier(process.env["BK_CANDIDATE_MULTIPLIER"], strict)
|
|
4754
|
+
};
|
|
4755
|
+
}
|
|
4756
|
+
function parseRrfK(raw, strict) {
|
|
4757
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4758
|
+
const parsed = Number.parseInt(raw, 10);
|
|
4759
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
4760
|
+
const msg = `BK_RRF_K must be a positive integer, got: "${raw}"`;
|
|
4761
|
+
if (strict) throw new Error(msg);
|
|
4762
|
+
logger3.warn(msg);
|
|
4763
|
+
return void 0;
|
|
4764
|
+
}
|
|
4765
|
+
return parsed;
|
|
4766
|
+
}
|
|
4767
|
+
function parseVectorWeight(raw, strict) {
|
|
4768
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4769
|
+
const parsed = Number.parseFloat(raw);
|
|
4770
|
+
if (Number.isNaN(parsed) || parsed < 0 || parsed > 1) {
|
|
4771
|
+
const msg = `BK_RRF_VECTOR_WEIGHT must be a float between 0 and 1, got: "${raw}"`;
|
|
4772
|
+
if (strict) throw new Error(msg);
|
|
4773
|
+
logger3.warn(msg);
|
|
4774
|
+
return void 0;
|
|
4775
|
+
}
|
|
4776
|
+
return parsed;
|
|
4777
|
+
}
|
|
4778
|
+
function parseCandidateMultiplier(raw, strict) {
|
|
4779
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4780
|
+
const parsed = Number.parseInt(raw, 10);
|
|
4781
|
+
if (Number.isNaN(parsed) || parsed < 1) {
|
|
4782
|
+
const msg = `BK_CANDIDATE_MULTIPLIER must be an integer >= 1, got: "${raw}"`;
|
|
4783
|
+
if (strict) throw new Error(msg);
|
|
4784
|
+
logger3.warn(msg);
|
|
4785
|
+
return void 0;
|
|
4786
|
+
}
|
|
4787
|
+
return parsed;
|
|
4788
|
+
}
|
|
4789
|
+
|
|
3822
4790
|
// src/services/search.service.ts
|
|
3823
|
-
var
|
|
4791
|
+
var logger4 = createLogger("search-service");
|
|
3824
4792
|
var INTENT_FILE_BOOSTS = {
|
|
3825
4793
|
"how-to": {
|
|
3826
4794
|
"documentation-primary": 1.3,
|
|
@@ -3982,9 +4950,10 @@ function mapSearchIntentToQueryIntent(intent) {
|
|
|
3982
4950
|
}
|
|
3983
4951
|
}
|
|
3984
4952
|
var RRF_PRESETS = {
|
|
3985
|
-
code: { k:
|
|
3986
|
-
web: { k: 30, vectorWeight: 0.
|
|
4953
|
+
code: { k: 25, vectorWeight: 0.75, ftsWeight: 0.25 },
|
|
4954
|
+
web: { k: 30, vectorWeight: 0.7, ftsWeight: 0.3 }
|
|
3987
4955
|
};
|
|
4956
|
+
var DEFAULT_CANDIDATE_MULTIPLIER = 2;
|
|
3988
4957
|
function detectContentType(results) {
|
|
3989
4958
|
const webCount = results.filter((r) => "url" in r.metadata).length;
|
|
3990
4959
|
return webCount > results.length / 2 ? "web" : "code";
|
|
@@ -3993,13 +4962,15 @@ var SearchService = class {
|
|
|
3993
4962
|
lanceStore;
|
|
3994
4963
|
codeUnitService;
|
|
3995
4964
|
codeGraphService;
|
|
4965
|
+
rerankerService;
|
|
3996
4966
|
graphCache;
|
|
3997
4967
|
searchConfig;
|
|
3998
4968
|
unsubscribeCacheInvalidation;
|
|
3999
|
-
constructor(lanceStore, codeGraphService, searchConfig) {
|
|
4969
|
+
constructor(lanceStore, codeGraphService, searchConfig, rerankerService) {
|
|
4000
4970
|
this.lanceStore = lanceStore;
|
|
4001
4971
|
this.codeUnitService = new CodeUnitService();
|
|
4002
4972
|
this.codeGraphService = codeGraphService;
|
|
4973
|
+
this.rerankerService = rerankerService;
|
|
4003
4974
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
4004
4975
|
this.searchConfig = searchConfig;
|
|
4005
4976
|
if (codeGraphService) {
|
|
@@ -4047,7 +5018,7 @@ var SearchService = class {
|
|
|
4047
5018
|
const detail = query.detail ?? "minimal";
|
|
4048
5019
|
const intents = classifyQueryIntents(query.query);
|
|
4049
5020
|
const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
|
|
4050
|
-
|
|
5021
|
+
logger4.debug(
|
|
4051
5022
|
{
|
|
4052
5023
|
query: query.query,
|
|
4053
5024
|
mode,
|
|
@@ -4063,6 +5034,7 @@ var SearchService = class {
|
|
|
4063
5034
|
);
|
|
4064
5035
|
let allResults = [];
|
|
4065
5036
|
let maxRawScore = 0;
|
|
5037
|
+
let rerankTimeMs;
|
|
4066
5038
|
const fetchLimit = limit * 3;
|
|
4067
5039
|
if (mode === "vector") {
|
|
4068
5040
|
const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
|
|
@@ -4079,16 +5051,17 @@ var SearchService = class {
|
|
|
4079
5051
|
);
|
|
4080
5052
|
allResults = hybridResult.results;
|
|
4081
5053
|
maxRawScore = hybridResult.maxRawScore;
|
|
5054
|
+
rerankTimeMs = hybridResult.rerankTimeMs;
|
|
4082
5055
|
}
|
|
4083
5056
|
if (query.minRelevance !== void 0) {
|
|
4084
5057
|
if (mode === "fts") {
|
|
4085
|
-
|
|
5058
|
+
logger4.warn(
|
|
4086
5059
|
{ query: query.query, minRelevance: query.minRelevance },
|
|
4087
5060
|
"minRelevance filter ignored in FTS mode (no vector scores available)"
|
|
4088
5061
|
);
|
|
4089
5062
|
} else if (maxRawScore < query.minRelevance) {
|
|
4090
5063
|
const timeMs2 = Date.now() - startTime;
|
|
4091
|
-
|
|
5064
|
+
logger4.info(
|
|
4092
5065
|
{
|
|
4093
5066
|
query: query.query,
|
|
4094
5067
|
mode,
|
|
@@ -4125,7 +5098,7 @@ var SearchService = class {
|
|
|
4125
5098
|
});
|
|
4126
5099
|
const timeMs = Date.now() - startTime;
|
|
4127
5100
|
const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
|
|
4128
|
-
|
|
5101
|
+
logger4.info(
|
|
4129
5102
|
{
|
|
4130
5103
|
query: query.query,
|
|
4131
5104
|
mode,
|
|
@@ -4138,7 +5111,7 @@ var SearchService = class {
|
|
|
4138
5111
|
},
|
|
4139
5112
|
"Search complete"
|
|
4140
5113
|
);
|
|
4141
|
-
|
|
5114
|
+
const response = {
|
|
4142
5115
|
query: query.query,
|
|
4143
5116
|
mode,
|
|
4144
5117
|
stores,
|
|
@@ -4148,6 +5121,10 @@ var SearchService = class {
|
|
|
4148
5121
|
confidence,
|
|
4149
5122
|
maxRawScore: mode !== "fts" ? maxRawScore : void 0
|
|
4150
5123
|
};
|
|
5124
|
+
if (rerankTimeMs !== void 0) {
|
|
5125
|
+
Object.assign(response, { rerankTimeMs });
|
|
5126
|
+
}
|
|
5127
|
+
return response;
|
|
4151
5128
|
}
|
|
4152
5129
|
/**
|
|
4153
5130
|
* Deduplicate results by source file path.
|
|
@@ -4253,14 +5230,16 @@ var SearchService = class {
|
|
|
4253
5230
|
*/
|
|
4254
5231
|
async hybridSearchWithMetadata(query, stores, limit, threshold) {
|
|
4255
5232
|
const intents = classifyQueryIntents(query);
|
|
4256
|
-
const
|
|
5233
|
+
const envOverrides = parseSearchEnvOverrides(false);
|
|
5234
|
+
const candidateMultiplier = envOverrides.candidateMultiplier ?? DEFAULT_CANDIDATE_MULTIPLIER;
|
|
5235
|
+
const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * candidateMultiplier);
|
|
4257
5236
|
const rawVectorScores = /* @__PURE__ */ new Map();
|
|
4258
5237
|
rawVectorResults.forEach((r) => {
|
|
4259
5238
|
rawVectorScores.set(r.id, r.score);
|
|
4260
5239
|
});
|
|
4261
5240
|
const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
|
|
4262
5241
|
const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
|
|
4263
|
-
const ftsResults = await this.ftsSearch(query, stores, limit *
|
|
5242
|
+
const ftsResults = await this.ftsSearch(query, stores, limit * candidateMultiplier);
|
|
4264
5243
|
const vectorRanks = /* @__PURE__ */ new Map();
|
|
4265
5244
|
const ftsRanks = /* @__PURE__ */ new Map();
|
|
4266
5245
|
const allDocs = /* @__PURE__ */ new Map();
|
|
@@ -4276,8 +5255,11 @@ var SearchService = class {
|
|
|
4276
5255
|
});
|
|
4277
5256
|
const rrfScores = [];
|
|
4278
5257
|
const contentType = detectContentType([...allDocs.values()]);
|
|
4279
|
-
const
|
|
4280
|
-
|
|
5258
|
+
const preset = RRF_PRESETS[contentType];
|
|
5259
|
+
const k = envOverrides.rrfK ?? preset.k;
|
|
5260
|
+
const vectorWeight = envOverrides.vectorWeight ?? preset.vectorWeight;
|
|
5261
|
+
const ftsWeight = 1 - vectorWeight;
|
|
5262
|
+
for (const [id, result2] of allDocs) {
|
|
4281
5263
|
const vectorRank = vectorRanks.get(id) ?? Infinity;
|
|
4282
5264
|
const ftsRank = ftsRanks.get(id) ?? Infinity;
|
|
4283
5265
|
const rawVectorScore = rawVectorScores.get(id);
|
|
@@ -4285,19 +5267,23 @@ var SearchService = class {
|
|
|
4285
5267
|
const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
|
|
4286
5268
|
const fileTypeBoost = this.getFileTypeBoost(
|
|
4287
5269
|
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
4288
|
-
|
|
5270
|
+
result2.metadata["fileType"],
|
|
4289
5271
|
intents
|
|
4290
5272
|
);
|
|
4291
|
-
const frameworkBoost = this.getFrameworkContextBoost(query,
|
|
4292
|
-
const urlKeywordBoost = this.getUrlKeywordBoost(query,
|
|
4293
|
-
const pathKeywordBoost = this.getPathKeywordBoost(query,
|
|
5273
|
+
const frameworkBoost = this.getFrameworkContextBoost(query, result2);
|
|
5274
|
+
const urlKeywordBoost = this.getUrlKeywordBoost(query, result2);
|
|
5275
|
+
const pathKeywordBoost = this.getPathKeywordBoost(query, result2);
|
|
5276
|
+
const depthBoost = this.getDepthBoost(result2, getPrimaryIntent(intents));
|
|
5277
|
+
const entryPointBoost = this.getEntryPointBoost(result2, getPrimaryIntent(intents));
|
|
4294
5278
|
const metadata = {
|
|
4295
5279
|
vectorRRF,
|
|
4296
5280
|
ftsRRF,
|
|
4297
5281
|
fileTypeBoost,
|
|
4298
5282
|
frameworkBoost,
|
|
4299
5283
|
urlKeywordBoost,
|
|
4300
|
-
pathKeywordBoost
|
|
5284
|
+
pathKeywordBoost,
|
|
5285
|
+
depthBoost,
|
|
5286
|
+
entryPointBoost
|
|
4301
5287
|
};
|
|
4302
5288
|
if (vectorRank !== Infinity) {
|
|
4303
5289
|
metadata.vectorRank = vectorRank;
|
|
@@ -4310,13 +5296,34 @@ var SearchService = class {
|
|
|
4310
5296
|
}
|
|
4311
5297
|
rrfScores.push({
|
|
4312
5298
|
id,
|
|
4313
|
-
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
|
|
4314
|
-
result,
|
|
5299
|
+
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost * depthBoost * entryPointBoost,
|
|
5300
|
+
result: result2,
|
|
4315
5301
|
rawVectorScore,
|
|
4316
5302
|
metadata
|
|
4317
5303
|
});
|
|
4318
5304
|
}
|
|
4319
|
-
const
|
|
5305
|
+
const sortedAll = rrfScores.sort((a, b) => b.score - a.score);
|
|
5306
|
+
let rerankTimeMs;
|
|
5307
|
+
let sorted;
|
|
5308
|
+
if (this.rerankerService !== void 0 && this.rerankerService.isEnabled() && sortedAll.length > 0) {
|
|
5309
|
+
const candidates = sortedAll.map((r) => ({
|
|
5310
|
+
id: r.id,
|
|
5311
|
+
content: r.result.content,
|
|
5312
|
+
score: r.score
|
|
5313
|
+
}));
|
|
5314
|
+
const reranked = await this.rerankerService.rerank(query, candidates);
|
|
5315
|
+
rerankTimeMs = reranked.timeMs;
|
|
5316
|
+
const rerankedScores = /* @__PURE__ */ new Map();
|
|
5317
|
+
reranked.results.forEach((r) => {
|
|
5318
|
+
rerankedScores.set(r.id, r.rerankerScore);
|
|
5319
|
+
});
|
|
5320
|
+
sorted = sortedAll.map((r) => ({
|
|
5321
|
+
...r,
|
|
5322
|
+
rerankerScore: rerankedScores.get(r.id)
|
|
5323
|
+
})).sort((a, b) => (b.rerankerScore ?? -Infinity) - (a.rerankerScore ?? -Infinity)).slice(0, limit);
|
|
5324
|
+
} else {
|
|
5325
|
+
sorted = sortedAll.slice(0, limit);
|
|
5326
|
+
}
|
|
4320
5327
|
let normalizedResults;
|
|
4321
5328
|
if (sorted.length > 0) {
|
|
4322
5329
|
const first = sorted[0];
|
|
@@ -4351,7 +5358,14 @@ var SearchService = class {
|
|
|
4351
5358
|
if (threshold !== void 0) {
|
|
4352
5359
|
normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
|
|
4353
5360
|
}
|
|
4354
|
-
|
|
5361
|
+
const result = {
|
|
5362
|
+
results: normalizedResults,
|
|
5363
|
+
maxRawScore
|
|
5364
|
+
};
|
|
5365
|
+
if (rerankTimeMs !== void 0) {
|
|
5366
|
+
result.rerankTimeMs = rerankTimeMs;
|
|
5367
|
+
}
|
|
5368
|
+
return result;
|
|
4355
5369
|
}
|
|
4356
5370
|
async searchAllStores(query, storeIds) {
|
|
4357
5371
|
return this.search({
|
|
@@ -4500,6 +5514,33 @@ var SearchService = class {
|
|
|
4500
5514
|
const matchRatio = matchingTerms.length / queryTerms.length;
|
|
4501
5515
|
return 1 + 1 * matchRatio;
|
|
4502
5516
|
}
|
|
5517
|
+
/**
|
|
5518
|
+
* Get a depth-based score multiplier, gated by query intent.
|
|
5519
|
+
* Root-level files (depth 0) are boosted for how-to and conceptual queries
|
|
5520
|
+
* where high-level docs/READMEs are most relevant.
|
|
5521
|
+
* Returns 1.0 (no-op) for implementation/debugging/comparison intents
|
|
5522
|
+
* to avoid disturbing candidate ordering before reranker.
|
|
5523
|
+
*/
|
|
5524
|
+
getDepthBoost(result, intent) {
|
|
5525
|
+
if (intent !== "how-to" && intent !== "conceptual") return 1;
|
|
5526
|
+
const depth = result.metadata["depth"];
|
|
5527
|
+
if (typeof depth !== "number") return 1;
|
|
5528
|
+
if (depth === 0) return 1.08;
|
|
5529
|
+
if (depth === 1) return 1.04;
|
|
5530
|
+
return 1;
|
|
5531
|
+
}
|
|
5532
|
+
/**
|
|
5533
|
+
* Get an entry-point score multiplier, gated by query intent.
|
|
5534
|
+
* Entry-point files (index.ts, main.py, etc.) are boosted for how-to
|
|
5535
|
+
* and implementation queries where API surfaces are most relevant.
|
|
5536
|
+
* Returns 1.0 (no-op) for debugging/comparison/conceptual intents.
|
|
5537
|
+
*/
|
|
5538
|
+
getEntryPointBoost(result, intent) {
|
|
5539
|
+
if (intent !== "how-to" && intent !== "implementation") return 1;
|
|
5540
|
+
const isEntryPoint = result.metadata["isEntryPoint"];
|
|
5541
|
+
if (isEntryPoint !== true) return 1;
|
|
5542
|
+
return 1.08;
|
|
5543
|
+
}
|
|
4503
5544
|
/**
|
|
4504
5545
|
* Get a score multiplier based on framework context.
|
|
4505
5546
|
* If query mentions a framework, boost results from that framework's files.
|
|
@@ -4798,7 +5839,7 @@ var SearchService = class {
|
|
|
4798
5839
|
|
|
4799
5840
|
// src/services/store-definition.service.ts
|
|
4800
5841
|
import { readFile as readFile7, access as access4 } from "fs/promises";
|
|
4801
|
-
import { resolve as resolve2, isAbsolute as isAbsolute2, join as
|
|
5842
|
+
import { resolve as resolve2, isAbsolute as isAbsolute2, join as join11 } from "path";
|
|
4802
5843
|
|
|
4803
5844
|
// src/types/store-definition.ts
|
|
4804
5845
|
import { z as z3 } from "zod";
|
|
@@ -4889,7 +5930,7 @@ var StoreDefinitionService = class {
|
|
|
4889
5930
|
config = null;
|
|
4890
5931
|
constructor(projectRoot) {
|
|
4891
5932
|
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
4892
|
-
this.configPath =
|
|
5933
|
+
this.configPath = join11(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
|
|
4893
5934
|
}
|
|
4894
5935
|
/**
|
|
4895
5936
|
* Load store definitions from config file.
|
|
@@ -5028,12 +6069,12 @@ var StoreDefinitionService = class {
|
|
|
5028
6069
|
// src/services/store.service.ts
|
|
5029
6070
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
5030
6071
|
import { readFile as readFile8, mkdir as mkdir5, stat as stat3, access as access5 } from "fs/promises";
|
|
5031
|
-
import { join as
|
|
6072
|
+
import { join as join12, resolve as resolve3, relative as relative2, isAbsolute as isAbsolute3 } from "path";
|
|
5032
6073
|
|
|
5033
6074
|
// src/plugin/git-clone.ts
|
|
5034
6075
|
import { spawn } from "child_process";
|
|
5035
6076
|
import { mkdir as mkdir4 } from "fs/promises";
|
|
5036
|
-
var
|
|
6077
|
+
var logger5 = createLogger("git-clone");
|
|
5037
6078
|
function sanitizeUrl(url) {
|
|
5038
6079
|
try {
|
|
5039
6080
|
const parsed = new URL(url);
|
|
@@ -5065,7 +6106,7 @@ async function cloneRepository(options) {
|
|
|
5065
6106
|
args.push("--branch", branch);
|
|
5066
6107
|
}
|
|
5067
6108
|
args.push(url, targetDir);
|
|
5068
|
-
|
|
6109
|
+
logger5.info(
|
|
5069
6110
|
{
|
|
5070
6111
|
url: sanitizeUrl(url),
|
|
5071
6112
|
targetDir,
|
|
@@ -5102,7 +6143,7 @@ async function cloneRepository(options) {
|
|
|
5102
6143
|
clearTimeout(timeout);
|
|
5103
6144
|
if (forceKillTimeout) clearTimeout(forceKillTimeout);
|
|
5104
6145
|
if (timedOut) {
|
|
5105
|
-
resolve4(err(new Error(`Git clone timed out after ${String(timeoutMs)}ms`)));
|
|
6146
|
+
resolve4(err(new Error(`Git clone timed out after ${String(timeoutMs)}ms for: ${url}`)));
|
|
5106
6147
|
} else if (code === 0) {
|
|
5107
6148
|
resolve4(ok(targetDir));
|
|
5108
6149
|
} else {
|
|
@@ -5127,7 +6168,7 @@ function extractRepoName(url) {
|
|
|
5127
6168
|
var CURRENT_SCHEMA_VERSION = 3;
|
|
5128
6169
|
|
|
5129
6170
|
// src/services/store.service.ts
|
|
5130
|
-
var
|
|
6171
|
+
var logger6 = createLogger("store-service");
|
|
5131
6172
|
async function fileExists4(path4) {
|
|
5132
6173
|
try {
|
|
5133
6174
|
await access5(path4);
|
|
@@ -5176,10 +6217,10 @@ var StoreService = class {
|
|
|
5176
6217
|
}
|
|
5177
6218
|
const relativePath = relative2(this.projectRoot, absolutePath);
|
|
5178
6219
|
if (relativePath.startsWith("..") || isAbsolute3(relativePath)) {
|
|
5179
|
-
|
|
6220
|
+
logger6.info(`Path outside projectRoot, storing absolute: ${absolutePath}`);
|
|
5180
6221
|
return { path: absolutePath, pathType: "absolute" };
|
|
5181
6222
|
}
|
|
5182
|
-
|
|
6223
|
+
logger6.debug(`Storing relative path: ${relativePath} (resolved from ${absolutePath})`);
|
|
5183
6224
|
return { path: relativePath, pathType: "relative" };
|
|
5184
6225
|
}
|
|
5185
6226
|
/**
|
|
@@ -5194,11 +6235,11 @@ var StoreService = class {
|
|
|
5194
6235
|
return storedPath;
|
|
5195
6236
|
}
|
|
5196
6237
|
if (this.projectRoot === void 0) {
|
|
5197
|
-
|
|
6238
|
+
logger6.error(`Store has relative path but no projectRoot: ${storedPath}`);
|
|
5198
6239
|
return storedPath;
|
|
5199
6240
|
}
|
|
5200
6241
|
const resolved = resolve3(this.projectRoot, storedPath);
|
|
5201
|
-
|
|
6242
|
+
logger6.debug(`Resolved relative path: ${storedPath} \u2192 ${resolved}`);
|
|
5202
6243
|
return resolved;
|
|
5203
6244
|
}
|
|
5204
6245
|
async initialize() {
|
|
@@ -5210,7 +6251,7 @@ var StoreService = class {
|
|
|
5210
6251
|
* This enables MCP server to see stores created by CLI without restart.
|
|
5211
6252
|
*/
|
|
5212
6253
|
async ensureRegistryFresh() {
|
|
5213
|
-
const registryPath =
|
|
6254
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5214
6255
|
try {
|
|
5215
6256
|
const stats = await stat3(registryPath);
|
|
5216
6257
|
if (stats.mtimeMs > this.registryMtime) {
|
|
@@ -5366,7 +6407,7 @@ var StoreService = class {
|
|
|
5366
6407
|
case "repo": {
|
|
5367
6408
|
let repoPath = input.path;
|
|
5368
6409
|
if (input.url !== void 0) {
|
|
5369
|
-
const cloneDir =
|
|
6410
|
+
const cloneDir = join12(this.dataDir, "repos", id);
|
|
5370
6411
|
const result = await cloneRepository({
|
|
5371
6412
|
url: input.url,
|
|
5372
6413
|
targetDir: cloneDir,
|
|
@@ -5567,7 +6608,7 @@ var StoreService = class {
|
|
|
5567
6608
|
await this.saveRegistry();
|
|
5568
6609
|
}
|
|
5569
6610
|
async loadRegistry() {
|
|
5570
|
-
const registryPath =
|
|
6611
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5571
6612
|
const exists = await fileExists4(registryPath);
|
|
5572
6613
|
if (!exists) {
|
|
5573
6614
|
this.registry = { stores: [] };
|
|
@@ -5602,7 +6643,7 @@ var StoreService = class {
|
|
|
5602
6643
|
})
|
|
5603
6644
|
};
|
|
5604
6645
|
if (migrationNeeded) {
|
|
5605
|
-
|
|
6646
|
+
logger6.debug("Schema migration needed - will upgrade on next save");
|
|
5606
6647
|
this.needsMigration = true;
|
|
5607
6648
|
}
|
|
5608
6649
|
} catch (error) {
|
|
@@ -5614,7 +6655,7 @@ var StoreService = class {
|
|
|
5614
6655
|
this.registryMtime = loadedStats.mtimeMs;
|
|
5615
6656
|
}
|
|
5616
6657
|
async saveRegistry() {
|
|
5617
|
-
const registryPath =
|
|
6658
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5618
6659
|
const storedStores = this.registry.stores.map((store) => {
|
|
5619
6660
|
const schemaVersion = this.needsMigration ? CURRENT_SCHEMA_VERSION : store.schemaVersion;
|
|
5620
6661
|
if (store.type === "file") {
|
|
@@ -5642,7 +6683,7 @@ var StoreService = class {
|
|
|
5642
6683
|
}
|
|
5643
6684
|
});
|
|
5644
6685
|
if (this.needsMigration) {
|
|
5645
|
-
|
|
6686
|
+
logger6.info("Schema migration complete - upgraded to v3");
|
|
5646
6687
|
this.needsMigration = false;
|
|
5647
6688
|
}
|
|
5648
6689
|
await atomicWriteFile(registryPath, JSON.stringify({ stores: storedStores }, null, 2));
|
|
@@ -5693,7 +6734,7 @@ function validateParsePythonResult(data) {
|
|
|
5693
6734
|
}
|
|
5694
6735
|
|
|
5695
6736
|
// src/crawl/bridge.ts
|
|
5696
|
-
var
|
|
6737
|
+
var logger7 = createLogger("python-bridge");
|
|
5697
6738
|
function getPythonExecutable() {
|
|
5698
6739
|
return process.platform === "win32" ? "python" : "python3";
|
|
5699
6740
|
}
|
|
@@ -5728,7 +6769,7 @@ var PythonBridge = class {
|
|
|
5728
6769
|
pythonWorkerPath = path3.join(projectRoot, "python", "ast_worker.py");
|
|
5729
6770
|
pythonPath = getPythonExecutable();
|
|
5730
6771
|
}
|
|
5731
|
-
|
|
6772
|
+
logger7.debug(
|
|
5732
6773
|
{ pythonWorkerPath, pythonPath, currentFilePath, isProduction },
|
|
5733
6774
|
"Starting Python bridge process"
|
|
5734
6775
|
);
|
|
@@ -5736,15 +6777,15 @@ var PythonBridge = class {
|
|
|
5736
6777
|
stdio: ["pipe", "pipe", "pipe"]
|
|
5737
6778
|
});
|
|
5738
6779
|
this.process.on("error", (err2) => {
|
|
5739
|
-
|
|
6780
|
+
logger7.error({ error: err2.message, stack: err2.stack }, "Python bridge process error");
|
|
5740
6781
|
this.rejectAllPending(new Error(`Process error: ${err2.message}`));
|
|
5741
6782
|
});
|
|
5742
6783
|
this.process.on("exit", (code, signal) => {
|
|
5743
6784
|
if (code !== 0 && code !== null) {
|
|
5744
|
-
|
|
6785
|
+
logger7.error({ code }, "Python bridge process exited with non-zero code");
|
|
5745
6786
|
this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
|
|
5746
6787
|
} else if (signal && !this.stoppingIntentionally) {
|
|
5747
|
-
|
|
6788
|
+
logger7.error({ signal }, "Python bridge process killed with signal");
|
|
5748
6789
|
this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
|
|
5749
6790
|
}
|
|
5750
6791
|
this.process = null;
|
|
@@ -5753,7 +6794,7 @@ var PythonBridge = class {
|
|
|
5753
6794
|
if (this.process.stderr) {
|
|
5754
6795
|
this.stderrReadline = createInterface({ input: this.process.stderr });
|
|
5755
6796
|
this.stderrReadline.on("line", (line) => {
|
|
5756
|
-
|
|
6797
|
+
logger7.warn({ stderr: line }, "Python bridge stderr output");
|
|
5757
6798
|
});
|
|
5758
6799
|
}
|
|
5759
6800
|
if (this.process.stdout === null) {
|
|
@@ -5782,7 +6823,7 @@ var PythonBridge = class {
|
|
|
5782
6823
|
pending.resolve(validated);
|
|
5783
6824
|
} catch (error) {
|
|
5784
6825
|
if (error instanceof ZodError) {
|
|
5785
|
-
|
|
6826
|
+
logger7.error(
|
|
5786
6827
|
{
|
|
5787
6828
|
issues: error.issues,
|
|
5788
6829
|
response: JSON.stringify(response.result)
|
|
@@ -5794,14 +6835,14 @@ var PythonBridge = class {
|
|
|
5794
6835
|
);
|
|
5795
6836
|
} else {
|
|
5796
6837
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
5797
|
-
|
|
6838
|
+
logger7.error({ error: errorMessage }, "Response validation error");
|
|
5798
6839
|
pending.reject(new Error(`Response validation error: ${errorMessage}`));
|
|
5799
6840
|
}
|
|
5800
6841
|
}
|
|
5801
6842
|
}
|
|
5802
6843
|
}
|
|
5803
6844
|
} catch (err2) {
|
|
5804
|
-
|
|
6845
|
+
logger7.error(
|
|
5805
6846
|
{
|
|
5806
6847
|
error: err2 instanceof Error ? err2.message : String(err2),
|
|
5807
6848
|
line
|
|
@@ -5897,242 +6938,9 @@ var PythonBridge = class {
|
|
|
5897
6938
|
}
|
|
5898
6939
|
};
|
|
5899
6940
|
|
|
5900
|
-
// src/db/embeddings.ts
|
|
5901
|
-
import { homedir as homedir2 } from "os";
|
|
5902
|
-
import { join as join11 } from "path";
|
|
5903
|
-
import { pipeline, env } from "@huggingface/transformers";
|
|
5904
|
-
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
5905
|
-
var DEFAULT_EMBEDDING_CONFIG = {
|
|
5906
|
-
model: "Xenova/bge-small-en-v1.5",
|
|
5907
|
-
batchSize: 32,
|
|
5908
|
-
dtype: "fp32",
|
|
5909
|
-
pooling: "mean",
|
|
5910
|
-
normalize: true,
|
|
5911
|
-
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
5912
|
-
docPrefix: "",
|
|
5913
|
-
maxInFlightBatches: 1
|
|
5914
|
-
};
|
|
5915
|
-
var EmbeddingEngine = class {
|
|
5916
|
-
extractor = null;
|
|
5917
|
-
initPromise = null;
|
|
5918
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed() and embedBatch()
|
|
5919
|
-
_dimensions = null;
|
|
5920
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
5921
|
-
disposed = false;
|
|
5922
|
-
config;
|
|
5923
|
-
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
5924
|
-
this.config = config;
|
|
5925
|
-
}
|
|
5926
|
-
/**
|
|
5927
|
-
* Guard against use-after-dispose
|
|
5928
|
-
*/
|
|
5929
|
-
assertNotDisposed() {
|
|
5930
|
-
if (this.disposed) {
|
|
5931
|
-
throw new Error("EmbeddingEngine has been disposed");
|
|
5932
|
-
}
|
|
5933
|
-
}
|
|
5934
|
-
/**
|
|
5935
|
-
* Initialize the embedding pipeline (concurrency-safe).
|
|
5936
|
-
* Multiple concurrent calls will share the same initialization promise.
|
|
5937
|
-
*/
|
|
5938
|
-
async initialize() {
|
|
5939
|
-
this.assertNotDisposed();
|
|
5940
|
-
if (this.extractor !== null) return;
|
|
5941
|
-
this.initPromise ??= (async () => {
|
|
5942
|
-
try {
|
|
5943
|
-
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
5944
|
-
dtype: this.config.dtype
|
|
5945
|
-
});
|
|
5946
|
-
} catch (error) {
|
|
5947
|
-
this.initPromise = null;
|
|
5948
|
-
throw error;
|
|
5949
|
-
}
|
|
5950
|
-
})();
|
|
5951
|
-
await this.initPromise;
|
|
5952
|
-
}
|
|
5953
|
-
/**
|
|
5954
|
-
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
5955
|
-
*/
|
|
5956
|
-
async embedQuery(text) {
|
|
5957
|
-
return this.embedText(this.config.queryPrefix + text);
|
|
5958
|
-
}
|
|
5959
|
-
/**
|
|
5960
|
-
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
5961
|
-
*/
|
|
5962
|
-
async embedDocument(text) {
|
|
5963
|
-
return this.embedText(this.config.docPrefix + text);
|
|
5964
|
-
}
|
|
5965
|
-
/**
|
|
5966
|
-
* Internal: embed text without prefix.
|
|
5967
|
-
*/
|
|
5968
|
-
async embedText(text) {
|
|
5969
|
-
this.assertNotDisposed();
|
|
5970
|
-
if (this.extractor === null) {
|
|
5971
|
-
await this.initialize();
|
|
5972
|
-
}
|
|
5973
|
-
if (this.extractor === null) {
|
|
5974
|
-
throw new Error("Failed to initialize embedding model");
|
|
5975
|
-
}
|
|
5976
|
-
const output = await this.extractor(text, {
|
|
5977
|
-
pooling: this.config.pooling,
|
|
5978
|
-
normalize: this.config.normalize
|
|
5979
|
-
});
|
|
5980
|
-
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5981
|
-
this._dimensions ??= dim;
|
|
5982
|
-
return Float32Array.from(output.data);
|
|
5983
|
-
}
|
|
5984
|
-
/**
|
|
5985
|
-
* Embed a batch of documents with optional parallelism.
|
|
5986
|
-
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
5987
|
-
*/
|
|
5988
|
-
async embedBatch(texts) {
|
|
5989
|
-
this.assertNotDisposed();
|
|
5990
|
-
if (this.extractor === null) {
|
|
5991
|
-
await this.initialize();
|
|
5992
|
-
}
|
|
5993
|
-
if (this.extractor === null) {
|
|
5994
|
-
throw new Error("Failed to initialize embedding model");
|
|
5995
|
-
}
|
|
5996
|
-
const batches = [];
|
|
5997
|
-
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
5998
|
-
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
5999
|
-
}
|
|
6000
|
-
if (batches.length === 0) {
|
|
6001
|
-
return [];
|
|
6002
|
-
}
|
|
6003
|
-
if (this.config.maxInFlightBatches <= 1) {
|
|
6004
|
-
return this.embedBatchesSequential(batches);
|
|
6005
|
-
} else {
|
|
6006
|
-
return this.embedBatchesConcurrent(batches);
|
|
6007
|
-
}
|
|
6008
|
-
}
|
|
6009
|
-
/**
|
|
6010
|
-
* Process batches sequentially (original behavior).
|
|
6011
|
-
*/
|
|
6012
|
-
async embedBatchesSequential(batches) {
|
|
6013
|
-
const results = [];
|
|
6014
|
-
for (let i = 0; i < batches.length; i++) {
|
|
6015
|
-
const batch = batches[i];
|
|
6016
|
-
if (batch === void 0) continue;
|
|
6017
|
-
const batchResults = await this.processSingleBatch(batch);
|
|
6018
|
-
results.push(...batchResults);
|
|
6019
|
-
if (i < batches.length - 1) {
|
|
6020
|
-
await new Promise((resolve4) => setImmediate(resolve4));
|
|
6021
|
-
}
|
|
6022
|
-
}
|
|
6023
|
-
return results;
|
|
6024
|
-
}
|
|
6025
|
-
/**
|
|
6026
|
-
* Process batches with controlled concurrency.
|
|
6027
|
-
*/
|
|
6028
|
-
async embedBatchesConcurrent(batches) {
|
|
6029
|
-
const results = new Array(batches.length);
|
|
6030
|
-
let inFlight = 0;
|
|
6031
|
-
const maxConcurrent = this.config.maxInFlightBatches;
|
|
6032
|
-
await Promise.all(
|
|
6033
|
-
batches.map(async (batch, idx) => {
|
|
6034
|
-
while (inFlight >= maxConcurrent) {
|
|
6035
|
-
await new Promise((resolve4) => setImmediate(resolve4));
|
|
6036
|
-
}
|
|
6037
|
-
inFlight++;
|
|
6038
|
-
try {
|
|
6039
|
-
results[idx] = await this.processSingleBatch(batch);
|
|
6040
|
-
} finally {
|
|
6041
|
-
inFlight--;
|
|
6042
|
-
}
|
|
6043
|
-
})
|
|
6044
|
-
);
|
|
6045
|
-
return results.flat();
|
|
6046
|
-
}
|
|
6047
|
-
/**
|
|
6048
|
-
* Process a single batch and return embeddings.
|
|
6049
|
-
*/
|
|
6050
|
-
async processSingleBatch(batch) {
|
|
6051
|
-
if (this.extractor === null) {
|
|
6052
|
-
throw new Error("Extractor not initialized");
|
|
6053
|
-
}
|
|
6054
|
-
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
6055
|
-
const output = await this.extractor(prefixedBatch, {
|
|
6056
|
-
pooling: this.config.pooling,
|
|
6057
|
-
normalize: this.config.normalize
|
|
6058
|
-
});
|
|
6059
|
-
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
6060
|
-
const batchResults = [];
|
|
6061
|
-
for (let b = 0; b < batch.length; b++) {
|
|
6062
|
-
const start = b * dim;
|
|
6063
|
-
const end = start + dim;
|
|
6064
|
-
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
6065
|
-
}
|
|
6066
|
-
this._dimensions ??= dim;
|
|
6067
|
-
return batchResults;
|
|
6068
|
-
}
|
|
6069
|
-
/**
|
|
6070
|
-
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
6071
|
-
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
6072
|
-
*/
|
|
6073
|
-
getDimensions() {
|
|
6074
|
-
if (this._dimensions === null) {
|
|
6075
|
-
throw new Error("Cannot get dimensions before first embed() call");
|
|
6076
|
-
}
|
|
6077
|
-
return this._dimensions;
|
|
6078
|
-
}
|
|
6079
|
-
/**
|
|
6080
|
-
* Check if the embedding pipeline is initialized.
|
|
6081
|
-
*/
|
|
6082
|
-
isInitialized() {
|
|
6083
|
-
return this.extractor !== null;
|
|
6084
|
-
}
|
|
6085
|
-
/**
|
|
6086
|
-
* Check if this engine has been disposed.
|
|
6087
|
-
*/
|
|
6088
|
-
isDisposed() {
|
|
6089
|
-
return this.disposed;
|
|
6090
|
-
}
|
|
6091
|
-
/**
|
|
6092
|
-
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
6093
|
-
* If currently initialized, disposes the pipeline first.
|
|
6094
|
-
*/
|
|
6095
|
-
async reset() {
|
|
6096
|
-
if (this.extractor !== null) {
|
|
6097
|
-
await this.extractor.dispose();
|
|
6098
|
-
this.extractor = null;
|
|
6099
|
-
}
|
|
6100
|
-
this.initPromise = null;
|
|
6101
|
-
this._dimensions = null;
|
|
6102
|
-
this.disposed = false;
|
|
6103
|
-
}
|
|
6104
|
-
/**
|
|
6105
|
-
* Ensure dimensions are available, initializing the model if needed.
|
|
6106
|
-
* Returns the embedding dimensions for the current model.
|
|
6107
|
-
*/
|
|
6108
|
-
async ensureDimensions() {
|
|
6109
|
-
if (this._dimensions === null) {
|
|
6110
|
-
await this.embedText("dimension probe");
|
|
6111
|
-
}
|
|
6112
|
-
if (this._dimensions === null) {
|
|
6113
|
-
throw new Error("Failed to determine embedding dimensions");
|
|
6114
|
-
}
|
|
6115
|
-
return this._dimensions;
|
|
6116
|
-
}
|
|
6117
|
-
/**
|
|
6118
|
-
* Dispose the embedding pipeline to free resources.
|
|
6119
|
-
* Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
|
|
6120
|
-
* After disposal, this engine cannot be used again.
|
|
6121
|
-
*/
|
|
6122
|
-
async dispose() {
|
|
6123
|
-
if (this.extractor !== null) {
|
|
6124
|
-
await this.extractor.dispose();
|
|
6125
|
-
this.extractor = null;
|
|
6126
|
-
}
|
|
6127
|
-
this.initPromise = null;
|
|
6128
|
-
this._dimensions = null;
|
|
6129
|
-
this.disposed = true;
|
|
6130
|
-
}
|
|
6131
|
-
};
|
|
6132
|
-
|
|
6133
6941
|
// src/db/lance.ts
|
|
6134
6942
|
import { rm as rm2 } from "fs/promises";
|
|
6135
|
-
import { join as
|
|
6943
|
+
import { join as join13 } from "path";
|
|
6136
6944
|
import * as lancedb from "@lancedb/lancedb";
|
|
6137
6945
|
import { LanceSchema } from "@lancedb/lancedb/embedding";
|
|
6138
6946
|
import { Utf8 } from "apache-arrow";
|
|
@@ -6147,17 +6955,19 @@ var HuggingFaceEmbeddingFunction = class extends TextEmbeddingFunction {
|
|
|
6147
6955
|
constructor(optionsRaw) {
|
|
6148
6956
|
super();
|
|
6149
6957
|
const options = this.resolveVariables(optionsRaw ?? {});
|
|
6150
|
-
|
|
6151
|
-
|
|
6152
|
-
|
|
6153
|
-
|
|
6154
|
-
pooling: options.pooling ?? "mean",
|
|
6155
|
-
normalize: options.normalize ?? true,
|
|
6156
|
-
queryPrefix: options.queryPrefix ?? "",
|
|
6157
|
-
docPrefix: options.docPrefix ?? "",
|
|
6158
|
-
maxInFlightBatches: 1
|
|
6958
|
+
const finetunedPath = getFinetunedModelPath();
|
|
6959
|
+
const model = finetunedPath ?? options.model ?? getConfiguredModelId();
|
|
6960
|
+
const overrides = {
|
|
6961
|
+
maxInFlightBatches: 1,
|
|
6159
6962
|
// Single-threaded for LanceDB integration
|
|
6963
|
+
...options.batchSize !== void 0 && { batchSize: options.batchSize },
|
|
6964
|
+
...options.dtype !== void 0 && { dtype: options.dtype },
|
|
6965
|
+
...options.pooling !== void 0 && { pooling: options.pooling },
|
|
6966
|
+
...options.normalize !== void 0 && { normalize: options.normalize },
|
|
6967
|
+
...options.queryPrefix !== void 0 && { queryPrefix: options.queryPrefix },
|
|
6968
|
+
...options.docPrefix !== void 0 && { docPrefix: options.docPrefix }
|
|
6160
6969
|
};
|
|
6970
|
+
this.embeddingConfig = buildEmbeddingConfig(model, overrides);
|
|
6161
6971
|
this.engine = new EmbeddingEngine(this.embeddingConfig);
|
|
6162
6972
|
}
|
|
6163
6973
|
/**
|
|
@@ -6233,11 +7043,15 @@ var DocumentMetadataSchema = z5.object({
|
|
|
6233
7043
|
}).loose();
|
|
6234
7044
|
|
|
6235
7045
|
// src/db/lance.ts
|
|
6236
|
-
var
|
|
7046
|
+
var logger8 = createLogger("lance");
|
|
6237
7047
|
function isSearchHit(value) {
|
|
6238
7048
|
if (typeof value !== "object" || value === null) return false;
|
|
6239
7049
|
return "id" in value && "content" in value && "metadata" in value && "_distance" in value && typeof value.id === "string" && typeof value.content === "string" && typeof value.metadata === "string" && typeof value._distance === "number";
|
|
6240
7050
|
}
|
|
7051
|
+
function isDocumentRecord(value) {
|
|
7052
|
+
if (typeof value !== "object" || value === null) return false;
|
|
7053
|
+
return "id" in value && "content" in value && "metadata" in value && typeof value.id === "string" && typeof value.content === "string" && typeof value.metadata === "string";
|
|
7054
|
+
}
|
|
6241
7055
|
function parseDocumentMetadata(jsonStr) {
|
|
6242
7056
|
const parsed = DocumentMetadataSchema.parse(JSON.parse(jsonStr));
|
|
6243
7057
|
return {
|
|
@@ -6405,6 +7219,29 @@ var LanceStore = class {
|
|
|
6405
7219
|
};
|
|
6406
7220
|
});
|
|
6407
7221
|
}
|
|
7222
|
+
/**
|
|
7223
|
+
* Get all documents from a store (for training data generation).
|
|
7224
|
+
* Returns documents in batches to avoid memory issues with large stores.
|
|
7225
|
+
*/
|
|
7226
|
+
async getAllDocuments(storeId, options) {
|
|
7227
|
+
const table = await this.getTable(storeId);
|
|
7228
|
+
const limit = options?.limit ?? 1e4;
|
|
7229
|
+
const offset = options?.offset ?? 0;
|
|
7230
|
+
const rawResults = await table.query().limit(limit).offset(offset).toArray();
|
|
7231
|
+
const results = rawResults.filter(isDocumentRecord);
|
|
7232
|
+
return results.map((r) => ({
|
|
7233
|
+
id: createDocumentId(r.id),
|
|
7234
|
+
content: r.content,
|
|
7235
|
+
metadata: parseDocumentMetadata(r.metadata)
|
|
7236
|
+
}));
|
|
7237
|
+
}
|
|
7238
|
+
/**
|
|
7239
|
+
* Count total documents in a store.
|
|
7240
|
+
*/
|
|
7241
|
+
async countDocuments(storeId) {
|
|
7242
|
+
const table = await this.getTable(storeId);
|
|
7243
|
+
return table.countRows();
|
|
7244
|
+
}
|
|
6408
7245
|
async deleteStore(storeId) {
|
|
6409
7246
|
const tableName = this.getTableName(storeId);
|
|
6410
7247
|
this.connection ??= await lancedb.connect(this.dataDir);
|
|
@@ -6413,11 +7250,11 @@ var LanceStore = class {
|
|
|
6413
7250
|
await this.connection.dropTable(tableName);
|
|
6414
7251
|
}
|
|
6415
7252
|
this.tables.delete(tableName);
|
|
6416
|
-
const lanceDir =
|
|
7253
|
+
const lanceDir = join13(this.dataDir, `${tableName}.lance`);
|
|
6417
7254
|
try {
|
|
6418
7255
|
await rm2(lanceDir, { recursive: true, force: true });
|
|
6419
7256
|
} catch (error) {
|
|
6420
|
-
|
|
7257
|
+
logger8.warn({ lanceDir, error }, "Failed to remove .lance directory");
|
|
6421
7258
|
}
|
|
6422
7259
|
}
|
|
6423
7260
|
close() {
|
|
@@ -6454,7 +7291,7 @@ var LanceStore = class {
|
|
|
6454
7291
|
};
|
|
6455
7292
|
|
|
6456
7293
|
// src/services/index.ts
|
|
6457
|
-
var
|
|
7294
|
+
var logger9 = createLogger("services");
|
|
6458
7295
|
var LazyServiceContainer = class {
|
|
6459
7296
|
// Eagerly initialized (lightweight)
|
|
6460
7297
|
config;
|
|
@@ -6469,6 +7306,8 @@ var LazyServiceContainer = class {
|
|
|
6469
7306
|
_manifest = null;
|
|
6470
7307
|
_embeddings = null;
|
|
6471
7308
|
_codeGraph = null;
|
|
7309
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
|
|
7310
|
+
_reranker = null;
|
|
6472
7311
|
_search = null;
|
|
6473
7312
|
_index = null;
|
|
6474
7313
|
constructor(config, appConfig, dataDir, store, lance, pythonBridge) {
|
|
@@ -6485,7 +7324,7 @@ var LazyServiceContainer = class {
|
|
|
6485
7324
|
*/
|
|
6486
7325
|
get embeddings() {
|
|
6487
7326
|
if (this._embeddings === null) {
|
|
6488
|
-
|
|
7327
|
+
logger9.debug("Lazy-initializing EmbeddingEngine");
|
|
6489
7328
|
this._embeddings = new EmbeddingEngine(this.appConfig.embedding);
|
|
6490
7329
|
}
|
|
6491
7330
|
return this._embeddings;
|
|
@@ -6495,18 +7334,34 @@ var LazyServiceContainer = class {
|
|
|
6495
7334
|
*/
|
|
6496
7335
|
get codeGraph() {
|
|
6497
7336
|
if (this._codeGraph === null) {
|
|
6498
|
-
|
|
7337
|
+
logger9.debug("Lazy-initializing CodeGraphService");
|
|
6499
7338
|
this._codeGraph = new CodeGraphService(this.dataDir, this.pythonBridge);
|
|
6500
7339
|
}
|
|
6501
7340
|
return this._codeGraph;
|
|
6502
7341
|
}
|
|
7342
|
+
/**
|
|
7343
|
+
* RerankerService is lazily created on first access.
|
|
7344
|
+
* Only created if reranker config exists and is enabled.
|
|
7345
|
+
*/
|
|
7346
|
+
get reranker() {
|
|
7347
|
+
if (this._reranker === null && this.appConfig.reranker?.enabled) {
|
|
7348
|
+
logger9.debug("Lazy-initializing RerankerService");
|
|
7349
|
+
this._reranker = new RerankerService(this.appConfig.reranker);
|
|
7350
|
+
}
|
|
7351
|
+
return this._reranker ?? void 0;
|
|
7352
|
+
}
|
|
6503
7353
|
/**
|
|
6504
7354
|
* SearchService is lazily created on first access.
|
|
6505
7355
|
*/
|
|
6506
7356
|
get search() {
|
|
6507
7357
|
if (this._search === null) {
|
|
6508
|
-
|
|
6509
|
-
this._search = new SearchService(
|
|
7358
|
+
logger9.debug("Lazy-initializing SearchService");
|
|
7359
|
+
this._search = new SearchService(
|
|
7360
|
+
this.lance,
|
|
7361
|
+
this.codeGraph,
|
|
7362
|
+
this.appConfig.search,
|
|
7363
|
+
this.reranker
|
|
7364
|
+
);
|
|
6510
7365
|
}
|
|
6511
7366
|
return this._search;
|
|
6512
7367
|
}
|
|
@@ -6515,14 +7370,15 @@ var LazyServiceContainer = class {
|
|
|
6515
7370
|
*/
|
|
6516
7371
|
get index() {
|
|
6517
7372
|
if (this._index === null) {
|
|
6518
|
-
|
|
7373
|
+
logger9.debug("Lazy-initializing IndexService");
|
|
6519
7374
|
this._index = new IndexService(this.lance, this.embeddings, {
|
|
6520
7375
|
codeGraphService: this.codeGraph,
|
|
6521
7376
|
manifestService: this.manifest,
|
|
6522
7377
|
chunkSize: this.appConfig.indexing.chunkSize,
|
|
6523
7378
|
chunkOverlap: this.appConfig.indexing.chunkOverlap,
|
|
6524
7379
|
concurrency: this.appConfig.indexing.concurrency,
|
|
6525
|
-
ignorePatterns: this.appConfig.indexing.ignorePatterns
|
|
7380
|
+
ignorePatterns: this.appConfig.indexing.ignorePatterns,
|
|
7381
|
+
prependPath: this.appConfig.indexing.prependPath
|
|
6526
7382
|
});
|
|
6527
7383
|
}
|
|
6528
7384
|
return this._index;
|
|
@@ -6532,7 +7388,7 @@ var LazyServiceContainer = class {
|
|
|
6532
7388
|
*/
|
|
6533
7389
|
get manifest() {
|
|
6534
7390
|
if (this._manifest === null) {
|
|
6535
|
-
|
|
7391
|
+
logger9.debug("Lazy-initializing ManifestService");
|
|
6536
7392
|
this._manifest = new ManifestService(this.dataDir);
|
|
6537
7393
|
}
|
|
6538
7394
|
return this._manifest;
|
|
@@ -6551,7 +7407,7 @@ var LazyServiceContainer = class {
|
|
|
6551
7407
|
}
|
|
6552
7408
|
};
|
|
6553
7409
|
async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
6554
|
-
|
|
7410
|
+
logger9.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
|
|
6555
7411
|
const startTime = Date.now();
|
|
6556
7412
|
const config = new ConfigService(configPath, dataDir, projectRoot);
|
|
6557
7413
|
const appConfig = await config.load();
|
|
@@ -6572,14 +7428,14 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
|
6572
7428
|
await store.initialize();
|
|
6573
7429
|
await lance.setEmbeddingFunction(appConfig.embedding);
|
|
6574
7430
|
const durationMs = Date.now() - startTime;
|
|
6575
|
-
|
|
7431
|
+
logger9.info(
|
|
6576
7432
|
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
|
|
6577
7433
|
"Lazy services initialized"
|
|
6578
7434
|
);
|
|
6579
7435
|
return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
|
|
6580
7436
|
}
|
|
6581
7437
|
async function createServices(configPath, dataDir, projectRoot) {
|
|
6582
|
-
|
|
7438
|
+
logger9.info({ configPath, dataDir, projectRoot }, "Initializing services");
|
|
6583
7439
|
const config = new ConfigService(configPath, dataDir, projectRoot);
|
|
6584
7440
|
const appConfig = await config.load();
|
|
6585
7441
|
const resolvedDataDir = config.resolveDataDir();
|
|
@@ -6602,16 +7458,18 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
6602
7458
|
await store.initialize();
|
|
6603
7459
|
const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
|
|
6604
7460
|
const manifest = new ManifestService(resolvedDataDir);
|
|
6605
|
-
const
|
|
7461
|
+
const reranker = appConfig.reranker?.enabled ? new RerankerService(appConfig.reranker) : void 0;
|
|
7462
|
+
const search = new SearchService(lance, codeGraph, appConfig.search, reranker);
|
|
6606
7463
|
const index = new IndexService(lance, embeddings, {
|
|
6607
7464
|
codeGraphService: codeGraph,
|
|
6608
7465
|
manifestService: manifest,
|
|
6609
7466
|
chunkSize: appConfig.indexing.chunkSize,
|
|
6610
7467
|
chunkOverlap: appConfig.indexing.chunkOverlap,
|
|
6611
7468
|
concurrency: appConfig.indexing.concurrency,
|
|
6612
|
-
ignorePatterns: appConfig.indexing.ignorePatterns
|
|
7469
|
+
ignorePatterns: appConfig.indexing.ignorePatterns,
|
|
7470
|
+
prependPath: appConfig.indexing.prependPath
|
|
6613
7471
|
});
|
|
6614
|
-
|
|
7472
|
+
logger9.info(
|
|
6615
7473
|
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
|
|
6616
7474
|
"Services initialized successfully"
|
|
6617
7475
|
);
|
|
@@ -6628,20 +7486,20 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
6628
7486
|
};
|
|
6629
7487
|
}
|
|
6630
7488
|
async function destroyServices(services) {
|
|
6631
|
-
|
|
7489
|
+
logger9.info("Shutting down services");
|
|
6632
7490
|
const errors = [];
|
|
6633
7491
|
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
6634
7492
|
const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
|
|
6635
7493
|
if (shouldCleanupSearch) {
|
|
6636
7494
|
services.search.cleanup();
|
|
6637
7495
|
} else {
|
|
6638
|
-
|
|
7496
|
+
logger9.debug("Skipping search cleanup (not initialized)");
|
|
6639
7497
|
}
|
|
6640
7498
|
try {
|
|
6641
7499
|
await services.pythonBridge.stop();
|
|
6642
7500
|
} catch (e) {
|
|
6643
7501
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6644
|
-
|
|
7502
|
+
logger9.error({ error }, "Error stopping Python bridge");
|
|
6645
7503
|
errors.push(error);
|
|
6646
7504
|
}
|
|
6647
7505
|
const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
|
|
@@ -6650,17 +7508,17 @@ async function destroyServices(services) {
|
|
|
6650
7508
|
await services.embeddings.dispose();
|
|
6651
7509
|
} catch (e) {
|
|
6652
7510
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6653
|
-
|
|
7511
|
+
logger9.error({ error }, "Error disposing EmbeddingEngine");
|
|
6654
7512
|
errors.push(error);
|
|
6655
7513
|
}
|
|
6656
7514
|
} else {
|
|
6657
|
-
|
|
7515
|
+
logger9.debug("Skipping embeddings disposal (not initialized)");
|
|
6658
7516
|
}
|
|
6659
7517
|
try {
|
|
6660
7518
|
await services.lance.closeAsync();
|
|
6661
7519
|
} catch (e) {
|
|
6662
7520
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6663
|
-
|
|
7521
|
+
logger9.error({ error }, "Error closing LanceStore");
|
|
6664
7522
|
errors.push(error);
|
|
6665
7523
|
}
|
|
6666
7524
|
await shutdownLogger();
|
|
@@ -6694,4 +7552,4 @@ export {
|
|
|
6694
7552
|
createServices,
|
|
6695
7553
|
destroyServices
|
|
6696
7554
|
};
|
|
6697
|
-
//# sourceMappingURL=chunk-
|
|
7555
|
+
//# sourceMappingURL=chunk-U27UECDZ.js.map
|