bluera-knowledge 0.28.1 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/README.md +194 -13
- package/dist/{chunk-UXT3BCAH.js → chunk-AEXFPA57.js} +398 -106
- package/dist/chunk-AEXFPA57.js.map +1 -0
- package/dist/{chunk-WP2GERAJ.js → chunk-B335UOU7.js} +1328 -467
- package/dist/chunk-B335UOU7.js.map +1 -0
- package/dist/{chunk-BYLIDCWD.js → chunk-KCI4U6FH.js} +2 -2
- package/dist/{chunk-H25AEF47.js → chunk-N3XYMAU3.js} +81 -2
- package/dist/chunk-N3XYMAU3.js.map +1 -0
- package/dist/index.js +8 -5
- package/dist/index.js.map +1 -1
- package/dist/mcp/bootstrap.js +9 -1
- package/dist/mcp/bootstrap.js.map +1 -1
- package/dist/mcp/server.d.ts +135 -10
- package/dist/mcp/server.js +3 -3
- package/dist/{watch.service-THP6X5ZZ.js → watch.service-LRFCT52P.js} +2 -2
- package/dist/workers/background-worker-cli.js +3 -3
- package/package.json +12 -3
- package/dist/chunk-H25AEF47.js.map +0 -1
- package/dist/chunk-UXT3BCAH.js.map +0 -1
- package/dist/chunk-WP2GERAJ.js.map +0 -1
- /package/dist/{chunk-BYLIDCWD.js.map → chunk-KCI4U6FH.js.map} +0 -0
- /package/dist/{watch.service-THP6X5ZZ.js.map → watch.service-LRFCT52P.js.map} +0 -0
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
} from "./chunk-CLIMKLTW.js";
|
|
5
5
|
import {
|
|
6
6
|
parseIgnorePatternsForScanning
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-N3XYMAU3.js";
|
|
8
8
|
import {
|
|
9
9
|
__require
|
|
10
10
|
} from "./chunk-DGUM43GV.js";
|
|
@@ -2060,8 +2060,601 @@ var CodeGraphService = class {
|
|
|
2060
2060
|
|
|
2061
2061
|
// src/services/config.service.ts
|
|
2062
2062
|
import { readFile as readFile2, access } from "fs/promises";
|
|
2063
|
+
import { homedir as homedir2 } from "os";
|
|
2064
|
+
import { isAbsolute, join as join6, resolve } from "path";
|
|
2065
|
+
|
|
2066
|
+
// src/services/reranker-env.ts
|
|
2067
|
+
var logger = createLogger("reranker-env");
|
|
2068
|
+
function parseRerankerEnvOverrides(strict) {
|
|
2069
|
+
return {
|
|
2070
|
+
enabled: parseEnabled(process.env["BK_RERANKER_ENABLED"], strict),
|
|
2071
|
+
topK: parseTopK(process.env["BK_RERANKER_TOPK"], strict)
|
|
2072
|
+
};
|
|
2073
|
+
}
|
|
2074
|
+
function parseEnabled(raw, strict) {
|
|
2075
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
2076
|
+
if (raw === "1") return true;
|
|
2077
|
+
if (raw === "0") return false;
|
|
2078
|
+
const msg = `BK_RERANKER_ENABLED must be '0' or '1', got: "${raw}"`;
|
|
2079
|
+
if (strict) throw new Error(msg);
|
|
2080
|
+
logger.warn(msg);
|
|
2081
|
+
return void 0;
|
|
2082
|
+
}
|
|
2083
|
+
function parseTopK(raw, strict) {
|
|
2084
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
2085
|
+
const parsed = Number.parseInt(raw, 10);
|
|
2086
|
+
if (Number.isNaN(parsed) || parsed < 1) {
|
|
2087
|
+
const msg = `BK_RERANKER_TOPK must be a positive integer, got: "${raw}"`;
|
|
2088
|
+
if (strict) throw new Error(msg);
|
|
2089
|
+
logger.warn(msg);
|
|
2090
|
+
return void 0;
|
|
2091
|
+
}
|
|
2092
|
+
return parsed;
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
// src/db/embeddings.ts
|
|
2063
2096
|
import { homedir } from "os";
|
|
2064
|
-
import {
|
|
2097
|
+
import { join as join5 } from "path";
|
|
2098
|
+
import { pipeline, env } from "@huggingface/transformers";
|
|
2099
|
+
|
|
2100
|
+
// src/models/registry.ts
|
|
2101
|
+
var MODEL_REGISTRY = {
|
|
2102
|
+
// ============================================================
|
|
2103
|
+
// BGE Models (BAAI) - Best for retrieval tasks
|
|
2104
|
+
// ============================================================
|
|
2105
|
+
"bge-small-en-v1.5": {
|
|
2106
|
+
id: "Xenova/bge-small-en-v1.5",
|
|
2107
|
+
name: "BGE Small English v1.5",
|
|
2108
|
+
dimensions: 384,
|
|
2109
|
+
pooling: "cls",
|
|
2110
|
+
normalize: true,
|
|
2111
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2112
|
+
docPrefix: "",
|
|
2113
|
+
category: "bge",
|
|
2114
|
+
sizeCategory: "small",
|
|
2115
|
+
notes: "Default model. Best balance of speed and quality for code search."
|
|
2116
|
+
},
|
|
2117
|
+
"bge-base-en-v1.5": {
|
|
2118
|
+
id: "Xenova/bge-base-en-v1.5",
|
|
2119
|
+
name: "BGE Base English v1.5",
|
|
2120
|
+
dimensions: 768,
|
|
2121
|
+
pooling: "cls",
|
|
2122
|
+
normalize: true,
|
|
2123
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2124
|
+
docPrefix: "",
|
|
2125
|
+
category: "bge",
|
|
2126
|
+
sizeCategory: "base",
|
|
2127
|
+
notes: "53% slower than small. Tested: regression on code search."
|
|
2128
|
+
},
|
|
2129
|
+
"bge-large-en-v1.5": {
|
|
2130
|
+
id: "Xenova/bge-large-en-v1.5",
|
|
2131
|
+
name: "BGE Large English v1.5",
|
|
2132
|
+
dimensions: 1024,
|
|
2133
|
+
pooling: "cls",
|
|
2134
|
+
normalize: true,
|
|
2135
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2136
|
+
docPrefix: "",
|
|
2137
|
+
category: "bge",
|
|
2138
|
+
sizeCategory: "large",
|
|
2139
|
+
notes: "Highest quality BGE but slow. Use for accuracy-critical tasks."
|
|
2140
|
+
},
|
|
2141
|
+
"bge-small-en": {
|
|
2142
|
+
id: "Xenova/bge-small-en",
|
|
2143
|
+
name: "BGE Small English v1.0",
|
|
2144
|
+
dimensions: 384,
|
|
2145
|
+
pooling: "cls",
|
|
2146
|
+
normalize: true,
|
|
2147
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2148
|
+
docPrefix: "",
|
|
2149
|
+
category: "bge",
|
|
2150
|
+
sizeCategory: "small",
|
|
2151
|
+
notes: "Older version. Use v1.5 instead."
|
|
2152
|
+
},
|
|
2153
|
+
"bge-base-en": {
|
|
2154
|
+
id: "Xenova/bge-base-en",
|
|
2155
|
+
name: "BGE Base English v1.0",
|
|
2156
|
+
dimensions: 768,
|
|
2157
|
+
pooling: "cls",
|
|
2158
|
+
normalize: true,
|
|
2159
|
+
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
2160
|
+
docPrefix: "",
|
|
2161
|
+
category: "bge",
|
|
2162
|
+
sizeCategory: "base",
|
|
2163
|
+
notes: "Older version. Use v1.5 instead."
|
|
2164
|
+
},
|
|
2165
|
+
// ============================================================
|
|
2166
|
+
// E5 Models (Microsoft) - Asymmetric retrieval
|
|
2167
|
+
// ============================================================
|
|
2168
|
+
"e5-small-v2": {
|
|
2169
|
+
id: "Xenova/e5-small-v2",
|
|
2170
|
+
name: "E5 Small v2",
|
|
2171
|
+
dimensions: 384,
|
|
2172
|
+
pooling: "mean",
|
|
2173
|
+
normalize: true,
|
|
2174
|
+
queryPrefix: "query: ",
|
|
2175
|
+
docPrefix: "passage: ",
|
|
2176
|
+
category: "e5",
|
|
2177
|
+
sizeCategory: "small",
|
|
2178
|
+
notes: "Tested: underperformed BGE on code search. Better for general text."
|
|
2179
|
+
},
|
|
2180
|
+
"e5-base-v2": {
|
|
2181
|
+
id: "Xenova/e5-base-v2",
|
|
2182
|
+
name: "E5 Base v2",
|
|
2183
|
+
dimensions: 768,
|
|
2184
|
+
pooling: "mean",
|
|
2185
|
+
normalize: true,
|
|
2186
|
+
queryPrefix: "query: ",
|
|
2187
|
+
docPrefix: "passage: ",
|
|
2188
|
+
category: "e5",
|
|
2189
|
+
sizeCategory: "base",
|
|
2190
|
+
notes: 'Larger E5 variant. Requires "query:" and "passage:" prefixes.'
|
|
2191
|
+
},
|
|
2192
|
+
"e5-large-v2": {
|
|
2193
|
+
id: "Xenova/e5-large-v2",
|
|
2194
|
+
name: "E5 Large v2",
|
|
2195
|
+
dimensions: 1024,
|
|
2196
|
+
pooling: "mean",
|
|
2197
|
+
normalize: true,
|
|
2198
|
+
queryPrefix: "query: ",
|
|
2199
|
+
docPrefix: "passage: ",
|
|
2200
|
+
category: "e5",
|
|
2201
|
+
sizeCategory: "large",
|
|
2202
|
+
notes: "Highest quality E5. Slow but accurate for general retrieval."
|
|
2203
|
+
},
|
|
2204
|
+
"multilingual-e5-small": {
|
|
2205
|
+
id: "Xenova/multilingual-e5-small",
|
|
2206
|
+
name: "Multilingual E5 Small",
|
|
2207
|
+
dimensions: 384,
|
|
2208
|
+
pooling: "mean",
|
|
2209
|
+
normalize: true,
|
|
2210
|
+
queryPrefix: "query: ",
|
|
2211
|
+
docPrefix: "passage: ",
|
|
2212
|
+
category: "e5",
|
|
2213
|
+
sizeCategory: "small",
|
|
2214
|
+
notes: "Supports 100+ languages. Good for multilingual codebases."
|
|
2215
|
+
},
|
|
2216
|
+
"multilingual-e5-base": {
|
|
2217
|
+
id: "Xenova/multilingual-e5-base",
|
|
2218
|
+
name: "Multilingual E5 Base",
|
|
2219
|
+
dimensions: 768,
|
|
2220
|
+
pooling: "mean",
|
|
2221
|
+
normalize: true,
|
|
2222
|
+
queryPrefix: "query: ",
|
|
2223
|
+
docPrefix: "passage: ",
|
|
2224
|
+
category: "e5",
|
|
2225
|
+
sizeCategory: "base",
|
|
2226
|
+
notes: "Supports 100+ languages. Larger multilingual variant."
|
|
2227
|
+
},
|
|
2228
|
+
// ============================================================
|
|
2229
|
+
// MiniLM Models (Sentence Transformers) - Fast general-purpose
|
|
2230
|
+
// ============================================================
|
|
2231
|
+
"all-MiniLM-L6-v2": {
|
|
2232
|
+
id: "Xenova/all-MiniLM-L6-v2",
|
|
2233
|
+
name: "all-MiniLM-L6-v2",
|
|
2234
|
+
dimensions: 384,
|
|
2235
|
+
pooling: "mean",
|
|
2236
|
+
normalize: true,
|
|
2237
|
+
queryPrefix: "",
|
|
2238
|
+
docPrefix: "",
|
|
2239
|
+
category: "minilm",
|
|
2240
|
+
sizeCategory: "small",
|
|
2241
|
+
notes: "Popular universal model. No prefixes needed. Very fast."
|
|
2242
|
+
},
|
|
2243
|
+
"all-MiniLM-L12-v2": {
|
|
2244
|
+
id: "Xenova/all-MiniLM-L12-v2",
|
|
2245
|
+
name: "all-MiniLM-L12-v2",
|
|
2246
|
+
dimensions: 384,
|
|
2247
|
+
pooling: "mean",
|
|
2248
|
+
normalize: true,
|
|
2249
|
+
queryPrefix: "",
|
|
2250
|
+
docPrefix: "",
|
|
2251
|
+
category: "minilm",
|
|
2252
|
+
sizeCategory: "small",
|
|
2253
|
+
notes: "Deeper MiniLM. Slightly better quality than L6."
|
|
2254
|
+
},
|
|
2255
|
+
"paraphrase-MiniLM-L6-v2": {
|
|
2256
|
+
id: "Xenova/paraphrase-MiniLM-L6-v2",
|
|
2257
|
+
name: "paraphrase-MiniLM-L6-v2",
|
|
2258
|
+
dimensions: 384,
|
|
2259
|
+
pooling: "mean",
|
|
2260
|
+
normalize: true,
|
|
2261
|
+
queryPrefix: "",
|
|
2262
|
+
docPrefix: "",
|
|
2263
|
+
category: "minilm",
|
|
2264
|
+
sizeCategory: "small",
|
|
2265
|
+
notes: "Optimized for paraphrase detection. Good for similarity."
|
|
2266
|
+
},
|
|
2267
|
+
"multi-qa-MiniLM-L6-cos-v1": {
|
|
2268
|
+
id: "Xenova/multi-qa-MiniLM-L6-cos-v1",
|
|
2269
|
+
name: "multi-qa-MiniLM-L6-cos-v1",
|
|
2270
|
+
dimensions: 384,
|
|
2271
|
+
pooling: "mean",
|
|
2272
|
+
normalize: true,
|
|
2273
|
+
queryPrefix: "",
|
|
2274
|
+
docPrefix: "",
|
|
2275
|
+
category: "minilm",
|
|
2276
|
+
sizeCategory: "small",
|
|
2277
|
+
notes: "Trained on 215M QA pairs. Good for question answering."
|
|
2278
|
+
},
|
|
2279
|
+
// ============================================================
|
|
2280
|
+
// GTE Models (Alibaba) - State-of-the-art small models
|
|
2281
|
+
// ============================================================
|
|
2282
|
+
"gte-small": {
|
|
2283
|
+
id: "Xenova/gte-small",
|
|
2284
|
+
name: "GTE Small",
|
|
2285
|
+
dimensions: 384,
|
|
2286
|
+
pooling: "mean",
|
|
2287
|
+
normalize: true,
|
|
2288
|
+
queryPrefix: "",
|
|
2289
|
+
docPrefix: "",
|
|
2290
|
+
category: "gte",
|
|
2291
|
+
sizeCategory: "small",
|
|
2292
|
+
notes: "Competitive with larger models. No prefixes needed."
|
|
2293
|
+
},
|
|
2294
|
+
"gte-base": {
|
|
2295
|
+
id: "Xenova/gte-base",
|
|
2296
|
+
name: "GTE Base",
|
|
2297
|
+
dimensions: 768,
|
|
2298
|
+
pooling: "mean",
|
|
2299
|
+
normalize: true,
|
|
2300
|
+
queryPrefix: "",
|
|
2301
|
+
docPrefix: "",
|
|
2302
|
+
category: "gte",
|
|
2303
|
+
sizeCategory: "base",
|
|
2304
|
+
notes: "Strong performance on MTEB benchmark."
|
|
2305
|
+
},
|
|
2306
|
+
"gte-large": {
|
|
2307
|
+
id: "Xenova/gte-large",
|
|
2308
|
+
name: "GTE Large",
|
|
2309
|
+
dimensions: 1024,
|
|
2310
|
+
pooling: "mean",
|
|
2311
|
+
normalize: true,
|
|
2312
|
+
queryPrefix: "",
|
|
2313
|
+
docPrefix: "",
|
|
2314
|
+
category: "gte",
|
|
2315
|
+
sizeCategory: "large",
|
|
2316
|
+
notes: "Top MTEB scores. Slow but very accurate."
|
|
2317
|
+
},
|
|
2318
|
+
// ============================================================
|
|
2319
|
+
// Nomic Models - Long context support
|
|
2320
|
+
// ============================================================
|
|
2321
|
+
"nomic-embed-text-v1": {
|
|
2322
|
+
id: "nomic-ai/nomic-embed-text-v1",
|
|
2323
|
+
name: "Nomic Embed Text v1",
|
|
2324
|
+
dimensions: 768,
|
|
2325
|
+
pooling: "mean",
|
|
2326
|
+
normalize: true,
|
|
2327
|
+
queryPrefix: "search_query: ",
|
|
2328
|
+
docPrefix: "search_document: ",
|
|
2329
|
+
category: "nomic",
|
|
2330
|
+
sizeCategory: "base",
|
|
2331
|
+
notes: "8192 token context. May need trust_remote_code."
|
|
2332
|
+
},
|
|
2333
|
+
"nomic-embed-text-v1.5": {
|
|
2334
|
+
id: "nomic-ai/nomic-embed-text-v1.5",
|
|
2335
|
+
name: "Nomic Embed Text v1.5",
|
|
2336
|
+
dimensions: 768,
|
|
2337
|
+
pooling: "mean",
|
|
2338
|
+
normalize: true,
|
|
2339
|
+
queryPrefix: "search_query: ",
|
|
2340
|
+
docPrefix: "search_document: ",
|
|
2341
|
+
category: "nomic",
|
|
2342
|
+
sizeCategory: "base",
|
|
2343
|
+
notes: "8192 token context. Matryoshka embeddings support."
|
|
2344
|
+
},
|
|
2345
|
+
// ============================================================
|
|
2346
|
+
// Other Notable Models
|
|
2347
|
+
// ============================================================
|
|
2348
|
+
"jina-embeddings-v2-small-en": {
|
|
2349
|
+
id: "Xenova/jina-embeddings-v2-small-en",
|
|
2350
|
+
name: "Jina Embeddings v2 Small",
|
|
2351
|
+
dimensions: 512,
|
|
2352
|
+
pooling: "mean",
|
|
2353
|
+
normalize: true,
|
|
2354
|
+
queryPrefix: "",
|
|
2355
|
+
docPrefix: "",
|
|
2356
|
+
category: "other",
|
|
2357
|
+
sizeCategory: "small",
|
|
2358
|
+
notes: "8192 token context. Good for long documents."
|
|
2359
|
+
},
|
|
2360
|
+
"jina-embeddings-v2-base-en": {
|
|
2361
|
+
id: "Xenova/jina-embeddings-v2-base-en",
|
|
2362
|
+
name: "Jina Embeddings v2 Base",
|
|
2363
|
+
dimensions: 768,
|
|
2364
|
+
pooling: "mean",
|
|
2365
|
+
normalize: true,
|
|
2366
|
+
queryPrefix: "",
|
|
2367
|
+
docPrefix: "",
|
|
2368
|
+
category: "other",
|
|
2369
|
+
sizeCategory: "base",
|
|
2370
|
+
notes: "8192 token context. Larger Jina variant."
|
|
2371
|
+
}
|
|
2372
|
+
};
|
|
2373
|
+
var DEFAULT_MODEL_ID = "bge-small-en-v1.5";
|
|
2374
|
+
function getModelConfig(modelId) {
|
|
2375
|
+
if (modelId in MODEL_REGISTRY) {
|
|
2376
|
+
return MODEL_REGISTRY[modelId];
|
|
2377
|
+
}
|
|
2378
|
+
for (const config of Object.values(MODEL_REGISTRY)) {
|
|
2379
|
+
if (config.id === modelId) {
|
|
2380
|
+
return config;
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
return void 0;
|
|
2384
|
+
}
|
|
2385
|
+
function getConfiguredModelId() {
|
|
2386
|
+
const envModel = process.env["BK_MODEL"] ?? process.env["BK_EMBEDDING_MODEL"];
|
|
2387
|
+
if (envModel !== void 0 && envModel !== "") {
|
|
2388
|
+
const config = getModelConfig(envModel);
|
|
2389
|
+
if (config === void 0) {
|
|
2390
|
+
console.warn(`Warning: Unknown model "${envModel}", using default "${DEFAULT_MODEL_ID}"`);
|
|
2391
|
+
return DEFAULT_MODEL_ID;
|
|
2392
|
+
}
|
|
2393
|
+
return envModel;
|
|
2394
|
+
}
|
|
2395
|
+
return DEFAULT_MODEL_ID;
|
|
2396
|
+
}
|
|
2397
|
+
|
|
2398
|
+
// src/db/embeddings.ts
|
|
2399
|
+
env.cacheDir = join5(homedir(), ".cache", "huggingface-transformers");
|
|
2400
|
+
function getFinetunedModelPath() {
|
|
2401
|
+
const path4 = process.env["BK_FINETUNED_MODEL"];
|
|
2402
|
+
if (path4 !== void 0 && path4 !== "") {
|
|
2403
|
+
return path4;
|
|
2404
|
+
}
|
|
2405
|
+
return void 0;
|
|
2406
|
+
}
|
|
2407
|
+
function buildEmbeddingConfig(modelId, overrides) {
|
|
2408
|
+
const rawPooling = process.env["BK_POOLING"];
|
|
2409
|
+
const envPooling = rawPooling === "mean" || rawPooling === "cls" || rawPooling === "none" ? rawPooling : void 0;
|
|
2410
|
+
const envQueryPrefix = process.env["BK_QUERY_PREFIX"];
|
|
2411
|
+
const modelConfig = getModelConfig(modelId);
|
|
2412
|
+
if (modelConfig === void 0) {
|
|
2413
|
+
return {
|
|
2414
|
+
model: modelId,
|
|
2415
|
+
batchSize: overrides?.batchSize ?? 32,
|
|
2416
|
+
dtype: overrides?.dtype ?? "fp32",
|
|
2417
|
+
pooling: overrides?.pooling ?? envPooling ?? "mean",
|
|
2418
|
+
normalize: overrides?.normalize ?? true,
|
|
2419
|
+
queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? "",
|
|
2420
|
+
docPrefix: overrides?.docPrefix ?? "",
|
|
2421
|
+
maxInFlightBatches: overrides?.maxInFlightBatches ?? 1
|
|
2422
|
+
};
|
|
2423
|
+
}
|
|
2424
|
+
return {
|
|
2425
|
+
model: modelConfig.id,
|
|
2426
|
+
batchSize: overrides?.batchSize ?? 32,
|
|
2427
|
+
dtype: overrides?.dtype ?? "fp32",
|
|
2428
|
+
pooling: overrides?.pooling ?? envPooling ?? modelConfig.pooling,
|
|
2429
|
+
normalize: overrides?.normalize ?? modelConfig.normalize,
|
|
2430
|
+
queryPrefix: overrides?.queryPrefix ?? envQueryPrefix ?? modelConfig.queryPrefix,
|
|
2431
|
+
docPrefix: overrides?.docPrefix ?? modelConfig.docPrefix,
|
|
2432
|
+
maxInFlightBatches: overrides?.maxInFlightBatches ?? 1
|
|
2433
|
+
};
|
|
2434
|
+
}
|
|
2435
|
+
var DEFAULT_EMBEDDING_CONFIG = buildEmbeddingConfig(
|
|
2436
|
+
getFinetunedModelPath() ?? getConfiguredModelId()
|
|
2437
|
+
);
|
|
2438
|
+
var EmbeddingEngine = class {
|
|
2439
|
+
extractor = null;
|
|
2440
|
+
initPromise = null;
|
|
2441
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed() and embedBatch()
|
|
2442
|
+
_dimensions = null;
|
|
2443
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
2444
|
+
disposed = false;
|
|
2445
|
+
config;
|
|
2446
|
+
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
2447
|
+
if (process.env["BK_DEBUG"] !== void 0 && process.env["BK_DEBUG"] !== "") {
|
|
2448
|
+
console.log("[EmbeddingEngine] Using model:", config.model);
|
|
2449
|
+
}
|
|
2450
|
+
this.config = config;
|
|
2451
|
+
}
|
|
2452
|
+
/**
|
|
2453
|
+
* Guard against use-after-dispose
|
|
2454
|
+
*/
|
|
2455
|
+
assertNotDisposed() {
|
|
2456
|
+
if (this.disposed) {
|
|
2457
|
+
throw new Error("EmbeddingEngine has been disposed");
|
|
2458
|
+
}
|
|
2459
|
+
}
|
|
2460
|
+
/**
|
|
2461
|
+
* Initialize the embedding pipeline (concurrency-safe).
|
|
2462
|
+
* Multiple concurrent calls will share the same initialization promise.
|
|
2463
|
+
*/
|
|
2464
|
+
async initialize() {
|
|
2465
|
+
this.assertNotDisposed();
|
|
2466
|
+
if (this.extractor !== null) return;
|
|
2467
|
+
this.initPromise ??= (async () => {
|
|
2468
|
+
try {
|
|
2469
|
+
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
2470
|
+
dtype: this.config.dtype
|
|
2471
|
+
});
|
|
2472
|
+
} catch (error) {
|
|
2473
|
+
this.initPromise = null;
|
|
2474
|
+
throw error;
|
|
2475
|
+
}
|
|
2476
|
+
})();
|
|
2477
|
+
await this.initPromise;
|
|
2478
|
+
}
|
|
2479
|
+
/**
|
|
2480
|
+
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
2481
|
+
*/
|
|
2482
|
+
async embedQuery(text) {
|
|
2483
|
+
return this.embedText(this.config.queryPrefix + text);
|
|
2484
|
+
}
|
|
2485
|
+
/**
|
|
2486
|
+
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
2487
|
+
*/
|
|
2488
|
+
async embedDocument(text) {
|
|
2489
|
+
return this.embedText(this.config.docPrefix + text);
|
|
2490
|
+
}
|
|
2491
|
+
/**
|
|
2492
|
+
* Internal: embed text without prefix.
|
|
2493
|
+
*/
|
|
2494
|
+
async embedText(text) {
|
|
2495
|
+
this.assertNotDisposed();
|
|
2496
|
+
if (this.extractor === null) {
|
|
2497
|
+
await this.initialize();
|
|
2498
|
+
}
|
|
2499
|
+
if (this.extractor === null) {
|
|
2500
|
+
throw new Error("Failed to initialize embedding model");
|
|
2501
|
+
}
|
|
2502
|
+
const output = await this.extractor(text, {
|
|
2503
|
+
pooling: this.config.pooling,
|
|
2504
|
+
normalize: this.config.normalize
|
|
2505
|
+
});
|
|
2506
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
2507
|
+
this._dimensions ??= dim;
|
|
2508
|
+
return Float32Array.from(output.data);
|
|
2509
|
+
}
|
|
2510
|
+
/**
|
|
2511
|
+
* Embed a batch of documents with optional parallelism.
|
|
2512
|
+
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
2513
|
+
*/
|
|
2514
|
+
async embedBatch(texts) {
|
|
2515
|
+
this.assertNotDisposed();
|
|
2516
|
+
if (this.extractor === null) {
|
|
2517
|
+
await this.initialize();
|
|
2518
|
+
}
|
|
2519
|
+
if (this.extractor === null) {
|
|
2520
|
+
throw new Error("Failed to initialize embedding model");
|
|
2521
|
+
}
|
|
2522
|
+
const batches = [];
|
|
2523
|
+
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
2524
|
+
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
2525
|
+
}
|
|
2526
|
+
if (batches.length === 0) {
|
|
2527
|
+
return [];
|
|
2528
|
+
}
|
|
2529
|
+
if (this.config.maxInFlightBatches <= 1) {
|
|
2530
|
+
return this.embedBatchesSequential(batches);
|
|
2531
|
+
} else {
|
|
2532
|
+
return this.embedBatchesConcurrent(batches);
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
/**
|
|
2536
|
+
* Process batches sequentially (original behavior).
|
|
2537
|
+
*/
|
|
2538
|
+
async embedBatchesSequential(batches) {
|
|
2539
|
+
const results = [];
|
|
2540
|
+
for (let i = 0; i < batches.length; i++) {
|
|
2541
|
+
const batch = batches[i];
|
|
2542
|
+
if (batch === void 0) continue;
|
|
2543
|
+
const batchResults = await this.processSingleBatch(batch);
|
|
2544
|
+
results.push(...batchResults);
|
|
2545
|
+
if (i < batches.length - 1) {
|
|
2546
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
return results;
|
|
2550
|
+
}
|
|
2551
|
+
/**
|
|
2552
|
+
* Process batches with controlled concurrency.
|
|
2553
|
+
*/
|
|
2554
|
+
async embedBatchesConcurrent(batches) {
|
|
2555
|
+
const results = new Array(batches.length);
|
|
2556
|
+
let inFlight = 0;
|
|
2557
|
+
const maxConcurrent = this.config.maxInFlightBatches;
|
|
2558
|
+
await Promise.all(
|
|
2559
|
+
batches.map(async (batch, idx) => {
|
|
2560
|
+
while (inFlight >= maxConcurrent) {
|
|
2561
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
2562
|
+
}
|
|
2563
|
+
inFlight++;
|
|
2564
|
+
try {
|
|
2565
|
+
results[idx] = await this.processSingleBatch(batch);
|
|
2566
|
+
} finally {
|
|
2567
|
+
inFlight--;
|
|
2568
|
+
}
|
|
2569
|
+
})
|
|
2570
|
+
);
|
|
2571
|
+
return results.flat();
|
|
2572
|
+
}
|
|
2573
|
+
/**
|
|
2574
|
+
* Process a single batch and return embeddings.
|
|
2575
|
+
*/
|
|
2576
|
+
async processSingleBatch(batch) {
|
|
2577
|
+
if (this.extractor === null) {
|
|
2578
|
+
throw new Error("Extractor not initialized");
|
|
2579
|
+
}
|
|
2580
|
+
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
2581
|
+
const output = await this.extractor(prefixedBatch, {
|
|
2582
|
+
pooling: this.config.pooling,
|
|
2583
|
+
normalize: this.config.normalize
|
|
2584
|
+
});
|
|
2585
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
2586
|
+
const batchResults = [];
|
|
2587
|
+
for (let b = 0; b < batch.length; b++) {
|
|
2588
|
+
const start = b * dim;
|
|
2589
|
+
const end = start + dim;
|
|
2590
|
+
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
2591
|
+
}
|
|
2592
|
+
this._dimensions ??= dim;
|
|
2593
|
+
return batchResults;
|
|
2594
|
+
}
|
|
2595
|
+
/**
|
|
2596
|
+
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
2597
|
+
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
2598
|
+
*/
|
|
2599
|
+
getDimensions() {
|
|
2600
|
+
if (this._dimensions === null) {
|
|
2601
|
+
throw new Error("Cannot get dimensions before first embed() call");
|
|
2602
|
+
}
|
|
2603
|
+
return this._dimensions;
|
|
2604
|
+
}
|
|
2605
|
+
/**
|
|
2606
|
+
* Check if the embedding pipeline is initialized.
|
|
2607
|
+
*/
|
|
2608
|
+
isInitialized() {
|
|
2609
|
+
return this.extractor !== null;
|
|
2610
|
+
}
|
|
2611
|
+
/**
|
|
2612
|
+
* Check if this engine has been disposed.
|
|
2613
|
+
*/
|
|
2614
|
+
isDisposed() {
|
|
2615
|
+
return this.disposed;
|
|
2616
|
+
}
|
|
2617
|
+
/**
|
|
2618
|
+
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
2619
|
+
* If currently initialized, disposes the pipeline first.
|
|
2620
|
+
*/
|
|
2621
|
+
async reset() {
|
|
2622
|
+
if (this.extractor !== null) {
|
|
2623
|
+
await this.extractor.dispose();
|
|
2624
|
+
this.extractor = null;
|
|
2625
|
+
}
|
|
2626
|
+
this.initPromise = null;
|
|
2627
|
+
this._dimensions = null;
|
|
2628
|
+
this.disposed = false;
|
|
2629
|
+
}
|
|
2630
|
+
/**
|
|
2631
|
+
* Ensure dimensions are available, initializing the model if needed.
|
|
2632
|
+
* Returns the embedding dimensions for the current model.
|
|
2633
|
+
*/
|
|
2634
|
+
async ensureDimensions() {
|
|
2635
|
+
if (this._dimensions === null) {
|
|
2636
|
+
await this.embedText("dimension probe");
|
|
2637
|
+
}
|
|
2638
|
+
if (this._dimensions === null) {
|
|
2639
|
+
throw new Error("Failed to determine embedding dimensions");
|
|
2640
|
+
}
|
|
2641
|
+
return this._dimensions;
|
|
2642
|
+
}
|
|
2643
|
+
/**
|
|
2644
|
+
* Dispose the embedding pipeline to free resources.
|
|
2645
|
+
* Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
|
|
2646
|
+
* After disposal, this engine cannot be used again.
|
|
2647
|
+
*/
|
|
2648
|
+
async dispose() {
|
|
2649
|
+
if (this.extractor !== null) {
|
|
2650
|
+
await this.extractor.dispose();
|
|
2651
|
+
this.extractor = null;
|
|
2652
|
+
}
|
|
2653
|
+
this.initPromise = null;
|
|
2654
|
+
this._dimensions = null;
|
|
2655
|
+
this.disposed = true;
|
|
2656
|
+
}
|
|
2657
|
+
};
|
|
2065
2658
|
|
|
2066
2659
|
// src/types/config.ts
|
|
2067
2660
|
var DEFAULT_CONFIG = {
|
|
@@ -2082,6 +2675,7 @@ var DEFAULT_CONFIG = {
|
|
|
2082
2675
|
chunkSize: 1e3,
|
|
2083
2676
|
chunkOverlap: 150,
|
|
2084
2677
|
ignorePatterns: ["node_modules/**", ".git/**", "*.min.js", "*.map"],
|
|
2678
|
+
prependPath: false,
|
|
2085
2679
|
maxFileSizeBytes: 1048576
|
|
2086
2680
|
// 1MB
|
|
2087
2681
|
},
|
|
@@ -2097,6 +2691,12 @@ var DEFAULT_CONFIG = {
|
|
|
2097
2691
|
server: {
|
|
2098
2692
|
port: 3847,
|
|
2099
2693
|
host: "127.0.0.1"
|
|
2694
|
+
},
|
|
2695
|
+
reranker: {
|
|
2696
|
+
enabled: false,
|
|
2697
|
+
model: "Xenova/ms-marco-MiniLM-L-6-v2",
|
|
2698
|
+
topK: 20,
|
|
2699
|
+
returnK: 10
|
|
2100
2700
|
}
|
|
2101
2701
|
};
|
|
2102
2702
|
|
|
@@ -2148,7 +2748,7 @@ var ConfigService = class {
|
|
|
2148
2748
|
if (configPath !== void 0 && configPath !== "") {
|
|
2149
2749
|
this.configPath = this.expandPath(configPath, this.projectRoot);
|
|
2150
2750
|
} else {
|
|
2151
|
-
this.configPath =
|
|
2751
|
+
this.configPath = join6(this.projectRoot, DEFAULT_CONFIG_PATH);
|
|
2152
2752
|
}
|
|
2153
2753
|
if (dataDir !== void 0 && dataDir !== "") {
|
|
2154
2754
|
this.dataDir = this.expandPath(dataDir, this.projectRoot);
|
|
@@ -2170,15 +2770,60 @@ var ConfigService = class {
|
|
|
2170
2770
|
if (!exists) {
|
|
2171
2771
|
this.config = { ...DEFAULT_CONFIG };
|
|
2172
2772
|
await this.save(this.config);
|
|
2173
|
-
|
|
2773
|
+
} else {
|
|
2774
|
+
const content = await readFile2(this.configPath, "utf-8");
|
|
2775
|
+
try {
|
|
2776
|
+
this.config = deepMerge(DEFAULT_CONFIG, JSON.parse(content));
|
|
2777
|
+
} catch (error) {
|
|
2778
|
+
throw new Error(
|
|
2779
|
+
`Failed to parse config file at ${this.configPath}: ${error instanceof Error ? error.message : String(error)}`
|
|
2780
|
+
);
|
|
2781
|
+
}
|
|
2174
2782
|
}
|
|
2175
|
-
const
|
|
2176
|
-
|
|
2177
|
-
this.config =
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2783
|
+
const finetunedPath = getFinetunedModelPath();
|
|
2784
|
+
if (finetunedPath !== void 0) {
|
|
2785
|
+
this.config = {
|
|
2786
|
+
...this.config,
|
|
2787
|
+
embedding: buildEmbeddingConfig(finetunedPath, {
|
|
2788
|
+
batchSize: this.config.embedding.batchSize,
|
|
2789
|
+
maxInFlightBatches: this.config.embedding.maxInFlightBatches
|
|
2790
|
+
})
|
|
2791
|
+
};
|
|
2792
|
+
} else {
|
|
2793
|
+
const configuredModelId = getConfiguredModelId();
|
|
2794
|
+
if (configuredModelId !== this.config.embedding.model) {
|
|
2795
|
+
this.config = {
|
|
2796
|
+
...this.config,
|
|
2797
|
+
embedding: buildEmbeddingConfig(configuredModelId, {
|
|
2798
|
+
batchSize: this.config.embedding.batchSize,
|
|
2799
|
+
maxInFlightBatches: this.config.embedding.maxInFlightBatches
|
|
2800
|
+
})
|
|
2801
|
+
};
|
|
2802
|
+
}
|
|
2803
|
+
}
|
|
2804
|
+
const rawPooling = process.env["BK_POOLING"];
|
|
2805
|
+
const envPooling = rawPooling === "mean" || rawPooling === "cls" || rawPooling === "none" ? rawPooling : void 0;
|
|
2806
|
+
const envQueryPrefix = process.env["BK_QUERY_PREFIX"];
|
|
2807
|
+
if (envPooling !== void 0 || envQueryPrefix !== void 0) {
|
|
2808
|
+
this.config = {
|
|
2809
|
+
...this.config,
|
|
2810
|
+
embedding: {
|
|
2811
|
+
...this.config.embedding,
|
|
2812
|
+
...envPooling !== void 0 ? { pooling: envPooling } : {},
|
|
2813
|
+
...envQueryPrefix !== void 0 ? { queryPrefix: envQueryPrefix } : {}
|
|
2814
|
+
}
|
|
2815
|
+
};
|
|
2816
|
+
}
|
|
2817
|
+
const rerankerOverrides = parseRerankerEnvOverrides(false);
|
|
2818
|
+
if (rerankerOverrides.enabled !== void 0 || rerankerOverrides.topK !== void 0) {
|
|
2819
|
+
this.config = {
|
|
2820
|
+
...this.config,
|
|
2821
|
+
reranker: {
|
|
2822
|
+
...this.config.reranker,
|
|
2823
|
+
...rerankerOverrides.enabled !== void 0 ? { enabled: rerankerOverrides.enabled } : {},
|
|
2824
|
+
...rerankerOverrides.topK !== void 0 ? { topK: rerankerOverrides.topK } : {}
|
|
2825
|
+
}
|
|
2826
|
+
};
|
|
2182
2827
|
}
|
|
2183
2828
|
return this.config;
|
|
2184
2829
|
}
|
|
@@ -2194,7 +2839,7 @@ var ConfigService = class {
|
|
|
2194
2839
|
}
|
|
2195
2840
|
expandPath(path4, baseDir) {
|
|
2196
2841
|
if (path4.startsWith("~")) {
|
|
2197
|
-
return path4.replace("~",
|
|
2842
|
+
return path4.replace("~", homedir2());
|
|
2198
2843
|
}
|
|
2199
2844
|
if (!isAbsolute(path4)) {
|
|
2200
2845
|
return resolve(baseDir, path4);
|
|
@@ -2205,19 +2850,38 @@ var ConfigService = class {
|
|
|
2205
2850
|
|
|
2206
2851
|
// src/services/gitignore.service.ts
|
|
2207
2852
|
import { readFile as readFile3, writeFile as writeFile3, access as access2 } from "fs/promises";
|
|
2208
|
-
import { join as
|
|
2853
|
+
import { join as join7 } from "path";
|
|
2209
2854
|
var REQUIRED_PATTERNS = [
|
|
2210
|
-
".bluera
|
|
2855
|
+
".bluera/",
|
|
2856
|
+
"!.bluera/",
|
|
2211
2857
|
"!.bluera/bluera-knowledge/",
|
|
2212
|
-
".bluera/bluera-knowledge/*",
|
|
2213
2858
|
"!.bluera/bluera-knowledge/stores.config.json",
|
|
2214
2859
|
"!.bluera/bluera-knowledge/config.json",
|
|
2215
2860
|
"!.bluera/bluera-knowledge/skill-activation.json",
|
|
2216
2861
|
".bluera/bluera-knowledge/data/",
|
|
2217
2862
|
".bluera/bluera-knowledge/logs/"
|
|
2218
2863
|
];
|
|
2219
|
-
|
|
2220
|
-
|
|
2864
|
+
function isPatternCovered(pattern, existingLines) {
|
|
2865
|
+
if (existingLines.includes(pattern)) return true;
|
|
2866
|
+
if (pattern.endsWith("/")) {
|
|
2867
|
+
const withoutSlash = pattern.slice(0, -1);
|
|
2868
|
+
if (existingLines.includes(withoutSlash)) return true;
|
|
2869
|
+
} else {
|
|
2870
|
+
if (existingLines.includes(`${pattern}/`)) return true;
|
|
2871
|
+
}
|
|
2872
|
+
if (pattern === ".bluera/") {
|
|
2873
|
+
if (existingLines.includes(".bluera/*")) return true;
|
|
2874
|
+
}
|
|
2875
|
+
if (pattern === "!.bluera/") {
|
|
2876
|
+
if (existingLines.includes(".bluera/*")) return true;
|
|
2877
|
+
}
|
|
2878
|
+
return false;
|
|
2879
|
+
}
|
|
2880
|
+
var SECTION_HEADER = `
|
|
2881
|
+
# Bluera Knowledge
|
|
2882
|
+
# Config files (stores.config.json, config.json, skill-activation.json) can be committed
|
|
2883
|
+
# Data directory (vector DB, cloned repos) and logs are not committed
|
|
2884
|
+
`;
|
|
2221
2885
|
async function fileExists2(path4) {
|
|
2222
2886
|
try {
|
|
2223
2887
|
await access2(path4);
|
|
@@ -2226,73 +2890,13 @@ async function fileExists2(path4) {
|
|
|
2226
2890
|
return false;
|
|
2227
2891
|
}
|
|
2228
2892
|
}
|
|
2229
|
-
function isPatternSatisfied(requiredPattern, existingLines) {
|
|
2230
|
-
if (existingLines.includes(requiredPattern)) {
|
|
2231
|
-
return true;
|
|
2232
|
-
}
|
|
2233
|
-
switch (requiredPattern) {
|
|
2234
|
-
case ".bluera/*":
|
|
2235
|
-
return existingLines.includes(".bluera/");
|
|
2236
|
-
case "!.bluera/bluera-knowledge/":
|
|
2237
|
-
return existingLines.includes("!.bluera/bluera-knowledge");
|
|
2238
|
-
case ".bluera/bluera-knowledge/data/":
|
|
2239
|
-
case ".bluera/bluera-knowledge/logs/":
|
|
2240
|
-
return existingLines.includes(".bluera/bluera-knowledge/*");
|
|
2241
|
-
default:
|
|
2242
|
-
return false;
|
|
2243
|
-
}
|
|
2244
|
-
}
|
|
2245
|
-
function removeBkSection(content) {
|
|
2246
|
-
const lines = content.split("\n");
|
|
2247
|
-
const outputLines = [];
|
|
2248
|
-
let removed = false;
|
|
2249
|
-
let i = 0;
|
|
2250
|
-
while (i < lines.length) {
|
|
2251
|
-
const line = lines[i] ?? "";
|
|
2252
|
-
const trimmed = line.trim();
|
|
2253
|
-
if (trimmed === SECTION_BEGIN) {
|
|
2254
|
-
removed = true;
|
|
2255
|
-
i++;
|
|
2256
|
-
while (i < lines.length && (lines[i] ?? "").trim() !== SECTION_END) {
|
|
2257
|
-
i++;
|
|
2258
|
-
}
|
|
2259
|
-
if (i < lines.length) {
|
|
2260
|
-
i++;
|
|
2261
|
-
}
|
|
2262
|
-
continue;
|
|
2263
|
-
}
|
|
2264
|
-
if (trimmed === "# Bluera Knowledge") {
|
|
2265
|
-
removed = true;
|
|
2266
|
-
i++;
|
|
2267
|
-
while (i < lines.length) {
|
|
2268
|
-
const lt = (lines[i] ?? "").trim();
|
|
2269
|
-
if (lt === "" || lt.startsWith("#") || lt.includes(".bluera")) {
|
|
2270
|
-
i++;
|
|
2271
|
-
} else {
|
|
2272
|
-
break;
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
continue;
|
|
2276
|
-
}
|
|
2277
|
-
outputLines.push(line);
|
|
2278
|
-
i++;
|
|
2279
|
-
}
|
|
2280
|
-
let lastIdx = outputLines.length - 1;
|
|
2281
|
-
while (lastIdx >= 0 && outputLines[lastIdx]?.trim() === "") {
|
|
2282
|
-
outputLines.pop();
|
|
2283
|
-
lastIdx--;
|
|
2284
|
-
}
|
|
2285
|
-
const cleaned = outputLines.length > 0 ? `${outputLines.join("\n")}
|
|
2286
|
-
` : "";
|
|
2287
|
-
return { cleaned, removed };
|
|
2288
|
-
}
|
|
2289
2893
|
var GitignoreService = class {
|
|
2290
2894
|
gitignorePath;
|
|
2291
2895
|
constructor(projectRoot) {
|
|
2292
|
-
this.gitignorePath =
|
|
2896
|
+
this.gitignorePath = join7(projectRoot, ".gitignore");
|
|
2293
2897
|
}
|
|
2294
2898
|
/**
|
|
2295
|
-
* Check if all required patterns are
|
|
2899
|
+
* Check if all required patterns are present in .gitignore
|
|
2296
2900
|
*/
|
|
2297
2901
|
async hasRequiredPatterns() {
|
|
2298
2902
|
const exists = await fileExists2(this.gitignorePath);
|
|
@@ -2301,62 +2905,52 @@ var GitignoreService = class {
|
|
|
2301
2905
|
}
|
|
2302
2906
|
const content = await readFile3(this.gitignorePath, "utf-8");
|
|
2303
2907
|
const lines = content.split("\n").map((l) => l.trim());
|
|
2304
|
-
|
|
2908
|
+
for (const pattern of REQUIRED_PATTERNS) {
|
|
2909
|
+
if (!isPatternCovered(pattern, lines)) {
|
|
2910
|
+
return false;
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
return true;
|
|
2305
2914
|
}
|
|
2306
2915
|
/**
|
|
2307
2916
|
* Ensure required .gitignore patterns are present.
|
|
2308
2917
|
*
|
|
2309
2918
|
* - Creates .gitignore if it doesn't exist
|
|
2310
|
-
* -
|
|
2311
|
-
* -
|
|
2312
|
-
* - Persists cleanup even when no patterns are missing
|
|
2919
|
+
* - Appends missing patterns if .gitignore exists
|
|
2920
|
+
* - Does nothing if all patterns are already present
|
|
2313
2921
|
*
|
|
2314
2922
|
* @returns Object with updated flag and descriptive message
|
|
2315
2923
|
*/
|
|
2316
2924
|
async ensureGitignorePatterns() {
|
|
2317
2925
|
const exists = await fileExists2(this.gitignorePath);
|
|
2318
2926
|
if (!exists) {
|
|
2319
|
-
const content =
|
|
2927
|
+
const content = `${SECTION_HEADER.trim()}
|
|
2928
|
+
${REQUIRED_PATTERNS.join("\n")}
|
|
2929
|
+
`;
|
|
2320
2930
|
await writeFile3(this.gitignorePath, content);
|
|
2321
2931
|
return {
|
|
2322
2932
|
updated: true,
|
|
2323
2933
|
message: "Created .gitignore with Bluera Knowledge patterns"
|
|
2324
2934
|
};
|
|
2325
2935
|
}
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
const { cleaned, removed } = removeBkSection(rawContent);
|
|
2329
|
-
const cleanedLines = cleaned.split("\n").map((l) => l.trim());
|
|
2936
|
+
const existingContent = await readFile3(this.gitignorePath, "utf-8");
|
|
2937
|
+
const lines = existingContent.split("\n").map((l) => l.trim());
|
|
2330
2938
|
const missingPatterns = REQUIRED_PATTERNS.filter(
|
|
2331
|
-
(pattern) => !
|
|
2939
|
+
(pattern) => !isPatternCovered(pattern, lines)
|
|
2332
2940
|
);
|
|
2333
2941
|
if (missingPatterns.length === 0) {
|
|
2334
|
-
if (removed) {
|
|
2335
|
-
await writeFile3(this.gitignorePath, cleaned);
|
|
2336
|
-
return {
|
|
2337
|
-
updated: true,
|
|
2338
|
-
message: "Cleaned redundant Bluera Knowledge section from .gitignore"
|
|
2339
|
-
};
|
|
2340
|
-
}
|
|
2341
2942
|
return {
|
|
2342
2943
|
updated: false,
|
|
2343
2944
|
message: "All Bluera Knowledge patterns already present in .gitignore"
|
|
2344
2945
|
};
|
|
2345
2946
|
}
|
|
2346
|
-
let newContent =
|
|
2347
|
-
if (newContent.
|
|
2348
|
-
if (!newContent.endsWith("\n")) {
|
|
2349
|
-
newContent += "\n";
|
|
2350
|
-
}
|
|
2947
|
+
let newContent = existingContent;
|
|
2948
|
+
if (!newContent.endsWith("\n")) {
|
|
2351
2949
|
newContent += "\n";
|
|
2352
2950
|
}
|
|
2353
|
-
newContent +=
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
updated: false,
|
|
2357
|
-
message: "All Bluera Knowledge patterns already present in .gitignore"
|
|
2358
|
-
};
|
|
2359
|
-
}
|
|
2951
|
+
newContent += SECTION_HEADER;
|
|
2952
|
+
newContent += `${missingPatterns.join("\n")}
|
|
2953
|
+
`;
|
|
2360
2954
|
await writeFile3(this.gitignorePath, newContent);
|
|
2361
2955
|
return {
|
|
2362
2956
|
updated: true,
|
|
@@ -2375,9 +2969,9 @@ var GitignoreService = class {
|
|
|
2375
2969
|
import { execFile } from "child_process";
|
|
2376
2970
|
import { createHash as createHash3 } from "crypto";
|
|
2377
2971
|
import { open, readFile as readFile5, readdir, stat as stat2 } from "fs/promises";
|
|
2378
|
-
import { join as
|
|
2972
|
+
import { join as join8, extname, basename, relative } from "path";
|
|
2379
2973
|
import { promisify } from "util";
|
|
2380
|
-
import { minimatch } from "minimatch";
|
|
2974
|
+
import { minimatch as minimatchFn } from "minimatch";
|
|
2381
2975
|
|
|
2382
2976
|
// src/services/chunking.service.ts
|
|
2383
2977
|
var CHUNK_PRESETS = {
|
|
@@ -2758,9 +3352,7 @@ var DriftService = class {
|
|
|
2758
3352
|
}
|
|
2759
3353
|
};
|
|
2760
3354
|
|
|
2761
|
-
// src/
|
|
2762
|
-
var execFileAsync = promisify(execFile);
|
|
2763
|
-
var logger = createLogger("index-service");
|
|
3355
|
+
// src/utils/text-extensions.ts
|
|
2764
3356
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
2765
3357
|
// Text/docs
|
|
2766
3358
|
".txt",
|
|
@@ -2870,6 +3462,11 @@ var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
|
2870
3462
|
".makefile",
|
|
2871
3463
|
".cmake"
|
|
2872
3464
|
]);
|
|
3465
|
+
|
|
3466
|
+
// src/services/index.service.ts
|
|
3467
|
+
var minimatch = minimatchFn;
|
|
3468
|
+
var execFileAsync = promisify(execFile);
|
|
3469
|
+
var logger2 = createLogger("index-service");
|
|
2873
3470
|
function isMinifiedFile(filename) {
|
|
2874
3471
|
const minPatterns = [
|
|
2875
3472
|
/\.min\.(js|css|mjs|cjs)$/i,
|
|
@@ -2892,6 +3489,110 @@ async function isBinaryFile(filePath) {
|
|
|
2892
3489
|
await fd.close();
|
|
2893
3490
|
}
|
|
2894
3491
|
}
|
|
3492
|
+
var EXT_TO_LANGUAGE = {
|
|
3493
|
+
".ts": "typescript",
|
|
3494
|
+
".tsx": "typescript",
|
|
3495
|
+
".mts": "typescript",
|
|
3496
|
+
".cts": "typescript",
|
|
3497
|
+
".js": "javascript",
|
|
3498
|
+
".jsx": "javascript",
|
|
3499
|
+
".mjs": "javascript",
|
|
3500
|
+
".cjs": "javascript",
|
|
3501
|
+
".py": "python",
|
|
3502
|
+
".pyi": "python",
|
|
3503
|
+
".pyx": "python",
|
|
3504
|
+
".rs": "rust",
|
|
3505
|
+
".go": "go",
|
|
3506
|
+
".java": "java",
|
|
3507
|
+
".kt": "kotlin",
|
|
3508
|
+
".kts": "kotlin",
|
|
3509
|
+
".scala": "scala",
|
|
3510
|
+
".groovy": "groovy",
|
|
3511
|
+
".rb": "ruby",
|
|
3512
|
+
".erb": "ruby",
|
|
3513
|
+
".rake": "ruby",
|
|
3514
|
+
".php": "php",
|
|
3515
|
+
".swift": "swift",
|
|
3516
|
+
".m": "objective-c",
|
|
3517
|
+
".mm": "objective-c",
|
|
3518
|
+
".c": "c",
|
|
3519
|
+
".h": "c",
|
|
3520
|
+
".cpp": "cpp",
|
|
3521
|
+
".cc": "cpp",
|
|
3522
|
+
".cxx": "cpp",
|
|
3523
|
+
".hpp": "cpp",
|
|
3524
|
+
".hxx": "cpp",
|
|
3525
|
+
".cs": "csharp",
|
|
3526
|
+
".fs": "fsharp",
|
|
3527
|
+
".vb": "vb",
|
|
3528
|
+
".sh": "shell",
|
|
3529
|
+
".bash": "shell",
|
|
3530
|
+
".zsh": "shell",
|
|
3531
|
+
".fish": "shell",
|
|
3532
|
+
".ps1": "powershell",
|
|
3533
|
+
".psm1": "powershell",
|
|
3534
|
+
".sql": "sql",
|
|
3535
|
+
".md": "markdown",
|
|
3536
|
+
".rst": "restructuredtext",
|
|
3537
|
+
".lua": "lua",
|
|
3538
|
+
".r": "r",
|
|
3539
|
+
".R": "r",
|
|
3540
|
+
".jl": "julia",
|
|
3541
|
+
".ex": "elixir",
|
|
3542
|
+
".exs": "elixir",
|
|
3543
|
+
".erl": "erlang",
|
|
3544
|
+
".hrl": "erlang",
|
|
3545
|
+
".clj": "clojure",
|
|
3546
|
+
".cljs": "clojure",
|
|
3547
|
+
".cljc": "clojure",
|
|
3548
|
+
".hs": "haskell",
|
|
3549
|
+
".elm": "elm",
|
|
3550
|
+
".dart": "dart",
|
|
3551
|
+
".zig": "zig",
|
|
3552
|
+
".nim": "nim",
|
|
3553
|
+
".v": "v",
|
|
3554
|
+
".pl": "perl",
|
|
3555
|
+
".pm": "perl",
|
|
3556
|
+
".tf": "terraform",
|
|
3557
|
+
".hcl": "hcl",
|
|
3558
|
+
".proto": "protobuf",
|
|
3559
|
+
".graphql": "graphql",
|
|
3560
|
+
".gql": "graphql",
|
|
3561
|
+
".vue": "vue",
|
|
3562
|
+
".svelte": "svelte",
|
|
3563
|
+
".html": "html",
|
|
3564
|
+
".htm": "html",
|
|
3565
|
+
".css": "css",
|
|
3566
|
+
".scss": "scss",
|
|
3567
|
+
".sass": "sass",
|
|
3568
|
+
".less": "less",
|
|
3569
|
+
".json": "json",
|
|
3570
|
+
".yaml": "yaml",
|
|
3571
|
+
".yml": "yaml",
|
|
3572
|
+
".toml": "toml",
|
|
3573
|
+
".xml": "xml"
|
|
3574
|
+
};
|
|
3575
|
+
var ENTRY_POINT_NAMES = /* @__PURE__ */ new Set([
|
|
3576
|
+
"index.ts",
|
|
3577
|
+
"index.js",
|
|
3578
|
+
"index.mjs",
|
|
3579
|
+
"index.tsx",
|
|
3580
|
+
"index.jsx",
|
|
3581
|
+
"main.ts",
|
|
3582
|
+
"main.js",
|
|
3583
|
+
"main.py",
|
|
3584
|
+
"main.go",
|
|
3585
|
+
"main.rs",
|
|
3586
|
+
"app.ts",
|
|
3587
|
+
"app.js",
|
|
3588
|
+
"app.py",
|
|
3589
|
+
"mod.rs",
|
|
3590
|
+
"lib.rs",
|
|
3591
|
+
"__init__.py",
|
|
3592
|
+
"server.ts",
|
|
3593
|
+
"server.js",
|
|
3594
|
+
"server.py"
|
|
3595
|
+
]);
|
|
2895
3596
|
var IndexService = class {
|
|
2896
3597
|
lanceStore;
|
|
2897
3598
|
embeddingEngine;
|
|
@@ -2902,6 +3603,7 @@ var IndexService = class {
|
|
|
2902
3603
|
concurrency;
|
|
2903
3604
|
ignoreDirs;
|
|
2904
3605
|
ignoreFilePatterns;
|
|
3606
|
+
prependPath;
|
|
2905
3607
|
maxFileSizeBytes;
|
|
2906
3608
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2907
3609
|
this.lanceStore = lanceStore;
|
|
@@ -2914,13 +3616,14 @@ var IndexService = class {
|
|
|
2914
3616
|
this.manifestService = options.manifestService;
|
|
2915
3617
|
this.driftService = new DriftService();
|
|
2916
3618
|
this.concurrency = options.concurrency ?? 4;
|
|
3619
|
+
this.prependPath = options.prependPath ?? false;
|
|
2917
3620
|
const parsed = parseIgnorePatternsForScanning(options.ignorePatterns ?? []);
|
|
2918
3621
|
this.ignoreDirs = parsed.dirs;
|
|
2919
3622
|
this.ignoreFilePatterns = parsed.fileMatchers;
|
|
2920
3623
|
this.maxFileSizeBytes = options.maxFileSizeBytes ?? 1048576;
|
|
2921
3624
|
}
|
|
2922
3625
|
async indexStore(store, onProgress) {
|
|
2923
|
-
|
|
3626
|
+
logger2.info(
|
|
2924
3627
|
{
|
|
2925
3628
|
storeId: store.id,
|
|
2926
3629
|
storeName: store.name,
|
|
@@ -2932,13 +3635,13 @@ var IndexService = class {
|
|
|
2932
3635
|
if (store.type === "file" || store.type === "repo") {
|
|
2933
3636
|
return await this.indexFileStore(store, onProgress);
|
|
2934
3637
|
}
|
|
2935
|
-
|
|
3638
|
+
logger2.error(
|
|
2936
3639
|
{ storeId: store.id, storeType: store.type },
|
|
2937
3640
|
"Unsupported store type for indexing"
|
|
2938
3641
|
);
|
|
2939
3642
|
return err(new Error(`Indexing not supported for store type: ${store.type}`));
|
|
2940
3643
|
} catch (error) {
|
|
2941
|
-
|
|
3644
|
+
logger2.error(
|
|
2942
3645
|
{
|
|
2943
3646
|
storeId: store.id,
|
|
2944
3647
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -2963,7 +3666,7 @@ var IndexService = class {
|
|
|
2963
3666
|
if (store.type !== "file" && store.type !== "repo") {
|
|
2964
3667
|
return err(new Error(`Incremental indexing not supported for store type: ${store.type}`));
|
|
2965
3668
|
}
|
|
2966
|
-
|
|
3669
|
+
logger2.info(
|
|
2967
3670
|
{
|
|
2968
3671
|
storeId: store.id,
|
|
2969
3672
|
storeName: store.name,
|
|
@@ -2979,7 +3682,7 @@ var IndexService = class {
|
|
|
2979
3682
|
filePaths.map((path4) => this.driftService.getFileState(path4))
|
|
2980
3683
|
);
|
|
2981
3684
|
const drift = await this.driftService.detectChanges(manifest, currentFiles);
|
|
2982
|
-
|
|
3685
|
+
logger2.debug(
|
|
2983
3686
|
{
|
|
2984
3687
|
storeId: store.id,
|
|
2985
3688
|
added: drift.added.length,
|
|
@@ -2998,7 +3701,7 @@ var IndexService = class {
|
|
|
2998
3701
|
}
|
|
2999
3702
|
if (documentIdsToDelete.length > 0) {
|
|
3000
3703
|
await this.lanceStore.deleteDocuments(store.id, documentIdsToDelete);
|
|
3001
|
-
|
|
3704
|
+
logger2.debug(
|
|
3002
3705
|
{ storeId: store.id, count: documentIdsToDelete.length },
|
|
3003
3706
|
"Deleted old documents"
|
|
3004
3707
|
);
|
|
@@ -3034,7 +3737,7 @@ var IndexService = class {
|
|
|
3034
3737
|
fileState: state
|
|
3035
3738
|
};
|
|
3036
3739
|
} catch (error) {
|
|
3037
|
-
|
|
3740
|
+
logger2.warn(
|
|
3038
3741
|
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3039
3742
|
"Failed to process file during incremental indexing, skipping"
|
|
3040
3743
|
);
|
|
@@ -3081,13 +3784,13 @@ var IndexService = class {
|
|
|
3081
3784
|
if (allSourceFiles.length > 0) {
|
|
3082
3785
|
const graph = await this.codeGraphService.buildGraph(allSourceFiles);
|
|
3083
3786
|
await this.codeGraphService.saveGraph(store.id, graph);
|
|
3084
|
-
|
|
3787
|
+
logger2.debug(
|
|
3085
3788
|
{ storeId: store.id, sourceFiles: allSourceFiles.length },
|
|
3086
3789
|
"Rebuilt code graph during incremental indexing"
|
|
3087
3790
|
);
|
|
3088
3791
|
} else {
|
|
3089
3792
|
await this.codeGraphService.deleteGraph(store.id);
|
|
3090
|
-
|
|
3793
|
+
logger2.debug(
|
|
3091
3794
|
{ storeId: store.id },
|
|
3092
3795
|
"Deleted stale code graph (no source files remain)"
|
|
3093
3796
|
);
|
|
@@ -3108,7 +3811,7 @@ var IndexService = class {
|
|
|
3108
3811
|
message: "Incremental indexing complete"
|
|
3109
3812
|
});
|
|
3110
3813
|
const timeMs = Date.now() - startTime;
|
|
3111
|
-
|
|
3814
|
+
logger2.info(
|
|
3112
3815
|
{
|
|
3113
3816
|
storeId: store.id,
|
|
3114
3817
|
storeName: store.name,
|
|
@@ -3131,7 +3834,7 @@ var IndexService = class {
|
|
|
3131
3834
|
filesUnchanged: drift.unchanged.length
|
|
3132
3835
|
});
|
|
3133
3836
|
} catch (error) {
|
|
3134
|
-
|
|
3837
|
+
logger2.error(
|
|
3135
3838
|
{
|
|
3136
3839
|
storeId: store.id,
|
|
3137
3840
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -3150,7 +3853,7 @@ var IndexService = class {
|
|
|
3150
3853
|
const files = await this.discoverFiles(store.path, store.ingest);
|
|
3151
3854
|
const documents = [];
|
|
3152
3855
|
let filesProcessed = 0;
|
|
3153
|
-
|
|
3856
|
+
logger2.debug(
|
|
3154
3857
|
{
|
|
3155
3858
|
storeId: store.id,
|
|
3156
3859
|
path: store.path,
|
|
@@ -3174,7 +3877,7 @@ var IndexService = class {
|
|
|
3174
3877
|
try {
|
|
3175
3878
|
return await this.processFile(filePath, store);
|
|
3176
3879
|
} catch (error) {
|
|
3177
|
-
|
|
3880
|
+
logger2.warn(
|
|
3178
3881
|
{ filePath, error: error instanceof Error ? error.message : String(error) },
|
|
3179
3882
|
"Failed to process file, skipping"
|
|
3180
3883
|
);
|
|
@@ -3189,11 +3892,19 @@ var IndexService = class {
|
|
|
3189
3892
|
}
|
|
3190
3893
|
}
|
|
3191
3894
|
filesProcessed += batch.length;
|
|
3895
|
+
const elapsedMs = Date.now() - startTime;
|
|
3896
|
+
const filesPerSecond = elapsedMs > 0 ? filesProcessed / elapsedMs * 1e3 : 0;
|
|
3897
|
+
const lastFile = batch.at(-1);
|
|
3192
3898
|
onProgress?.({
|
|
3193
3899
|
type: "progress",
|
|
3194
3900
|
current: filesProcessed,
|
|
3195
3901
|
total: files.length,
|
|
3196
|
-
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files
|
|
3902
|
+
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`,
|
|
3903
|
+
details: {
|
|
3904
|
+
...lastFile !== void 0 ? { currentFile: basename(lastFile) } : {},
|
|
3905
|
+
elapsedMs,
|
|
3906
|
+
filesPerSecond
|
|
3907
|
+
}
|
|
3197
3908
|
});
|
|
3198
3909
|
}
|
|
3199
3910
|
if (documents.length > 0) {
|
|
@@ -3206,14 +3917,18 @@ var IndexService = class {
|
|
|
3206
3917
|
} else if (this.codeGraphService) {
|
|
3207
3918
|
await this.codeGraphService.deleteGraph(store.id);
|
|
3208
3919
|
}
|
|
3920
|
+
const timeMs = Date.now() - startTime;
|
|
3209
3921
|
onProgress?.({
|
|
3210
3922
|
type: "complete",
|
|
3211
3923
|
current: files.length,
|
|
3212
3924
|
total: files.length,
|
|
3213
|
-
message: "Indexing complete"
|
|
3925
|
+
message: "Indexing complete",
|
|
3926
|
+
details: {
|
|
3927
|
+
elapsedMs: timeMs,
|
|
3928
|
+
filesPerSecond: timeMs > 0 ? files.length / timeMs * 1e3 : 0
|
|
3929
|
+
}
|
|
3214
3930
|
});
|
|
3215
|
-
|
|
3216
|
-
logger.info(
|
|
3931
|
+
logger2.info(
|
|
3217
3932
|
{
|
|
3218
3933
|
storeId: store.id,
|
|
3219
3934
|
storeName: store.name,
|
|
@@ -3235,14 +3950,20 @@ var IndexService = class {
|
|
|
3235
3950
|
* Extracted for parallel processing.
|
|
3236
3951
|
*/
|
|
3237
3952
|
async processFile(filePath, store) {
|
|
3238
|
-
const
|
|
3239
|
-
const fileHash = createHash3("md5").update(
|
|
3240
|
-
const chunks = this.chunker.chunk(content, filePath);
|
|
3953
|
+
const rawContent = await readFile5(filePath, "utf-8");
|
|
3954
|
+
const fileHash = createHash3("md5").update(rawContent).digest("hex");
|
|
3241
3955
|
const relativePath = relative(store.path, filePath);
|
|
3956
|
+
const content = this.prependPath ? `[${relativePath}]
|
|
3957
|
+
${rawContent}` : rawContent;
|
|
3958
|
+
const chunks = this.chunker.chunk(content, filePath);
|
|
3242
3959
|
const pathHash = createHash3("md5").update(relativePath).digest("hex").slice(0, 8);
|
|
3243
3960
|
const ext = extname(filePath).toLowerCase();
|
|
3244
3961
|
const fileName = basename(filePath).toLowerCase();
|
|
3245
3962
|
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
3963
|
+
const language = EXT_TO_LANGUAGE[ext];
|
|
3964
|
+
const normalizedRelPath = relativePath.replaceAll("\\", "/");
|
|
3965
|
+
const depth = normalizedRelPath.split("/").length - 1;
|
|
3966
|
+
const isEntryPoint = ENTRY_POINT_NAMES.has(basename(filePath));
|
|
3246
3967
|
const sourceFile = [".ts", ".tsx", ".js", ".jsx", ".py", ".rs", ".go"].includes(ext) ? { path: filePath, content } : void 0;
|
|
3247
3968
|
if (chunks.length === 0) {
|
|
3248
3969
|
return { documents: [], sourceFile };
|
|
@@ -3275,7 +3996,11 @@ var IndexService = class {
|
|
|
3275
3996
|
sectionHeader: chunk.sectionHeader,
|
|
3276
3997
|
functionName: chunk.functionName,
|
|
3277
3998
|
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
3278
|
-
docSummary: chunk.docSummary
|
|
3999
|
+
docSummary: chunk.docSummary,
|
|
4000
|
+
relativePath: normalizedRelPath,
|
|
4001
|
+
language,
|
|
4002
|
+
depth,
|
|
4003
|
+
isEntryPoint
|
|
3279
4004
|
}
|
|
3280
4005
|
});
|
|
3281
4006
|
}
|
|
@@ -3286,11 +4011,11 @@ var IndexService = class {
|
|
|
3286
4011
|
*/
|
|
3287
4012
|
async getTrackedFiles(repoPath) {
|
|
3288
4013
|
try {
|
|
3289
|
-
const gitDir =
|
|
4014
|
+
const gitDir = join8(repoPath, ".git");
|
|
3290
4015
|
try {
|
|
3291
4016
|
await stat2(gitDir);
|
|
3292
4017
|
} catch {
|
|
3293
|
-
|
|
4018
|
+
logger2.info({ repoPath }, "Not a git repository, using filesystem walk");
|
|
3294
4019
|
return null;
|
|
3295
4020
|
}
|
|
3296
4021
|
const { stdout } = await execFileAsync("git", ["ls-files", "-z"], {
|
|
@@ -3300,12 +4025,12 @@ var IndexService = class {
|
|
|
3300
4025
|
});
|
|
3301
4026
|
const files = stdout.split("\0").filter(Boolean);
|
|
3302
4027
|
if (files.some((f) => f === ".gitmodules")) {
|
|
3303
|
-
|
|
4028
|
+
logger2.info({ repoPath }, "Repository has submodules (skipped by ls-files)");
|
|
3304
4029
|
}
|
|
3305
4030
|
return files;
|
|
3306
4031
|
} catch (error) {
|
|
3307
4032
|
const reason = error instanceof Error ? error.message : String(error);
|
|
3308
|
-
|
|
4033
|
+
logger2.warn({ repoPath, reason }, "git ls-files failed, using filesystem walk");
|
|
3309
4034
|
return null;
|
|
3310
4035
|
}
|
|
3311
4036
|
}
|
|
@@ -3318,11 +4043,11 @@ var IndexService = class {
|
|
|
3318
4043
|
const trackedFiles = await this.getTrackedFiles(storePath);
|
|
3319
4044
|
let candidates;
|
|
3320
4045
|
if (trackedFiles !== null) {
|
|
3321
|
-
candidates = trackedFiles.map((f) =>
|
|
3322
|
-
|
|
4046
|
+
candidates = trackedFiles.map((f) => join8(storePath, f));
|
|
4047
|
+
logger2.debug({ storePath, count: candidates.length }, "Using git ls-files for discovery");
|
|
3323
4048
|
} else {
|
|
3324
4049
|
candidates = await this.scanDirectory(storePath);
|
|
3325
|
-
|
|
4050
|
+
logger2.debug({ storePath, count: candidates.length }, "Using filesystem walk for discovery");
|
|
3326
4051
|
}
|
|
3327
4052
|
return this.filterFiles(candidates, storePath, ingestConfig);
|
|
3328
4053
|
}
|
|
@@ -3344,6 +4069,12 @@ var IndexService = class {
|
|
|
3344
4069
|
const ext = extname(filePath).toLowerCase();
|
|
3345
4070
|
const filename = basename(filePath);
|
|
3346
4071
|
if (!TEXT_EXTENSIONS.has(ext)) continue;
|
|
4072
|
+
const relativePath = relative(storePath, filePath).replaceAll("\\", "/");
|
|
4073
|
+
const pathSegments = relativePath.split("/");
|
|
4074
|
+
const dirSegments = pathSegments.slice(0, -1);
|
|
4075
|
+
if (dirSegments.some((segment) => this.ignoreDirs.has(segment))) {
|
|
4076
|
+
continue;
|
|
4077
|
+
}
|
|
3347
4078
|
const shouldIgnore = this.ignoreFilePatterns.some((matcher) => matcher(filename));
|
|
3348
4079
|
if (shouldIgnore) continue;
|
|
3349
4080
|
if (skipMinified && isMinifiedFile(filename)) {
|
|
@@ -3351,10 +4082,10 @@ var IndexService = class {
|
|
|
3351
4082
|
continue;
|
|
3352
4083
|
}
|
|
3353
4084
|
if (excludeGlobs.length > 0) {
|
|
3354
|
-
const
|
|
3355
|
-
const excluded = excludeGlobs.some((glob) => minimatch(
|
|
4085
|
+
const relativePath2 = relative(storePath, filePath);
|
|
4086
|
+
const excluded = excludeGlobs.some((glob) => minimatch(relativePath2, glob));
|
|
3356
4087
|
if (excluded) {
|
|
3357
|
-
skippedExcluded.push(
|
|
4088
|
+
skippedExcluded.push(relativePath2);
|
|
3358
4089
|
continue;
|
|
3359
4090
|
}
|
|
3360
4091
|
}
|
|
@@ -3382,7 +4113,7 @@ var IndexService = class {
|
|
|
3382
4113
|
result.push(filePath);
|
|
3383
4114
|
}
|
|
3384
4115
|
if (skippedLarge.length > 0) {
|
|
3385
|
-
|
|
4116
|
+
logger2.info(
|
|
3386
4117
|
{
|
|
3387
4118
|
storePath,
|
|
3388
4119
|
count: skippedLarge.length,
|
|
@@ -3393,19 +4124,19 @@ var IndexService = class {
|
|
|
3393
4124
|
);
|
|
3394
4125
|
}
|
|
3395
4126
|
if (skippedMinified.length > 0) {
|
|
3396
|
-
|
|
4127
|
+
logger2.debug(
|
|
3397
4128
|
{ storePath, count: skippedMinified.length, examples: skippedMinified.slice(0, 5) },
|
|
3398
4129
|
"Skipped minified files"
|
|
3399
4130
|
);
|
|
3400
4131
|
}
|
|
3401
4132
|
if (skippedBinary.length > 0) {
|
|
3402
|
-
|
|
4133
|
+
logger2.debug(
|
|
3403
4134
|
{ storePath, count: skippedBinary.length, examples: skippedBinary.slice(0, 5) },
|
|
3404
4135
|
"Skipped binary files"
|
|
3405
4136
|
);
|
|
3406
4137
|
}
|
|
3407
4138
|
if (skippedExcluded.length > 0) {
|
|
3408
|
-
|
|
4139
|
+
logger2.debug(
|
|
3409
4140
|
{ storePath, count: skippedExcluded.length, examples: skippedExcluded.slice(0, 5) },
|
|
3410
4141
|
"Skipped excluded files"
|
|
3411
4142
|
);
|
|
@@ -3416,7 +4147,7 @@ var IndexService = class {
|
|
|
3416
4147
|
const files = [];
|
|
3417
4148
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
3418
4149
|
for (const entry of entries) {
|
|
3419
|
-
const fullPath =
|
|
4150
|
+
const fullPath = join8(dir, entry.name);
|
|
3420
4151
|
if (entry.isDirectory()) {
|
|
3421
4152
|
if (!this.ignoreDirs.has(entry.name)) {
|
|
3422
4153
|
files.push(...await this.scanDirectory(fullPath));
|
|
@@ -3517,7 +4248,7 @@ function classifyWebContentType(url, title) {
|
|
|
3517
4248
|
|
|
3518
4249
|
// src/services/manifest.service.ts
|
|
3519
4250
|
import { readFile as readFile6, access as access3, mkdir as mkdir3 } from "fs/promises";
|
|
3520
|
-
import { join as
|
|
4251
|
+
import { join as join9 } from "path";
|
|
3521
4252
|
|
|
3522
4253
|
// src/types/manifest.ts
|
|
3523
4254
|
import { z as z2 } from "zod";
|
|
@@ -3554,7 +4285,7 @@ function createEmptyManifest(storeId) {
|
|
|
3554
4285
|
var ManifestService = class {
|
|
3555
4286
|
manifestsDir;
|
|
3556
4287
|
constructor(dataDir) {
|
|
3557
|
-
this.manifestsDir =
|
|
4288
|
+
this.manifestsDir = join9(dataDir, "manifests");
|
|
3558
4289
|
}
|
|
3559
4290
|
/**
|
|
3560
4291
|
* Initialize the manifests directory.
|
|
@@ -3566,7 +4297,7 @@ var ManifestService = class {
|
|
|
3566
4297
|
* Get the file path for a store's manifest.
|
|
3567
4298
|
*/
|
|
3568
4299
|
getManifestPath(storeId) {
|
|
3569
|
-
return
|
|
4300
|
+
return join9(this.manifestsDir, `${storeId}.manifest.json`);
|
|
3570
4301
|
}
|
|
3571
4302
|
/**
|
|
3572
4303
|
* Load a store's manifest.
|
|
@@ -3650,6 +4381,202 @@ var ManifestService = class {
|
|
|
3650
4381
|
}
|
|
3651
4382
|
};
|
|
3652
4383
|
|
|
4384
|
+
// src/services/reranker.service.ts
|
|
4385
|
+
import { homedir as homedir3 } from "os";
|
|
4386
|
+
import { join as join10 } from "path";
|
|
4387
|
+
import { env as env2, AutoModelForSequenceClassification, AutoTokenizer } from "@huggingface/transformers";
|
|
4388
|
+
env2.cacheDir = join10(homedir3(), ".cache", "huggingface-transformers");
|
|
4389
|
+
var DEBUG_RERANKER = process.env["BK_DEBUG_RERANKER"] === "1";
|
|
4390
|
+
var debugStats = {
|
|
4391
|
+
totalQueries: 0,
|
|
4392
|
+
top1Reordered: 0,
|
|
4393
|
+
top3Reordered: 0,
|
|
4394
|
+
scoreVariancePositive: 0
|
|
4395
|
+
};
|
|
4396
|
+
if (DEBUG_RERANKER) {
|
|
4397
|
+
process.on("beforeExit", () => {
|
|
4398
|
+
RerankerService.logDebugStats();
|
|
4399
|
+
});
|
|
4400
|
+
}
|
|
4401
|
+
var RerankerService = class {
|
|
4402
|
+
model = null;
|
|
4403
|
+
tokenizer = null;
|
|
4404
|
+
initPromise = null;
|
|
4405
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
4406
|
+
disposed = false;
|
|
4407
|
+
config;
|
|
4408
|
+
constructor(config) {
|
|
4409
|
+
this.config = config;
|
|
4410
|
+
}
|
|
4411
|
+
/**
|
|
4412
|
+
* Guard against use-after-dispose
|
|
4413
|
+
*/
|
|
4414
|
+
assertNotDisposed() {
|
|
4415
|
+
if (this.disposed) {
|
|
4416
|
+
throw new Error("RerankerService has been disposed");
|
|
4417
|
+
}
|
|
4418
|
+
}
|
|
4419
|
+
/**
|
|
4420
|
+
* Check if reranking is enabled
|
|
4421
|
+
*/
|
|
4422
|
+
isEnabled() {
|
|
4423
|
+
return this.config.enabled;
|
|
4424
|
+
}
|
|
4425
|
+
/**
|
|
4426
|
+
* Initialize the reranker model (concurrency-safe).
|
|
4427
|
+
* Multiple concurrent calls will share the same initialization promise.
|
|
4428
|
+
*/
|
|
4429
|
+
async initialize() {
|
|
4430
|
+
this.assertNotDisposed();
|
|
4431
|
+
if (this.model !== null && this.tokenizer !== null) return;
|
|
4432
|
+
this.initPromise ??= (async () => {
|
|
4433
|
+
try {
|
|
4434
|
+
const [model, tokenizer] = await Promise.all([
|
|
4435
|
+
AutoModelForSequenceClassification.from_pretrained(this.config.model, {
|
|
4436
|
+
dtype: "fp32"
|
|
4437
|
+
}),
|
|
4438
|
+
AutoTokenizer.from_pretrained(this.config.model)
|
|
4439
|
+
]);
|
|
4440
|
+
this.model = model;
|
|
4441
|
+
this.tokenizer = tokenizer;
|
|
4442
|
+
} catch (error) {
|
|
4443
|
+
this.initPromise = null;
|
|
4444
|
+
throw error;
|
|
4445
|
+
}
|
|
4446
|
+
})();
|
|
4447
|
+
await this.initPromise;
|
|
4448
|
+
}
|
|
4449
|
+
/**
|
|
4450
|
+
* Rerank candidates by scoring query-document pairs with the cross-encoder.
|
|
4451
|
+
* Returns results sorted by reranker score (descending).
|
|
4452
|
+
*/
|
|
4453
|
+
async rerank(query, candidates) {
|
|
4454
|
+
this.assertNotDisposed();
|
|
4455
|
+
const startTime = Date.now();
|
|
4456
|
+
if (!this.config.enabled) {
|
|
4457
|
+
return {
|
|
4458
|
+
results: candidates.map((c) => ({
|
|
4459
|
+
id: c.id,
|
|
4460
|
+
originalScore: c.score,
|
|
4461
|
+
rerankerScore: c.score
|
|
4462
|
+
})),
|
|
4463
|
+
timeMs: Date.now() - startTime
|
|
4464
|
+
};
|
|
4465
|
+
}
|
|
4466
|
+
if (candidates.length === 0) {
|
|
4467
|
+
return { results: [], timeMs: Date.now() - startTime };
|
|
4468
|
+
}
|
|
4469
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4470
|
+
await this.initialize();
|
|
4471
|
+
}
|
|
4472
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4473
|
+
throw new Error("Failed to initialize reranker model");
|
|
4474
|
+
}
|
|
4475
|
+
const toRerank = candidates.slice(0, this.config.topK);
|
|
4476
|
+
const scoredResults = [];
|
|
4477
|
+
for (const candidate of toRerank) {
|
|
4478
|
+
const score = await this.scoreQueryDocPair(query, candidate.content);
|
|
4479
|
+
scoredResults.push({
|
|
4480
|
+
id: candidate.id,
|
|
4481
|
+
originalScore: candidate.score,
|
|
4482
|
+
rerankerScore: score
|
|
4483
|
+
});
|
|
4484
|
+
}
|
|
4485
|
+
const preRankIds = scoredResults.map((r) => r.id);
|
|
4486
|
+
scoredResults.sort((a, b) => b.rerankerScore - a.rerankerScore);
|
|
4487
|
+
if (DEBUG_RERANKER) {
|
|
4488
|
+
const postRankIds = scoredResults.map((r) => r.id);
|
|
4489
|
+
debugStats.totalQueries++;
|
|
4490
|
+
if (preRankIds[0] !== postRankIds[0]) debugStats.top1Reordered++;
|
|
4491
|
+
const preTop3 = preRankIds.slice(0, 3).join(",");
|
|
4492
|
+
const postTop3 = postRankIds.slice(0, 3).join(",");
|
|
4493
|
+
if (preTop3 !== postTop3) debugStats.top3Reordered++;
|
|
4494
|
+
const scores = scoredResults.map((r) => r.rerankerScore);
|
|
4495
|
+
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
4496
|
+
const variance = scores.reduce((a, b) => a + (b - mean) ** 2, 0) / scores.length;
|
|
4497
|
+
if (variance > 1e-10) debugStats.scoreVariancePositive++;
|
|
4498
|
+
if (debugStats.totalQueries <= 3) {
|
|
4499
|
+
console.error(
|
|
4500
|
+
`[DEBUG_RERANKER] query #${String(debugStats.totalQueries)}: pre=[${preRankIds.slice(0, 3).join(", ")}] post=[${postRankIds.slice(0, 3).join(", ")}] scores=[${scores.slice(0, 3).map((s) => s.toFixed(4)).join(", ")}] variance=${variance.toFixed(6)}`
|
|
4501
|
+
);
|
|
4502
|
+
}
|
|
4503
|
+
}
|
|
4504
|
+
const finalResults = scoredResults.slice(0, this.config.returnK);
|
|
4505
|
+
return {
|
|
4506
|
+
results: finalResults,
|
|
4507
|
+
timeMs: Date.now() - startTime
|
|
4508
|
+
};
|
|
4509
|
+
}
|
|
4510
|
+
/**
|
|
4511
|
+
* Score a single query-document pair using the cross-encoder.
|
|
4512
|
+
*/
|
|
4513
|
+
async scoreQueryDocPair(query, document) {
|
|
4514
|
+
if (this.model === null || this.tokenizer === null) {
|
|
4515
|
+
throw new Error("Model not initialized");
|
|
4516
|
+
}
|
|
4517
|
+
const inputs = await this.tokenizer([query], {
|
|
4518
|
+
text_pair: [document],
|
|
4519
|
+
padding: true,
|
|
4520
|
+
truncation: true,
|
|
4521
|
+
max_length: 512
|
|
4522
|
+
});
|
|
4523
|
+
const output = await this.model(inputs);
|
|
4524
|
+
const logits = output.logits;
|
|
4525
|
+
const scores = Array.from(logits.data);
|
|
4526
|
+
const score = scores[0];
|
|
4527
|
+
if (typeof score !== "number") {
|
|
4528
|
+
throw new Error("Invalid reranker output: expected numeric score");
|
|
4529
|
+
}
|
|
4530
|
+
return score;
|
|
4531
|
+
}
|
|
4532
|
+
/**
|
|
4533
|
+
* Check if the reranker is initialized.
|
|
4534
|
+
*/
|
|
4535
|
+
isInitialized() {
|
|
4536
|
+
return this.model !== null && this.tokenizer !== null;
|
|
4537
|
+
}
|
|
4538
|
+
/**
|
|
4539
|
+
* Check if this service has been disposed.
|
|
4540
|
+
*/
|
|
4541
|
+
isDisposed() {
|
|
4542
|
+
return this.disposed;
|
|
4543
|
+
}
|
|
4544
|
+
/**
|
|
4545
|
+
* Reset the service to uninitialized state, allowing reuse after disposal.
|
|
4546
|
+
*/
|
|
4547
|
+
async reset() {
|
|
4548
|
+
if (this.model !== null) {
|
|
4549
|
+
await this.model.dispose();
|
|
4550
|
+
this.model = null;
|
|
4551
|
+
}
|
|
4552
|
+
this.tokenizer = null;
|
|
4553
|
+
this.initPromise = null;
|
|
4554
|
+
this.disposed = false;
|
|
4555
|
+
}
|
|
4556
|
+
/**
|
|
4557
|
+
* Log aggregate reranker debug stats. Only meaningful when BK_DEBUG_RERANKER=1.
|
|
4558
|
+
*/
|
|
4559
|
+
static logDebugStats() {
|
|
4560
|
+
if (!DEBUG_RERANKER || debugStats.totalQueries === 0) return;
|
|
4561
|
+
const pct = (n) => (n / debugStats.totalQueries * 100).toFixed(1);
|
|
4562
|
+
console.error(
|
|
4563
|
+
`[DEBUG_RERANKER] Aggregate: ${String(debugStats.totalQueries)} queries, top1 reordered: ${pct(debugStats.top1Reordered)}% (${String(debugStats.top1Reordered)}/${String(debugStats.totalQueries)}), top3 reordered: ${pct(debugStats.top3Reordered)}% (${String(debugStats.top3Reordered)}/${String(debugStats.totalQueries)}), score variance>0: ${pct(debugStats.scoreVariancePositive)}% (${String(debugStats.scoreVariancePositive)}/${String(debugStats.totalQueries)})`
|
|
4564
|
+
);
|
|
4565
|
+
}
|
|
4566
|
+
/**
|
|
4567
|
+
* Dispose the reranker to free resources.
|
|
4568
|
+
*/
|
|
4569
|
+
async dispose() {
|
|
4570
|
+
if (this.model !== null) {
|
|
4571
|
+
await this.model.dispose();
|
|
4572
|
+
this.model = null;
|
|
4573
|
+
}
|
|
4574
|
+
this.tokenizer = null;
|
|
4575
|
+
this.initPromise = null;
|
|
4576
|
+
this.disposed = true;
|
|
4577
|
+
}
|
|
4578
|
+
};
|
|
4579
|
+
|
|
3653
4580
|
// src/services/code-unit.service.ts
|
|
3654
4581
|
var CodeUnitService = class {
|
|
3655
4582
|
extractCodeUnit(code, symbolName, language) {
|
|
@@ -3819,8 +4746,51 @@ var CodeUnitService = class {
|
|
|
3819
4746
|
}
|
|
3820
4747
|
};
|
|
3821
4748
|
|
|
4749
|
+
// src/services/search-env.ts
|
|
4750
|
+
var logger3 = createLogger("search-env");
|
|
4751
|
+
function parseSearchEnvOverrides(strict) {
|
|
4752
|
+
return {
|
|
4753
|
+
rrfK: parseRrfK(process.env["BK_RRF_K"], strict),
|
|
4754
|
+
vectorWeight: parseVectorWeight(process.env["BK_RRF_VECTOR_WEIGHT"], strict),
|
|
4755
|
+
candidateMultiplier: parseCandidateMultiplier(process.env["BK_CANDIDATE_MULTIPLIER"], strict)
|
|
4756
|
+
};
|
|
4757
|
+
}
|
|
4758
|
+
function parseRrfK(raw, strict) {
|
|
4759
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4760
|
+
const parsed = Number.parseInt(raw, 10);
|
|
4761
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
4762
|
+
const msg = `BK_RRF_K must be a positive integer, got: "${raw}"`;
|
|
4763
|
+
if (strict) throw new Error(msg);
|
|
4764
|
+
logger3.warn(msg);
|
|
4765
|
+
return void 0;
|
|
4766
|
+
}
|
|
4767
|
+
return parsed;
|
|
4768
|
+
}
|
|
4769
|
+
function parseVectorWeight(raw, strict) {
|
|
4770
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4771
|
+
const parsed = Number.parseFloat(raw);
|
|
4772
|
+
if (Number.isNaN(parsed) || parsed < 0 || parsed > 1) {
|
|
4773
|
+
const msg = `BK_RRF_VECTOR_WEIGHT must be a float between 0 and 1, got: "${raw}"`;
|
|
4774
|
+
if (strict) throw new Error(msg);
|
|
4775
|
+
logger3.warn(msg);
|
|
4776
|
+
return void 0;
|
|
4777
|
+
}
|
|
4778
|
+
return parsed;
|
|
4779
|
+
}
|
|
4780
|
+
function parseCandidateMultiplier(raw, strict) {
|
|
4781
|
+
if (raw === void 0 || raw === "") return void 0;
|
|
4782
|
+
const parsed = Number.parseInt(raw, 10);
|
|
4783
|
+
if (Number.isNaN(parsed) || parsed < 1) {
|
|
4784
|
+
const msg = `BK_CANDIDATE_MULTIPLIER must be an integer >= 1, got: "${raw}"`;
|
|
4785
|
+
if (strict) throw new Error(msg);
|
|
4786
|
+
logger3.warn(msg);
|
|
4787
|
+
return void 0;
|
|
4788
|
+
}
|
|
4789
|
+
return parsed;
|
|
4790
|
+
}
|
|
4791
|
+
|
|
3822
4792
|
// src/services/search.service.ts
|
|
3823
|
-
var
|
|
4793
|
+
var logger4 = createLogger("search-service");
|
|
3824
4794
|
var INTENT_FILE_BOOSTS = {
|
|
3825
4795
|
"how-to": {
|
|
3826
4796
|
"documentation-primary": 1.3,
|
|
@@ -3982,9 +4952,10 @@ function mapSearchIntentToQueryIntent(intent) {
|
|
|
3982
4952
|
}
|
|
3983
4953
|
}
|
|
3984
4954
|
var RRF_PRESETS = {
|
|
3985
|
-
code: { k:
|
|
3986
|
-
web: { k: 30, vectorWeight: 0.
|
|
4955
|
+
code: { k: 25, vectorWeight: 0.75, ftsWeight: 0.25 },
|
|
4956
|
+
web: { k: 30, vectorWeight: 0.7, ftsWeight: 0.3 }
|
|
3987
4957
|
};
|
|
4958
|
+
var DEFAULT_CANDIDATE_MULTIPLIER = 2;
|
|
3988
4959
|
function detectContentType(results) {
|
|
3989
4960
|
const webCount = results.filter((r) => "url" in r.metadata).length;
|
|
3990
4961
|
return webCount > results.length / 2 ? "web" : "code";
|
|
@@ -3993,13 +4964,15 @@ var SearchService = class {
|
|
|
3993
4964
|
lanceStore;
|
|
3994
4965
|
codeUnitService;
|
|
3995
4966
|
codeGraphService;
|
|
4967
|
+
rerankerService;
|
|
3996
4968
|
graphCache;
|
|
3997
4969
|
searchConfig;
|
|
3998
4970
|
unsubscribeCacheInvalidation;
|
|
3999
|
-
constructor(lanceStore, codeGraphService, searchConfig) {
|
|
4971
|
+
constructor(lanceStore, codeGraphService, searchConfig, rerankerService) {
|
|
4000
4972
|
this.lanceStore = lanceStore;
|
|
4001
4973
|
this.codeUnitService = new CodeUnitService();
|
|
4002
4974
|
this.codeGraphService = codeGraphService;
|
|
4975
|
+
this.rerankerService = rerankerService;
|
|
4003
4976
|
this.graphCache = /* @__PURE__ */ new Map();
|
|
4004
4977
|
this.searchConfig = searchConfig;
|
|
4005
4978
|
if (codeGraphService) {
|
|
@@ -4047,7 +5020,7 @@ var SearchService = class {
|
|
|
4047
5020
|
const detail = query.detail ?? "minimal";
|
|
4048
5021
|
const intents = classifyQueryIntents(query.query);
|
|
4049
5022
|
const primaryIntent = query.intent !== void 0 ? mapSearchIntentToQueryIntent(query.intent) : getPrimaryIntent(intents);
|
|
4050
|
-
|
|
5023
|
+
logger4.debug(
|
|
4051
5024
|
{
|
|
4052
5025
|
query: query.query,
|
|
4053
5026
|
mode,
|
|
@@ -4063,6 +5036,7 @@ var SearchService = class {
|
|
|
4063
5036
|
);
|
|
4064
5037
|
let allResults = [];
|
|
4065
5038
|
let maxRawScore = 0;
|
|
5039
|
+
let rerankTimeMs;
|
|
4066
5040
|
const fetchLimit = limit * 3;
|
|
4067
5041
|
if (mode === "vector") {
|
|
4068
5042
|
const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
|
|
@@ -4079,16 +5053,17 @@ var SearchService = class {
|
|
|
4079
5053
|
);
|
|
4080
5054
|
allResults = hybridResult.results;
|
|
4081
5055
|
maxRawScore = hybridResult.maxRawScore;
|
|
5056
|
+
rerankTimeMs = hybridResult.rerankTimeMs;
|
|
4082
5057
|
}
|
|
4083
5058
|
if (query.minRelevance !== void 0) {
|
|
4084
5059
|
if (mode === "fts") {
|
|
4085
|
-
|
|
5060
|
+
logger4.warn(
|
|
4086
5061
|
{ query: query.query, minRelevance: query.minRelevance },
|
|
4087
5062
|
"minRelevance filter ignored in FTS mode (no vector scores available)"
|
|
4088
5063
|
);
|
|
4089
5064
|
} else if (maxRawScore < query.minRelevance) {
|
|
4090
5065
|
const timeMs2 = Date.now() - startTime;
|
|
4091
|
-
|
|
5066
|
+
logger4.info(
|
|
4092
5067
|
{
|
|
4093
5068
|
query: query.query,
|
|
4094
5069
|
mode,
|
|
@@ -4125,7 +5100,7 @@ var SearchService = class {
|
|
|
4125
5100
|
});
|
|
4126
5101
|
const timeMs = Date.now() - startTime;
|
|
4127
5102
|
const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
|
|
4128
|
-
|
|
5103
|
+
logger4.info(
|
|
4129
5104
|
{
|
|
4130
5105
|
query: query.query,
|
|
4131
5106
|
mode,
|
|
@@ -4138,7 +5113,7 @@ var SearchService = class {
|
|
|
4138
5113
|
},
|
|
4139
5114
|
"Search complete"
|
|
4140
5115
|
);
|
|
4141
|
-
|
|
5116
|
+
const response = {
|
|
4142
5117
|
query: query.query,
|
|
4143
5118
|
mode,
|
|
4144
5119
|
stores,
|
|
@@ -4148,6 +5123,10 @@ var SearchService = class {
|
|
|
4148
5123
|
confidence,
|
|
4149
5124
|
maxRawScore: mode !== "fts" ? maxRawScore : void 0
|
|
4150
5125
|
};
|
|
5126
|
+
if (rerankTimeMs !== void 0) {
|
|
5127
|
+
Object.assign(response, { rerankTimeMs });
|
|
5128
|
+
}
|
|
5129
|
+
return response;
|
|
4151
5130
|
}
|
|
4152
5131
|
/**
|
|
4153
5132
|
* Deduplicate results by source file path.
|
|
@@ -4253,14 +5232,16 @@ var SearchService = class {
|
|
|
4253
5232
|
*/
|
|
4254
5233
|
async hybridSearchWithMetadata(query, stores, limit, threshold) {
|
|
4255
5234
|
const intents = classifyQueryIntents(query);
|
|
4256
|
-
const
|
|
5235
|
+
const envOverrides = parseSearchEnvOverrides(false);
|
|
5236
|
+
const candidateMultiplier = envOverrides.candidateMultiplier ?? DEFAULT_CANDIDATE_MULTIPLIER;
|
|
5237
|
+
const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * candidateMultiplier);
|
|
4257
5238
|
const rawVectorScores = /* @__PURE__ */ new Map();
|
|
4258
5239
|
rawVectorResults.forEach((r) => {
|
|
4259
5240
|
rawVectorScores.set(r.id, r.score);
|
|
4260
5241
|
});
|
|
4261
5242
|
const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
|
|
4262
5243
|
const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
|
|
4263
|
-
const ftsResults = await this.ftsSearch(query, stores, limit *
|
|
5244
|
+
const ftsResults = await this.ftsSearch(query, stores, limit * candidateMultiplier);
|
|
4264
5245
|
const vectorRanks = /* @__PURE__ */ new Map();
|
|
4265
5246
|
const ftsRanks = /* @__PURE__ */ new Map();
|
|
4266
5247
|
const allDocs = /* @__PURE__ */ new Map();
|
|
@@ -4276,8 +5257,11 @@ var SearchService = class {
|
|
|
4276
5257
|
});
|
|
4277
5258
|
const rrfScores = [];
|
|
4278
5259
|
const contentType = detectContentType([...allDocs.values()]);
|
|
4279
|
-
const
|
|
4280
|
-
|
|
5260
|
+
const preset = RRF_PRESETS[contentType];
|
|
5261
|
+
const k = envOverrides.rrfK ?? preset.k;
|
|
5262
|
+
const vectorWeight = envOverrides.vectorWeight ?? preset.vectorWeight;
|
|
5263
|
+
const ftsWeight = 1 - vectorWeight;
|
|
5264
|
+
for (const [id, result2] of allDocs) {
|
|
4281
5265
|
const vectorRank = vectorRanks.get(id) ?? Infinity;
|
|
4282
5266
|
const ftsRank = ftsRanks.get(id) ?? Infinity;
|
|
4283
5267
|
const rawVectorScore = rawVectorScores.get(id);
|
|
@@ -4285,19 +5269,23 @@ var SearchService = class {
|
|
|
4285
5269
|
const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
|
|
4286
5270
|
const fileTypeBoost = this.getFileTypeBoost(
|
|
4287
5271
|
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
4288
|
-
|
|
5272
|
+
result2.metadata["fileType"],
|
|
4289
5273
|
intents
|
|
4290
5274
|
);
|
|
4291
|
-
const frameworkBoost = this.getFrameworkContextBoost(query,
|
|
4292
|
-
const urlKeywordBoost = this.getUrlKeywordBoost(query,
|
|
4293
|
-
const pathKeywordBoost = this.getPathKeywordBoost(query,
|
|
5275
|
+
const frameworkBoost = this.getFrameworkContextBoost(query, result2);
|
|
5276
|
+
const urlKeywordBoost = this.getUrlKeywordBoost(query, result2);
|
|
5277
|
+
const pathKeywordBoost = this.getPathKeywordBoost(query, result2);
|
|
5278
|
+
const depthBoost = this.getDepthBoost(result2, getPrimaryIntent(intents));
|
|
5279
|
+
const entryPointBoost = this.getEntryPointBoost(result2, getPrimaryIntent(intents));
|
|
4294
5280
|
const metadata = {
|
|
4295
5281
|
vectorRRF,
|
|
4296
5282
|
ftsRRF,
|
|
4297
5283
|
fileTypeBoost,
|
|
4298
5284
|
frameworkBoost,
|
|
4299
5285
|
urlKeywordBoost,
|
|
4300
|
-
pathKeywordBoost
|
|
5286
|
+
pathKeywordBoost,
|
|
5287
|
+
depthBoost,
|
|
5288
|
+
entryPointBoost
|
|
4301
5289
|
};
|
|
4302
5290
|
if (vectorRank !== Infinity) {
|
|
4303
5291
|
metadata.vectorRank = vectorRank;
|
|
@@ -4310,13 +5298,34 @@ var SearchService = class {
|
|
|
4310
5298
|
}
|
|
4311
5299
|
rrfScores.push({
|
|
4312
5300
|
id,
|
|
4313
|
-
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
|
|
4314
|
-
result,
|
|
5301
|
+
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost * depthBoost * entryPointBoost,
|
|
5302
|
+
result: result2,
|
|
4315
5303
|
rawVectorScore,
|
|
4316
5304
|
metadata
|
|
4317
5305
|
});
|
|
4318
5306
|
}
|
|
4319
|
-
const
|
|
5307
|
+
const sortedAll = rrfScores.sort((a, b) => b.score - a.score);
|
|
5308
|
+
let rerankTimeMs;
|
|
5309
|
+
let sorted;
|
|
5310
|
+
if (this.rerankerService !== void 0 && this.rerankerService.isEnabled() && sortedAll.length > 0) {
|
|
5311
|
+
const candidates = sortedAll.map((r) => ({
|
|
5312
|
+
id: r.id,
|
|
5313
|
+
content: r.result.content,
|
|
5314
|
+
score: r.score
|
|
5315
|
+
}));
|
|
5316
|
+
const reranked = await this.rerankerService.rerank(query, candidates);
|
|
5317
|
+
rerankTimeMs = reranked.timeMs;
|
|
5318
|
+
const rerankedScores = /* @__PURE__ */ new Map();
|
|
5319
|
+
reranked.results.forEach((r) => {
|
|
5320
|
+
rerankedScores.set(r.id, r.rerankerScore);
|
|
5321
|
+
});
|
|
5322
|
+
sorted = sortedAll.map((r) => ({
|
|
5323
|
+
...r,
|
|
5324
|
+
rerankerScore: rerankedScores.get(r.id)
|
|
5325
|
+
})).sort((a, b) => (b.rerankerScore ?? -Infinity) - (a.rerankerScore ?? -Infinity)).slice(0, limit);
|
|
5326
|
+
} else {
|
|
5327
|
+
sorted = sortedAll.slice(0, limit);
|
|
5328
|
+
}
|
|
4320
5329
|
let normalizedResults;
|
|
4321
5330
|
if (sorted.length > 0) {
|
|
4322
5331
|
const first = sorted[0];
|
|
@@ -4351,7 +5360,14 @@ var SearchService = class {
|
|
|
4351
5360
|
if (threshold !== void 0) {
|
|
4352
5361
|
normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
|
|
4353
5362
|
}
|
|
4354
|
-
|
|
5363
|
+
const result = {
|
|
5364
|
+
results: normalizedResults,
|
|
5365
|
+
maxRawScore
|
|
5366
|
+
};
|
|
5367
|
+
if (rerankTimeMs !== void 0) {
|
|
5368
|
+
result.rerankTimeMs = rerankTimeMs;
|
|
5369
|
+
}
|
|
5370
|
+
return result;
|
|
4355
5371
|
}
|
|
4356
5372
|
async searchAllStores(query, storeIds) {
|
|
4357
5373
|
return this.search({
|
|
@@ -4500,6 +5516,33 @@ var SearchService = class {
|
|
|
4500
5516
|
const matchRatio = matchingTerms.length / queryTerms.length;
|
|
4501
5517
|
return 1 + 1 * matchRatio;
|
|
4502
5518
|
}
|
|
5519
|
+
/**
|
|
5520
|
+
* Get a depth-based score multiplier, gated by query intent.
|
|
5521
|
+
* Root-level files (depth 0) are boosted for how-to and conceptual queries
|
|
5522
|
+
* where high-level docs/READMEs are most relevant.
|
|
5523
|
+
* Returns 1.0 (no-op) for implementation/debugging/comparison intents
|
|
5524
|
+
* to avoid disturbing candidate ordering before reranker.
|
|
5525
|
+
*/
|
|
5526
|
+
getDepthBoost(result, intent) {
|
|
5527
|
+
if (intent !== "how-to" && intent !== "conceptual") return 1;
|
|
5528
|
+
const depth = result.metadata["depth"];
|
|
5529
|
+
if (typeof depth !== "number") return 1;
|
|
5530
|
+
if (depth === 0) return 1.08;
|
|
5531
|
+
if (depth === 1) return 1.04;
|
|
5532
|
+
return 1;
|
|
5533
|
+
}
|
|
5534
|
+
/**
|
|
5535
|
+
* Get an entry-point score multiplier, gated by query intent.
|
|
5536
|
+
* Entry-point files (index.ts, main.py, etc.) are boosted for how-to
|
|
5537
|
+
* and implementation queries where API surfaces are most relevant.
|
|
5538
|
+
* Returns 1.0 (no-op) for debugging/comparison/conceptual intents.
|
|
5539
|
+
*/
|
|
5540
|
+
getEntryPointBoost(result, intent) {
|
|
5541
|
+
if (intent !== "how-to" && intent !== "implementation") return 1;
|
|
5542
|
+
const isEntryPoint = result.metadata["isEntryPoint"];
|
|
5543
|
+
if (isEntryPoint !== true) return 1;
|
|
5544
|
+
return 1.08;
|
|
5545
|
+
}
|
|
4503
5546
|
/**
|
|
4504
5547
|
* Get a score multiplier based on framework context.
|
|
4505
5548
|
* If query mentions a framework, boost results from that framework's files.
|
|
@@ -4798,7 +5841,7 @@ var SearchService = class {
|
|
|
4798
5841
|
|
|
4799
5842
|
// src/services/store-definition.service.ts
|
|
4800
5843
|
import { readFile as readFile7, access as access4 } from "fs/promises";
|
|
4801
|
-
import { resolve as resolve2, isAbsolute as isAbsolute2, join as
|
|
5844
|
+
import { resolve as resolve2, isAbsolute as isAbsolute2, join as join11 } from "path";
|
|
4802
5845
|
|
|
4803
5846
|
// src/types/store-definition.ts
|
|
4804
5847
|
import { z as z3 } from "zod";
|
|
@@ -4889,7 +5932,7 @@ var StoreDefinitionService = class {
|
|
|
4889
5932
|
config = null;
|
|
4890
5933
|
constructor(projectRoot) {
|
|
4891
5934
|
this.projectRoot = projectRoot ?? ProjectRootService.resolve();
|
|
4892
|
-
this.configPath =
|
|
5935
|
+
this.configPath = join11(this.projectRoot, ".bluera/bluera-knowledge/stores.config.json");
|
|
4893
5936
|
}
|
|
4894
5937
|
/**
|
|
4895
5938
|
* Load store definitions from config file.
|
|
@@ -5028,12 +6071,12 @@ var StoreDefinitionService = class {
|
|
|
5028
6071
|
// src/services/store.service.ts
|
|
5029
6072
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
5030
6073
|
import { readFile as readFile8, mkdir as mkdir5, stat as stat3, access as access5 } from "fs/promises";
|
|
5031
|
-
import { join as
|
|
6074
|
+
import { join as join12, resolve as resolve3, relative as relative2, isAbsolute as isAbsolute3 } from "path";
|
|
5032
6075
|
|
|
5033
6076
|
// src/plugin/git-clone.ts
|
|
5034
6077
|
import { spawn } from "child_process";
|
|
5035
6078
|
import { mkdir as mkdir4 } from "fs/promises";
|
|
5036
|
-
var
|
|
6079
|
+
var logger5 = createLogger("git-clone");
|
|
5037
6080
|
function sanitizeUrl(url) {
|
|
5038
6081
|
try {
|
|
5039
6082
|
const parsed = new URL(url);
|
|
@@ -5065,7 +6108,7 @@ async function cloneRepository(options) {
|
|
|
5065
6108
|
args.push("--branch", branch);
|
|
5066
6109
|
}
|
|
5067
6110
|
args.push(url, targetDir);
|
|
5068
|
-
|
|
6111
|
+
logger5.info(
|
|
5069
6112
|
{
|
|
5070
6113
|
url: sanitizeUrl(url),
|
|
5071
6114
|
targetDir,
|
|
@@ -5102,7 +6145,7 @@ async function cloneRepository(options) {
|
|
|
5102
6145
|
clearTimeout(timeout);
|
|
5103
6146
|
if (forceKillTimeout) clearTimeout(forceKillTimeout);
|
|
5104
6147
|
if (timedOut) {
|
|
5105
|
-
resolve4(err(new Error(`Git clone timed out after ${String(timeoutMs)}ms`)));
|
|
6148
|
+
resolve4(err(new Error(`Git clone timed out after ${String(timeoutMs)}ms for: ${url}`)));
|
|
5106
6149
|
} else if (code === 0) {
|
|
5107
6150
|
resolve4(ok(targetDir));
|
|
5108
6151
|
} else {
|
|
@@ -5127,7 +6170,7 @@ function extractRepoName(url) {
|
|
|
5127
6170
|
var CURRENT_SCHEMA_VERSION = 3;
|
|
5128
6171
|
|
|
5129
6172
|
// src/services/store.service.ts
|
|
5130
|
-
var
|
|
6173
|
+
var logger6 = createLogger("store-service");
|
|
5131
6174
|
async function fileExists4(path4) {
|
|
5132
6175
|
try {
|
|
5133
6176
|
await access5(path4);
|
|
@@ -5176,10 +6219,10 @@ var StoreService = class {
|
|
|
5176
6219
|
}
|
|
5177
6220
|
const relativePath = relative2(this.projectRoot, absolutePath);
|
|
5178
6221
|
if (relativePath.startsWith("..") || isAbsolute3(relativePath)) {
|
|
5179
|
-
|
|
6222
|
+
logger6.info(`Path outside projectRoot, storing absolute: ${absolutePath}`);
|
|
5180
6223
|
return { path: absolutePath, pathType: "absolute" };
|
|
5181
6224
|
}
|
|
5182
|
-
|
|
6225
|
+
logger6.debug(`Storing relative path: ${relativePath} (resolved from ${absolutePath})`);
|
|
5183
6226
|
return { path: relativePath, pathType: "relative" };
|
|
5184
6227
|
}
|
|
5185
6228
|
/**
|
|
@@ -5194,11 +6237,11 @@ var StoreService = class {
|
|
|
5194
6237
|
return storedPath;
|
|
5195
6238
|
}
|
|
5196
6239
|
if (this.projectRoot === void 0) {
|
|
5197
|
-
|
|
6240
|
+
logger6.error(`Store has relative path but no projectRoot: ${storedPath}`);
|
|
5198
6241
|
return storedPath;
|
|
5199
6242
|
}
|
|
5200
6243
|
const resolved = resolve3(this.projectRoot, storedPath);
|
|
5201
|
-
|
|
6244
|
+
logger6.debug(`Resolved relative path: ${storedPath} \u2192 ${resolved}`);
|
|
5202
6245
|
return resolved;
|
|
5203
6246
|
}
|
|
5204
6247
|
async initialize() {
|
|
@@ -5210,7 +6253,7 @@ var StoreService = class {
|
|
|
5210
6253
|
* This enables MCP server to see stores created by CLI without restart.
|
|
5211
6254
|
*/
|
|
5212
6255
|
async ensureRegistryFresh() {
|
|
5213
|
-
const registryPath =
|
|
6256
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5214
6257
|
try {
|
|
5215
6258
|
const stats = await stat3(registryPath);
|
|
5216
6259
|
if (stats.mtimeMs > this.registryMtime) {
|
|
@@ -5366,7 +6409,7 @@ var StoreService = class {
|
|
|
5366
6409
|
case "repo": {
|
|
5367
6410
|
let repoPath = input.path;
|
|
5368
6411
|
if (input.url !== void 0) {
|
|
5369
|
-
const cloneDir =
|
|
6412
|
+
const cloneDir = join12(this.dataDir, "repos", id);
|
|
5370
6413
|
const result = await cloneRepository({
|
|
5371
6414
|
url: input.url,
|
|
5372
6415
|
targetDir: cloneDir,
|
|
@@ -5567,7 +6610,7 @@ var StoreService = class {
|
|
|
5567
6610
|
await this.saveRegistry();
|
|
5568
6611
|
}
|
|
5569
6612
|
async loadRegistry() {
|
|
5570
|
-
const registryPath =
|
|
6613
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5571
6614
|
const exists = await fileExists4(registryPath);
|
|
5572
6615
|
if (!exists) {
|
|
5573
6616
|
this.registry = { stores: [] };
|
|
@@ -5602,7 +6645,7 @@ var StoreService = class {
|
|
|
5602
6645
|
})
|
|
5603
6646
|
};
|
|
5604
6647
|
if (migrationNeeded) {
|
|
5605
|
-
|
|
6648
|
+
logger6.debug("Schema migration needed - will upgrade on next save");
|
|
5606
6649
|
this.needsMigration = true;
|
|
5607
6650
|
}
|
|
5608
6651
|
} catch (error) {
|
|
@@ -5614,7 +6657,7 @@ var StoreService = class {
|
|
|
5614
6657
|
this.registryMtime = loadedStats.mtimeMs;
|
|
5615
6658
|
}
|
|
5616
6659
|
async saveRegistry() {
|
|
5617
|
-
const registryPath =
|
|
6660
|
+
const registryPath = join12(this.dataDir, "stores.json");
|
|
5618
6661
|
const storedStores = this.registry.stores.map((store) => {
|
|
5619
6662
|
const schemaVersion = this.needsMigration ? CURRENT_SCHEMA_VERSION : store.schemaVersion;
|
|
5620
6663
|
if (store.type === "file") {
|
|
@@ -5642,7 +6685,7 @@ var StoreService = class {
|
|
|
5642
6685
|
}
|
|
5643
6686
|
});
|
|
5644
6687
|
if (this.needsMigration) {
|
|
5645
|
-
|
|
6688
|
+
logger6.info("Schema migration complete - upgraded to v3");
|
|
5646
6689
|
this.needsMigration = false;
|
|
5647
6690
|
}
|
|
5648
6691
|
await atomicWriteFile(registryPath, JSON.stringify({ stores: storedStores }, null, 2));
|
|
@@ -5693,7 +6736,7 @@ function validateParsePythonResult(data) {
|
|
|
5693
6736
|
}
|
|
5694
6737
|
|
|
5695
6738
|
// src/crawl/bridge.ts
|
|
5696
|
-
var
|
|
6739
|
+
var logger7 = createLogger("python-bridge");
|
|
5697
6740
|
function getPythonExecutable() {
|
|
5698
6741
|
return process.platform === "win32" ? "python" : "python3";
|
|
5699
6742
|
}
|
|
@@ -5728,7 +6771,7 @@ var PythonBridge = class {
|
|
|
5728
6771
|
pythonWorkerPath = path3.join(projectRoot, "python", "ast_worker.py");
|
|
5729
6772
|
pythonPath = getPythonExecutable();
|
|
5730
6773
|
}
|
|
5731
|
-
|
|
6774
|
+
logger7.debug(
|
|
5732
6775
|
{ pythonWorkerPath, pythonPath, currentFilePath, isProduction },
|
|
5733
6776
|
"Starting Python bridge process"
|
|
5734
6777
|
);
|
|
@@ -5736,15 +6779,15 @@ var PythonBridge = class {
|
|
|
5736
6779
|
stdio: ["pipe", "pipe", "pipe"]
|
|
5737
6780
|
});
|
|
5738
6781
|
this.process.on("error", (err2) => {
|
|
5739
|
-
|
|
6782
|
+
logger7.error({ error: err2.message, stack: err2.stack }, "Python bridge process error");
|
|
5740
6783
|
this.rejectAllPending(new Error(`Process error: ${err2.message}`));
|
|
5741
6784
|
});
|
|
5742
6785
|
this.process.on("exit", (code, signal) => {
|
|
5743
6786
|
if (code !== 0 && code !== null) {
|
|
5744
|
-
|
|
6787
|
+
logger7.error({ code }, "Python bridge process exited with non-zero code");
|
|
5745
6788
|
this.rejectAllPending(new Error(`Process exited with code ${String(code)}`));
|
|
5746
6789
|
} else if (signal && !this.stoppingIntentionally) {
|
|
5747
|
-
|
|
6790
|
+
logger7.error({ signal }, "Python bridge process killed with signal");
|
|
5748
6791
|
this.rejectAllPending(new Error(`Process killed with signal ${signal}`));
|
|
5749
6792
|
}
|
|
5750
6793
|
this.process = null;
|
|
@@ -5753,7 +6796,7 @@ var PythonBridge = class {
|
|
|
5753
6796
|
if (this.process.stderr) {
|
|
5754
6797
|
this.stderrReadline = createInterface({ input: this.process.stderr });
|
|
5755
6798
|
this.stderrReadline.on("line", (line) => {
|
|
5756
|
-
|
|
6799
|
+
logger7.warn({ stderr: line }, "Python bridge stderr output");
|
|
5757
6800
|
});
|
|
5758
6801
|
}
|
|
5759
6802
|
if (this.process.stdout === null) {
|
|
@@ -5782,7 +6825,7 @@ var PythonBridge = class {
|
|
|
5782
6825
|
pending.resolve(validated);
|
|
5783
6826
|
} catch (error) {
|
|
5784
6827
|
if (error instanceof ZodError) {
|
|
5785
|
-
|
|
6828
|
+
logger7.error(
|
|
5786
6829
|
{
|
|
5787
6830
|
issues: error.issues,
|
|
5788
6831
|
response: JSON.stringify(response.result)
|
|
@@ -5794,14 +6837,14 @@ var PythonBridge = class {
|
|
|
5794
6837
|
);
|
|
5795
6838
|
} else {
|
|
5796
6839
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
5797
|
-
|
|
6840
|
+
logger7.error({ error: errorMessage }, "Response validation error");
|
|
5798
6841
|
pending.reject(new Error(`Response validation error: ${errorMessage}`));
|
|
5799
6842
|
}
|
|
5800
6843
|
}
|
|
5801
6844
|
}
|
|
5802
6845
|
}
|
|
5803
6846
|
} catch (err2) {
|
|
5804
|
-
|
|
6847
|
+
logger7.error(
|
|
5805
6848
|
{
|
|
5806
6849
|
error: err2 instanceof Error ? err2.message : String(err2),
|
|
5807
6850
|
line
|
|
@@ -5897,242 +6940,9 @@ var PythonBridge = class {
|
|
|
5897
6940
|
}
|
|
5898
6941
|
};
|
|
5899
6942
|
|
|
5900
|
-
// src/db/embeddings.ts
|
|
5901
|
-
import { homedir as homedir2 } from "os";
|
|
5902
|
-
import { join as join11 } from "path";
|
|
5903
|
-
import { pipeline, env } from "@huggingface/transformers";
|
|
5904
|
-
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
5905
|
-
var DEFAULT_EMBEDDING_CONFIG = {
|
|
5906
|
-
model: "Xenova/bge-small-en-v1.5",
|
|
5907
|
-
batchSize: 32,
|
|
5908
|
-
dtype: "fp32",
|
|
5909
|
-
pooling: "mean",
|
|
5910
|
-
normalize: true,
|
|
5911
|
-
queryPrefix: "Represent this sentence for searching relevant passages: ",
|
|
5912
|
-
docPrefix: "",
|
|
5913
|
-
maxInFlightBatches: 1
|
|
5914
|
-
};
|
|
5915
|
-
var EmbeddingEngine = class {
|
|
5916
|
-
extractor = null;
|
|
5917
|
-
initPromise = null;
|
|
5918
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed() and embedBatch()
|
|
5919
|
-
_dimensions = null;
|
|
5920
|
-
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
5921
|
-
disposed = false;
|
|
5922
|
-
config;
|
|
5923
|
-
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
5924
|
-
this.config = config;
|
|
5925
|
-
}
|
|
5926
|
-
/**
|
|
5927
|
-
* Guard against use-after-dispose
|
|
5928
|
-
*/
|
|
5929
|
-
assertNotDisposed() {
|
|
5930
|
-
if (this.disposed) {
|
|
5931
|
-
throw new Error("EmbeddingEngine has been disposed");
|
|
5932
|
-
}
|
|
5933
|
-
}
|
|
5934
|
-
/**
|
|
5935
|
-
* Initialize the embedding pipeline (concurrency-safe).
|
|
5936
|
-
* Multiple concurrent calls will share the same initialization promise.
|
|
5937
|
-
*/
|
|
5938
|
-
async initialize() {
|
|
5939
|
-
this.assertNotDisposed();
|
|
5940
|
-
if (this.extractor !== null) return;
|
|
5941
|
-
this.initPromise ??= (async () => {
|
|
5942
|
-
try {
|
|
5943
|
-
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
5944
|
-
dtype: this.config.dtype
|
|
5945
|
-
});
|
|
5946
|
-
} catch (error) {
|
|
5947
|
-
this.initPromise = null;
|
|
5948
|
-
throw error;
|
|
5949
|
-
}
|
|
5950
|
-
})();
|
|
5951
|
-
await this.initPromise;
|
|
5952
|
-
}
|
|
5953
|
-
/**
|
|
5954
|
-
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
5955
|
-
*/
|
|
5956
|
-
async embedQuery(text) {
|
|
5957
|
-
return this.embedText(this.config.queryPrefix + text);
|
|
5958
|
-
}
|
|
5959
|
-
/**
|
|
5960
|
-
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
5961
|
-
*/
|
|
5962
|
-
async embedDocument(text) {
|
|
5963
|
-
return this.embedText(this.config.docPrefix + text);
|
|
5964
|
-
}
|
|
5965
|
-
/**
|
|
5966
|
-
* Internal: embed text without prefix.
|
|
5967
|
-
*/
|
|
5968
|
-
async embedText(text) {
|
|
5969
|
-
this.assertNotDisposed();
|
|
5970
|
-
if (this.extractor === null) {
|
|
5971
|
-
await this.initialize();
|
|
5972
|
-
}
|
|
5973
|
-
if (this.extractor === null) {
|
|
5974
|
-
throw new Error("Failed to initialize embedding model");
|
|
5975
|
-
}
|
|
5976
|
-
const output = await this.extractor(text, {
|
|
5977
|
-
pooling: this.config.pooling,
|
|
5978
|
-
normalize: this.config.normalize
|
|
5979
|
-
});
|
|
5980
|
-
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5981
|
-
this._dimensions ??= dim;
|
|
5982
|
-
return Float32Array.from(output.data);
|
|
5983
|
-
}
|
|
5984
|
-
/**
|
|
5985
|
-
* Embed a batch of documents with optional parallelism.
|
|
5986
|
-
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
5987
|
-
*/
|
|
5988
|
-
async embedBatch(texts) {
|
|
5989
|
-
this.assertNotDisposed();
|
|
5990
|
-
if (this.extractor === null) {
|
|
5991
|
-
await this.initialize();
|
|
5992
|
-
}
|
|
5993
|
-
if (this.extractor === null) {
|
|
5994
|
-
throw new Error("Failed to initialize embedding model");
|
|
5995
|
-
}
|
|
5996
|
-
const batches = [];
|
|
5997
|
-
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
5998
|
-
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
5999
|
-
}
|
|
6000
|
-
if (batches.length === 0) {
|
|
6001
|
-
return [];
|
|
6002
|
-
}
|
|
6003
|
-
if (this.config.maxInFlightBatches <= 1) {
|
|
6004
|
-
return this.embedBatchesSequential(batches);
|
|
6005
|
-
} else {
|
|
6006
|
-
return this.embedBatchesConcurrent(batches);
|
|
6007
|
-
}
|
|
6008
|
-
}
|
|
6009
|
-
/**
|
|
6010
|
-
* Process batches sequentially (original behavior).
|
|
6011
|
-
*/
|
|
6012
|
-
async embedBatchesSequential(batches) {
|
|
6013
|
-
const results = [];
|
|
6014
|
-
for (let i = 0; i < batches.length; i++) {
|
|
6015
|
-
const batch = batches[i];
|
|
6016
|
-
if (batch === void 0) continue;
|
|
6017
|
-
const batchResults = await this.processSingleBatch(batch);
|
|
6018
|
-
results.push(...batchResults);
|
|
6019
|
-
if (i < batches.length - 1) {
|
|
6020
|
-
await new Promise((resolve4) => setImmediate(resolve4));
|
|
6021
|
-
}
|
|
6022
|
-
}
|
|
6023
|
-
return results;
|
|
6024
|
-
}
|
|
6025
|
-
/**
|
|
6026
|
-
* Process batches with controlled concurrency.
|
|
6027
|
-
*/
|
|
6028
|
-
async embedBatchesConcurrent(batches) {
|
|
6029
|
-
const results = new Array(batches.length);
|
|
6030
|
-
let inFlight = 0;
|
|
6031
|
-
const maxConcurrent = this.config.maxInFlightBatches;
|
|
6032
|
-
await Promise.all(
|
|
6033
|
-
batches.map(async (batch, idx) => {
|
|
6034
|
-
while (inFlight >= maxConcurrent) {
|
|
6035
|
-
await new Promise((resolve4) => setImmediate(resolve4));
|
|
6036
|
-
}
|
|
6037
|
-
inFlight++;
|
|
6038
|
-
try {
|
|
6039
|
-
results[idx] = await this.processSingleBatch(batch);
|
|
6040
|
-
} finally {
|
|
6041
|
-
inFlight--;
|
|
6042
|
-
}
|
|
6043
|
-
})
|
|
6044
|
-
);
|
|
6045
|
-
return results.flat();
|
|
6046
|
-
}
|
|
6047
|
-
/**
|
|
6048
|
-
* Process a single batch and return embeddings.
|
|
6049
|
-
*/
|
|
6050
|
-
async processSingleBatch(batch) {
|
|
6051
|
-
if (this.extractor === null) {
|
|
6052
|
-
throw new Error("Extractor not initialized");
|
|
6053
|
-
}
|
|
6054
|
-
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
6055
|
-
const output = await this.extractor(prefixedBatch, {
|
|
6056
|
-
pooling: this.config.pooling,
|
|
6057
|
-
normalize: this.config.normalize
|
|
6058
|
-
});
|
|
6059
|
-
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
6060
|
-
const batchResults = [];
|
|
6061
|
-
for (let b = 0; b < batch.length; b++) {
|
|
6062
|
-
const start = b * dim;
|
|
6063
|
-
const end = start + dim;
|
|
6064
|
-
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
6065
|
-
}
|
|
6066
|
-
this._dimensions ??= dim;
|
|
6067
|
-
return batchResults;
|
|
6068
|
-
}
|
|
6069
|
-
/**
|
|
6070
|
-
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
6071
|
-
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
6072
|
-
*/
|
|
6073
|
-
getDimensions() {
|
|
6074
|
-
if (this._dimensions === null) {
|
|
6075
|
-
throw new Error("Cannot get dimensions before first embed() call");
|
|
6076
|
-
}
|
|
6077
|
-
return this._dimensions;
|
|
6078
|
-
}
|
|
6079
|
-
/**
|
|
6080
|
-
* Check if the embedding pipeline is initialized.
|
|
6081
|
-
*/
|
|
6082
|
-
isInitialized() {
|
|
6083
|
-
return this.extractor !== null;
|
|
6084
|
-
}
|
|
6085
|
-
/**
|
|
6086
|
-
* Check if this engine has been disposed.
|
|
6087
|
-
*/
|
|
6088
|
-
isDisposed() {
|
|
6089
|
-
return this.disposed;
|
|
6090
|
-
}
|
|
6091
|
-
/**
|
|
6092
|
-
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
6093
|
-
* If currently initialized, disposes the pipeline first.
|
|
6094
|
-
*/
|
|
6095
|
-
async reset() {
|
|
6096
|
-
if (this.extractor !== null) {
|
|
6097
|
-
await this.extractor.dispose();
|
|
6098
|
-
this.extractor = null;
|
|
6099
|
-
}
|
|
6100
|
-
this.initPromise = null;
|
|
6101
|
-
this._dimensions = null;
|
|
6102
|
-
this.disposed = false;
|
|
6103
|
-
}
|
|
6104
|
-
/**
|
|
6105
|
-
* Ensure dimensions are available, initializing the model if needed.
|
|
6106
|
-
* Returns the embedding dimensions for the current model.
|
|
6107
|
-
*/
|
|
6108
|
-
async ensureDimensions() {
|
|
6109
|
-
if (this._dimensions === null) {
|
|
6110
|
-
await this.embedText("dimension probe");
|
|
6111
|
-
}
|
|
6112
|
-
if (this._dimensions === null) {
|
|
6113
|
-
throw new Error("Failed to determine embedding dimensions");
|
|
6114
|
-
}
|
|
6115
|
-
return this._dimensions;
|
|
6116
|
-
}
|
|
6117
|
-
/**
|
|
6118
|
-
* Dispose the embedding pipeline to free resources.
|
|
6119
|
-
* Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
|
|
6120
|
-
* After disposal, this engine cannot be used again.
|
|
6121
|
-
*/
|
|
6122
|
-
async dispose() {
|
|
6123
|
-
if (this.extractor !== null) {
|
|
6124
|
-
await this.extractor.dispose();
|
|
6125
|
-
this.extractor = null;
|
|
6126
|
-
}
|
|
6127
|
-
this.initPromise = null;
|
|
6128
|
-
this._dimensions = null;
|
|
6129
|
-
this.disposed = true;
|
|
6130
|
-
}
|
|
6131
|
-
};
|
|
6132
|
-
|
|
6133
6943
|
// src/db/lance.ts
|
|
6134
6944
|
import { rm as rm2 } from "fs/promises";
|
|
6135
|
-
import { join as
|
|
6945
|
+
import { join as join13 } from "path";
|
|
6136
6946
|
import * as lancedb from "@lancedb/lancedb";
|
|
6137
6947
|
import { LanceSchema } from "@lancedb/lancedb/embedding";
|
|
6138
6948
|
import { Utf8 } from "apache-arrow";
|
|
@@ -6147,17 +6957,19 @@ var HuggingFaceEmbeddingFunction = class extends TextEmbeddingFunction {
|
|
|
6147
6957
|
constructor(optionsRaw) {
|
|
6148
6958
|
super();
|
|
6149
6959
|
const options = this.resolveVariables(optionsRaw ?? {});
|
|
6150
|
-
|
|
6151
|
-
|
|
6152
|
-
|
|
6153
|
-
|
|
6154
|
-
pooling: options.pooling ?? "mean",
|
|
6155
|
-
normalize: options.normalize ?? true,
|
|
6156
|
-
queryPrefix: options.queryPrefix ?? "",
|
|
6157
|
-
docPrefix: options.docPrefix ?? "",
|
|
6158
|
-
maxInFlightBatches: 1
|
|
6960
|
+
const finetunedPath = getFinetunedModelPath();
|
|
6961
|
+
const model = finetunedPath ?? options.model ?? getConfiguredModelId();
|
|
6962
|
+
const overrides = {
|
|
6963
|
+
maxInFlightBatches: 1,
|
|
6159
6964
|
// Single-threaded for LanceDB integration
|
|
6965
|
+
...options.batchSize !== void 0 && { batchSize: options.batchSize },
|
|
6966
|
+
...options.dtype !== void 0 && { dtype: options.dtype },
|
|
6967
|
+
...options.pooling !== void 0 && { pooling: options.pooling },
|
|
6968
|
+
...options.normalize !== void 0 && { normalize: options.normalize },
|
|
6969
|
+
...options.queryPrefix !== void 0 && { queryPrefix: options.queryPrefix },
|
|
6970
|
+
...options.docPrefix !== void 0 && { docPrefix: options.docPrefix }
|
|
6160
6971
|
};
|
|
6972
|
+
this.embeddingConfig = buildEmbeddingConfig(model, overrides);
|
|
6161
6973
|
this.engine = new EmbeddingEngine(this.embeddingConfig);
|
|
6162
6974
|
}
|
|
6163
6975
|
/**
|
|
@@ -6233,11 +7045,15 @@ var DocumentMetadataSchema = z5.object({
|
|
|
6233
7045
|
}).loose();
|
|
6234
7046
|
|
|
6235
7047
|
// src/db/lance.ts
|
|
6236
|
-
var
|
|
7048
|
+
var logger8 = createLogger("lance");
|
|
6237
7049
|
function isSearchHit(value) {
|
|
6238
7050
|
if (typeof value !== "object" || value === null) return false;
|
|
6239
7051
|
return "id" in value && "content" in value && "metadata" in value && "_distance" in value && typeof value.id === "string" && typeof value.content === "string" && typeof value.metadata === "string" && typeof value._distance === "number";
|
|
6240
7052
|
}
|
|
7053
|
+
function isDocumentRecord(value) {
|
|
7054
|
+
if (typeof value !== "object" || value === null) return false;
|
|
7055
|
+
return "id" in value && "content" in value && "metadata" in value && typeof value.id === "string" && typeof value.content === "string" && typeof value.metadata === "string";
|
|
7056
|
+
}
|
|
6241
7057
|
function parseDocumentMetadata(jsonStr) {
|
|
6242
7058
|
const parsed = DocumentMetadataSchema.parse(JSON.parse(jsonStr));
|
|
6243
7059
|
return {
|
|
@@ -6405,6 +7221,29 @@ var LanceStore = class {
|
|
|
6405
7221
|
};
|
|
6406
7222
|
});
|
|
6407
7223
|
}
|
|
7224
|
+
/**
|
|
7225
|
+
* Get all documents from a store (for training data generation).
|
|
7226
|
+
* Returns documents in batches to avoid memory issues with large stores.
|
|
7227
|
+
*/
|
|
7228
|
+
async getAllDocuments(storeId, options) {
|
|
7229
|
+
const table = await this.getTable(storeId);
|
|
7230
|
+
const limit = options?.limit ?? 1e4;
|
|
7231
|
+
const offset = options?.offset ?? 0;
|
|
7232
|
+
const rawResults = await table.query().limit(limit).offset(offset).toArray();
|
|
7233
|
+
const results = rawResults.filter(isDocumentRecord);
|
|
7234
|
+
return results.map((r) => ({
|
|
7235
|
+
id: createDocumentId(r.id),
|
|
7236
|
+
content: r.content,
|
|
7237
|
+
metadata: parseDocumentMetadata(r.metadata)
|
|
7238
|
+
}));
|
|
7239
|
+
}
|
|
7240
|
+
/**
|
|
7241
|
+
* Count total documents in a store.
|
|
7242
|
+
*/
|
|
7243
|
+
async countDocuments(storeId) {
|
|
7244
|
+
const table = await this.getTable(storeId);
|
|
7245
|
+
return table.countRows();
|
|
7246
|
+
}
|
|
6408
7247
|
async deleteStore(storeId) {
|
|
6409
7248
|
const tableName = this.getTableName(storeId);
|
|
6410
7249
|
this.connection ??= await lancedb.connect(this.dataDir);
|
|
@@ -6413,11 +7252,11 @@ var LanceStore = class {
|
|
|
6413
7252
|
await this.connection.dropTable(tableName);
|
|
6414
7253
|
}
|
|
6415
7254
|
this.tables.delete(tableName);
|
|
6416
|
-
const lanceDir =
|
|
7255
|
+
const lanceDir = join13(this.dataDir, `${tableName}.lance`);
|
|
6417
7256
|
try {
|
|
6418
7257
|
await rm2(lanceDir, { recursive: true, force: true });
|
|
6419
7258
|
} catch (error) {
|
|
6420
|
-
|
|
7259
|
+
logger8.warn({ lanceDir, error }, "Failed to remove .lance directory");
|
|
6421
7260
|
}
|
|
6422
7261
|
}
|
|
6423
7262
|
close() {
|
|
@@ -6454,7 +7293,7 @@ var LanceStore = class {
|
|
|
6454
7293
|
};
|
|
6455
7294
|
|
|
6456
7295
|
// src/services/index.ts
|
|
6457
|
-
var
|
|
7296
|
+
var logger9 = createLogger("services");
|
|
6458
7297
|
var LazyServiceContainer = class {
|
|
6459
7298
|
// Eagerly initialized (lightweight)
|
|
6460
7299
|
config;
|
|
@@ -6469,6 +7308,8 @@ var LazyServiceContainer = class {
|
|
|
6469
7308
|
_manifest = null;
|
|
6470
7309
|
_embeddings = null;
|
|
6471
7310
|
_codeGraph = null;
|
|
7311
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in lazy getter
|
|
7312
|
+
_reranker = null;
|
|
6472
7313
|
_search = null;
|
|
6473
7314
|
_index = null;
|
|
6474
7315
|
constructor(config, appConfig, dataDir, store, lance, pythonBridge) {
|
|
@@ -6485,7 +7326,7 @@ var LazyServiceContainer = class {
|
|
|
6485
7326
|
*/
|
|
6486
7327
|
get embeddings() {
|
|
6487
7328
|
if (this._embeddings === null) {
|
|
6488
|
-
|
|
7329
|
+
logger9.debug("Lazy-initializing EmbeddingEngine");
|
|
6489
7330
|
this._embeddings = new EmbeddingEngine(this.appConfig.embedding);
|
|
6490
7331
|
}
|
|
6491
7332
|
return this._embeddings;
|
|
@@ -6495,18 +7336,34 @@ var LazyServiceContainer = class {
|
|
|
6495
7336
|
*/
|
|
6496
7337
|
get codeGraph() {
|
|
6497
7338
|
if (this._codeGraph === null) {
|
|
6498
|
-
|
|
7339
|
+
logger9.debug("Lazy-initializing CodeGraphService");
|
|
6499
7340
|
this._codeGraph = new CodeGraphService(this.dataDir, this.pythonBridge);
|
|
6500
7341
|
}
|
|
6501
7342
|
return this._codeGraph;
|
|
6502
7343
|
}
|
|
7344
|
+
/**
|
|
7345
|
+
* RerankerService is lazily created on first access.
|
|
7346
|
+
* Only created if reranker config exists and is enabled.
|
|
7347
|
+
*/
|
|
7348
|
+
get reranker() {
|
|
7349
|
+
if (this._reranker === null && this.appConfig.reranker?.enabled) {
|
|
7350
|
+
logger9.debug("Lazy-initializing RerankerService");
|
|
7351
|
+
this._reranker = new RerankerService(this.appConfig.reranker);
|
|
7352
|
+
}
|
|
7353
|
+
return this._reranker ?? void 0;
|
|
7354
|
+
}
|
|
6503
7355
|
/**
|
|
6504
7356
|
* SearchService is lazily created on first access.
|
|
6505
7357
|
*/
|
|
6506
7358
|
get search() {
|
|
6507
7359
|
if (this._search === null) {
|
|
6508
|
-
|
|
6509
|
-
this._search = new SearchService(
|
|
7360
|
+
logger9.debug("Lazy-initializing SearchService");
|
|
7361
|
+
this._search = new SearchService(
|
|
7362
|
+
this.lance,
|
|
7363
|
+
this.codeGraph,
|
|
7364
|
+
this.appConfig.search,
|
|
7365
|
+
this.reranker
|
|
7366
|
+
);
|
|
6510
7367
|
}
|
|
6511
7368
|
return this._search;
|
|
6512
7369
|
}
|
|
@@ -6515,14 +7372,15 @@ var LazyServiceContainer = class {
|
|
|
6515
7372
|
*/
|
|
6516
7373
|
get index() {
|
|
6517
7374
|
if (this._index === null) {
|
|
6518
|
-
|
|
7375
|
+
logger9.debug("Lazy-initializing IndexService");
|
|
6519
7376
|
this._index = new IndexService(this.lance, this.embeddings, {
|
|
6520
7377
|
codeGraphService: this.codeGraph,
|
|
6521
7378
|
manifestService: this.manifest,
|
|
6522
7379
|
chunkSize: this.appConfig.indexing.chunkSize,
|
|
6523
7380
|
chunkOverlap: this.appConfig.indexing.chunkOverlap,
|
|
6524
7381
|
concurrency: this.appConfig.indexing.concurrency,
|
|
6525
|
-
ignorePatterns: this.appConfig.indexing.ignorePatterns
|
|
7382
|
+
ignorePatterns: this.appConfig.indexing.ignorePatterns,
|
|
7383
|
+
prependPath: this.appConfig.indexing.prependPath
|
|
6526
7384
|
});
|
|
6527
7385
|
}
|
|
6528
7386
|
return this._index;
|
|
@@ -6532,7 +7390,7 @@ var LazyServiceContainer = class {
|
|
|
6532
7390
|
*/
|
|
6533
7391
|
get manifest() {
|
|
6534
7392
|
if (this._manifest === null) {
|
|
6535
|
-
|
|
7393
|
+
logger9.debug("Lazy-initializing ManifestService");
|
|
6536
7394
|
this._manifest = new ManifestService(this.dataDir);
|
|
6537
7395
|
}
|
|
6538
7396
|
return this._manifest;
|
|
@@ -6551,7 +7409,7 @@ var LazyServiceContainer = class {
|
|
|
6551
7409
|
}
|
|
6552
7410
|
};
|
|
6553
7411
|
async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
6554
|
-
|
|
7412
|
+
logger9.info({ configPath, dataDir, projectRoot }, "Initializing lazy services");
|
|
6555
7413
|
const startTime = Date.now();
|
|
6556
7414
|
const config = new ConfigService(configPath, dataDir, projectRoot);
|
|
6557
7415
|
const appConfig = await config.load();
|
|
@@ -6572,14 +7430,14 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
|
|
|
6572
7430
|
await store.initialize();
|
|
6573
7431
|
await lance.setEmbeddingFunction(appConfig.embedding);
|
|
6574
7432
|
const durationMs = Date.now() - startTime;
|
|
6575
|
-
|
|
7433
|
+
logger9.info(
|
|
6576
7434
|
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot, durationMs },
|
|
6577
7435
|
"Lazy services initialized"
|
|
6578
7436
|
);
|
|
6579
7437
|
return new LazyServiceContainer(config, appConfig, resolvedDataDir, store, lance, pythonBridge);
|
|
6580
7438
|
}
|
|
6581
7439
|
async function createServices(configPath, dataDir, projectRoot) {
|
|
6582
|
-
|
|
7440
|
+
logger9.info({ configPath, dataDir, projectRoot }, "Initializing services");
|
|
6583
7441
|
const config = new ConfigService(configPath, dataDir, projectRoot);
|
|
6584
7442
|
const appConfig = await config.load();
|
|
6585
7443
|
const resolvedDataDir = config.resolveDataDir();
|
|
@@ -6602,16 +7460,18 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
6602
7460
|
await store.initialize();
|
|
6603
7461
|
const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
|
|
6604
7462
|
const manifest = new ManifestService(resolvedDataDir);
|
|
6605
|
-
const
|
|
7463
|
+
const reranker = appConfig.reranker?.enabled ? new RerankerService(appConfig.reranker) : void 0;
|
|
7464
|
+
const search = new SearchService(lance, codeGraph, appConfig.search, reranker);
|
|
6606
7465
|
const index = new IndexService(lance, embeddings, {
|
|
6607
7466
|
codeGraphService: codeGraph,
|
|
6608
7467
|
manifestService: manifest,
|
|
6609
7468
|
chunkSize: appConfig.indexing.chunkSize,
|
|
6610
7469
|
chunkOverlap: appConfig.indexing.chunkOverlap,
|
|
6611
7470
|
concurrency: appConfig.indexing.concurrency,
|
|
6612
|
-
ignorePatterns: appConfig.indexing.ignorePatterns
|
|
7471
|
+
ignorePatterns: appConfig.indexing.ignorePatterns,
|
|
7472
|
+
prependPath: appConfig.indexing.prependPath
|
|
6613
7473
|
});
|
|
6614
|
-
|
|
7474
|
+
logger9.info(
|
|
6615
7475
|
{ dataDir: resolvedDataDir, projectRoot: resolvedProjectRoot },
|
|
6616
7476
|
"Services initialized successfully"
|
|
6617
7477
|
);
|
|
@@ -6628,20 +7488,20 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
6628
7488
|
};
|
|
6629
7489
|
}
|
|
6630
7490
|
async function destroyServices(services) {
|
|
6631
|
-
|
|
7491
|
+
logger9.info("Shutting down services");
|
|
6632
7492
|
const errors = [];
|
|
6633
7493
|
const isLazyContainer = services instanceof LazyServiceContainer;
|
|
6634
7494
|
const shouldCleanupSearch = !isLazyContainer || services.hasSearch;
|
|
6635
7495
|
if (shouldCleanupSearch) {
|
|
6636
7496
|
services.search.cleanup();
|
|
6637
7497
|
} else {
|
|
6638
|
-
|
|
7498
|
+
logger9.debug("Skipping search cleanup (not initialized)");
|
|
6639
7499
|
}
|
|
6640
7500
|
try {
|
|
6641
7501
|
await services.pythonBridge.stop();
|
|
6642
7502
|
} catch (e) {
|
|
6643
7503
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6644
|
-
|
|
7504
|
+
logger9.error({ error }, "Error stopping Python bridge");
|
|
6645
7505
|
errors.push(error);
|
|
6646
7506
|
}
|
|
6647
7507
|
const shouldDisposeEmbeddings = !isLazyContainer || services.hasEmbeddings;
|
|
@@ -6650,17 +7510,17 @@ async function destroyServices(services) {
|
|
|
6650
7510
|
await services.embeddings.dispose();
|
|
6651
7511
|
} catch (e) {
|
|
6652
7512
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6653
|
-
|
|
7513
|
+
logger9.error({ error }, "Error disposing EmbeddingEngine");
|
|
6654
7514
|
errors.push(error);
|
|
6655
7515
|
}
|
|
6656
7516
|
} else {
|
|
6657
|
-
|
|
7517
|
+
logger9.debug("Skipping embeddings disposal (not initialized)");
|
|
6658
7518
|
}
|
|
6659
7519
|
try {
|
|
6660
7520
|
await services.lance.closeAsync();
|
|
6661
7521
|
} catch (e) {
|
|
6662
7522
|
const error = e instanceof Error ? e : new Error(String(e));
|
|
6663
|
-
|
|
7523
|
+
logger9.error({ error }, "Error closing LanceStore");
|
|
6664
7524
|
errors.push(error);
|
|
6665
7525
|
}
|
|
6666
7526
|
await shutdownLogger();
|
|
@@ -6682,6 +7542,7 @@ export {
|
|
|
6682
7542
|
ASTParser,
|
|
6683
7543
|
ok,
|
|
6684
7544
|
err,
|
|
7545
|
+
TEXT_EXTENSIONS,
|
|
6685
7546
|
classifyWebContentType,
|
|
6686
7547
|
isFileStoreDefinition,
|
|
6687
7548
|
isRepoStoreDefinition,
|
|
@@ -6694,4 +7555,4 @@ export {
|
|
|
6694
7555
|
createServices,
|
|
6695
7556
|
destroyServices
|
|
6696
7557
|
};
|
|
6697
|
-
//# sourceMappingURL=chunk-
|
|
7558
|
+
//# sourceMappingURL=chunk-B335UOU7.js.map
|