raggrep 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +32 -4
- package/dist/cli/main.js +1459 -635
- package/dist/cli/main.js.map +17 -11
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/ports/embedding.d.ts +4 -1
- package/dist/domain/ports/index.d.ts +2 -1
- package/dist/domain/ports/logger.d.ts +66 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.d.ts +45 -8
- package/dist/index.js +1500 -679
- package/dist/index.js.map +17 -11
- package/dist/infrastructure/index.d.ts +1 -0
- package/dist/infrastructure/logger/index.d.ts +6 -0
- package/dist/infrastructure/logger/loggers.d.ts +75 -0
- package/dist/modules/data/json/index.d.ts +47 -0
- package/dist/modules/docs/markdown/index.d.ts +47 -0
- package/dist/modules/language/typescript/index.d.ts +10 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
var __create = Object.create;
|
|
4
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
2
5
|
var __defProp = Object.defineProperty;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __toESM = (mod, isNodeMode, target) => {
|
|
9
|
+
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
10
|
+
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
11
|
+
for (let key of __getOwnPropNames(mod))
|
|
12
|
+
if (!__hasOwnProp.call(to, key))
|
|
13
|
+
__defProp(to, key, {
|
|
14
|
+
get: () => mod[key],
|
|
15
|
+
enumerable: true
|
|
16
|
+
});
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
3
19
|
var __export = (target, all) => {
|
|
4
20
|
for (var name in all)
|
|
5
21
|
__defProp(target, name, {
|
|
@@ -10,6 +26,7 @@ var __export = (target, all) => {
|
|
|
10
26
|
});
|
|
11
27
|
};
|
|
12
28
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
29
|
+
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
13
30
|
|
|
14
31
|
// src/infrastructure/embeddings/transformersEmbedding.ts
|
|
15
32
|
import {
|
|
@@ -27,7 +44,8 @@ class TransformersEmbeddingProvider {
|
|
|
27
44
|
constructor(config) {
|
|
28
45
|
this.config = {
|
|
29
46
|
model: config?.model ?? "all-MiniLM-L6-v2",
|
|
30
|
-
showProgress: config?.showProgress ?? false
|
|
47
|
+
showProgress: config?.showProgress ?? false,
|
|
48
|
+
logger: config?.logger
|
|
31
49
|
};
|
|
32
50
|
}
|
|
33
51
|
async initialize(config) {
|
|
@@ -49,29 +67,55 @@ class TransformersEmbeddingProvider {
|
|
|
49
67
|
this.isInitializing = true;
|
|
50
68
|
this.initPromise = (async () => {
|
|
51
69
|
const modelId = EMBEDDING_MODELS[this.config.model];
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
}
|
|
70
|
+
const logger = this.config.logger;
|
|
71
|
+
const showProgress = this.config.showProgress || !!logger;
|
|
72
|
+
const isCached = await isModelCached(this.config.model);
|
|
73
|
+
let hasDownloads = false;
|
|
57
74
|
try {
|
|
58
75
|
this.pipeline = await pipeline("feature-extraction", modelId, {
|
|
59
|
-
progress_callback:
|
|
76
|
+
progress_callback: showProgress && !isCached ? (progress) => {
|
|
60
77
|
if (progress.status === "progress" && progress.file) {
|
|
78
|
+
if (!hasDownloads) {
|
|
79
|
+
hasDownloads = true;
|
|
80
|
+
if (logger) {
|
|
81
|
+
logger.info(`Downloading embedding model: ${this.config.model}`);
|
|
82
|
+
} else {
|
|
83
|
+
console.log(`
|
|
84
|
+
Loading embedding model: ${this.config.model}`);
|
|
85
|
+
console.log(` Cache: ${CACHE_DIR}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
61
88
|
const pct = progress.progress ? Math.round(progress.progress) : 0;
|
|
62
|
-
|
|
89
|
+
if (logger) {
|
|
90
|
+
logger.progress(` Downloading ${progress.file}: ${pct}%`);
|
|
91
|
+
} else {
|
|
92
|
+
process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
|
|
93
|
+
}
|
|
63
94
|
} else if (progress.status === "done" && progress.file) {
|
|
64
|
-
|
|
95
|
+
if (logger) {
|
|
96
|
+
logger.clearProgress();
|
|
97
|
+
logger.info(` Downloaded ${progress.file}`);
|
|
98
|
+
} else if (hasDownloads) {
|
|
99
|
+
process.stdout.write(`\r Downloaded ${progress.file}
|
|
65
100
|
`);
|
|
101
|
+
}
|
|
66
102
|
}
|
|
67
103
|
} : undefined
|
|
68
104
|
});
|
|
69
|
-
if (
|
|
70
|
-
|
|
105
|
+
if (hasDownloads) {
|
|
106
|
+
if (logger) {
|
|
107
|
+
logger.clearProgress();
|
|
108
|
+
logger.info(`Model ready: ${this.config.model}`);
|
|
109
|
+
} else {
|
|
110
|
+
console.log(` Model ready.
|
|
71
111
|
`);
|
|
112
|
+
}
|
|
72
113
|
}
|
|
73
114
|
} catch (error) {
|
|
74
115
|
this.pipeline = null;
|
|
116
|
+
if (logger) {
|
|
117
|
+
logger.clearProgress();
|
|
118
|
+
}
|
|
75
119
|
throw new Error(`Failed to load embedding model: ${error}`);
|
|
76
120
|
} finally {
|
|
77
121
|
this.isInitializing = false;
|
|
@@ -125,9 +169,21 @@ class TransformersEmbeddingProvider {
|
|
|
125
169
|
function getCacheDir() {
|
|
126
170
|
return CACHE_DIR;
|
|
127
171
|
}
|
|
172
|
+
async function isModelCached(model) {
|
|
173
|
+
const modelId = EMBEDDING_MODELS[model];
|
|
174
|
+
const modelPath = path.join(CACHE_DIR, modelId);
|
|
175
|
+
try {
|
|
176
|
+
const fs = await import("fs/promises");
|
|
177
|
+
const onnxPath = path.join(modelPath, "onnx", "model_quantized.onnx");
|
|
178
|
+
await fs.access(onnxPath);
|
|
179
|
+
return true;
|
|
180
|
+
} catch {
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
128
184
|
function configureEmbeddings(config) {
|
|
129
185
|
const newConfig = { ...globalConfig, ...config };
|
|
130
|
-
if (newConfig.model !== globalConfig.model) {
|
|
186
|
+
if (newConfig.model !== globalConfig.model || newConfig.logger !== globalConfig.logger) {
|
|
131
187
|
globalProvider = null;
|
|
132
188
|
}
|
|
133
189
|
globalConfig = newConfig;
|
|
@@ -163,7 +219,8 @@ var init_transformersEmbedding = __esm(() => {
|
|
|
163
219
|
};
|
|
164
220
|
globalConfig = {
|
|
165
221
|
model: "all-MiniLM-L6-v2",
|
|
166
|
-
showProgress: false
|
|
222
|
+
showProgress: false,
|
|
223
|
+
logger: undefined
|
|
167
224
|
};
|
|
168
225
|
});
|
|
169
226
|
|
|
@@ -171,6 +228,96 @@ var init_transformersEmbedding = __esm(() => {
|
|
|
171
228
|
var init_embeddings = __esm(() => {
|
|
172
229
|
init_transformersEmbedding();
|
|
173
230
|
});
|
|
231
|
+
|
|
232
|
+
// src/infrastructure/logger/loggers.ts
|
|
233
|
+
class ConsoleLogger {
|
|
234
|
+
verbose;
|
|
235
|
+
constructor(options) {
|
|
236
|
+
this.verbose = options?.verbose ?? false;
|
|
237
|
+
}
|
|
238
|
+
info(message) {
|
|
239
|
+
console.log(message);
|
|
240
|
+
}
|
|
241
|
+
warn(message) {
|
|
242
|
+
console.warn(message);
|
|
243
|
+
}
|
|
244
|
+
error(message) {
|
|
245
|
+
console.error(message);
|
|
246
|
+
}
|
|
247
|
+
debug(message) {
|
|
248
|
+
if (this.verbose) {
|
|
249
|
+
console.log(message);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
progress(message) {
|
|
253
|
+
console.log(message);
|
|
254
|
+
}
|
|
255
|
+
clearProgress() {}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
class InlineProgressLogger {
|
|
259
|
+
verbose;
|
|
260
|
+
lastProgressLength = 0;
|
|
261
|
+
hasProgress = false;
|
|
262
|
+
constructor(options) {
|
|
263
|
+
this.verbose = options?.verbose ?? false;
|
|
264
|
+
}
|
|
265
|
+
info(message) {
|
|
266
|
+
this.clearProgress();
|
|
267
|
+
console.log(message);
|
|
268
|
+
}
|
|
269
|
+
warn(message) {
|
|
270
|
+
this.clearProgress();
|
|
271
|
+
console.warn(message);
|
|
272
|
+
}
|
|
273
|
+
error(message) {
|
|
274
|
+
this.clearProgress();
|
|
275
|
+
console.error(message);
|
|
276
|
+
}
|
|
277
|
+
debug(message) {
|
|
278
|
+
if (this.verbose) {
|
|
279
|
+
this.clearProgress();
|
|
280
|
+
console.log(message);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
progress(message) {
|
|
284
|
+
process.stdout.write(`\r${message}`);
|
|
285
|
+
const padding = Math.max(0, this.lastProgressLength - message.length);
|
|
286
|
+
if (padding > 0) {
|
|
287
|
+
process.stdout.write(" ".repeat(padding));
|
|
288
|
+
}
|
|
289
|
+
this.lastProgressLength = message.length;
|
|
290
|
+
this.hasProgress = true;
|
|
291
|
+
}
|
|
292
|
+
clearProgress() {
|
|
293
|
+
if (this.hasProgress && this.lastProgressLength > 0) {
|
|
294
|
+
process.stdout.write("\r" + " ".repeat(this.lastProgressLength) + "\r");
|
|
295
|
+
this.lastProgressLength = 0;
|
|
296
|
+
this.hasProgress = false;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
class SilentLogger {
|
|
302
|
+
info() {}
|
|
303
|
+
warn() {}
|
|
304
|
+
error() {}
|
|
305
|
+
debug() {}
|
|
306
|
+
progress() {}
|
|
307
|
+
clearProgress() {}
|
|
308
|
+
}
|
|
309
|
+
function createLogger(options) {
|
|
310
|
+
return new ConsoleLogger(options);
|
|
311
|
+
}
|
|
312
|
+
function createInlineLogger(options) {
|
|
313
|
+
return new InlineProgressLogger(options);
|
|
314
|
+
}
|
|
315
|
+
function createSilentLogger() {
|
|
316
|
+
return new SilentLogger;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// src/infrastructure/logger/index.ts
|
|
320
|
+
var init_logger = () => {};
|
|
174
321
|
// src/domain/entities/searchResult.ts
|
|
175
322
|
var DEFAULT_SEARCH_OPTIONS;
|
|
176
323
|
var init_searchResult = __esm(() => {
|
|
@@ -201,6 +348,20 @@ function createDefaultConfig() {
|
|
|
201
348
|
options: {
|
|
202
349
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
203
350
|
}
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
id: "data/json",
|
|
354
|
+
enabled: true,
|
|
355
|
+
options: {
|
|
356
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
357
|
+
}
|
|
358
|
+
},
|
|
359
|
+
{
|
|
360
|
+
id: "docs/markdown",
|
|
361
|
+
enabled: true,
|
|
362
|
+
options: {
|
|
363
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
364
|
+
}
|
|
204
365
|
}
|
|
205
366
|
]
|
|
206
367
|
};
|
|
@@ -244,16 +405,18 @@ var init_config = __esm(() => {
|
|
|
244
405
|
".jsx",
|
|
245
406
|
".mjs",
|
|
246
407
|
".cjs",
|
|
408
|
+
".mts",
|
|
409
|
+
".cts",
|
|
410
|
+
".json",
|
|
411
|
+
".md",
|
|
247
412
|
".py",
|
|
248
413
|
".go",
|
|
249
414
|
".rs",
|
|
250
415
|
".java",
|
|
251
|
-
".json",
|
|
252
416
|
".yaml",
|
|
253
417
|
".yml",
|
|
254
418
|
".toml",
|
|
255
419
|
".sql",
|
|
256
|
-
".md",
|
|
257
420
|
".txt"
|
|
258
421
|
];
|
|
259
422
|
});
|
|
@@ -2145,6 +2308,194 @@ var init_core = __esm(() => {
|
|
|
2145
2308
|
init_symbols();
|
|
2146
2309
|
});
|
|
2147
2310
|
|
|
2311
|
+
// src/domain/services/keywords.ts
|
|
2312
|
+
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2313
|
+
const keywords = new Set;
|
|
2314
|
+
if (name) {
|
|
2315
|
+
keywords.add(name.toLowerCase());
|
|
2316
|
+
const parts = name.split(/(?=[A-Z])/).map((p) => p.toLowerCase());
|
|
2317
|
+
parts.forEach((p) => p.length > 2 && keywords.add(p));
|
|
2318
|
+
}
|
|
2319
|
+
const identifierRegex = /\b([a-zA-Z_][a-zA-Z0-9_]{2,})\b/g;
|
|
2320
|
+
let match;
|
|
2321
|
+
while ((match = identifierRegex.exec(content)) !== null) {
|
|
2322
|
+
const word = match[1].toLowerCase();
|
|
2323
|
+
if (!COMMON_KEYWORDS.has(word) && word.length > 2) {
|
|
2324
|
+
keywords.add(word);
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
return Array.from(keywords).slice(0, maxKeywords);
|
|
2328
|
+
}
|
|
2329
|
+
function splitIdentifier(str) {
|
|
2330
|
+
return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
|
|
2331
|
+
}
|
|
2332
|
+
function extractPathKeywords(filepath) {
|
|
2333
|
+
const keywords = new Set;
|
|
2334
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
2335
|
+
const segments = pathWithoutExt.split(/[/\\]/);
|
|
2336
|
+
for (const segment of segments) {
|
|
2337
|
+
if (segment.length < 2)
|
|
2338
|
+
continue;
|
|
2339
|
+
const lower = segment.toLowerCase();
|
|
2340
|
+
if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
|
|
2341
|
+
keywords.add(lower);
|
|
2342
|
+
}
|
|
2343
|
+
const parts = splitIdentifier(segment);
|
|
2344
|
+
for (const part of parts) {
|
|
2345
|
+
if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
|
|
2346
|
+
keywords.add(part);
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
return Array.from(keywords);
|
|
2351
|
+
}
|
|
2352
|
+
function parsePathContext(filepath) {
|
|
2353
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
2354
|
+
const allSegments = pathWithoutExt.split(/[/\\]/);
|
|
2355
|
+
const filename = allSegments[allSegments.length - 1];
|
|
2356
|
+
const dirSegments = allSegments.slice(0, -1);
|
|
2357
|
+
const keywords = extractPathKeywords(filepath);
|
|
2358
|
+
let layer;
|
|
2359
|
+
const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
|
|
2360
|
+
const filenameLower = filename.toLowerCase();
|
|
2361
|
+
for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
2362
|
+
for (const pattern of patterns) {
|
|
2363
|
+
if (filenameLower.includes(pattern)) {
|
|
2364
|
+
layer = layerName;
|
|
2365
|
+
break;
|
|
2366
|
+
}
|
|
2367
|
+
if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
|
|
2368
|
+
layer = layerName;
|
|
2369
|
+
break;
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
if (layer)
|
|
2373
|
+
break;
|
|
2374
|
+
}
|
|
2375
|
+
let domain;
|
|
2376
|
+
const layerPatternSet = new Set(Object.values(LAYER_PATTERNS2).flat());
|
|
2377
|
+
const reversedSegments = [...dirSegments].reverse();
|
|
2378
|
+
for (const segment of reversedSegments) {
|
|
2379
|
+
const lower = segment.toLowerCase();
|
|
2380
|
+
if (["src", "lib", "app", "packages", "modules"].includes(lower))
|
|
2381
|
+
continue;
|
|
2382
|
+
if (layerPatternSet.has(lower))
|
|
2383
|
+
continue;
|
|
2384
|
+
if (lower.length > 2) {
|
|
2385
|
+
domain = lower;
|
|
2386
|
+
break;
|
|
2387
|
+
}
|
|
2388
|
+
}
|
|
2389
|
+
return {
|
|
2390
|
+
segments: dirSegments,
|
|
2391
|
+
layer,
|
|
2392
|
+
domain,
|
|
2393
|
+
depth: dirSegments.length,
|
|
2394
|
+
keywords
|
|
2395
|
+
};
|
|
2396
|
+
}
|
|
2397
|
+
function formatPathContextForEmbedding(pathContext) {
|
|
2398
|
+
const parts = [];
|
|
2399
|
+
if (pathContext.domain) {
|
|
2400
|
+
parts.push(pathContext.domain);
|
|
2401
|
+
}
|
|
2402
|
+
if (pathContext.layer) {
|
|
2403
|
+
parts.push(pathContext.layer);
|
|
2404
|
+
}
|
|
2405
|
+
const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
|
|
2406
|
+
if (significantSegments.length > 0) {
|
|
2407
|
+
parts.push(...significantSegments.map((s) => s.toLowerCase()));
|
|
2408
|
+
}
|
|
2409
|
+
if (parts.length === 0)
|
|
2410
|
+
return "";
|
|
2411
|
+
const unique = [...new Set(parts)];
|
|
2412
|
+
return `[${unique.join(" ")}]`;
|
|
2413
|
+
}
|
|
2414
|
+
var COMMON_KEYWORDS, LAYER_PATTERNS2;
|
|
2415
|
+
var init_keywords = __esm(() => {
|
|
2416
|
+
COMMON_KEYWORDS = new Set([
|
|
2417
|
+
"const",
|
|
2418
|
+
"let",
|
|
2419
|
+
"var",
|
|
2420
|
+
"function",
|
|
2421
|
+
"class",
|
|
2422
|
+
"interface",
|
|
2423
|
+
"type",
|
|
2424
|
+
"enum",
|
|
2425
|
+
"export",
|
|
2426
|
+
"import",
|
|
2427
|
+
"from",
|
|
2428
|
+
"return",
|
|
2429
|
+
"async",
|
|
2430
|
+
"await",
|
|
2431
|
+
"new",
|
|
2432
|
+
"this",
|
|
2433
|
+
"true",
|
|
2434
|
+
"false",
|
|
2435
|
+
"null",
|
|
2436
|
+
"undefined",
|
|
2437
|
+
"if",
|
|
2438
|
+
"else",
|
|
2439
|
+
"for",
|
|
2440
|
+
"while",
|
|
2441
|
+
"switch",
|
|
2442
|
+
"case",
|
|
2443
|
+
"break",
|
|
2444
|
+
"continue",
|
|
2445
|
+
"try",
|
|
2446
|
+
"catch",
|
|
2447
|
+
"finally",
|
|
2448
|
+
"throw",
|
|
2449
|
+
"typeof",
|
|
2450
|
+
"instanceof",
|
|
2451
|
+
"void",
|
|
2452
|
+
"delete",
|
|
2453
|
+
"in",
|
|
2454
|
+
"of",
|
|
2455
|
+
"string",
|
|
2456
|
+
"number",
|
|
2457
|
+
"boolean",
|
|
2458
|
+
"any",
|
|
2459
|
+
"unknown",
|
|
2460
|
+
"never",
|
|
2461
|
+
"object",
|
|
2462
|
+
"public",
|
|
2463
|
+
"private",
|
|
2464
|
+
"protected",
|
|
2465
|
+
"static",
|
|
2466
|
+
"readonly",
|
|
2467
|
+
"abstract",
|
|
2468
|
+
"implements",
|
|
2469
|
+
"extends",
|
|
2470
|
+
"super",
|
|
2471
|
+
"get",
|
|
2472
|
+
"set",
|
|
2473
|
+
"constructor",
|
|
2474
|
+
"the",
|
|
2475
|
+
"and",
|
|
2476
|
+
"for",
|
|
2477
|
+
"not",
|
|
2478
|
+
"with",
|
|
2479
|
+
"are",
|
|
2480
|
+
"was",
|
|
2481
|
+
"has",
|
|
2482
|
+
"have"
|
|
2483
|
+
]);
|
|
2484
|
+
LAYER_PATTERNS2 = {
|
|
2485
|
+
controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
|
|
2486
|
+
service: ["service", "services", "usecase", "usecases", "application"],
|
|
2487
|
+
repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
|
|
2488
|
+
model: ["model", "models", "entity", "entities", "schema", "schemas"],
|
|
2489
|
+
util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
|
|
2490
|
+
config: ["config", "configs", "configuration", "settings"],
|
|
2491
|
+
middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
|
|
2492
|
+
domain: ["domain", "core", "business"],
|
|
2493
|
+
infrastructure: ["infrastructure", "infra", "external", "adapters"],
|
|
2494
|
+
presentation: ["presentation", "view", "views", "component", "components", "ui"],
|
|
2495
|
+
test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
|
|
2496
|
+
};
|
|
2497
|
+
});
|
|
2498
|
+
|
|
2148
2499
|
// src/domain/services/similarity.ts
|
|
2149
2500
|
function cosineSimilarity(a, b) {
|
|
2150
2501
|
if (a.length !== b.length) {
|
|
@@ -2164,17 +2515,151 @@ function cosineSimilarity(a, b) {
|
|
|
2164
2515
|
return dotProduct / magnitude;
|
|
2165
2516
|
}
|
|
2166
2517
|
|
|
2167
|
-
// src/
|
|
2168
|
-
import * as
|
|
2169
|
-
function
|
|
2170
|
-
const
|
|
2171
|
-
|
|
2172
|
-
|
|
2518
|
+
// src/domain/services/queryIntent.ts
|
|
2519
|
+
import * as path7 from "path";
|
|
2520
|
+
function detectQueryIntent(queryTerms) {
|
|
2521
|
+
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2522
|
+
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
2523
|
+
if (hasDocumentationTerm) {
|
|
2524
|
+
return "documentation";
|
|
2525
|
+
}
|
|
2526
|
+
if (hasImplementationTerm) {
|
|
2527
|
+
return "implementation";
|
|
2173
2528
|
}
|
|
2174
|
-
return
|
|
2529
|
+
return "neutral";
|
|
2175
2530
|
}
|
|
2176
|
-
function
|
|
2177
|
-
|
|
2531
|
+
function extractQueryTerms(query) {
|
|
2532
|
+
return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
2533
|
+
}
|
|
2534
|
+
function isSourceCodeFile(filepath) {
|
|
2535
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2536
|
+
return SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2537
|
+
}
|
|
2538
|
+
function isDocFile(filepath) {
|
|
2539
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2540
|
+
return DOC_EXTENSIONS.includes(ext);
|
|
2541
|
+
}
|
|
2542
|
+
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2543
|
+
const isSourceCode = isSourceCodeFile(filepath);
|
|
2544
|
+
const isDoc = isDocFile(filepath);
|
|
2545
|
+
const intent = detectQueryIntent(queryTerms);
|
|
2546
|
+
if (intent === "implementation") {
|
|
2547
|
+
if (isSourceCode) {
|
|
2548
|
+
return 0.06;
|
|
2549
|
+
}
|
|
2550
|
+
return 0;
|
|
2551
|
+
}
|
|
2552
|
+
if (intent === "documentation") {
|
|
2553
|
+
if (isDoc) {
|
|
2554
|
+
return 0.08;
|
|
2555
|
+
}
|
|
2556
|
+
return 0;
|
|
2557
|
+
}
|
|
2558
|
+
return 0;
|
|
2559
|
+
}
|
|
2560
|
+
var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2561
|
+
var init_queryIntent = __esm(() => {
|
|
2562
|
+
IMPLEMENTATION_TERMS = [
|
|
2563
|
+
"function",
|
|
2564
|
+
"method",
|
|
2565
|
+
"class",
|
|
2566
|
+
"interface",
|
|
2567
|
+
"implement",
|
|
2568
|
+
"implementation",
|
|
2569
|
+
"endpoint",
|
|
2570
|
+
"route",
|
|
2571
|
+
"handler",
|
|
2572
|
+
"controller",
|
|
2573
|
+
"module",
|
|
2574
|
+
"code"
|
|
2575
|
+
];
|
|
2576
|
+
DOCUMENTATION_TERMS = [
|
|
2577
|
+
"documentation",
|
|
2578
|
+
"docs",
|
|
2579
|
+
"guide",
|
|
2580
|
+
"tutorial",
|
|
2581
|
+
"readme",
|
|
2582
|
+
"how",
|
|
2583
|
+
"what",
|
|
2584
|
+
"why",
|
|
2585
|
+
"explain",
|
|
2586
|
+
"overview",
|
|
2587
|
+
"getting",
|
|
2588
|
+
"started",
|
|
2589
|
+
"requirements",
|
|
2590
|
+
"setup",
|
|
2591
|
+
"install",
|
|
2592
|
+
"configure",
|
|
2593
|
+
"configuration"
|
|
2594
|
+
];
|
|
2595
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
2596
|
+
".ts",
|
|
2597
|
+
".tsx",
|
|
2598
|
+
".js",
|
|
2599
|
+
".jsx",
|
|
2600
|
+
".mjs",
|
|
2601
|
+
".cjs",
|
|
2602
|
+
".py",
|
|
2603
|
+
".go",
|
|
2604
|
+
".rs",
|
|
2605
|
+
".java"
|
|
2606
|
+
];
|
|
2607
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2608
|
+
});
|
|
2609
|
+
|
|
2610
|
+
// src/domain/services/chunking.ts
|
|
2611
|
+
function createLineBasedChunks(content, options = {}) {
|
|
2612
|
+
const {
|
|
2613
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2614
|
+
overlap = DEFAULT_OVERLAP,
|
|
2615
|
+
minLinesForMultipleChunks = chunkSize
|
|
2616
|
+
} = options;
|
|
2617
|
+
const lines = content.split(`
|
|
2618
|
+
`);
|
|
2619
|
+
const chunks = [];
|
|
2620
|
+
if (lines.length <= minLinesForMultipleChunks) {
|
|
2621
|
+
return [
|
|
2622
|
+
{
|
|
2623
|
+
content,
|
|
2624
|
+
startLine: 1,
|
|
2625
|
+
endLine: lines.length,
|
|
2626
|
+
type: "file"
|
|
2627
|
+
}
|
|
2628
|
+
];
|
|
2629
|
+
}
|
|
2630
|
+
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2631
|
+
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2632
|
+
chunks.push({
|
|
2633
|
+
content: lines.slice(i, endIdx).join(`
|
|
2634
|
+
`),
|
|
2635
|
+
startLine: i + 1,
|
|
2636
|
+
endLine: endIdx,
|
|
2637
|
+
type: "block"
|
|
2638
|
+
});
|
|
2639
|
+
if (endIdx >= lines.length)
|
|
2640
|
+
break;
|
|
2641
|
+
}
|
|
2642
|
+
return chunks;
|
|
2643
|
+
}
|
|
2644
|
+
function generateChunkId(filepath, startLine, endLine) {
|
|
2645
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2646
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2647
|
+
}
|
|
2648
|
+
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2649
|
+
|
|
2650
|
+
// src/domain/services/index.ts
|
|
2651
|
+
var init_services = __esm(() => {
|
|
2652
|
+
init_keywords();
|
|
2653
|
+
init_queryIntent();
|
|
2654
|
+
});
|
|
2655
|
+
|
|
2656
|
+
// src/modules/language/typescript/parseCode.ts
|
|
2657
|
+
import * as ts from "typescript";
|
|
2658
|
+
function parseTypeScriptCode(content, filepath) {
|
|
2659
|
+
return parseTypeScript(content, filepath);
|
|
2660
|
+
}
|
|
2661
|
+
function parseTypeScript(content, filepath) {
|
|
2662
|
+
const chunks = [];
|
|
2178
2663
|
const lines = content.split(`
|
|
2179
2664
|
`);
|
|
2180
2665
|
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
@@ -2315,41 +2800,20 @@ function parseTypeScript(content, filepath) {
|
|
|
2315
2800
|
}
|
|
2316
2801
|
ts.forEachChild(sourceFile, visit);
|
|
2317
2802
|
if (chunks.length === 0) {
|
|
2318
|
-
|
|
2319
|
-
}
|
|
2320
|
-
return chunks;
|
|
2321
|
-
}
|
|
2322
|
-
function parseGenericCode(content) {
|
|
2323
|
-
const chunks = [];
|
|
2324
|
-
const lines = content.split(`
|
|
2803
|
+
const lines2 = content.split(`
|
|
2325
2804
|
`);
|
|
2326
|
-
const CHUNK_SIZE = 30;
|
|
2327
|
-
const OVERLAP = 5;
|
|
2328
|
-
if (lines.length <= CHUNK_SIZE) {
|
|
2329
2805
|
return [
|
|
2330
2806
|
{
|
|
2331
2807
|
content,
|
|
2332
2808
|
startLine: 1,
|
|
2333
|
-
endLine:
|
|
2809
|
+
endLine: lines2.length,
|
|
2334
2810
|
type: "file"
|
|
2335
2811
|
}
|
|
2336
2812
|
];
|
|
2337
2813
|
}
|
|
2338
|
-
for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
|
|
2339
|
-
const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
|
|
2340
|
-
chunks.push({
|
|
2341
|
-
content: lines.slice(i, endIdx).join(`
|
|
2342
|
-
`),
|
|
2343
|
-
startLine: i + 1,
|
|
2344
|
-
endLine: endIdx,
|
|
2345
|
-
type: "block"
|
|
2346
|
-
});
|
|
2347
|
-
if (endIdx >= lines.length)
|
|
2348
|
-
break;
|
|
2349
|
-
}
|
|
2350
2814
|
return chunks;
|
|
2351
2815
|
}
|
|
2352
|
-
function
|
|
2816
|
+
function generateChunkId2(filepath, startLine, endLine) {
|
|
2353
2817
|
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2354
2818
|
return `${safePath}-${startLine}-${endLine}`;
|
|
2355
2819
|
}
|
|
@@ -2360,256 +2824,68 @@ var init_fileIndexStorage = __esm(() => {
|
|
|
2360
2824
|
init_entities();
|
|
2361
2825
|
});
|
|
2362
2826
|
|
|
2363
|
-
// src/
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2827
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
2828
|
+
import * as fs3 from "fs/promises";
|
|
2829
|
+
import * as path8 from "path";
|
|
2830
|
+
|
|
2831
|
+
class SymbolicIndex {
|
|
2832
|
+
meta = null;
|
|
2833
|
+
fileSummaries = new Map;
|
|
2834
|
+
bm25Index = null;
|
|
2835
|
+
symbolicPath;
|
|
2836
|
+
moduleId;
|
|
2837
|
+
constructor(indexDir, moduleId) {
|
|
2838
|
+
this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
|
|
2839
|
+
this.moduleId = moduleId;
|
|
2370
2840
|
}
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2841
|
+
async initialize() {
|
|
2842
|
+
try {
|
|
2843
|
+
await this.load();
|
|
2844
|
+
} catch {
|
|
2845
|
+
this.meta = {
|
|
2846
|
+
version: "1.0.0",
|
|
2847
|
+
lastUpdated: new Date().toISOString(),
|
|
2848
|
+
moduleId: this.moduleId,
|
|
2849
|
+
fileCount: 0,
|
|
2850
|
+
bm25Data: {
|
|
2851
|
+
avgDocLength: 0,
|
|
2852
|
+
documentFrequencies: {},
|
|
2853
|
+
totalDocs: 0
|
|
2854
|
+
}
|
|
2855
|
+
};
|
|
2856
|
+
this.bm25Index = new BM25Index;
|
|
2377
2857
|
}
|
|
2378
2858
|
}
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2859
|
+
addFile(summary) {
|
|
2860
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2861
|
+
}
|
|
2862
|
+
removeFile(filepath) {
|
|
2863
|
+
return this.fileSummaries.delete(filepath);
|
|
2864
|
+
}
|
|
2865
|
+
buildBM25Index() {
|
|
2866
|
+
this.bm25Index = new BM25Index;
|
|
2867
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2868
|
+
const content = [
|
|
2869
|
+
...summary.keywords,
|
|
2870
|
+
...summary.exports,
|
|
2871
|
+
...extractPathKeywords(filepath)
|
|
2872
|
+
].join(" ");
|
|
2873
|
+
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2394
2874
|
}
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
keywords.add(part);
|
|
2399
|
-
}
|
|
2875
|
+
if (this.meta) {
|
|
2876
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2877
|
+
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2400
2878
|
}
|
|
2401
2879
|
}
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
2406
|
-
const allSegments = pathWithoutExt.split(/[/\\]/);
|
|
2407
|
-
const filename = allSegments[allSegments.length - 1];
|
|
2408
|
-
const dirSegments = allSegments.slice(0, -1);
|
|
2409
|
-
const keywords = extractPathKeywords(filepath);
|
|
2410
|
-
let layer;
|
|
2411
|
-
const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
|
|
2412
|
-
const filenameLower = filename.toLowerCase();
|
|
2413
|
-
for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
2414
|
-
for (const pattern of patterns) {
|
|
2415
|
-
if (filenameLower.includes(pattern)) {
|
|
2416
|
-
layer = layerName;
|
|
2417
|
-
break;
|
|
2418
|
-
}
|
|
2419
|
-
if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
|
|
2420
|
-
layer = layerName;
|
|
2421
|
-
break;
|
|
2422
|
-
}
|
|
2880
|
+
findCandidates(query, maxCandidates = 20) {
|
|
2881
|
+
if (!this.bm25Index) {
|
|
2882
|
+
return Array.from(this.fileSummaries.keys());
|
|
2423
2883
|
}
|
|
2424
|
-
|
|
2425
|
-
|
|
2884
|
+
const results = this.bm25Index.search(query, maxCandidates);
|
|
2885
|
+
return results.map((r) => r.id);
|
|
2426
2886
|
}
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
const reversedSegments = [...dirSegments].reverse();
|
|
2430
|
-
for (const segment of reversedSegments) {
|
|
2431
|
-
const lower = segment.toLowerCase();
|
|
2432
|
-
if (["src", "lib", "app", "packages", "modules"].includes(lower))
|
|
2433
|
-
continue;
|
|
2434
|
-
if (layerPatternSet.has(lower))
|
|
2435
|
-
continue;
|
|
2436
|
-
if (lower.length > 2) {
|
|
2437
|
-
domain = lower;
|
|
2438
|
-
break;
|
|
2439
|
-
}
|
|
2440
|
-
}
|
|
2441
|
-
return {
|
|
2442
|
-
segments: dirSegments,
|
|
2443
|
-
layer,
|
|
2444
|
-
domain,
|
|
2445
|
-
depth: dirSegments.length,
|
|
2446
|
-
keywords
|
|
2447
|
-
};
|
|
2448
|
-
}
|
|
2449
|
-
function formatPathContextForEmbedding(pathContext) {
|
|
2450
|
-
const parts = [];
|
|
2451
|
-
if (pathContext.domain) {
|
|
2452
|
-
parts.push(pathContext.domain);
|
|
2453
|
-
}
|
|
2454
|
-
if (pathContext.layer) {
|
|
2455
|
-
parts.push(pathContext.layer);
|
|
2456
|
-
}
|
|
2457
|
-
const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
|
|
2458
|
-
if (significantSegments.length > 0) {
|
|
2459
|
-
parts.push(...significantSegments.map((s) => s.toLowerCase()));
|
|
2460
|
-
}
|
|
2461
|
-
if (parts.length === 0)
|
|
2462
|
-
return "";
|
|
2463
|
-
const unique = [...new Set(parts)];
|
|
2464
|
-
return `[${unique.join(" ")}]`;
|
|
2465
|
-
}
|
|
2466
|
-
var COMMON_KEYWORDS, LAYER_PATTERNS2;
|
|
2467
|
-
var init_keywords = __esm(() => {
|
|
2468
|
-
COMMON_KEYWORDS = new Set([
|
|
2469
|
-
"const",
|
|
2470
|
-
"let",
|
|
2471
|
-
"var",
|
|
2472
|
-
"function",
|
|
2473
|
-
"class",
|
|
2474
|
-
"interface",
|
|
2475
|
-
"type",
|
|
2476
|
-
"enum",
|
|
2477
|
-
"export",
|
|
2478
|
-
"import",
|
|
2479
|
-
"from",
|
|
2480
|
-
"return",
|
|
2481
|
-
"async",
|
|
2482
|
-
"await",
|
|
2483
|
-
"new",
|
|
2484
|
-
"this",
|
|
2485
|
-
"true",
|
|
2486
|
-
"false",
|
|
2487
|
-
"null",
|
|
2488
|
-
"undefined",
|
|
2489
|
-
"if",
|
|
2490
|
-
"else",
|
|
2491
|
-
"for",
|
|
2492
|
-
"while",
|
|
2493
|
-
"switch",
|
|
2494
|
-
"case",
|
|
2495
|
-
"break",
|
|
2496
|
-
"continue",
|
|
2497
|
-
"try",
|
|
2498
|
-
"catch",
|
|
2499
|
-
"finally",
|
|
2500
|
-
"throw",
|
|
2501
|
-
"typeof",
|
|
2502
|
-
"instanceof",
|
|
2503
|
-
"void",
|
|
2504
|
-
"delete",
|
|
2505
|
-
"in",
|
|
2506
|
-
"of",
|
|
2507
|
-
"string",
|
|
2508
|
-
"number",
|
|
2509
|
-
"boolean",
|
|
2510
|
-
"any",
|
|
2511
|
-
"unknown",
|
|
2512
|
-
"never",
|
|
2513
|
-
"object",
|
|
2514
|
-
"public",
|
|
2515
|
-
"private",
|
|
2516
|
-
"protected",
|
|
2517
|
-
"static",
|
|
2518
|
-
"readonly",
|
|
2519
|
-
"abstract",
|
|
2520
|
-
"implements",
|
|
2521
|
-
"extends",
|
|
2522
|
-
"super",
|
|
2523
|
-
"get",
|
|
2524
|
-
"set",
|
|
2525
|
-
"constructor",
|
|
2526
|
-
"the",
|
|
2527
|
-
"and",
|
|
2528
|
-
"for",
|
|
2529
|
-
"not",
|
|
2530
|
-
"with",
|
|
2531
|
-
"are",
|
|
2532
|
-
"was",
|
|
2533
|
-
"has",
|
|
2534
|
-
"have"
|
|
2535
|
-
]);
|
|
2536
|
-
LAYER_PATTERNS2 = {
|
|
2537
|
-
controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
|
|
2538
|
-
service: ["service", "services", "usecase", "usecases", "application"],
|
|
2539
|
-
repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
|
|
2540
|
-
model: ["model", "models", "entity", "entities", "schema", "schemas"],
|
|
2541
|
-
util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
|
|
2542
|
-
config: ["config", "configs", "configuration", "settings"],
|
|
2543
|
-
middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
|
|
2544
|
-
domain: ["domain", "core", "business"],
|
|
2545
|
-
infrastructure: ["infrastructure", "infra", "external", "adapters"],
|
|
2546
|
-
presentation: ["presentation", "view", "views", "component", "components", "ui"],
|
|
2547
|
-
test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
|
|
2548
|
-
};
|
|
2549
|
-
});
|
|
2550
|
-
|
|
2551
|
-
// src/infrastructure/storage/symbolicIndex.ts
|
|
2552
|
-
import * as fs3 from "fs/promises";
|
|
2553
|
-
import * as path7 from "path";
|
|
2554
|
-
|
|
2555
|
-
class SymbolicIndex {
|
|
2556
|
-
meta = null;
|
|
2557
|
-
fileSummaries = new Map;
|
|
2558
|
-
bm25Index = null;
|
|
2559
|
-
symbolicPath;
|
|
2560
|
-
moduleId;
|
|
2561
|
-
constructor(indexDir, moduleId) {
|
|
2562
|
-
this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
|
|
2563
|
-
this.moduleId = moduleId;
|
|
2564
|
-
}
|
|
2565
|
-
async initialize() {
|
|
2566
|
-
try {
|
|
2567
|
-
await this.load();
|
|
2568
|
-
} catch {
|
|
2569
|
-
this.meta = {
|
|
2570
|
-
version: "1.0.0",
|
|
2571
|
-
lastUpdated: new Date().toISOString(),
|
|
2572
|
-
moduleId: this.moduleId,
|
|
2573
|
-
fileCount: 0,
|
|
2574
|
-
bm25Data: {
|
|
2575
|
-
avgDocLength: 0,
|
|
2576
|
-
documentFrequencies: {},
|
|
2577
|
-
totalDocs: 0
|
|
2578
|
-
}
|
|
2579
|
-
};
|
|
2580
|
-
this.bm25Index = new BM25Index;
|
|
2581
|
-
}
|
|
2582
|
-
}
|
|
2583
|
-
addFile(summary) {
|
|
2584
|
-
this.fileSummaries.set(summary.filepath, summary);
|
|
2585
|
-
}
|
|
2586
|
-
removeFile(filepath) {
|
|
2587
|
-
return this.fileSummaries.delete(filepath);
|
|
2588
|
-
}
|
|
2589
|
-
buildBM25Index() {
|
|
2590
|
-
this.bm25Index = new BM25Index;
|
|
2591
|
-
for (const [filepath, summary] of this.fileSummaries) {
|
|
2592
|
-
const content = [
|
|
2593
|
-
...summary.keywords,
|
|
2594
|
-
...summary.exports,
|
|
2595
|
-
...extractPathKeywords(filepath)
|
|
2596
|
-
].join(" ");
|
|
2597
|
-
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2598
|
-
}
|
|
2599
|
-
if (this.meta) {
|
|
2600
|
-
this.meta.fileCount = this.fileSummaries.size;
|
|
2601
|
-
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2602
|
-
}
|
|
2603
|
-
}
|
|
2604
|
-
findCandidates(query, maxCandidates = 20) {
|
|
2605
|
-
if (!this.bm25Index) {
|
|
2606
|
-
return Array.from(this.fileSummaries.keys());
|
|
2607
|
-
}
|
|
2608
|
-
const results = this.bm25Index.search(query, maxCandidates);
|
|
2609
|
-
return results.map((r) => r.id);
|
|
2610
|
-
}
|
|
2611
|
-
getAllFiles() {
|
|
2612
|
-
return Array.from(this.fileSummaries.keys());
|
|
2887
|
+
getAllFiles() {
|
|
2888
|
+
return Array.from(this.fileSummaries.keys());
|
|
2613
2889
|
}
|
|
2614
2890
|
getFileSummary(filepath) {
|
|
2615
2891
|
return this.fileSummaries.get(filepath);
|
|
@@ -2620,16 +2896,16 @@ class SymbolicIndex {
|
|
|
2620
2896
|
this.meta.lastUpdated = new Date().toISOString();
|
|
2621
2897
|
this.meta.fileCount = this.fileSummaries.size;
|
|
2622
2898
|
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2623
|
-
const metaPath =
|
|
2899
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2624
2900
|
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2625
2901
|
for (const [filepath, summary] of this.fileSummaries) {
|
|
2626
2902
|
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2627
|
-
await fs3.mkdir(
|
|
2903
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
2628
2904
|
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2629
2905
|
}
|
|
2630
2906
|
}
|
|
2631
2907
|
async load() {
|
|
2632
|
-
const metaPath =
|
|
2908
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2633
2909
|
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2634
2910
|
this.meta = JSON.parse(metaContent);
|
|
2635
2911
|
this.fileSummaries.clear();
|
|
@@ -2640,7 +2916,7 @@ class SymbolicIndex {
|
|
|
2640
2916
|
try {
|
|
2641
2917
|
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2642
2918
|
for (const entry of entries) {
|
|
2643
|
-
const fullPath =
|
|
2919
|
+
const fullPath = path8.join(dir, entry.name);
|
|
2644
2920
|
if (entry.isDirectory()) {
|
|
2645
2921
|
await this.loadFileSummariesRecursive(fullPath);
|
|
2646
2922
|
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
@@ -2657,7 +2933,7 @@ class SymbolicIndex {
|
|
|
2657
2933
|
}
|
|
2658
2934
|
getFileSummaryPath(filepath) {
|
|
2659
2935
|
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2660
|
-
return
|
|
2936
|
+
return path8.join(this.symbolicPath, jsonPath);
|
|
2661
2937
|
}
|
|
2662
2938
|
async deleteFileSummary(filepath) {
|
|
2663
2939
|
try {
|
|
@@ -2667,7 +2943,7 @@ class SymbolicIndex {
|
|
|
2667
2943
|
}
|
|
2668
2944
|
async exists() {
|
|
2669
2945
|
try {
|
|
2670
|
-
const metaPath =
|
|
2946
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2671
2947
|
await fs3.access(metaPath);
|
|
2672
2948
|
return true;
|
|
2673
2949
|
} catch {
|
|
@@ -2703,40 +2979,16 @@ var init_storage = __esm(() => {
|
|
|
2703
2979
|
// src/modules/language/typescript/index.ts
|
|
2704
2980
|
var exports_typescript = {};
|
|
2705
2981
|
__export(exports_typescript, {
|
|
2982
|
+
isTypeScriptFile: () => isTypeScriptFile,
|
|
2706
2983
|
TypeScriptModule: () => TypeScriptModule,
|
|
2984
|
+
TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
|
|
2707
2985
|
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2708
2986
|
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2709
2987
|
});
|
|
2710
|
-
import * as
|
|
2711
|
-
function
|
|
2712
|
-
const
|
|
2713
|
-
|
|
2714
|
-
if (hasDocumentationTerm) {
|
|
2715
|
-
return "documentation";
|
|
2716
|
-
}
|
|
2717
|
-
if (hasImplementationTerm) {
|
|
2718
|
-
return "implementation";
|
|
2719
|
-
}
|
|
2720
|
-
return "neutral";
|
|
2721
|
-
}
|
|
2722
|
-
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2723
|
-
const ext = path8.extname(filepath).toLowerCase();
|
|
2724
|
-
const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2725
|
-
const isDoc = DOC_EXTENSIONS.includes(ext);
|
|
2726
|
-
const intent = detectQueryIntent(queryTerms);
|
|
2727
|
-
if (intent === "implementation") {
|
|
2728
|
-
if (isSourceCode) {
|
|
2729
|
-
return 0.06;
|
|
2730
|
-
}
|
|
2731
|
-
return 0;
|
|
2732
|
-
}
|
|
2733
|
-
if (intent === "documentation") {
|
|
2734
|
-
if (isDoc) {
|
|
2735
|
-
return 0.08;
|
|
2736
|
-
}
|
|
2737
|
-
return 0;
|
|
2738
|
-
}
|
|
2739
|
-
return 0;
|
|
2988
|
+
import * as path9 from "path";
|
|
2989
|
+
function isTypeScriptFile(filepath) {
|
|
2990
|
+
const ext = path9.extname(filepath).toLowerCase();
|
|
2991
|
+
return TYPESCRIPT_EXTENSIONS.includes(ext);
|
|
2740
2992
|
}
|
|
2741
2993
|
function calculateChunkTypeBoost(chunk) {
|
|
2742
2994
|
switch (chunk.type) {
|
|
@@ -2756,85 +3008,659 @@ function calculateChunkTypeBoost(chunk) {
|
|
|
2756
3008
|
return 0;
|
|
2757
3009
|
}
|
|
2758
3010
|
}
|
|
2759
|
-
function calculateExportBoost(chunk) {
|
|
2760
|
-
return chunk.isExported ? 0.03 : 0;
|
|
3011
|
+
function calculateExportBoost(chunk) {
|
|
3012
|
+
return chunk.isExported ? 0.03 : 0;
|
|
3013
|
+
}
|
|
3014
|
+
|
|
3015
|
+
class TypeScriptModule {
|
|
3016
|
+
id = "language/typescript";
|
|
3017
|
+
name = "TypeScript Search";
|
|
3018
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
3019
|
+
version = "1.0.0";
|
|
3020
|
+
embeddingConfig = null;
|
|
3021
|
+
symbolicIndex = null;
|
|
3022
|
+
pendingSummaries = new Map;
|
|
3023
|
+
rootDir = "";
|
|
3024
|
+
logger = undefined;
|
|
3025
|
+
async initialize(config) {
|
|
3026
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3027
|
+
this.logger = config.options?.logger;
|
|
3028
|
+
if (this.logger) {
|
|
3029
|
+
this.embeddingConfig = {
|
|
3030
|
+
...this.embeddingConfig,
|
|
3031
|
+
logger: this.logger
|
|
3032
|
+
};
|
|
3033
|
+
}
|
|
3034
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3035
|
+
this.pendingSummaries.clear();
|
|
3036
|
+
}
|
|
3037
|
+
async indexFile(filepath, content, ctx) {
|
|
3038
|
+
if (!isTypeScriptFile(filepath)) {
|
|
3039
|
+
return null;
|
|
3040
|
+
}
|
|
3041
|
+
this.rootDir = ctx.rootDir;
|
|
3042
|
+
const parsedChunks = parseTypeScriptCode(content, filepath);
|
|
3043
|
+
if (parsedChunks.length === 0) {
|
|
3044
|
+
return null;
|
|
3045
|
+
}
|
|
3046
|
+
const pathContext = parsePathContext(filepath);
|
|
3047
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
3048
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
3049
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
3050
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3051
|
+
});
|
|
3052
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3053
|
+
const chunks = parsedChunks.map((pc) => ({
|
|
3054
|
+
id: generateChunkId2(filepath, pc.startLine, pc.endLine),
|
|
3055
|
+
content: pc.content,
|
|
3056
|
+
startLine: pc.startLine,
|
|
3057
|
+
endLine: pc.endLine,
|
|
3058
|
+
type: pc.type,
|
|
3059
|
+
name: pc.name,
|
|
3060
|
+
isExported: pc.isExported,
|
|
3061
|
+
jsDoc: pc.jsDoc
|
|
3062
|
+
}));
|
|
3063
|
+
const references = this.extractReferences(content, filepath);
|
|
3064
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3065
|
+
const currentConfig = getEmbeddingConfig();
|
|
3066
|
+
const moduleData = {
|
|
3067
|
+
embeddings,
|
|
3068
|
+
embeddingModel: currentConfig.model
|
|
3069
|
+
};
|
|
3070
|
+
const chunkTypes = [
|
|
3071
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
3072
|
+
];
|
|
3073
|
+
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
3074
|
+
const allKeywords = new Set;
|
|
3075
|
+
for (const pc of parsedChunks) {
|
|
3076
|
+
const keywords = extractKeywords(pc.content, pc.name);
|
|
3077
|
+
keywords.forEach((k) => allKeywords.add(k));
|
|
3078
|
+
}
|
|
3079
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3080
|
+
const fileSummary = {
|
|
3081
|
+
filepath,
|
|
3082
|
+
chunkCount: chunks.length,
|
|
3083
|
+
chunkTypes,
|
|
3084
|
+
keywords: Array.from(allKeywords),
|
|
3085
|
+
exports,
|
|
3086
|
+
lastModified: stats.lastModified,
|
|
3087
|
+
pathContext: {
|
|
3088
|
+
segments: pathContext.segments,
|
|
3089
|
+
layer: pathContext.layer,
|
|
3090
|
+
domain: pathContext.domain,
|
|
3091
|
+
depth: pathContext.depth
|
|
3092
|
+
}
|
|
3093
|
+
};
|
|
3094
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3095
|
+
return {
|
|
3096
|
+
filepath,
|
|
3097
|
+
lastModified: stats.lastModified,
|
|
3098
|
+
chunks,
|
|
3099
|
+
moduleData,
|
|
3100
|
+
references
|
|
3101
|
+
};
|
|
3102
|
+
}
|
|
3103
|
+
async finalize(ctx) {
|
|
3104
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3105
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3106
|
+
await this.symbolicIndex.initialize();
|
|
3107
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3108
|
+
this.symbolicIndex.addFile(summary);
|
|
3109
|
+
}
|
|
3110
|
+
this.symbolicIndex.buildBM25Index();
|
|
3111
|
+
await this.symbolicIndex.save();
|
|
3112
|
+
this.pendingSummaries.clear();
|
|
3113
|
+
}
|
|
3114
|
+
async search(query, ctx, options = {}) {
|
|
3115
|
+
const {
|
|
3116
|
+
topK = DEFAULT_TOP_K2,
|
|
3117
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
3118
|
+
filePatterns
|
|
3119
|
+
} = options;
|
|
3120
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3121
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3122
|
+
let allFiles;
|
|
3123
|
+
try {
|
|
3124
|
+
await symbolicIndex.initialize();
|
|
3125
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3126
|
+
} catch {
|
|
3127
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3128
|
+
}
|
|
3129
|
+
let filesToSearch = allFiles;
|
|
3130
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3131
|
+
filesToSearch = allFiles.filter((filepath) => {
|
|
3132
|
+
return filePatterns.some((pattern) => {
|
|
3133
|
+
if (pattern.startsWith("*.")) {
|
|
3134
|
+
const ext = pattern.slice(1);
|
|
3135
|
+
return filepath.endsWith(ext);
|
|
3136
|
+
}
|
|
3137
|
+
return filepath.includes(pattern);
|
|
3138
|
+
});
|
|
3139
|
+
});
|
|
3140
|
+
}
|
|
3141
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3142
|
+
const bm25Index = new BM25Index;
|
|
3143
|
+
const allChunksData = [];
|
|
3144
|
+
for (const filepath of filesToSearch) {
|
|
3145
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3146
|
+
if (!fileIndex)
|
|
3147
|
+
continue;
|
|
3148
|
+
const moduleData = fileIndex.moduleData;
|
|
3149
|
+
if (!moduleData?.embeddings)
|
|
3150
|
+
continue;
|
|
3151
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3152
|
+
const chunk = fileIndex.chunks[i];
|
|
3153
|
+
const embedding = moduleData.embeddings[i];
|
|
3154
|
+
if (!embedding)
|
|
3155
|
+
continue;
|
|
3156
|
+
allChunksData.push({
|
|
3157
|
+
filepath: fileIndex.filepath,
|
|
3158
|
+
chunk,
|
|
3159
|
+
embedding
|
|
3160
|
+
});
|
|
3161
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3162
|
+
}
|
|
3163
|
+
}
|
|
3164
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3165
|
+
const bm25Scores = new Map;
|
|
3166
|
+
for (const result of bm25Results) {
|
|
3167
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3168
|
+
}
|
|
3169
|
+
const queryTerms = extractQueryTerms(query);
|
|
3170
|
+
const pathBoosts = new Map;
|
|
3171
|
+
for (const filepath of filesToSearch) {
|
|
3172
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3173
|
+
if (summary?.pathContext) {
|
|
3174
|
+
let boost = 0;
|
|
3175
|
+
const ctx2 = summary.pathContext;
|
|
3176
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3177
|
+
boost += 0.1;
|
|
3178
|
+
}
|
|
3179
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3180
|
+
boost += 0.05;
|
|
3181
|
+
}
|
|
3182
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3183
|
+
if (segmentMatch) {
|
|
3184
|
+
boost += 0.05;
|
|
3185
|
+
}
|
|
3186
|
+
pathBoosts.set(filepath, boost);
|
|
3187
|
+
}
|
|
3188
|
+
}
|
|
3189
|
+
const results = [];
|
|
3190
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3191
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3192
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3193
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
3194
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
3195
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
3196
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
3197
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
3198
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
3199
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3200
|
+
results.push({
|
|
3201
|
+
filepath,
|
|
3202
|
+
chunk,
|
|
3203
|
+
score: hybridScore,
|
|
3204
|
+
moduleId: this.id,
|
|
3205
|
+
context: {
|
|
3206
|
+
semanticScore,
|
|
3207
|
+
bm25Score,
|
|
3208
|
+
pathBoost,
|
|
3209
|
+
fileTypeBoost,
|
|
3210
|
+
chunkTypeBoost,
|
|
3211
|
+
exportBoost
|
|
3212
|
+
}
|
|
3213
|
+
});
|
|
3214
|
+
}
|
|
3215
|
+
}
|
|
3216
|
+
results.sort((a, b) => b.score - a.score);
|
|
3217
|
+
return results.slice(0, topK);
|
|
3218
|
+
}
|
|
3219
|
+
extractReferences(content, filepath) {
|
|
3220
|
+
const references = [];
|
|
3221
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3222
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3223
|
+
let match;
|
|
3224
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
3225
|
+
const importPath = match[1];
|
|
3226
|
+
if (importPath.startsWith(".")) {
|
|
3227
|
+
const dir = path9.dirname(filepath);
|
|
3228
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3229
|
+
references.push(resolved);
|
|
3230
|
+
}
|
|
3231
|
+
}
|
|
3232
|
+
while ((match = requireRegex.exec(content)) !== null) {
|
|
3233
|
+
const importPath = match[1];
|
|
3234
|
+
if (importPath.startsWith(".")) {
|
|
3235
|
+
const dir = path9.dirname(filepath);
|
|
3236
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3237
|
+
references.push(resolved);
|
|
3238
|
+
}
|
|
3239
|
+
}
|
|
3240
|
+
return references;
|
|
3241
|
+
}
|
|
3242
|
+
}
|
|
3243
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
|
|
3244
|
+
var init_typescript = __esm(() => {
|
|
3245
|
+
init_embeddings();
|
|
3246
|
+
init_services();
|
|
3247
|
+
init_config2();
|
|
3248
|
+
init_parseCode();
|
|
3249
|
+
init_storage();
|
|
3250
|
+
TYPESCRIPT_EXTENSIONS = [
|
|
3251
|
+
".ts",
|
|
3252
|
+
".tsx",
|
|
3253
|
+
".js",
|
|
3254
|
+
".jsx",
|
|
3255
|
+
".mjs",
|
|
3256
|
+
".cjs",
|
|
3257
|
+
".mts",
|
|
3258
|
+
".cts"
|
|
3259
|
+
];
|
|
3260
|
+
});
|
|
3261
|
+
|
|
3262
|
+
// src/modules/data/json/index.ts
|
|
3263
|
+
var exports_json = {};
|
|
3264
|
+
__export(exports_json, {
|
|
3265
|
+
isJsonFile: () => isJsonFile,
|
|
3266
|
+
JsonModule: () => JsonModule,
|
|
3267
|
+
JSON_EXTENSIONS: () => JSON_EXTENSIONS,
|
|
3268
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
|
|
3269
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
|
|
3270
|
+
});
|
|
3271
|
+
import * as path10 from "path";
|
|
3272
|
+
function isJsonFile(filepath) {
|
|
3273
|
+
const ext = path10.extname(filepath).toLowerCase();
|
|
3274
|
+
return JSON_EXTENSIONS.includes(ext);
|
|
3275
|
+
}
|
|
3276
|
+
function extractJsonKeys(obj, prefix = "") {
|
|
3277
|
+
const keys = [];
|
|
3278
|
+
if (obj === null || obj === undefined) {
|
|
3279
|
+
return keys;
|
|
3280
|
+
}
|
|
3281
|
+
if (Array.isArray(obj)) {
|
|
3282
|
+
obj.forEach((item, index) => {
|
|
3283
|
+
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3284
|
+
});
|
|
3285
|
+
} else if (typeof obj === "object") {
|
|
3286
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3287
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3288
|
+
keys.push(key);
|
|
3289
|
+
keys.push(...extractJsonKeys(value, fullKey));
|
|
3290
|
+
}
|
|
3291
|
+
}
|
|
3292
|
+
return keys;
|
|
3293
|
+
}
|
|
3294
|
+
function extractJsonKeywords(content) {
|
|
3295
|
+
try {
|
|
3296
|
+
const parsed = JSON.parse(content);
|
|
3297
|
+
const keys = extractJsonKeys(parsed);
|
|
3298
|
+
const stringValues = [];
|
|
3299
|
+
const extractStrings = (obj) => {
|
|
3300
|
+
if (typeof obj === "string") {
|
|
3301
|
+
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3302
|
+
stringValues.push(...words);
|
|
3303
|
+
} else if (Array.isArray(obj)) {
|
|
3304
|
+
obj.forEach(extractStrings);
|
|
3305
|
+
} else if (obj && typeof obj === "object") {
|
|
3306
|
+
Object.values(obj).forEach(extractStrings);
|
|
3307
|
+
}
|
|
3308
|
+
};
|
|
3309
|
+
extractStrings(parsed);
|
|
3310
|
+
return [...new Set([...keys, ...stringValues])];
|
|
3311
|
+
} catch {
|
|
3312
|
+
return [];
|
|
3313
|
+
}
|
|
3314
|
+
}
|
|
3315
|
+
|
|
3316
|
+
class JsonModule {
|
|
3317
|
+
id = "data/json";
|
|
3318
|
+
name = "JSON Search";
|
|
3319
|
+
description = "JSON file search with structure-aware indexing";
|
|
3320
|
+
version = "1.0.0";
|
|
3321
|
+
embeddingConfig = null;
|
|
3322
|
+
symbolicIndex = null;
|
|
3323
|
+
pendingSummaries = new Map;
|
|
3324
|
+
rootDir = "";
|
|
3325
|
+
logger = undefined;
|
|
3326
|
+
async initialize(config) {
|
|
3327
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3328
|
+
this.logger = config.options?.logger;
|
|
3329
|
+
if (this.logger) {
|
|
3330
|
+
this.embeddingConfig = {
|
|
3331
|
+
...this.embeddingConfig,
|
|
3332
|
+
logger: this.logger
|
|
3333
|
+
};
|
|
3334
|
+
}
|
|
3335
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3336
|
+
this.pendingSummaries.clear();
|
|
3337
|
+
}
|
|
3338
|
+
async indexFile(filepath, content, ctx) {
|
|
3339
|
+
if (!isJsonFile(filepath)) {
|
|
3340
|
+
return null;
|
|
3341
|
+
}
|
|
3342
|
+
this.rootDir = ctx.rootDir;
|
|
3343
|
+
const textChunks = createLineBasedChunks(content, {
|
|
3344
|
+
chunkSize: 50,
|
|
3345
|
+
overlap: 10
|
|
3346
|
+
});
|
|
3347
|
+
if (textChunks.length === 0) {
|
|
3348
|
+
return null;
|
|
3349
|
+
}
|
|
3350
|
+
const chunkContents = textChunks.map((c) => {
|
|
3351
|
+
const filename = path10.basename(filepath);
|
|
3352
|
+
return `${filename}: ${c.content}`;
|
|
3353
|
+
});
|
|
3354
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3355
|
+
const chunks = textChunks.map((tc, i) => ({
|
|
3356
|
+
id: generateChunkId(filepath, tc.startLine, tc.endLine),
|
|
3357
|
+
content: tc.content,
|
|
3358
|
+
startLine: tc.startLine,
|
|
3359
|
+
endLine: tc.endLine,
|
|
3360
|
+
type: tc.type
|
|
3361
|
+
}));
|
|
3362
|
+
const jsonKeys = extractJsonKeys((() => {
|
|
3363
|
+
try {
|
|
3364
|
+
return JSON.parse(content);
|
|
3365
|
+
} catch {
|
|
3366
|
+
return {};
|
|
3367
|
+
}
|
|
3368
|
+
})());
|
|
3369
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3370
|
+
const currentConfig = getEmbeddingConfig();
|
|
3371
|
+
const moduleData = {
|
|
3372
|
+
embeddings,
|
|
3373
|
+
embeddingModel: currentConfig.model,
|
|
3374
|
+
jsonKeys
|
|
3375
|
+
};
|
|
3376
|
+
const keywords = extractJsonKeywords(content);
|
|
3377
|
+
const fileSummary = {
|
|
3378
|
+
filepath,
|
|
3379
|
+
chunkCount: chunks.length,
|
|
3380
|
+
chunkTypes: ["file"],
|
|
3381
|
+
keywords,
|
|
3382
|
+
exports: [],
|
|
3383
|
+
lastModified: stats.lastModified
|
|
3384
|
+
};
|
|
3385
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3386
|
+
return {
|
|
3387
|
+
filepath,
|
|
3388
|
+
lastModified: stats.lastModified,
|
|
3389
|
+
chunks,
|
|
3390
|
+
moduleData
|
|
3391
|
+
};
|
|
3392
|
+
}
|
|
3393
|
+
async finalize(ctx) {
|
|
3394
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3395
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3396
|
+
await this.symbolicIndex.initialize();
|
|
3397
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3398
|
+
this.symbolicIndex.addFile(summary);
|
|
3399
|
+
}
|
|
3400
|
+
this.symbolicIndex.buildBM25Index();
|
|
3401
|
+
await this.symbolicIndex.save();
|
|
3402
|
+
this.pendingSummaries.clear();
|
|
3403
|
+
}
|
|
3404
|
+
async search(query, ctx, options = {}) {
|
|
3405
|
+
const {
|
|
3406
|
+
topK = DEFAULT_TOP_K3,
|
|
3407
|
+
minScore = DEFAULT_MIN_SCORE3,
|
|
3408
|
+
filePatterns
|
|
3409
|
+
} = options;
|
|
3410
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3411
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3412
|
+
let allFiles;
|
|
3413
|
+
try {
|
|
3414
|
+
await symbolicIndex.initialize();
|
|
3415
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3416
|
+
} catch {
|
|
3417
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3418
|
+
}
|
|
3419
|
+
let filesToSearch = allFiles.filter((f) => isJsonFile(f));
|
|
3420
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3421
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3422
|
+
return filePatterns.some((pattern) => {
|
|
3423
|
+
if (pattern.startsWith("*.")) {
|
|
3424
|
+
const ext = pattern.slice(1);
|
|
3425
|
+
return filepath.endsWith(ext);
|
|
3426
|
+
}
|
|
3427
|
+
return filepath.includes(pattern);
|
|
3428
|
+
});
|
|
3429
|
+
});
|
|
3430
|
+
}
|
|
3431
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3432
|
+
const bm25Index = new BM25Index;
|
|
3433
|
+
const allChunksData = [];
|
|
3434
|
+
for (const filepath of filesToSearch) {
|
|
3435
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3436
|
+
if (!fileIndex)
|
|
3437
|
+
continue;
|
|
3438
|
+
const moduleData = fileIndex.moduleData;
|
|
3439
|
+
if (!moduleData?.embeddings)
|
|
3440
|
+
continue;
|
|
3441
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3442
|
+
const chunk = fileIndex.chunks[i];
|
|
3443
|
+
const embedding = moduleData.embeddings[i];
|
|
3444
|
+
if (!embedding)
|
|
3445
|
+
continue;
|
|
3446
|
+
allChunksData.push({
|
|
3447
|
+
filepath: fileIndex.filepath,
|
|
3448
|
+
chunk,
|
|
3449
|
+
embedding
|
|
3450
|
+
});
|
|
3451
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3452
|
+
}
|
|
3453
|
+
}
|
|
3454
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3455
|
+
const bm25Scores = new Map;
|
|
3456
|
+
for (const result of bm25Results) {
|
|
3457
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3458
|
+
}
|
|
3459
|
+
const queryTerms = extractQueryTerms(query);
|
|
3460
|
+
const results = [];
|
|
3461
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3462
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3463
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3464
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
|
|
3465
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3466
|
+
results.push({
|
|
3467
|
+
filepath,
|
|
3468
|
+
chunk,
|
|
3469
|
+
score: hybridScore,
|
|
3470
|
+
moduleId: this.id,
|
|
3471
|
+
context: {
|
|
3472
|
+
semanticScore,
|
|
3473
|
+
bm25Score
|
|
3474
|
+
}
|
|
3475
|
+
});
|
|
3476
|
+
}
|
|
3477
|
+
}
|
|
3478
|
+
results.sort((a, b) => b.score - a.score);
|
|
3479
|
+
return results.slice(0, topK);
|
|
3480
|
+
}
|
|
3481
|
+
}
|
|
3482
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
|
|
3483
|
+
var init_json = __esm(() => {
|
|
3484
|
+
init_embeddings();
|
|
3485
|
+
init_services();
|
|
3486
|
+
init_config2();
|
|
3487
|
+
init_storage();
|
|
3488
|
+
JSON_EXTENSIONS = [".json"];
|
|
3489
|
+
});
|
|
3490
|
+
|
|
3491
|
+
// src/modules/docs/markdown/index.ts
|
|
3492
|
+
var exports_markdown = {};
|
|
3493
|
+
__export(exports_markdown, {
|
|
3494
|
+
isMarkdownFile: () => isMarkdownFile,
|
|
3495
|
+
MarkdownModule: () => MarkdownModule,
|
|
3496
|
+
MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
|
|
3497
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
|
|
3498
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
|
|
3499
|
+
});
|
|
3500
|
+
import * as path11 from "path";
|
|
3501
|
+
function isMarkdownFile(filepath) {
|
|
3502
|
+
const ext = path11.extname(filepath).toLowerCase();
|
|
3503
|
+
return MARKDOWN_EXTENSIONS.includes(ext);
|
|
3504
|
+
}
|
|
3505
|
+
function parseMarkdownSections(content) {
|
|
3506
|
+
const lines = content.split(`
|
|
3507
|
+
`);
|
|
3508
|
+
const sections = [];
|
|
3509
|
+
let currentSection = null;
|
|
3510
|
+
let currentContent = [];
|
|
3511
|
+
let startLine = 1;
|
|
3512
|
+
for (let i = 0;i < lines.length; i++) {
|
|
3513
|
+
const line = lines[i];
|
|
3514
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
3515
|
+
if (headingMatch) {
|
|
3516
|
+
if (currentSection) {
|
|
3517
|
+
currentSection.content = currentContent.join(`
|
|
3518
|
+
`).trim();
|
|
3519
|
+
currentSection.endLine = i;
|
|
3520
|
+
if (currentSection.content || currentSection.heading) {
|
|
3521
|
+
sections.push(currentSection);
|
|
3522
|
+
}
|
|
3523
|
+
} else if (currentContent.length > 0) {
|
|
3524
|
+
sections.push({
|
|
3525
|
+
heading: "",
|
|
3526
|
+
level: 0,
|
|
3527
|
+
content: currentContent.join(`
|
|
3528
|
+
`).trim(),
|
|
3529
|
+
startLine: 1,
|
|
3530
|
+
endLine: i
|
|
3531
|
+
});
|
|
3532
|
+
}
|
|
3533
|
+
currentSection = {
|
|
3534
|
+
heading: headingMatch[2],
|
|
3535
|
+
level: headingMatch[1].length,
|
|
3536
|
+
content: "",
|
|
3537
|
+
startLine: i + 1,
|
|
3538
|
+
endLine: lines.length
|
|
3539
|
+
};
|
|
3540
|
+
currentContent = [];
|
|
3541
|
+
} else {
|
|
3542
|
+
currentContent.push(line);
|
|
3543
|
+
}
|
|
3544
|
+
}
|
|
3545
|
+
if (currentSection) {
|
|
3546
|
+
currentSection.content = currentContent.join(`
|
|
3547
|
+
`).trim();
|
|
3548
|
+
currentSection.endLine = lines.length;
|
|
3549
|
+
if (currentSection.content || currentSection.heading) {
|
|
3550
|
+
sections.push(currentSection);
|
|
3551
|
+
}
|
|
3552
|
+
} else if (currentContent.length > 0) {
|
|
3553
|
+
sections.push({
|
|
3554
|
+
heading: "",
|
|
3555
|
+
level: 0,
|
|
3556
|
+
content: currentContent.join(`
|
|
3557
|
+
`).trim(),
|
|
3558
|
+
startLine: 1,
|
|
3559
|
+
endLine: lines.length
|
|
3560
|
+
});
|
|
3561
|
+
}
|
|
3562
|
+
return sections;
|
|
3563
|
+
}
|
|
3564
|
+
function extractMarkdownKeywords(content) {
|
|
3565
|
+
const keywords = [];
|
|
3566
|
+
const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
|
|
3567
|
+
for (const match of headingMatches) {
|
|
3568
|
+
const heading = match[1].toLowerCase();
|
|
3569
|
+
const words = heading.split(/\s+/).filter((w) => w.length > 2);
|
|
3570
|
+
keywords.push(...words);
|
|
3571
|
+
}
|
|
3572
|
+
const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
|
|
3573
|
+
for (const match of emphasisMatches) {
|
|
3574
|
+
const text = (match[1] || match[2] || "").toLowerCase();
|
|
3575
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3576
|
+
keywords.push(...words);
|
|
3577
|
+
}
|
|
3578
|
+
const codeMatches = content.matchAll(/`([^`]+)`/g);
|
|
3579
|
+
for (const match of codeMatches) {
|
|
3580
|
+
const code = match[1].toLowerCase();
|
|
3581
|
+
if (code.length > 2 && code.length < 50) {
|
|
3582
|
+
keywords.push(code);
|
|
3583
|
+
}
|
|
3584
|
+
}
|
|
3585
|
+
const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
|
|
3586
|
+
for (const match of linkMatches) {
|
|
3587
|
+
const text = match[1].toLowerCase();
|
|
3588
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3589
|
+
keywords.push(...words);
|
|
3590
|
+
}
|
|
3591
|
+
return [...new Set(keywords)];
|
|
2761
3592
|
}
|
|
2762
3593
|
|
|
2763
|
-
class
|
|
2764
|
-
id = "
|
|
2765
|
-
name = "
|
|
2766
|
-
description = "
|
|
3594
|
+
class MarkdownModule {
|
|
3595
|
+
id = "docs/markdown";
|
|
3596
|
+
name = "Markdown Search";
|
|
3597
|
+
description = "Markdown documentation search with section-aware indexing";
|
|
2767
3598
|
version = "1.0.0";
|
|
2768
3599
|
embeddingConfig = null;
|
|
2769
3600
|
symbolicIndex = null;
|
|
2770
3601
|
pendingSummaries = new Map;
|
|
2771
3602
|
rootDir = "";
|
|
3603
|
+
logger = undefined;
|
|
2772
3604
|
async initialize(config) {
|
|
2773
3605
|
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3606
|
+
this.logger = config.options?.logger;
|
|
3607
|
+
if (this.logger) {
|
|
3608
|
+
this.embeddingConfig = {
|
|
3609
|
+
...this.embeddingConfig,
|
|
3610
|
+
logger: this.logger
|
|
3611
|
+
};
|
|
3612
|
+
}
|
|
2774
3613
|
configureEmbeddings(this.embeddingConfig);
|
|
2775
3614
|
this.pendingSummaries.clear();
|
|
2776
3615
|
}
|
|
2777
3616
|
async indexFile(filepath, content, ctx) {
|
|
3617
|
+
if (!isMarkdownFile(filepath)) {
|
|
3618
|
+
return null;
|
|
3619
|
+
}
|
|
2778
3620
|
this.rootDir = ctx.rootDir;
|
|
2779
|
-
const
|
|
2780
|
-
if (
|
|
3621
|
+
const sections = parseMarkdownSections(content);
|
|
3622
|
+
if (sections.length === 0) {
|
|
2781
3623
|
return null;
|
|
2782
3624
|
}
|
|
2783
|
-
const
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3625
|
+
const chunkContents = sections.map((s) => {
|
|
3626
|
+
const filename = path11.basename(filepath);
|
|
3627
|
+
const headingContext = s.heading ? `${s.heading}: ` : "";
|
|
3628
|
+
return `${filename} ${headingContext}${s.content}`;
|
|
2788
3629
|
});
|
|
2789
3630
|
const embeddings = await getEmbeddings(chunkContents);
|
|
2790
|
-
const chunks =
|
|
2791
|
-
id: generateChunkId(filepath,
|
|
2792
|
-
content:
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
3631
|
+
const chunks = sections.map((section, i) => ({
|
|
3632
|
+
id: generateChunkId(filepath, section.startLine, section.endLine),
|
|
3633
|
+
content: section.heading ? `## ${section.heading}
|
|
3634
|
+
|
|
3635
|
+
${section.content}` : section.content,
|
|
3636
|
+
startLine: section.startLine,
|
|
3637
|
+
endLine: section.endLine,
|
|
3638
|
+
type: "block",
|
|
3639
|
+
name: section.heading || undefined
|
|
2799
3640
|
}));
|
|
2800
|
-
const
|
|
3641
|
+
const headings = sections.filter((s) => s.heading).map((s) => s.heading);
|
|
2801
3642
|
const stats = await ctx.getFileStats(filepath);
|
|
2802
3643
|
const currentConfig = getEmbeddingConfig();
|
|
2803
3644
|
const moduleData = {
|
|
2804
3645
|
embeddings,
|
|
2805
|
-
embeddingModel: currentConfig.model
|
|
3646
|
+
embeddingModel: currentConfig.model,
|
|
3647
|
+
headings
|
|
2806
3648
|
};
|
|
2807
|
-
const
|
|
2808
|
-
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2809
|
-
];
|
|
2810
|
-
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2811
|
-
const allKeywords = new Set;
|
|
2812
|
-
for (const pc of parsedChunks) {
|
|
2813
|
-
const keywords = extractKeywords(pc.content, pc.name);
|
|
2814
|
-
keywords.forEach((k) => allKeywords.add(k));
|
|
2815
|
-
}
|
|
2816
|
-
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3649
|
+
const keywords = extractMarkdownKeywords(content);
|
|
2817
3650
|
const fileSummary = {
|
|
2818
3651
|
filepath,
|
|
2819
3652
|
chunkCount: chunks.length,
|
|
2820
|
-
chunkTypes,
|
|
2821
|
-
keywords
|
|
2822
|
-
exports,
|
|
2823
|
-
lastModified: stats.lastModified
|
|
2824
|
-
pathContext: {
|
|
2825
|
-
segments: pathContext.segments,
|
|
2826
|
-
layer: pathContext.layer,
|
|
2827
|
-
domain: pathContext.domain,
|
|
2828
|
-
depth: pathContext.depth
|
|
2829
|
-
}
|
|
3653
|
+
chunkTypes: ["block"],
|
|
3654
|
+
keywords,
|
|
3655
|
+
exports: headings,
|
|
3656
|
+
lastModified: stats.lastModified
|
|
2830
3657
|
};
|
|
2831
3658
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
2832
3659
|
return {
|
|
2833
3660
|
filepath,
|
|
2834
3661
|
lastModified: stats.lastModified,
|
|
2835
3662
|
chunks,
|
|
2836
|
-
moduleData
|
|
2837
|
-
references
|
|
3663
|
+
moduleData
|
|
2838
3664
|
};
|
|
2839
3665
|
}
|
|
2840
3666
|
async finalize(ctx) {
|
|
@@ -2850,8 +3676,8 @@ class TypeScriptModule {
|
|
|
2850
3676
|
}
|
|
2851
3677
|
async search(query, ctx, options = {}) {
|
|
2852
3678
|
const {
|
|
2853
|
-
topK =
|
|
2854
|
-
minScore =
|
|
3679
|
+
topK = DEFAULT_TOP_K4,
|
|
3680
|
+
minScore = DEFAULT_MIN_SCORE4,
|
|
2855
3681
|
filePatterns
|
|
2856
3682
|
} = options;
|
|
2857
3683
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
@@ -2863,9 +3689,9 @@ class TypeScriptModule {
|
|
|
2863
3689
|
} catch {
|
|
2864
3690
|
allFiles = await ctx.listIndexedFiles();
|
|
2865
3691
|
}
|
|
2866
|
-
let filesToSearch = allFiles;
|
|
3692
|
+
let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
|
|
2867
3693
|
if (filePatterns && filePatterns.length > 0) {
|
|
2868
|
-
filesToSearch =
|
|
3694
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
2869
3695
|
return filePatterns.some((pattern) => {
|
|
2870
3696
|
if (pattern.startsWith("*.")) {
|
|
2871
3697
|
const ext = pattern.slice(1);
|
|
@@ -2903,36 +3729,24 @@ class TypeScriptModule {
|
|
|
2903
3729
|
for (const result of bm25Results) {
|
|
2904
3730
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
2905
3731
|
}
|
|
2906
|
-
const queryTerms = query
|
|
2907
|
-
const pathBoosts = new Map;
|
|
2908
|
-
for (const filepath of filesToSearch) {
|
|
2909
|
-
const summary = symbolicIndex.getFileSummary(filepath);
|
|
2910
|
-
if (summary?.pathContext) {
|
|
2911
|
-
let boost = 0;
|
|
2912
|
-
const ctx2 = summary.pathContext;
|
|
2913
|
-
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
2914
|
-
boost += 0.1;
|
|
2915
|
-
}
|
|
2916
|
-
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
2917
|
-
boost += 0.05;
|
|
2918
|
-
}
|
|
2919
|
-
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
2920
|
-
if (segmentMatch) {
|
|
2921
|
-
boost += 0.05;
|
|
2922
|
-
}
|
|
2923
|
-
pathBoosts.set(filepath, boost);
|
|
2924
|
-
}
|
|
2925
|
-
}
|
|
3732
|
+
const queryTerms = extractQueryTerms(query);
|
|
2926
3733
|
const results = [];
|
|
2927
3734
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
2928
3735
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
2929
3736
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
|
|
2935
|
-
|
|
3737
|
+
let docBoost = 0;
|
|
3738
|
+
if (queryTerms.some((t) => [
|
|
3739
|
+
"docs",
|
|
3740
|
+
"documentation",
|
|
3741
|
+
"readme",
|
|
3742
|
+
"guide",
|
|
3743
|
+
"how",
|
|
3744
|
+
"what",
|
|
3745
|
+
"explain"
|
|
3746
|
+
].includes(t))) {
|
|
3747
|
+
docBoost = 0.05;
|
|
3748
|
+
}
|
|
3749
|
+
const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
2936
3750
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
2937
3751
|
results.push({
|
|
2938
3752
|
filepath,
|
|
@@ -2942,10 +3756,7 @@ class TypeScriptModule {
|
|
|
2942
3756
|
context: {
|
|
2943
3757
|
semanticScore,
|
|
2944
3758
|
bm25Score,
|
|
2945
|
-
|
|
2946
|
-
fileTypeBoost,
|
|
2947
|
-
chunkTypeBoost,
|
|
2948
|
-
exportBoost
|
|
3759
|
+
docBoost
|
|
2949
3760
|
}
|
|
2950
3761
|
});
|
|
2951
3762
|
}
|
|
@@ -2953,84 +3764,14 @@ class TypeScriptModule {
|
|
|
2953
3764
|
results.sort((a, b) => b.score - a.score);
|
|
2954
3765
|
return results.slice(0, topK);
|
|
2955
3766
|
}
|
|
2956
|
-
extractReferences(content, filepath) {
|
|
2957
|
-
const references = [];
|
|
2958
|
-
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
2959
|
-
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
2960
|
-
let match;
|
|
2961
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
2962
|
-
const importPath = match[1];
|
|
2963
|
-
if (importPath.startsWith(".")) {
|
|
2964
|
-
const dir = path8.dirname(filepath);
|
|
2965
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
2966
|
-
references.push(resolved);
|
|
2967
|
-
}
|
|
2968
|
-
}
|
|
2969
|
-
while ((match = requireRegex.exec(content)) !== null) {
|
|
2970
|
-
const importPath = match[1];
|
|
2971
|
-
if (importPath.startsWith(".")) {
|
|
2972
|
-
const dir = path8.dirname(filepath);
|
|
2973
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
2974
|
-
references.push(resolved);
|
|
2975
|
-
}
|
|
2976
|
-
}
|
|
2977
|
-
return references;
|
|
2978
|
-
}
|
|
2979
3767
|
}
|
|
2980
|
-
var
|
|
2981
|
-
var
|
|
3768
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
|
|
3769
|
+
var init_markdown = __esm(() => {
|
|
2982
3770
|
init_embeddings();
|
|
3771
|
+
init_services();
|
|
2983
3772
|
init_config2();
|
|
2984
|
-
init_parseCode();
|
|
2985
3773
|
init_storage();
|
|
2986
|
-
|
|
2987
|
-
init_keywords();
|
|
2988
|
-
IMPLEMENTATION_TERMS = [
|
|
2989
|
-
"function",
|
|
2990
|
-
"method",
|
|
2991
|
-
"class",
|
|
2992
|
-
"interface",
|
|
2993
|
-
"implement",
|
|
2994
|
-
"implementation",
|
|
2995
|
-
"endpoint",
|
|
2996
|
-
"route",
|
|
2997
|
-
"handler",
|
|
2998
|
-
"controller",
|
|
2999
|
-
"module",
|
|
3000
|
-
"code"
|
|
3001
|
-
];
|
|
3002
|
-
DOCUMENTATION_TERMS = [
|
|
3003
|
-
"documentation",
|
|
3004
|
-
"docs",
|
|
3005
|
-
"guide",
|
|
3006
|
-
"tutorial",
|
|
3007
|
-
"readme",
|
|
3008
|
-
"how",
|
|
3009
|
-
"what",
|
|
3010
|
-
"why",
|
|
3011
|
-
"explain",
|
|
3012
|
-
"overview",
|
|
3013
|
-
"getting",
|
|
3014
|
-
"started",
|
|
3015
|
-
"requirements",
|
|
3016
|
-
"setup",
|
|
3017
|
-
"install",
|
|
3018
|
-
"configure",
|
|
3019
|
-
"configuration"
|
|
3020
|
-
];
|
|
3021
|
-
SOURCE_CODE_EXTENSIONS = [
|
|
3022
|
-
".ts",
|
|
3023
|
-
".tsx",
|
|
3024
|
-
".js",
|
|
3025
|
-
".jsx",
|
|
3026
|
-
".mjs",
|
|
3027
|
-
".cjs",
|
|
3028
|
-
".py",
|
|
3029
|
-
".go",
|
|
3030
|
-
".rs",
|
|
3031
|
-
".java"
|
|
3032
|
-
];
|
|
3033
|
-
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
3774
|
+
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
3034
3775
|
});
|
|
3035
3776
|
|
|
3036
3777
|
// src/modules/registry.ts
|
|
@@ -3055,8 +3796,12 @@ class ModuleRegistryImpl {
|
|
|
3055
3796
|
async function registerBuiltInModules() {
|
|
3056
3797
|
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
3057
3798
|
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
3799
|
+
const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
|
|
3800
|
+
const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
|
|
3058
3801
|
registry.register(new CoreModule2);
|
|
3059
3802
|
registry.register(new TypeScriptModule2);
|
|
3803
|
+
registry.register(new JsonModule2);
|
|
3804
|
+
registry.register(new MarkdownModule2);
|
|
3060
3805
|
}
|
|
3061
3806
|
var registry;
|
|
3062
3807
|
var init_registry = __esm(() => {
|
|
@@ -3064,13 +3809,13 @@ var init_registry = __esm(() => {
|
|
|
3064
3809
|
});
|
|
3065
3810
|
|
|
3066
3811
|
// src/infrastructure/introspection/projectDetector.ts
|
|
3067
|
-
import * as
|
|
3812
|
+
import * as path12 from "path";
|
|
3068
3813
|
import * as fs4 from "fs/promises";
|
|
3069
3814
|
async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
3070
3815
|
if (depth > MAX_SCAN_DEPTH)
|
|
3071
3816
|
return [];
|
|
3072
3817
|
const results = [];
|
|
3073
|
-
const fullDir = currentDir ?
|
|
3818
|
+
const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
|
|
3074
3819
|
try {
|
|
3075
3820
|
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
3076
3821
|
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
@@ -3093,10 +3838,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3093
3838
|
}
|
|
3094
3839
|
async function parsePackageJson(rootDir, relativePath) {
|
|
3095
3840
|
try {
|
|
3096
|
-
const packageJsonPath =
|
|
3841
|
+
const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
|
|
3097
3842
|
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
3098
3843
|
const pkg = JSON.parse(content);
|
|
3099
|
-
const name = pkg.name ||
|
|
3844
|
+
const name = pkg.name || path12.basename(relativePath);
|
|
3100
3845
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
3101
3846
|
let type = "unknown";
|
|
3102
3847
|
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
@@ -3141,7 +3886,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3141
3886
|
for (const pattern of monorepoPatterns) {
|
|
3142
3887
|
if (!dirNames.includes(pattern))
|
|
3143
3888
|
continue;
|
|
3144
|
-
const patternDir =
|
|
3889
|
+
const patternDir = path12.join(rootDir, pattern);
|
|
3145
3890
|
try {
|
|
3146
3891
|
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
3147
3892
|
for (const subDir of subDirs) {
|
|
@@ -3172,7 +3917,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3172
3917
|
}
|
|
3173
3918
|
let rootType = "unknown";
|
|
3174
3919
|
try {
|
|
3175
|
-
const rootPkgPath =
|
|
3920
|
+
const rootPkgPath = path12.join(rootDir, "package.json");
|
|
3176
3921
|
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
3177
3922
|
if (rootPkg.workspaces)
|
|
3178
3923
|
isMonorepo = true;
|
|
@@ -3212,7 +3957,7 @@ var init_projectDetector = __esm(() => {
|
|
|
3212
3957
|
});
|
|
3213
3958
|
|
|
3214
3959
|
// src/infrastructure/introspection/IntrospectionIndex.ts
|
|
3215
|
-
import * as
|
|
3960
|
+
import * as path13 from "path";
|
|
3216
3961
|
import * as fs5 from "fs/promises";
|
|
3217
3962
|
|
|
3218
3963
|
class IntrospectionIndex {
|
|
@@ -3226,7 +3971,7 @@ class IntrospectionIndex {
|
|
|
3226
3971
|
async initialize() {
|
|
3227
3972
|
this.structure = await detectProjectStructure(this.rootDir);
|
|
3228
3973
|
try {
|
|
3229
|
-
const configPath =
|
|
3974
|
+
const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
|
|
3230
3975
|
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
3231
3976
|
const config = JSON.parse(configContent);
|
|
3232
3977
|
this.config = config.introspection || {};
|
|
@@ -3266,28 +4011,28 @@ class IntrospectionIndex {
|
|
|
3266
4011
|
}
|
|
3267
4012
|
}
|
|
3268
4013
|
async save(config) {
|
|
3269
|
-
const introDir =
|
|
4014
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3270
4015
|
await fs5.mkdir(introDir, { recursive: true });
|
|
3271
|
-
const projectPath =
|
|
4016
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3272
4017
|
await fs5.writeFile(projectPath, JSON.stringify({
|
|
3273
4018
|
version: "1.0.0",
|
|
3274
4019
|
lastUpdated: new Date().toISOString(),
|
|
3275
4020
|
structure: this.structure
|
|
3276
4021
|
}, null, 2));
|
|
3277
4022
|
for (const [filepath, intro] of this.files) {
|
|
3278
|
-
const introFilePath =
|
|
3279
|
-
await fs5.mkdir(
|
|
4023
|
+
const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
4024
|
+
await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
|
|
3280
4025
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3281
4026
|
}
|
|
3282
4027
|
}
|
|
3283
4028
|
async load(config) {
|
|
3284
|
-
const introDir =
|
|
4029
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3285
4030
|
try {
|
|
3286
|
-
const projectPath =
|
|
4031
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3287
4032
|
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
3288
4033
|
const projectData = JSON.parse(projectContent);
|
|
3289
4034
|
this.structure = projectData.structure;
|
|
3290
|
-
await this.loadFilesRecursive(
|
|
4035
|
+
await this.loadFilesRecursive(path13.join(introDir, "files"), "");
|
|
3291
4036
|
} catch {
|
|
3292
4037
|
this.structure = null;
|
|
3293
4038
|
this.files.clear();
|
|
@@ -3297,7 +4042,7 @@ class IntrospectionIndex {
|
|
|
3297
4042
|
try {
|
|
3298
4043
|
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
3299
4044
|
for (const entry of entries) {
|
|
3300
|
-
const entryPath =
|
|
4045
|
+
const entryPath = path13.join(basePath, entry.name);
|
|
3301
4046
|
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
3302
4047
|
if (entry.isDirectory()) {
|
|
3303
4048
|
await this.loadFilesRecursive(entryPath, relativePath);
|
|
@@ -3328,7 +4073,7 @@ var init_introspection2 = __esm(() => {
|
|
|
3328
4073
|
|
|
3329
4074
|
// src/app/indexer/watcher.ts
|
|
3330
4075
|
import { watch } from "chokidar";
|
|
3331
|
-
import * as
|
|
4076
|
+
import * as path14 from "path";
|
|
3332
4077
|
async function watchDirectory(rootDir, options = {}) {
|
|
3333
4078
|
const {
|
|
3334
4079
|
debounceMs = DEFAULT_DEBOUNCE_MS,
|
|
@@ -3339,7 +4084,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3339
4084
|
onFileChange,
|
|
3340
4085
|
onError
|
|
3341
4086
|
} = options;
|
|
3342
|
-
rootDir =
|
|
4087
|
+
rootDir = path14.resolve(rootDir);
|
|
3343
4088
|
const config = await loadConfig(rootDir);
|
|
3344
4089
|
const indexLocation = getIndexLocation(rootDir);
|
|
3345
4090
|
const validExtensions = new Set(config.extensions);
|
|
@@ -3349,7 +4094,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3349
4094
|
"**/.git/**"
|
|
3350
4095
|
];
|
|
3351
4096
|
function shouldWatchFile(filepath) {
|
|
3352
|
-
const ext =
|
|
4097
|
+
const ext = path14.extname(filepath);
|
|
3353
4098
|
return validExtensions.has(ext);
|
|
3354
4099
|
}
|
|
3355
4100
|
let isRunning = true;
|
|
@@ -3431,7 +4176,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3431
4176
|
function handleFileEvent(event, filepath) {
|
|
3432
4177
|
if (!isRunning)
|
|
3433
4178
|
return;
|
|
3434
|
-
const relativePath =
|
|
4179
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3435
4180
|
if (!shouldWatchFile(filepath)) {
|
|
3436
4181
|
return;
|
|
3437
4182
|
}
|
|
@@ -3502,6 +4247,7 @@ var init_watcher = __esm(() => {
|
|
|
3502
4247
|
var exports_indexer = {};
|
|
3503
4248
|
__export(exports_indexer, {
|
|
3504
4249
|
watchDirectory: () => watchDirectory,
|
|
4250
|
+
resetIndex: () => resetIndex,
|
|
3505
4251
|
indexDirectory: () => indexDirectory,
|
|
3506
4252
|
getIndexStatus: () => getIndexStatus,
|
|
3507
4253
|
ensureIndexFresh: () => ensureIndexFresh,
|
|
@@ -3509,46 +4255,56 @@ __export(exports_indexer, {
|
|
|
3509
4255
|
});
|
|
3510
4256
|
import { glob } from "glob";
|
|
3511
4257
|
import * as fs6 from "fs/promises";
|
|
3512
|
-
import * as
|
|
4258
|
+
import * as path15 from "path";
|
|
4259
|
+
async function parallelMap(items, processor, concurrency) {
|
|
4260
|
+
const results = new Array(items.length);
|
|
4261
|
+
let nextIndex = 0;
|
|
4262
|
+
async function worker() {
|
|
4263
|
+
while (nextIndex < items.length) {
|
|
4264
|
+
const index = nextIndex++;
|
|
4265
|
+
const item = items[index];
|
|
4266
|
+
try {
|
|
4267
|
+
const value = await processor(item, index);
|
|
4268
|
+
results[index] = { success: true, value };
|
|
4269
|
+
} catch (error) {
|
|
4270
|
+
results[index] = { success: false, error };
|
|
4271
|
+
}
|
|
4272
|
+
}
|
|
4273
|
+
}
|
|
4274
|
+
const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
|
|
4275
|
+
await Promise.all(workers);
|
|
4276
|
+
return results;
|
|
4277
|
+
}
|
|
3513
4278
|
async function indexDirectory(rootDir, options = {}) {
|
|
3514
4279
|
const verbose = options.verbose ?? false;
|
|
3515
4280
|
const quiet = options.quiet ?? false;
|
|
3516
|
-
|
|
4281
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
4282
|
+
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
4283
|
+
rootDir = path15.resolve(rootDir);
|
|
3517
4284
|
const location = getIndexLocation(rootDir);
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
}
|
|
4285
|
+
logger.info(`Indexing directory: ${rootDir}`);
|
|
4286
|
+
logger.info(`Index location: ${location.indexDir}`);
|
|
4287
|
+
logger.debug(`Concurrency: ${concurrency}`);
|
|
3522
4288
|
const config = await loadConfig(rootDir);
|
|
3523
4289
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3524
4290
|
await introspection.initialize();
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
console.log(`Detected monorepo with ${structure.projects.length} projects`);
|
|
3529
|
-
}
|
|
4291
|
+
const structure = introspection.getStructure();
|
|
4292
|
+
if (structure?.isMonorepo) {
|
|
4293
|
+
logger.debug(`Detected monorepo with ${structure.projects.length} projects`);
|
|
3530
4294
|
}
|
|
3531
4295
|
await registerBuiltInModules();
|
|
3532
4296
|
const enabledModules = registry.getEnabled(config);
|
|
3533
4297
|
if (enabledModules.length === 0) {
|
|
3534
|
-
|
|
3535
|
-
console.log("No modules enabled. Check your configuration.");
|
|
3536
|
-
}
|
|
4298
|
+
logger.info("No modules enabled. Check your configuration.");
|
|
3537
4299
|
return [];
|
|
3538
4300
|
}
|
|
3539
|
-
|
|
3540
|
-
console.log(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3541
|
-
}
|
|
4301
|
+
logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3542
4302
|
const files = await findFiles(rootDir, config);
|
|
3543
|
-
|
|
3544
|
-
console.log(`Found ${files.length} files to index`);
|
|
3545
|
-
}
|
|
4303
|
+
logger.info(`Found ${files.length} files to index`);
|
|
3546
4304
|
const results = [];
|
|
3547
4305
|
for (const module of enabledModules) {
|
|
3548
|
-
|
|
3549
|
-
console.log(`
|
|
4306
|
+
logger.info(`
|
|
3550
4307
|
[${module.name}] Starting indexing...`);
|
|
3551
|
-
}
|
|
3552
4308
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
3553
4309
|
if (module.initialize && moduleConfig) {
|
|
3554
4310
|
const configWithOverrides = { ...moduleConfig };
|
|
@@ -3558,32 +4314,32 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3558
4314
|
embeddingModel: options.model
|
|
3559
4315
|
};
|
|
3560
4316
|
}
|
|
4317
|
+
configWithOverrides.options = {
|
|
4318
|
+
...configWithOverrides.options,
|
|
4319
|
+
logger
|
|
4320
|
+
};
|
|
3561
4321
|
await module.initialize(configWithOverrides);
|
|
3562
4322
|
}
|
|
3563
|
-
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
|
|
4323
|
+
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
|
|
3564
4324
|
results.push(result);
|
|
3565
4325
|
if (module.finalize) {
|
|
3566
|
-
|
|
3567
|
-
console.log(`[${module.name}] Building secondary indexes...`);
|
|
3568
|
-
}
|
|
4326
|
+
logger.info(`[${module.name}] Building secondary indexes...`);
|
|
3569
4327
|
const ctx = {
|
|
3570
4328
|
rootDir,
|
|
3571
4329
|
config,
|
|
3572
4330
|
readFile: async (filepath) => {
|
|
3573
|
-
const fullPath =
|
|
4331
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3574
4332
|
return fs6.readFile(fullPath, "utf-8");
|
|
3575
4333
|
},
|
|
3576
4334
|
getFileStats: async (filepath) => {
|
|
3577
|
-
const fullPath =
|
|
4335
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3578
4336
|
const stats = await fs6.stat(fullPath);
|
|
3579
4337
|
return { lastModified: stats.mtime.toISOString() };
|
|
3580
4338
|
}
|
|
3581
4339
|
};
|
|
3582
4340
|
await module.finalize(ctx);
|
|
3583
4341
|
}
|
|
3584
|
-
|
|
3585
|
-
console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
3586
|
-
}
|
|
4342
|
+
logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
3587
4343
|
}
|
|
3588
4344
|
await introspection.save(config);
|
|
3589
4345
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
@@ -3606,28 +4362,37 @@ async function deleteIndex(rootDir) {
|
|
|
3606
4362
|
await fs6.rm(indexDir, { recursive: true, force: true });
|
|
3607
4363
|
} catch {}
|
|
3608
4364
|
}
|
|
4365
|
+
async function resetIndex(rootDir) {
|
|
4366
|
+
rootDir = path15.resolve(rootDir);
|
|
4367
|
+
const status = await getIndexStatus(rootDir);
|
|
4368
|
+
if (!status.exists) {
|
|
4369
|
+
throw new Error(`No index found for ${rootDir}`);
|
|
4370
|
+
}
|
|
4371
|
+
await deleteIndex(rootDir);
|
|
4372
|
+
return {
|
|
4373
|
+
success: true,
|
|
4374
|
+
indexDir: status.indexDir
|
|
4375
|
+
};
|
|
4376
|
+
}
|
|
3609
4377
|
async function ensureIndexFresh(rootDir, options = {}) {
|
|
3610
4378
|
const verbose = options.verbose ?? false;
|
|
3611
4379
|
const quiet = options.quiet ?? false;
|
|
3612
|
-
|
|
4380
|
+
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
4381
|
+
rootDir = path15.resolve(rootDir);
|
|
3613
4382
|
const status = await getIndexStatus(rootDir);
|
|
3614
4383
|
if (!status.exists) {
|
|
3615
|
-
|
|
3616
|
-
console.log(`No index found. Creating index...
|
|
4384
|
+
logger.info(`No index found. Creating index...
|
|
3617
4385
|
`);
|
|
3618
|
-
}
|
|
3619
|
-
const results = await indexDirectory(rootDir, { ...options, quiet });
|
|
4386
|
+
const results = await indexDirectory(rootDir, { ...options, logger });
|
|
3620
4387
|
const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
3621
4388
|
return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
|
|
3622
4389
|
}
|
|
3623
4390
|
const versionCompatible = await isIndexVersionCompatible(rootDir);
|
|
3624
4391
|
if (!versionCompatible) {
|
|
3625
|
-
|
|
3626
|
-
console.log(`Index version incompatible. Rebuilding...
|
|
4392
|
+
logger.info(`Index version incompatible. Rebuilding...
|
|
3627
4393
|
`);
|
|
3628
|
-
}
|
|
3629
4394
|
await deleteIndex(rootDir);
|
|
3630
|
-
const results = await indexDirectory(rootDir, { ...options,
|
|
4395
|
+
const results = await indexDirectory(rootDir, { ...options, logger });
|
|
3631
4396
|
const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
3632
4397
|
return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
|
|
3633
4398
|
}
|
|
@@ -3640,7 +4405,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3640
4405
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3641
4406
|
await introspection.initialize();
|
|
3642
4407
|
const currentFiles = await findFiles(rootDir, config);
|
|
3643
|
-
const currentFileSet = new Set(currentFiles.map((f) =>
|
|
4408
|
+
const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
|
|
3644
4409
|
let totalIndexed = 0;
|
|
3645
4410
|
let totalRemoved = 0;
|
|
3646
4411
|
let totalUnchanged = 0;
|
|
@@ -3654,6 +4419,10 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3654
4419
|
embeddingModel: options.model
|
|
3655
4420
|
};
|
|
3656
4421
|
}
|
|
4422
|
+
configWithOverrides.options = {
|
|
4423
|
+
...configWithOverrides.options,
|
|
4424
|
+
logger
|
|
4425
|
+
};
|
|
3657
4426
|
await module.initialize(configWithOverrides);
|
|
3658
4427
|
}
|
|
3659
4428
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
@@ -3665,14 +4434,12 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3665
4434
|
}
|
|
3666
4435
|
}
|
|
3667
4436
|
for (const filepath of filesToRemove) {
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
}
|
|
3671
|
-
const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4437
|
+
logger.debug(` Removing stale: ${filepath}`);
|
|
4438
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3672
4439
|
try {
|
|
3673
4440
|
await fs6.unlink(indexFilePath);
|
|
3674
4441
|
} catch {}
|
|
3675
|
-
const symbolicFilePath =
|
|
4442
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3676
4443
|
try {
|
|
3677
4444
|
await fs6.unlink(symbolicFilePath);
|
|
3678
4445
|
} catch {}
|
|
@@ -3683,18 +4450,21 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3683
4450
|
rootDir,
|
|
3684
4451
|
config,
|
|
3685
4452
|
readFile: async (filepath) => {
|
|
3686
|
-
const fullPath =
|
|
4453
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3687
4454
|
return fs6.readFile(fullPath, "utf-8");
|
|
3688
4455
|
},
|
|
3689
4456
|
getFileStats: async (filepath) => {
|
|
3690
|
-
const fullPath =
|
|
4457
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3691
4458
|
const stats = await fs6.stat(fullPath);
|
|
3692
4459
|
return { lastModified: stats.mtime.toISOString() };
|
|
3693
4460
|
},
|
|
3694
4461
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3695
4462
|
};
|
|
3696
|
-
|
|
3697
|
-
|
|
4463
|
+
const totalFiles = currentFiles.length;
|
|
4464
|
+
for (let i = 0;i < currentFiles.length; i++) {
|
|
4465
|
+
const filepath = currentFiles[i];
|
|
4466
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
4467
|
+
const progress = `[${i + 1}/${totalFiles}]`;
|
|
3698
4468
|
try {
|
|
3699
4469
|
const stats = await fs6.stat(filepath);
|
|
3700
4470
|
const lastModified = stats.mtime.toISOString();
|
|
@@ -3703,9 +4473,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3703
4473
|
totalUnchanged++;
|
|
3704
4474
|
continue;
|
|
3705
4475
|
}
|
|
3706
|
-
|
|
3707
|
-
console.log(` Indexing: ${relativePath}`);
|
|
3708
|
-
}
|
|
4476
|
+
logger.progress(` ${progress} Indexing: ${relativePath}`);
|
|
3709
4477
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3710
4478
|
introspection.addFile(relativePath, content);
|
|
3711
4479
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
@@ -3718,11 +4486,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3718
4486
|
totalIndexed++;
|
|
3719
4487
|
}
|
|
3720
4488
|
} catch (error) {
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
}
|
|
4489
|
+
logger.clearProgress();
|
|
4490
|
+
logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
|
|
3724
4491
|
}
|
|
3725
4492
|
}
|
|
4493
|
+
logger.clearProgress();
|
|
3726
4494
|
if (totalIndexed > 0 || totalRemoved > 0) {
|
|
3727
4495
|
manifest.lastUpdated = new Date().toISOString();
|
|
3728
4496
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
@@ -3746,7 +4514,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3746
4514
|
unchanged: totalUnchanged
|
|
3747
4515
|
};
|
|
3748
4516
|
}
|
|
3749
|
-
async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
|
|
4517
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
|
|
3750
4518
|
const result = {
|
|
3751
4519
|
moduleId: module.id,
|
|
3752
4520
|
indexed: 0,
|
|
@@ -3754,55 +4522,102 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3754
4522
|
errors: 0
|
|
3755
4523
|
};
|
|
3756
4524
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
4525
|
+
const indexPath = getModuleIndexPath(rootDir, module.id, config);
|
|
4526
|
+
const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
|
|
4527
|
+
const filesToRemove = [];
|
|
4528
|
+
for (const filepath of Object.keys(manifest.files)) {
|
|
4529
|
+
if (!currentFileSet.has(filepath)) {
|
|
4530
|
+
filesToRemove.push(filepath);
|
|
4531
|
+
}
|
|
4532
|
+
}
|
|
4533
|
+
if (filesToRemove.length > 0) {
|
|
4534
|
+
logger.info(` Removing ${filesToRemove.length} stale entries...`);
|
|
4535
|
+
for (const filepath of filesToRemove) {
|
|
4536
|
+
logger.debug(` Removing: ${filepath}`);
|
|
4537
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4538
|
+
try {
|
|
4539
|
+
await fs6.unlink(indexFilePath);
|
|
4540
|
+
} catch {}
|
|
4541
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
4542
|
+
try {
|
|
4543
|
+
await fs6.unlink(symbolicFilePath);
|
|
4544
|
+
} catch {}
|
|
4545
|
+
delete manifest.files[filepath];
|
|
4546
|
+
}
|
|
4547
|
+
await cleanupEmptyDirectories(indexPath);
|
|
4548
|
+
}
|
|
3757
4549
|
const ctx = {
|
|
3758
4550
|
rootDir,
|
|
3759
4551
|
config,
|
|
3760
4552
|
readFile: async (filepath) => {
|
|
3761
|
-
const fullPath =
|
|
4553
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3762
4554
|
return fs6.readFile(fullPath, "utf-8");
|
|
3763
4555
|
},
|
|
3764
4556
|
getFileStats: async (filepath) => {
|
|
3765
|
-
const fullPath =
|
|
4557
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3766
4558
|
const stats = await fs6.stat(fullPath);
|
|
3767
4559
|
return { lastModified: stats.mtime.toISOString() };
|
|
3768
4560
|
},
|
|
3769
4561
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3770
4562
|
};
|
|
3771
|
-
|
|
3772
|
-
|
|
4563
|
+
const totalFiles = files.length;
|
|
4564
|
+
let completedCount = 0;
|
|
4565
|
+
const processFile = async (filepath, _index) => {
|
|
4566
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
3773
4567
|
try {
|
|
3774
4568
|
const stats = await fs6.stat(filepath);
|
|
3775
4569
|
const lastModified = stats.mtime.toISOString();
|
|
3776
4570
|
const existingEntry = manifest.files[relativePath];
|
|
3777
4571
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
}
|
|
3781
|
-
result.skipped++;
|
|
3782
|
-
continue;
|
|
4572
|
+
completedCount++;
|
|
4573
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
|
|
4574
|
+
return { relativePath, status: "skipped" };
|
|
3783
4575
|
}
|
|
3784
4576
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3785
4577
|
introspection.addFile(relativePath, content);
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
}
|
|
4578
|
+
completedCount++;
|
|
4579
|
+
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
3789
4580
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
3790
4581
|
if (!fileIndex) {
|
|
3791
|
-
|
|
3792
|
-
|
|
3793
|
-
}
|
|
3794
|
-
result.skipped++;
|
|
3795
|
-
continue;
|
|
4582
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
|
|
4583
|
+
return { relativePath, status: "skipped" };
|
|
3796
4584
|
}
|
|
3797
4585
|
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
3798
|
-
|
|
4586
|
+
return {
|
|
4587
|
+
relativePath,
|
|
4588
|
+
status: "indexed",
|
|
3799
4589
|
lastModified,
|
|
3800
4590
|
chunkCount: fileIndex.chunks.length
|
|
3801
4591
|
};
|
|
3802
|
-
result.indexed++;
|
|
3803
4592
|
} catch (error) {
|
|
3804
|
-
|
|
4593
|
+
completedCount++;
|
|
4594
|
+
return { relativePath, status: "error", error };
|
|
4595
|
+
}
|
|
4596
|
+
};
|
|
4597
|
+
logger.debug(` Using concurrency: ${concurrency}`);
|
|
4598
|
+
const results = await parallelMap(files, processFile, concurrency);
|
|
4599
|
+
logger.clearProgress();
|
|
4600
|
+
for (const item of results) {
|
|
4601
|
+
if (!item.success) {
|
|
3805
4602
|
result.errors++;
|
|
4603
|
+
continue;
|
|
4604
|
+
}
|
|
4605
|
+
const fileResult = item.value;
|
|
4606
|
+
switch (fileResult.status) {
|
|
4607
|
+
case "indexed":
|
|
4608
|
+
manifest.files[fileResult.relativePath] = {
|
|
4609
|
+
lastModified: fileResult.lastModified,
|
|
4610
|
+
chunkCount: fileResult.chunkCount
|
|
4611
|
+
};
|
|
4612
|
+
result.indexed++;
|
|
4613
|
+
break;
|
|
4614
|
+
case "skipped":
|
|
4615
|
+
result.skipped++;
|
|
4616
|
+
break;
|
|
4617
|
+
case "error":
|
|
4618
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
4619
|
+
result.errors++;
|
|
4620
|
+
break;
|
|
3806
4621
|
}
|
|
3807
4622
|
}
|
|
3808
4623
|
manifest.lastUpdated = new Date().toISOString();
|
|
@@ -3839,13 +4654,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
3839
4654
|
}
|
|
3840
4655
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
3841
4656
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
3842
|
-
await fs6.mkdir(
|
|
4657
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
3843
4658
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3844
4659
|
}
|
|
3845
4660
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
3846
4661
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
3847
|
-
const indexFilePath =
|
|
3848
|
-
await fs6.mkdir(
|
|
4662
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4663
|
+
await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
|
|
3849
4664
|
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
3850
4665
|
}
|
|
3851
4666
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
@@ -3855,31 +4670,32 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
3855
4670
|
lastUpdated: new Date().toISOString(),
|
|
3856
4671
|
modules: modules.map((m) => m.id)
|
|
3857
4672
|
};
|
|
3858
|
-
await fs6.mkdir(
|
|
4673
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
3859
4674
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3860
4675
|
}
|
|
3861
4676
|
async function cleanupIndex(rootDir, options = {}) {
|
|
3862
4677
|
const verbose = options.verbose ?? false;
|
|
3863
|
-
|
|
3864
|
-
|
|
4678
|
+
const logger = options.logger ?? createLogger({ verbose });
|
|
4679
|
+
rootDir = path15.resolve(rootDir);
|
|
4680
|
+
logger.info(`Cleaning up index in: ${rootDir}`);
|
|
3865
4681
|
const config = await loadConfig(rootDir);
|
|
3866
4682
|
await registerBuiltInModules();
|
|
3867
4683
|
const enabledModules = registry.getEnabled(config);
|
|
3868
4684
|
if (enabledModules.length === 0) {
|
|
3869
|
-
|
|
4685
|
+
logger.info("No modules enabled.");
|
|
3870
4686
|
return [];
|
|
3871
4687
|
}
|
|
3872
4688
|
const results = [];
|
|
3873
4689
|
for (const module of enabledModules) {
|
|
3874
|
-
|
|
4690
|
+
logger.info(`
|
|
3875
4691
|
[${module.name}] Checking for stale entries...`);
|
|
3876
|
-
const result = await cleanupModuleIndex(rootDir, module.id, config,
|
|
4692
|
+
const result = await cleanupModuleIndex(rootDir, module.id, config, logger);
|
|
3877
4693
|
results.push(result);
|
|
3878
|
-
|
|
4694
|
+
logger.info(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
|
|
3879
4695
|
}
|
|
3880
4696
|
return results;
|
|
3881
4697
|
}
|
|
3882
|
-
async function cleanupModuleIndex(rootDir, moduleId, config,
|
|
4698
|
+
async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
3883
4699
|
const result = {
|
|
3884
4700
|
moduleId,
|
|
3885
4701
|
removed: 0,
|
|
@@ -3890,7 +4706,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
3890
4706
|
const filesToRemove = [];
|
|
3891
4707
|
const updatedFiles = {};
|
|
3892
4708
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
3893
|
-
const fullPath =
|
|
4709
|
+
const fullPath = path15.join(rootDir, filepath);
|
|
3894
4710
|
try {
|
|
3895
4711
|
await fs6.access(fullPath);
|
|
3896
4712
|
updatedFiles[filepath] = entry;
|
|
@@ -3898,13 +4714,11 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
3898
4714
|
} catch {
|
|
3899
4715
|
filesToRemove.push(filepath);
|
|
3900
4716
|
result.removed++;
|
|
3901
|
-
|
|
3902
|
-
console.log(` Removing stale entry: ${filepath}`);
|
|
3903
|
-
}
|
|
4717
|
+
logger.debug(` Removing stale entry: ${filepath}`);
|
|
3904
4718
|
}
|
|
3905
4719
|
}
|
|
3906
4720
|
for (const filepath of filesToRemove) {
|
|
3907
|
-
const indexFilePath =
|
|
4721
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3908
4722
|
try {
|
|
3909
4723
|
await fs6.unlink(indexFilePath);
|
|
3910
4724
|
} catch {}
|
|
@@ -3920,7 +4734,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3920
4734
|
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
3921
4735
|
for (const entry of entries) {
|
|
3922
4736
|
if (entry.isDirectory()) {
|
|
3923
|
-
const subDir =
|
|
4737
|
+
const subDir = path15.join(dir, entry.name);
|
|
3924
4738
|
await cleanupEmptyDirectories(subDir);
|
|
3925
4739
|
}
|
|
3926
4740
|
}
|
|
@@ -3935,7 +4749,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3935
4749
|
}
|
|
3936
4750
|
}
|
|
3937
4751
|
async function getIndexStatus(rootDir) {
|
|
3938
|
-
rootDir =
|
|
4752
|
+
rootDir = path15.resolve(rootDir);
|
|
3939
4753
|
const config = await loadConfig(rootDir);
|
|
3940
4754
|
const location = getIndexLocation(rootDir);
|
|
3941
4755
|
const indexDir = location.indexDir;
|
|
@@ -3971,7 +4785,7 @@ async function getIndexStatus(rootDir) {
|
|
|
3971
4785
|
}
|
|
3972
4786
|
} catch {
|
|
3973
4787
|
try {
|
|
3974
|
-
const entries = await fs6.readdir(
|
|
4788
|
+
const entries = await fs6.readdir(path15.join(indexDir, "index"));
|
|
3975
4789
|
if (entries.length > 0) {
|
|
3976
4790
|
status.exists = true;
|
|
3977
4791
|
for (const entry of entries) {
|
|
@@ -3991,11 +4805,12 @@ async function getIndexStatus(rootDir) {
|
|
|
3991
4805
|
}
|
|
3992
4806
|
return status;
|
|
3993
4807
|
}
|
|
3994
|
-
var INDEX_SCHEMA_VERSION = "1.0.0";
|
|
4808
|
+
var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
|
|
3995
4809
|
var init_indexer = __esm(() => {
|
|
3996
4810
|
init_config2();
|
|
3997
4811
|
init_registry();
|
|
3998
4812
|
init_introspection2();
|
|
4813
|
+
init_logger();
|
|
3999
4814
|
init_watcher();
|
|
4000
4815
|
});
|
|
4001
4816
|
|
|
@@ -4011,9 +4826,9 @@ __export(exports_search, {
|
|
|
4011
4826
|
formatSearchResults: () => formatSearchResults
|
|
4012
4827
|
});
|
|
4013
4828
|
import * as fs7 from "fs/promises";
|
|
4014
|
-
import * as
|
|
4829
|
+
import * as path16 from "path";
|
|
4015
4830
|
async function search(rootDir, query, options = {}) {
|
|
4016
|
-
rootDir =
|
|
4831
|
+
rootDir = path16.resolve(rootDir);
|
|
4017
4832
|
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
4018
4833
|
if (ensureFresh) {
|
|
4019
4834
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
@@ -4058,7 +4873,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4058
4873
|
config,
|
|
4059
4874
|
loadFileIndex: async (filepath) => {
|
|
4060
4875
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
4061
|
-
const indexFilePath = hasExtension ?
|
|
4876
|
+
const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
|
|
4062
4877
|
try {
|
|
4063
4878
|
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
4064
4879
|
return JSON.parse(content);
|
|
@@ -4070,7 +4885,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4070
4885
|
const files = [];
|
|
4071
4886
|
await traverseDirectory(indexPath, files, indexPath);
|
|
4072
4887
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
4073
|
-
const relative4 =
|
|
4888
|
+
const relative4 = path16.relative(indexPath, f);
|
|
4074
4889
|
return relative4.replace(/\.json$/, "");
|
|
4075
4890
|
});
|
|
4076
4891
|
}
|
|
@@ -4080,7 +4895,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
4080
4895
|
try {
|
|
4081
4896
|
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
4082
4897
|
for (const entry of entries) {
|
|
4083
|
-
const fullPath =
|
|
4898
|
+
const fullPath = path16.join(dir, entry.name);
|
|
4084
4899
|
if (entry.isDirectory()) {
|
|
4085
4900
|
await traverseDirectory(fullPath, files, basePath);
|
|
4086
4901
|
} else if (entry.isFile()) {
|
|
@@ -4154,10 +4969,11 @@ var init_search = __esm(() => {
|
|
|
4154
4969
|
|
|
4155
4970
|
// src/app/cli/main.ts
|
|
4156
4971
|
init_embeddings();
|
|
4972
|
+
init_logger();
|
|
4157
4973
|
// package.json
|
|
4158
4974
|
var package_default = {
|
|
4159
4975
|
name: "raggrep",
|
|
4160
|
-
version: "0.
|
|
4976
|
+
version: "0.5.0",
|
|
4161
4977
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
4162
4978
|
type: "module",
|
|
4163
4979
|
main: "./dist/index.js",
|
|
@@ -4293,6 +5109,14 @@ function parseFlags(args2) {
|
|
|
4293
5109
|
console.error("--type requires a file extension (e.g., ts, tsx, js)");
|
|
4294
5110
|
process.exit(1);
|
|
4295
5111
|
}
|
|
5112
|
+
} else if (arg === "--concurrency" || arg === "-c") {
|
|
5113
|
+
const c = parseInt(args2[++i], 10);
|
|
5114
|
+
if (!isNaN(c) && c > 0) {
|
|
5115
|
+
flags.concurrency = c;
|
|
5116
|
+
} else {
|
|
5117
|
+
console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
|
|
5118
|
+
process.exit(1);
|
|
5119
|
+
}
|
|
4296
5120
|
} else if (!arg.startsWith("-")) {
|
|
4297
5121
|
flags.remaining.push(arg);
|
|
4298
5122
|
}
|
|
@@ -4312,10 +5136,11 @@ Usage:
|
|
|
4312
5136
|
raggrep index [options]
|
|
4313
5137
|
|
|
4314
5138
|
Options:
|
|
4315
|
-
-w, --watch
|
|
4316
|
-
-m, --model <name>
|
|
4317
|
-
-
|
|
4318
|
-
-
|
|
5139
|
+
-w, --watch Watch for file changes and re-index automatically
|
|
5140
|
+
-m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
|
|
5141
|
+
-c, --concurrency <n> Number of files to process in parallel (default: 4)
|
|
5142
|
+
-v, --verbose Show detailed progress
|
|
5143
|
+
-h, --help Show this help message
|
|
4319
5144
|
|
|
4320
5145
|
Available Models:
|
|
4321
5146
|
${models}
|
|
@@ -4326,18 +5151,22 @@ Examples:
|
|
|
4326
5151
|
raggrep index
|
|
4327
5152
|
raggrep index --watch
|
|
4328
5153
|
raggrep index --model bge-small-en-v1.5
|
|
5154
|
+
raggrep index --concurrency 8
|
|
4329
5155
|
raggrep index --verbose
|
|
4330
5156
|
`);
|
|
4331
5157
|
process.exit(0);
|
|
4332
5158
|
}
|
|
4333
5159
|
const { indexDirectory: indexDirectory2, watchDirectory: watchDirectory2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
5160
|
+
const logger = createInlineLogger({ verbose: flags.verbose });
|
|
4334
5161
|
console.log("RAGgrep Indexer");
|
|
4335
5162
|
console.log(`================
|
|
4336
5163
|
`);
|
|
4337
5164
|
try {
|
|
4338
5165
|
const results = await indexDirectory2(process.cwd(), {
|
|
4339
5166
|
model: flags.model,
|
|
4340
|
-
verbose: flags.verbose
|
|
5167
|
+
verbose: flags.verbose,
|
|
5168
|
+
concurrency: flags.concurrency,
|
|
5169
|
+
logger
|
|
4341
5170
|
});
|
|
4342
5171
|
console.log(`
|
|
4343
5172
|
================`);
|
|
@@ -4422,9 +5251,11 @@ Examples:
|
|
|
4422
5251
|
process.exit(1);
|
|
4423
5252
|
}
|
|
4424
5253
|
try {
|
|
5254
|
+
const silentLogger = createSilentLogger();
|
|
4425
5255
|
const freshStats = await ensureIndexFresh2(process.cwd(), {
|
|
4426
5256
|
model: flags.model,
|
|
4427
|
-
quiet: true
|
|
5257
|
+
quiet: true,
|
|
5258
|
+
logger: silentLogger
|
|
4428
5259
|
});
|
|
4429
5260
|
console.log("RAGgrep Search");
|
|
4430
5261
|
console.log(`==============
|
|
@@ -4457,44 +5288,37 @@ Examples:
|
|
|
4457
5288
|
}
|
|
4458
5289
|
break;
|
|
4459
5290
|
}
|
|
4460
|
-
case "
|
|
5291
|
+
case "reset": {
|
|
4461
5292
|
if (flags.help) {
|
|
4462
5293
|
console.log(`
|
|
4463
|
-
raggrep
|
|
5294
|
+
raggrep reset - Clear the index for the current directory
|
|
4464
5295
|
|
|
4465
5296
|
Usage:
|
|
4466
|
-
raggrep
|
|
5297
|
+
raggrep reset [options]
|
|
4467
5298
|
|
|
4468
5299
|
Options:
|
|
4469
|
-
-v, --verbose Show detailed progress
|
|
4470
5300
|
-h, --help Show this help message
|
|
4471
5301
|
|
|
4472
5302
|
Description:
|
|
4473
|
-
|
|
4474
|
-
|
|
5303
|
+
Completely removes the index for the current directory.
|
|
5304
|
+
The next 'raggrep index' or 'raggrep query' will rebuild from scratch.
|
|
4475
5305
|
|
|
4476
5306
|
Examples:
|
|
4477
|
-
raggrep
|
|
4478
|
-
raggrep cleanup --verbose
|
|
5307
|
+
raggrep reset
|
|
4479
5308
|
`);
|
|
4480
5309
|
process.exit(0);
|
|
4481
5310
|
}
|
|
4482
|
-
const {
|
|
4483
|
-
console.log("RAGgrep Cleanup");
|
|
4484
|
-
console.log(`===============
|
|
4485
|
-
`);
|
|
5311
|
+
const { resetIndex: resetIndex2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
4486
5312
|
try {
|
|
4487
|
-
const
|
|
4488
|
-
|
|
4489
|
-
});
|
|
4490
|
-
console.log(`
|
|
4491
|
-
===============`);
|
|
4492
|
-
console.log("Summary:");
|
|
4493
|
-
for (const result of results) {
|
|
4494
|
-
console.log(` ${result.moduleId}: ${result.removed} removed, ${result.kept} kept`);
|
|
4495
|
-
}
|
|
5313
|
+
const result = await resetIndex2(process.cwd());
|
|
5314
|
+
console.log("Index cleared successfully.");
|
|
5315
|
+
console.log(` Removed: ${result.indexDir}`);
|
|
4496
5316
|
} catch (error) {
|
|
4497
|
-
|
|
5317
|
+
if (error instanceof Error && error.message.includes("No index found")) {
|
|
5318
|
+
console.error("Error: No index found for this directory.");
|
|
5319
|
+
process.exit(1);
|
|
5320
|
+
}
|
|
5321
|
+
console.error("Error during reset:", error);
|
|
4498
5322
|
process.exit(1);
|
|
4499
5323
|
}
|
|
4500
5324
|
break;
|
|
@@ -4573,7 +5397,7 @@ Commands:
|
|
|
4573
5397
|
index Index the current directory
|
|
4574
5398
|
query Search the indexed codebase
|
|
4575
5399
|
status Show the current state of the index
|
|
4576
|
-
|
|
5400
|
+
reset Clear the index for the current directory
|
|
4577
5401
|
|
|
4578
5402
|
Options:
|
|
4579
5403
|
-h, --help Show help for a command
|
|
@@ -4583,7 +5407,7 @@ Examples:
|
|
|
4583
5407
|
raggrep index
|
|
4584
5408
|
raggrep query "user login"
|
|
4585
5409
|
raggrep status
|
|
4586
|
-
raggrep
|
|
5410
|
+
raggrep reset
|
|
4587
5411
|
|
|
4588
5412
|
Run 'raggrep <command> --help' for more information.
|
|
4589
5413
|
`);
|
|
@@ -4595,4 +5419,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
4595
5419
|
}
|
|
4596
5420
|
main();
|
|
4597
5421
|
|
|
4598
|
-
//# debugId=
|
|
5422
|
+
//# debugId=5CA623D9974ACF4364756E2164756E21
|