raggrep 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +4 -0
- package/dist/cli/main.js +1323 -610
- package/dist/cli/main.js.map +19 -14
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/entities/searchResult.d.ts +5 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.js +1301 -613
- package/dist/index.js.map +18 -13
- package/dist/modules/core/index.d.ts +4 -0
- package/dist/modules/data/json/index.d.ts +49 -0
- package/dist/modules/docs/markdown/index.d.ts +49 -0
- package/dist/modules/language/typescript/index.d.ts +11 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/dist/types.d.ts +6 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -33,6 +33,7 @@ var init_searchResult = __esm(() => {
|
|
|
33
33
|
topK: 10,
|
|
34
34
|
minScore: 0.15,
|
|
35
35
|
filePatterns: [],
|
|
36
|
+
pathFilter: [],
|
|
36
37
|
ensureFresh: true
|
|
37
38
|
};
|
|
38
39
|
});
|
|
@@ -56,6 +57,20 @@ function createDefaultConfig() {
|
|
|
56
57
|
options: {
|
|
57
58
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
58
59
|
}
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
id: "data/json",
|
|
63
|
+
enabled: true,
|
|
64
|
+
options: {
|
|
65
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
id: "docs/markdown",
|
|
70
|
+
enabled: true,
|
|
71
|
+
options: {
|
|
72
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
73
|
+
}
|
|
59
74
|
}
|
|
60
75
|
]
|
|
61
76
|
};
|
|
@@ -99,16 +114,18 @@ var init_config = __esm(() => {
|
|
|
99
114
|
".jsx",
|
|
100
115
|
".mjs",
|
|
101
116
|
".cjs",
|
|
117
|
+
".mts",
|
|
118
|
+
".cts",
|
|
119
|
+
".json",
|
|
120
|
+
".md",
|
|
102
121
|
".py",
|
|
103
122
|
".go",
|
|
104
123
|
".rs",
|
|
105
124
|
".java",
|
|
106
|
-
".json",
|
|
107
125
|
".yaml",
|
|
108
126
|
".yml",
|
|
109
127
|
".toml",
|
|
110
128
|
".sql",
|
|
111
|
-
".md",
|
|
112
129
|
".txt"
|
|
113
130
|
];
|
|
114
131
|
});
|
|
@@ -1779,6 +1796,9 @@ class CoreModule {
|
|
|
1779
1796
|
name = "Core Search";
|
|
1780
1797
|
description = "Language-agnostic text search with symbol extraction";
|
|
1781
1798
|
version = "1.0.0";
|
|
1799
|
+
supportsFile(_filepath) {
|
|
1800
|
+
return true;
|
|
1801
|
+
}
|
|
1782
1802
|
symbolIndex = new Map;
|
|
1783
1803
|
bm25Index = null;
|
|
1784
1804
|
rootDir = "";
|
|
@@ -2198,221 +2218,6 @@ var init_embeddings = __esm(() => {
|
|
|
2198
2218
|
init_transformersEmbedding();
|
|
2199
2219
|
});
|
|
2200
2220
|
|
|
2201
|
-
// src/domain/services/similarity.ts
|
|
2202
|
-
function cosineSimilarity(a, b) {
|
|
2203
|
-
if (a.length !== b.length) {
|
|
2204
|
-
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2205
|
-
}
|
|
2206
|
-
let dotProduct = 0;
|
|
2207
|
-
let normA = 0;
|
|
2208
|
-
let normB = 0;
|
|
2209
|
-
for (let i = 0;i < a.length; i++) {
|
|
2210
|
-
dotProduct += a[i] * b[i];
|
|
2211
|
-
normA += a[i] * a[i];
|
|
2212
|
-
normB += b[i] * b[i];
|
|
2213
|
-
}
|
|
2214
|
-
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2215
|
-
if (magnitude === 0)
|
|
2216
|
-
return 0;
|
|
2217
|
-
return dotProduct / magnitude;
|
|
2218
|
-
}
|
|
2219
|
-
|
|
2220
|
-
// src/modules/language/typescript/parseCode.ts
|
|
2221
|
-
import * as ts from "typescript";
|
|
2222
|
-
function parseCode(content, filepath) {
|
|
2223
|
-
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
2224
|
-
if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
|
|
2225
|
-
return parseTypeScript(content, filepath);
|
|
2226
|
-
}
|
|
2227
|
-
return parseGenericCode(content);
|
|
2228
|
-
}
|
|
2229
|
-
function parseTypeScript(content, filepath) {
|
|
2230
|
-
const chunks = [];
|
|
2231
|
-
const lines = content.split(`
|
|
2232
|
-
`);
|
|
2233
|
-
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2234
|
-
function getLineNumbers(node) {
|
|
2235
|
-
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2236
|
-
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2237
|
-
return {
|
|
2238
|
-
startLine: start.line + 1,
|
|
2239
|
-
endLine: end.line + 1
|
|
2240
|
-
};
|
|
2241
|
-
}
|
|
2242
|
-
function getNodeText(node) {
|
|
2243
|
-
return node.getText(sourceFile);
|
|
2244
|
-
}
|
|
2245
|
-
function isExported(node) {
|
|
2246
|
-
if (!ts.canHaveModifiers(node))
|
|
2247
|
-
return false;
|
|
2248
|
-
const modifiers = ts.getModifiers(node);
|
|
2249
|
-
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2250
|
-
}
|
|
2251
|
-
function getJSDoc(node) {
|
|
2252
|
-
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2253
|
-
if (jsDocNodes.length === 0)
|
|
2254
|
-
return;
|
|
2255
|
-
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2256
|
-
`);
|
|
2257
|
-
}
|
|
2258
|
-
function getFunctionName(node) {
|
|
2259
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2260
|
-
return node.name.text;
|
|
2261
|
-
}
|
|
2262
|
-
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2263
|
-
return node.name.text;
|
|
2264
|
-
}
|
|
2265
|
-
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2266
|
-
return node.name.text;
|
|
2267
|
-
}
|
|
2268
|
-
return;
|
|
2269
|
-
}
|
|
2270
|
-
function visit(node) {
|
|
2271
|
-
const { startLine, endLine } = getLineNumbers(node);
|
|
2272
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2273
|
-
chunks.push({
|
|
2274
|
-
content: getNodeText(node),
|
|
2275
|
-
startLine,
|
|
2276
|
-
endLine,
|
|
2277
|
-
type: "function",
|
|
2278
|
-
name: node.name.text,
|
|
2279
|
-
isExported: isExported(node),
|
|
2280
|
-
jsDoc: getJSDoc(node)
|
|
2281
|
-
});
|
|
2282
|
-
return;
|
|
2283
|
-
}
|
|
2284
|
-
if (ts.isVariableStatement(node)) {
|
|
2285
|
-
for (const decl of node.declarationList.declarations) {
|
|
2286
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2287
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2288
|
-
chunks.push({
|
|
2289
|
-
content: getNodeText(node),
|
|
2290
|
-
startLine,
|
|
2291
|
-
endLine,
|
|
2292
|
-
type: "function",
|
|
2293
|
-
name,
|
|
2294
|
-
isExported: isExported(node),
|
|
2295
|
-
jsDoc: getJSDoc(node)
|
|
2296
|
-
});
|
|
2297
|
-
return;
|
|
2298
|
-
}
|
|
2299
|
-
}
|
|
2300
|
-
}
|
|
2301
|
-
if (ts.isClassDeclaration(node) && node.name) {
|
|
2302
|
-
chunks.push({
|
|
2303
|
-
content: getNodeText(node),
|
|
2304
|
-
startLine,
|
|
2305
|
-
endLine,
|
|
2306
|
-
type: "class",
|
|
2307
|
-
name: node.name.text,
|
|
2308
|
-
isExported: isExported(node),
|
|
2309
|
-
jsDoc: getJSDoc(node)
|
|
2310
|
-
});
|
|
2311
|
-
return;
|
|
2312
|
-
}
|
|
2313
|
-
if (ts.isInterfaceDeclaration(node)) {
|
|
2314
|
-
chunks.push({
|
|
2315
|
-
content: getNodeText(node),
|
|
2316
|
-
startLine,
|
|
2317
|
-
endLine,
|
|
2318
|
-
type: "interface",
|
|
2319
|
-
name: node.name.text,
|
|
2320
|
-
isExported: isExported(node),
|
|
2321
|
-
jsDoc: getJSDoc(node)
|
|
2322
|
-
});
|
|
2323
|
-
return;
|
|
2324
|
-
}
|
|
2325
|
-
if (ts.isTypeAliasDeclaration(node)) {
|
|
2326
|
-
chunks.push({
|
|
2327
|
-
content: getNodeText(node),
|
|
2328
|
-
startLine,
|
|
2329
|
-
endLine,
|
|
2330
|
-
type: "type",
|
|
2331
|
-
name: node.name.text,
|
|
2332
|
-
isExported: isExported(node),
|
|
2333
|
-
jsDoc: getJSDoc(node)
|
|
2334
|
-
});
|
|
2335
|
-
return;
|
|
2336
|
-
}
|
|
2337
|
-
if (ts.isEnumDeclaration(node)) {
|
|
2338
|
-
chunks.push({
|
|
2339
|
-
content: getNodeText(node),
|
|
2340
|
-
startLine,
|
|
2341
|
-
endLine,
|
|
2342
|
-
type: "enum",
|
|
2343
|
-
name: node.name.text,
|
|
2344
|
-
isExported: isExported(node),
|
|
2345
|
-
jsDoc: getJSDoc(node)
|
|
2346
|
-
});
|
|
2347
|
-
return;
|
|
2348
|
-
}
|
|
2349
|
-
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2350
|
-
for (const decl of node.declarationList.declarations) {
|
|
2351
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2352
|
-
continue;
|
|
2353
|
-
}
|
|
2354
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2355
|
-
chunks.push({
|
|
2356
|
-
content: getNodeText(node),
|
|
2357
|
-
startLine,
|
|
2358
|
-
endLine,
|
|
2359
|
-
type: "variable",
|
|
2360
|
-
name,
|
|
2361
|
-
isExported: true,
|
|
2362
|
-
jsDoc: getJSDoc(node)
|
|
2363
|
-
});
|
|
2364
|
-
}
|
|
2365
|
-
return;
|
|
2366
|
-
}
|
|
2367
|
-
ts.forEachChild(node, visit);
|
|
2368
|
-
}
|
|
2369
|
-
ts.forEachChild(sourceFile, visit);
|
|
2370
|
-
if (chunks.length === 0) {
|
|
2371
|
-
return parseGenericCode(content);
|
|
2372
|
-
}
|
|
2373
|
-
return chunks;
|
|
2374
|
-
}
|
|
2375
|
-
function parseGenericCode(content) {
|
|
2376
|
-
const chunks = [];
|
|
2377
|
-
const lines = content.split(`
|
|
2378
|
-
`);
|
|
2379
|
-
const CHUNK_SIZE = 30;
|
|
2380
|
-
const OVERLAP = 5;
|
|
2381
|
-
if (lines.length <= CHUNK_SIZE) {
|
|
2382
|
-
return [
|
|
2383
|
-
{
|
|
2384
|
-
content,
|
|
2385
|
-
startLine: 1,
|
|
2386
|
-
endLine: lines.length,
|
|
2387
|
-
type: "file"
|
|
2388
|
-
}
|
|
2389
|
-
];
|
|
2390
|
-
}
|
|
2391
|
-
for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
|
|
2392
|
-
const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
|
|
2393
|
-
chunks.push({
|
|
2394
|
-
content: lines.slice(i, endIdx).join(`
|
|
2395
|
-
`),
|
|
2396
|
-
startLine: i + 1,
|
|
2397
|
-
endLine: endIdx,
|
|
2398
|
-
type: "block"
|
|
2399
|
-
});
|
|
2400
|
-
if (endIdx >= lines.length)
|
|
2401
|
-
break;
|
|
2402
|
-
}
|
|
2403
|
-
return chunks;
|
|
2404
|
-
}
|
|
2405
|
-
function generateChunkId(filepath, startLine, endLine) {
|
|
2406
|
-
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2407
|
-
return `${safePath}-${startLine}-${endLine}`;
|
|
2408
|
-
}
|
|
2409
|
-
var init_parseCode = () => {};
|
|
2410
|
-
|
|
2411
|
-
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2412
|
-
var init_fileIndexStorage = __esm(() => {
|
|
2413
|
-
init_entities();
|
|
2414
|
-
});
|
|
2415
|
-
|
|
2416
2221
|
// src/domain/services/keywords.ts
|
|
2417
2222
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2418
2223
|
const keywords = new Set;
|
|
@@ -2601,223 +2406,1120 @@ var init_keywords = __esm(() => {
|
|
|
2601
2406
|
};
|
|
2602
2407
|
});
|
|
2603
2408
|
|
|
2604
|
-
// src/
|
|
2605
|
-
|
|
2606
|
-
|
|
2409
|
+
// src/domain/services/similarity.ts
|
|
2410
|
+
function cosineSimilarity(a, b) {
|
|
2411
|
+
if (a.length !== b.length) {
|
|
2412
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2413
|
+
}
|
|
2414
|
+
let dotProduct = 0;
|
|
2415
|
+
let normA = 0;
|
|
2416
|
+
let normB = 0;
|
|
2417
|
+
for (let i = 0;i < a.length; i++) {
|
|
2418
|
+
dotProduct += a[i] * b[i];
|
|
2419
|
+
normA += a[i] * a[i];
|
|
2420
|
+
normB += b[i] * b[i];
|
|
2421
|
+
}
|
|
2422
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2423
|
+
if (magnitude === 0)
|
|
2424
|
+
return 0;
|
|
2425
|
+
return dotProduct / magnitude;
|
|
2426
|
+
}
|
|
2607
2427
|
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2428
|
+
// src/domain/services/queryIntent.ts
|
|
2429
|
+
import * as path7 from "path";
|
|
2430
|
+
function detectQueryIntent(queryTerms) {
|
|
2431
|
+
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2432
|
+
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
2433
|
+
if (hasDocumentationTerm) {
|
|
2434
|
+
return "documentation";
|
|
2435
|
+
}
|
|
2436
|
+
if (hasImplementationTerm) {
|
|
2437
|
+
return "implementation";
|
|
2438
|
+
}
|
|
2439
|
+
return "neutral";
|
|
2440
|
+
}
|
|
2441
|
+
function extractQueryTerms(query) {
|
|
2442
|
+
return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
2443
|
+
}
|
|
2444
|
+
function isSourceCodeFile(filepath) {
|
|
2445
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2446
|
+
return SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2447
|
+
}
|
|
2448
|
+
function isDocFile(filepath) {
|
|
2449
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2450
|
+
return DOC_EXTENSIONS.includes(ext);
|
|
2451
|
+
}
|
|
2452
|
+
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2453
|
+
const isSourceCode = isSourceCodeFile(filepath);
|
|
2454
|
+
const isDoc = isDocFile(filepath);
|
|
2455
|
+
const intent = detectQueryIntent(queryTerms);
|
|
2456
|
+
if (intent === "implementation") {
|
|
2457
|
+
if (isSourceCode) {
|
|
2458
|
+
return 0.06;
|
|
2459
|
+
}
|
|
2460
|
+
return 0;
|
|
2461
|
+
}
|
|
2462
|
+
if (intent === "documentation") {
|
|
2463
|
+
if (isDoc) {
|
|
2464
|
+
return 0.08;
|
|
2465
|
+
}
|
|
2466
|
+
return 0;
|
|
2467
|
+
}
|
|
2468
|
+
return 0;
|
|
2469
|
+
}
|
|
2470
|
+
var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2471
|
+
var init_queryIntent = __esm(() => {
|
|
2472
|
+
IMPLEMENTATION_TERMS = [
|
|
2473
|
+
"function",
|
|
2474
|
+
"method",
|
|
2475
|
+
"class",
|
|
2476
|
+
"interface",
|
|
2477
|
+
"implement",
|
|
2478
|
+
"implementation",
|
|
2479
|
+
"endpoint",
|
|
2480
|
+
"route",
|
|
2481
|
+
"handler",
|
|
2482
|
+
"controller",
|
|
2483
|
+
"module",
|
|
2484
|
+
"code"
|
|
2485
|
+
];
|
|
2486
|
+
DOCUMENTATION_TERMS = [
|
|
2487
|
+
"documentation",
|
|
2488
|
+
"docs",
|
|
2489
|
+
"guide",
|
|
2490
|
+
"tutorial",
|
|
2491
|
+
"readme",
|
|
2492
|
+
"how",
|
|
2493
|
+
"what",
|
|
2494
|
+
"why",
|
|
2495
|
+
"explain",
|
|
2496
|
+
"overview",
|
|
2497
|
+
"getting",
|
|
2498
|
+
"started",
|
|
2499
|
+
"requirements",
|
|
2500
|
+
"setup",
|
|
2501
|
+
"install",
|
|
2502
|
+
"configure",
|
|
2503
|
+
"configuration"
|
|
2504
|
+
];
|
|
2505
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
2506
|
+
".ts",
|
|
2507
|
+
".tsx",
|
|
2508
|
+
".js",
|
|
2509
|
+
".jsx",
|
|
2510
|
+
".mjs",
|
|
2511
|
+
".cjs",
|
|
2512
|
+
".py",
|
|
2513
|
+
".go",
|
|
2514
|
+
".rs",
|
|
2515
|
+
".java"
|
|
2516
|
+
];
|
|
2517
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2518
|
+
});
|
|
2519
|
+
|
|
2520
|
+
// src/domain/services/chunking.ts
|
|
2521
|
+
function createLineBasedChunks(content, options = {}) {
|
|
2522
|
+
const {
|
|
2523
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2524
|
+
overlap = DEFAULT_OVERLAP,
|
|
2525
|
+
minLinesForMultipleChunks = chunkSize
|
|
2526
|
+
} = options;
|
|
2527
|
+
const lines = content.split(`
|
|
2528
|
+
`);
|
|
2529
|
+
const chunks = [];
|
|
2530
|
+
if (lines.length <= minLinesForMultipleChunks) {
|
|
2531
|
+
return [
|
|
2532
|
+
{
|
|
2533
|
+
content,
|
|
2534
|
+
startLine: 1,
|
|
2535
|
+
endLine: lines.length,
|
|
2536
|
+
type: "file"
|
|
2537
|
+
}
|
|
2538
|
+
];
|
|
2539
|
+
}
|
|
2540
|
+
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2541
|
+
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2542
|
+
chunks.push({
|
|
2543
|
+
content: lines.slice(i, endIdx).join(`
|
|
2544
|
+
`),
|
|
2545
|
+
startLine: i + 1,
|
|
2546
|
+
endLine: endIdx,
|
|
2547
|
+
type: "block"
|
|
2548
|
+
});
|
|
2549
|
+
if (endIdx >= lines.length)
|
|
2550
|
+
break;
|
|
2551
|
+
}
|
|
2552
|
+
return chunks;
|
|
2553
|
+
}
|
|
2554
|
+
function generateChunkId(filepath, startLine, endLine) {
|
|
2555
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2556
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2557
|
+
}
|
|
2558
|
+
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2559
|
+
|
|
2560
|
+
// src/domain/services/index.ts
|
|
2561
|
+
var init_services = __esm(() => {
|
|
2562
|
+
init_keywords();
|
|
2563
|
+
init_queryIntent();
|
|
2564
|
+
});
|
|
2565
|
+
|
|
2566
|
+
// src/modules/language/typescript/parseCode.ts
|
|
2567
|
+
import * as ts from "typescript";
|
|
2568
|
+
function parseTypeScriptCode(content, filepath) {
|
|
2569
|
+
return parseTypeScript(content, filepath);
|
|
2570
|
+
}
|
|
2571
|
+
function parseTypeScript(content, filepath) {
|
|
2572
|
+
const chunks = [];
|
|
2573
|
+
const lines = content.split(`
|
|
2574
|
+
`);
|
|
2575
|
+
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2576
|
+
function getLineNumbers(node) {
|
|
2577
|
+
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2578
|
+
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2579
|
+
return {
|
|
2580
|
+
startLine: start.line + 1,
|
|
2581
|
+
endLine: end.line + 1
|
|
2582
|
+
};
|
|
2583
|
+
}
|
|
2584
|
+
function getNodeText(node) {
|
|
2585
|
+
return node.getText(sourceFile);
|
|
2586
|
+
}
|
|
2587
|
+
function isExported(node) {
|
|
2588
|
+
if (!ts.canHaveModifiers(node))
|
|
2589
|
+
return false;
|
|
2590
|
+
const modifiers = ts.getModifiers(node);
|
|
2591
|
+
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2592
|
+
}
|
|
2593
|
+
function getJSDoc(node) {
|
|
2594
|
+
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2595
|
+
if (jsDocNodes.length === 0)
|
|
2596
|
+
return;
|
|
2597
|
+
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2598
|
+
`);
|
|
2599
|
+
}
|
|
2600
|
+
function getFunctionName(node) {
|
|
2601
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2602
|
+
return node.name.text;
|
|
2603
|
+
}
|
|
2604
|
+
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2605
|
+
return node.name.text;
|
|
2606
|
+
}
|
|
2607
|
+
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2608
|
+
return node.name.text;
|
|
2609
|
+
}
|
|
2610
|
+
return;
|
|
2611
|
+
}
|
|
2612
|
+
function visit(node) {
|
|
2613
|
+
const { startLine, endLine } = getLineNumbers(node);
|
|
2614
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2615
|
+
chunks.push({
|
|
2616
|
+
content: getNodeText(node),
|
|
2617
|
+
startLine,
|
|
2618
|
+
endLine,
|
|
2619
|
+
type: "function",
|
|
2620
|
+
name: node.name.text,
|
|
2621
|
+
isExported: isExported(node),
|
|
2622
|
+
jsDoc: getJSDoc(node)
|
|
2623
|
+
});
|
|
2624
|
+
return;
|
|
2625
|
+
}
|
|
2626
|
+
if (ts.isVariableStatement(node)) {
|
|
2627
|
+
for (const decl of node.declarationList.declarations) {
|
|
2628
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2629
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2630
|
+
chunks.push({
|
|
2631
|
+
content: getNodeText(node),
|
|
2632
|
+
startLine,
|
|
2633
|
+
endLine,
|
|
2634
|
+
type: "function",
|
|
2635
|
+
name,
|
|
2636
|
+
isExported: isExported(node),
|
|
2637
|
+
jsDoc: getJSDoc(node)
|
|
2638
|
+
});
|
|
2639
|
+
return;
|
|
2640
|
+
}
|
|
2641
|
+
}
|
|
2642
|
+
}
|
|
2643
|
+
if (ts.isClassDeclaration(node) && node.name) {
|
|
2644
|
+
chunks.push({
|
|
2645
|
+
content: getNodeText(node),
|
|
2646
|
+
startLine,
|
|
2647
|
+
endLine,
|
|
2648
|
+
type: "class",
|
|
2649
|
+
name: node.name.text,
|
|
2650
|
+
isExported: isExported(node),
|
|
2651
|
+
jsDoc: getJSDoc(node)
|
|
2652
|
+
});
|
|
2653
|
+
return;
|
|
2654
|
+
}
|
|
2655
|
+
if (ts.isInterfaceDeclaration(node)) {
|
|
2656
|
+
chunks.push({
|
|
2657
|
+
content: getNodeText(node),
|
|
2658
|
+
startLine,
|
|
2659
|
+
endLine,
|
|
2660
|
+
type: "interface",
|
|
2661
|
+
name: node.name.text,
|
|
2662
|
+
isExported: isExported(node),
|
|
2663
|
+
jsDoc: getJSDoc(node)
|
|
2664
|
+
});
|
|
2665
|
+
return;
|
|
2666
|
+
}
|
|
2667
|
+
if (ts.isTypeAliasDeclaration(node)) {
|
|
2668
|
+
chunks.push({
|
|
2669
|
+
content: getNodeText(node),
|
|
2670
|
+
startLine,
|
|
2671
|
+
endLine,
|
|
2672
|
+
type: "type",
|
|
2673
|
+
name: node.name.text,
|
|
2674
|
+
isExported: isExported(node),
|
|
2675
|
+
jsDoc: getJSDoc(node)
|
|
2676
|
+
});
|
|
2677
|
+
return;
|
|
2678
|
+
}
|
|
2679
|
+
if (ts.isEnumDeclaration(node)) {
|
|
2680
|
+
chunks.push({
|
|
2681
|
+
content: getNodeText(node),
|
|
2682
|
+
startLine,
|
|
2683
|
+
endLine,
|
|
2684
|
+
type: "enum",
|
|
2685
|
+
name: node.name.text,
|
|
2686
|
+
isExported: isExported(node),
|
|
2687
|
+
jsDoc: getJSDoc(node)
|
|
2688
|
+
});
|
|
2689
|
+
return;
|
|
2690
|
+
}
|
|
2691
|
+
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2692
|
+
for (const decl of node.declarationList.declarations) {
|
|
2693
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2694
|
+
continue;
|
|
2695
|
+
}
|
|
2696
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2697
|
+
chunks.push({
|
|
2698
|
+
content: getNodeText(node),
|
|
2699
|
+
startLine,
|
|
2700
|
+
endLine,
|
|
2701
|
+
type: "variable",
|
|
2702
|
+
name,
|
|
2703
|
+
isExported: true,
|
|
2704
|
+
jsDoc: getJSDoc(node)
|
|
2705
|
+
});
|
|
2706
|
+
}
|
|
2707
|
+
return;
|
|
2708
|
+
}
|
|
2709
|
+
ts.forEachChild(node, visit);
|
|
2710
|
+
}
|
|
2711
|
+
ts.forEachChild(sourceFile, visit);
|
|
2712
|
+
if (chunks.length === 0) {
|
|
2713
|
+
const lines2 = content.split(`
|
|
2714
|
+
`);
|
|
2715
|
+
return [
|
|
2716
|
+
{
|
|
2717
|
+
content,
|
|
2718
|
+
startLine: 1,
|
|
2719
|
+
endLine: lines2.length,
|
|
2720
|
+
type: "file"
|
|
2721
|
+
}
|
|
2722
|
+
];
|
|
2723
|
+
}
|
|
2724
|
+
return chunks;
|
|
2725
|
+
}
|
|
2726
|
+
function generateChunkId2(filepath, startLine, endLine) {
|
|
2727
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2728
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2729
|
+
}
|
|
2730
|
+
var init_parseCode = () => {};
|
|
2731
|
+
|
|
2732
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2733
|
+
var init_fileIndexStorage = __esm(() => {
|
|
2734
|
+
init_entities();
|
|
2735
|
+
});
|
|
2736
|
+
|
|
2737
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
2738
|
+
import * as fs3 from "fs/promises";
|
|
2739
|
+
import * as path8 from "path";
|
|
2740
|
+
|
|
2741
|
+
class SymbolicIndex {
|
|
2742
|
+
meta = null;
|
|
2743
|
+
fileSummaries = new Map;
|
|
2744
|
+
bm25Index = null;
|
|
2745
|
+
symbolicPath;
|
|
2746
|
+
moduleId;
|
|
2747
|
+
constructor(indexDir, moduleId) {
|
|
2748
|
+
this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
|
|
2749
|
+
this.moduleId = moduleId;
|
|
2617
2750
|
}
|
|
2618
2751
|
async initialize() {
|
|
2619
2752
|
try {
|
|
2620
|
-
await this.load();
|
|
2753
|
+
await this.load();
|
|
2754
|
+
} catch {
|
|
2755
|
+
this.meta = {
|
|
2756
|
+
version: "1.0.0",
|
|
2757
|
+
lastUpdated: new Date().toISOString(),
|
|
2758
|
+
moduleId: this.moduleId,
|
|
2759
|
+
fileCount: 0,
|
|
2760
|
+
bm25Data: {
|
|
2761
|
+
avgDocLength: 0,
|
|
2762
|
+
documentFrequencies: {},
|
|
2763
|
+
totalDocs: 0
|
|
2764
|
+
}
|
|
2765
|
+
};
|
|
2766
|
+
this.bm25Index = new BM25Index;
|
|
2767
|
+
}
|
|
2768
|
+
}
|
|
2769
|
+
addFile(summary) {
|
|
2770
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2771
|
+
}
|
|
2772
|
+
removeFile(filepath) {
|
|
2773
|
+
return this.fileSummaries.delete(filepath);
|
|
2774
|
+
}
|
|
2775
|
+
buildBM25Index() {
|
|
2776
|
+
this.bm25Index = new BM25Index;
|
|
2777
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2778
|
+
const content = [
|
|
2779
|
+
...summary.keywords,
|
|
2780
|
+
...summary.exports,
|
|
2781
|
+
...extractPathKeywords(filepath)
|
|
2782
|
+
].join(" ");
|
|
2783
|
+
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2784
|
+
}
|
|
2785
|
+
if (this.meta) {
|
|
2786
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2787
|
+
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2788
|
+
}
|
|
2789
|
+
}
|
|
2790
|
+
findCandidates(query, maxCandidates = 20) {
|
|
2791
|
+
if (!this.bm25Index) {
|
|
2792
|
+
return Array.from(this.fileSummaries.keys());
|
|
2793
|
+
}
|
|
2794
|
+
const results = this.bm25Index.search(query, maxCandidates);
|
|
2795
|
+
return results.map((r) => r.id);
|
|
2796
|
+
}
|
|
2797
|
+
getAllFiles() {
|
|
2798
|
+
return Array.from(this.fileSummaries.keys());
|
|
2799
|
+
}
|
|
2800
|
+
getFileSummary(filepath) {
|
|
2801
|
+
return this.fileSummaries.get(filepath);
|
|
2802
|
+
}
|
|
2803
|
+
async save() {
|
|
2804
|
+
if (!this.meta)
|
|
2805
|
+
throw new Error("Index not initialized");
|
|
2806
|
+
this.meta.lastUpdated = new Date().toISOString();
|
|
2807
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2808
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2809
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2810
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2811
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2812
|
+
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2813
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
2814
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
async load() {
|
|
2818
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2819
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2820
|
+
this.meta = JSON.parse(metaContent);
|
|
2821
|
+
this.fileSummaries.clear();
|
|
2822
|
+
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
2823
|
+
this.buildBM25Index();
|
|
2824
|
+
}
|
|
2825
|
+
async loadFileSummariesRecursive(dir) {
|
|
2826
|
+
try {
|
|
2827
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2828
|
+
for (const entry of entries) {
|
|
2829
|
+
const fullPath = path8.join(dir, entry.name);
|
|
2830
|
+
if (entry.isDirectory()) {
|
|
2831
|
+
await this.loadFileSummariesRecursive(fullPath);
|
|
2832
|
+
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
2833
|
+
try {
|
|
2834
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
2835
|
+
const summary = JSON.parse(content);
|
|
2836
|
+
if (summary.filepath) {
|
|
2837
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2838
|
+
}
|
|
2839
|
+
} catch {}
|
|
2840
|
+
}
|
|
2841
|
+
}
|
|
2842
|
+
} catch {}
|
|
2843
|
+
}
|
|
2844
|
+
getFileSummaryPath(filepath) {
|
|
2845
|
+
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2846
|
+
return path8.join(this.symbolicPath, jsonPath);
|
|
2847
|
+
}
|
|
2848
|
+
async deleteFileSummary(filepath) {
|
|
2849
|
+
try {
|
|
2850
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
2851
|
+
} catch {}
|
|
2852
|
+
this.fileSummaries.delete(filepath);
|
|
2853
|
+
}
|
|
2854
|
+
async exists() {
|
|
2855
|
+
try {
|
|
2856
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2857
|
+
await fs3.access(metaPath);
|
|
2858
|
+
return true;
|
|
2859
|
+
} catch {
|
|
2860
|
+
return false;
|
|
2861
|
+
}
|
|
2862
|
+
}
|
|
2863
|
+
get size() {
|
|
2864
|
+
return this.fileSummaries.size;
|
|
2865
|
+
}
|
|
2866
|
+
clear() {
|
|
2867
|
+
this.fileSummaries.clear();
|
|
2868
|
+
if (this.meta) {
|
|
2869
|
+
this.meta.fileCount = 0;
|
|
2870
|
+
this.meta.bm25Data = {
|
|
2871
|
+
avgDocLength: 0,
|
|
2872
|
+
documentFrequencies: {},
|
|
2873
|
+
totalDocs: 0
|
|
2874
|
+
};
|
|
2875
|
+
}
|
|
2876
|
+
this.bm25Index = new BM25Index;
|
|
2877
|
+
}
|
|
2878
|
+
}
|
|
2879
|
+
var init_symbolicIndex = __esm(() => {
|
|
2880
|
+
init_keywords();
|
|
2881
|
+
});
|
|
2882
|
+
|
|
2883
|
+
// src/infrastructure/storage/index.ts
|
|
2884
|
+
var init_storage = __esm(() => {
|
|
2885
|
+
init_fileIndexStorage();
|
|
2886
|
+
init_symbolicIndex();
|
|
2887
|
+
});
|
|
2888
|
+
|
|
2889
|
+
// src/modules/language/typescript/index.ts
|
|
2890
|
+
var exports_typescript = {};
|
|
2891
|
+
__export(exports_typescript, {
|
|
2892
|
+
supportsFile: () => supportsFile,
|
|
2893
|
+
isTypeScriptFile: () => isTypeScriptFile,
|
|
2894
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
2895
|
+
TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
|
|
2896
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2897
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2898
|
+
});
|
|
2899
|
+
import * as path9 from "path";
|
|
2900
|
+
function isTypeScriptFile(filepath) {
|
|
2901
|
+
const ext = path9.extname(filepath).toLowerCase();
|
|
2902
|
+
return TYPESCRIPT_EXTENSIONS.includes(ext);
|
|
2903
|
+
}
|
|
2904
|
+
function calculateChunkTypeBoost(chunk) {
|
|
2905
|
+
switch (chunk.type) {
|
|
2906
|
+
case "function":
|
|
2907
|
+
return 0.05;
|
|
2908
|
+
case "class":
|
|
2909
|
+
case "interface":
|
|
2910
|
+
return 0.04;
|
|
2911
|
+
case "type":
|
|
2912
|
+
case "enum":
|
|
2913
|
+
return 0.03;
|
|
2914
|
+
case "variable":
|
|
2915
|
+
return 0.02;
|
|
2916
|
+
case "file":
|
|
2917
|
+
case "block":
|
|
2918
|
+
default:
|
|
2919
|
+
return 0;
|
|
2920
|
+
}
|
|
2921
|
+
}
|
|
2922
|
+
function calculateExportBoost(chunk) {
|
|
2923
|
+
return chunk.isExported ? 0.03 : 0;
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2926
|
+
class TypeScriptModule {
|
|
2927
|
+
id = "language/typescript";
|
|
2928
|
+
name = "TypeScript Search";
|
|
2929
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
2930
|
+
version = "1.0.0";
|
|
2931
|
+
supportsFile(filepath) {
|
|
2932
|
+
return isTypeScriptFile(filepath);
|
|
2933
|
+
}
|
|
2934
|
+
embeddingConfig = null;
|
|
2935
|
+
symbolicIndex = null;
|
|
2936
|
+
pendingSummaries = new Map;
|
|
2937
|
+
rootDir = "";
|
|
2938
|
+
logger = undefined;
|
|
2939
|
+
async initialize(config) {
|
|
2940
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
2941
|
+
this.logger = config.options?.logger;
|
|
2942
|
+
if (this.logger) {
|
|
2943
|
+
this.embeddingConfig = {
|
|
2944
|
+
...this.embeddingConfig,
|
|
2945
|
+
logger: this.logger
|
|
2946
|
+
};
|
|
2947
|
+
}
|
|
2948
|
+
configureEmbeddings(this.embeddingConfig);
|
|
2949
|
+
this.pendingSummaries.clear();
|
|
2950
|
+
}
|
|
2951
|
+
async indexFile(filepath, content, ctx) {
|
|
2952
|
+
if (!isTypeScriptFile(filepath)) {
|
|
2953
|
+
return null;
|
|
2954
|
+
}
|
|
2955
|
+
this.rootDir = ctx.rootDir;
|
|
2956
|
+
const parsedChunks = parseTypeScriptCode(content, filepath);
|
|
2957
|
+
if (parsedChunks.length === 0) {
|
|
2958
|
+
return null;
|
|
2959
|
+
}
|
|
2960
|
+
const pathContext = parsePathContext(filepath);
|
|
2961
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
2962
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
2963
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
2964
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
2965
|
+
});
|
|
2966
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
2967
|
+
const chunks = parsedChunks.map((pc) => ({
|
|
2968
|
+
id: generateChunkId2(filepath, pc.startLine, pc.endLine),
|
|
2969
|
+
content: pc.content,
|
|
2970
|
+
startLine: pc.startLine,
|
|
2971
|
+
endLine: pc.endLine,
|
|
2972
|
+
type: pc.type,
|
|
2973
|
+
name: pc.name,
|
|
2974
|
+
isExported: pc.isExported,
|
|
2975
|
+
jsDoc: pc.jsDoc
|
|
2976
|
+
}));
|
|
2977
|
+
const references = this.extractReferences(content, filepath);
|
|
2978
|
+
const stats = await ctx.getFileStats(filepath);
|
|
2979
|
+
const currentConfig = getEmbeddingConfig();
|
|
2980
|
+
const moduleData = {
|
|
2981
|
+
embeddings,
|
|
2982
|
+
embeddingModel: currentConfig.model
|
|
2983
|
+
};
|
|
2984
|
+
const chunkTypes = [
|
|
2985
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2986
|
+
];
|
|
2987
|
+
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2988
|
+
const allKeywords = new Set;
|
|
2989
|
+
for (const pc of parsedChunks) {
|
|
2990
|
+
const keywords = extractKeywords(pc.content, pc.name);
|
|
2991
|
+
keywords.forEach((k) => allKeywords.add(k));
|
|
2992
|
+
}
|
|
2993
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
2994
|
+
const fileSummary = {
|
|
2995
|
+
filepath,
|
|
2996
|
+
chunkCount: chunks.length,
|
|
2997
|
+
chunkTypes,
|
|
2998
|
+
keywords: Array.from(allKeywords),
|
|
2999
|
+
exports,
|
|
3000
|
+
lastModified: stats.lastModified,
|
|
3001
|
+
pathContext: {
|
|
3002
|
+
segments: pathContext.segments,
|
|
3003
|
+
layer: pathContext.layer,
|
|
3004
|
+
domain: pathContext.domain,
|
|
3005
|
+
depth: pathContext.depth
|
|
3006
|
+
}
|
|
3007
|
+
};
|
|
3008
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3009
|
+
return {
|
|
3010
|
+
filepath,
|
|
3011
|
+
lastModified: stats.lastModified,
|
|
3012
|
+
chunks,
|
|
3013
|
+
moduleData,
|
|
3014
|
+
references
|
|
3015
|
+
};
|
|
3016
|
+
}
|
|
3017
|
+
async finalize(ctx) {
|
|
3018
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3019
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3020
|
+
await this.symbolicIndex.initialize();
|
|
3021
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3022
|
+
this.symbolicIndex.addFile(summary);
|
|
3023
|
+
}
|
|
3024
|
+
this.symbolicIndex.buildBM25Index();
|
|
3025
|
+
await this.symbolicIndex.save();
|
|
3026
|
+
this.pendingSummaries.clear();
|
|
3027
|
+
}
|
|
3028
|
+
async search(query, ctx, options = {}) {
|
|
3029
|
+
const {
|
|
3030
|
+
topK = DEFAULT_TOP_K2,
|
|
3031
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
3032
|
+
filePatterns
|
|
3033
|
+
} = options;
|
|
3034
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3035
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3036
|
+
let allFiles;
|
|
3037
|
+
try {
|
|
3038
|
+
await symbolicIndex.initialize();
|
|
3039
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
2621
3040
|
} catch {
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
3041
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3042
|
+
}
|
|
3043
|
+
let filesToSearch = allFiles;
|
|
3044
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3045
|
+
filesToSearch = allFiles.filter((filepath) => {
|
|
3046
|
+
return filePatterns.some((pattern) => {
|
|
3047
|
+
if (pattern.startsWith("*.")) {
|
|
3048
|
+
const ext = pattern.slice(1);
|
|
3049
|
+
return filepath.endsWith(ext);
|
|
3050
|
+
}
|
|
3051
|
+
return filepath.includes(pattern);
|
|
3052
|
+
});
|
|
3053
|
+
});
|
|
3054
|
+
}
|
|
3055
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3056
|
+
const bm25Index = new BM25Index;
|
|
3057
|
+
const allChunksData = [];
|
|
3058
|
+
for (const filepath of filesToSearch) {
|
|
3059
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3060
|
+
if (!fileIndex)
|
|
3061
|
+
continue;
|
|
3062
|
+
const moduleData = fileIndex.moduleData;
|
|
3063
|
+
if (!moduleData?.embeddings)
|
|
3064
|
+
continue;
|
|
3065
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3066
|
+
const chunk = fileIndex.chunks[i];
|
|
3067
|
+
const embedding = moduleData.embeddings[i];
|
|
3068
|
+
if (!embedding)
|
|
3069
|
+
continue;
|
|
3070
|
+
allChunksData.push({
|
|
3071
|
+
filepath: fileIndex.filepath,
|
|
3072
|
+
chunk,
|
|
3073
|
+
embedding
|
|
3074
|
+
});
|
|
3075
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3076
|
+
}
|
|
3077
|
+
}
|
|
3078
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3079
|
+
const bm25Scores = new Map;
|
|
3080
|
+
for (const result of bm25Results) {
|
|
3081
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3082
|
+
}
|
|
3083
|
+
const queryTerms = extractQueryTerms(query);
|
|
3084
|
+
const pathBoosts = new Map;
|
|
3085
|
+
for (const filepath of filesToSearch) {
|
|
3086
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3087
|
+
if (summary?.pathContext) {
|
|
3088
|
+
let boost = 0;
|
|
3089
|
+
const ctx2 = summary.pathContext;
|
|
3090
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3091
|
+
boost += 0.1;
|
|
2631
3092
|
}
|
|
2632
|
-
|
|
2633
|
-
|
|
3093
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3094
|
+
boost += 0.05;
|
|
3095
|
+
}
|
|
3096
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3097
|
+
if (segmentMatch) {
|
|
3098
|
+
boost += 0.05;
|
|
3099
|
+
}
|
|
3100
|
+
pathBoosts.set(filepath, boost);
|
|
3101
|
+
}
|
|
3102
|
+
}
|
|
3103
|
+
const results = [];
|
|
3104
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3105
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3106
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3107
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
3108
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
3109
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
3110
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
3111
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
3112
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
3113
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3114
|
+
results.push({
|
|
3115
|
+
filepath,
|
|
3116
|
+
chunk,
|
|
3117
|
+
score: hybridScore,
|
|
3118
|
+
moduleId: this.id,
|
|
3119
|
+
context: {
|
|
3120
|
+
semanticScore,
|
|
3121
|
+
bm25Score,
|
|
3122
|
+
pathBoost,
|
|
3123
|
+
fileTypeBoost,
|
|
3124
|
+
chunkTypeBoost,
|
|
3125
|
+
exportBoost
|
|
3126
|
+
}
|
|
3127
|
+
});
|
|
3128
|
+
}
|
|
2634
3129
|
}
|
|
3130
|
+
results.sort((a, b) => b.score - a.score);
|
|
3131
|
+
return results.slice(0, topK);
|
|
2635
3132
|
}
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2648
|
-
...extractPathKeywords(filepath)
|
|
2649
|
-
].join(" ");
|
|
2650
|
-
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
3133
|
+
extractReferences(content, filepath) {
|
|
3134
|
+
const references = [];
|
|
3135
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3136
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3137
|
+
let match;
|
|
3138
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
3139
|
+
const importPath = match[1];
|
|
3140
|
+
if (importPath.startsWith(".")) {
|
|
3141
|
+
const dir = path9.dirname(filepath);
|
|
3142
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3143
|
+
references.push(resolved);
|
|
3144
|
+
}
|
|
2651
3145
|
}
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
3146
|
+
while ((match = requireRegex.exec(content)) !== null) {
|
|
3147
|
+
const importPath = match[1];
|
|
3148
|
+
if (importPath.startsWith(".")) {
|
|
3149
|
+
const dir = path9.dirname(filepath);
|
|
3150
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3151
|
+
references.push(resolved);
|
|
3152
|
+
}
|
|
2655
3153
|
}
|
|
3154
|
+
return references;
|
|
2656
3155
|
}
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
3156
|
+
}
|
|
3157
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
3158
|
+
var init_typescript = __esm(() => {
|
|
3159
|
+
init_embeddings();
|
|
3160
|
+
init_services();
|
|
3161
|
+
init_config2();
|
|
3162
|
+
init_parseCode();
|
|
3163
|
+
init_storage();
|
|
3164
|
+
TYPESCRIPT_EXTENSIONS = [
|
|
3165
|
+
".ts",
|
|
3166
|
+
".tsx",
|
|
3167
|
+
".js",
|
|
3168
|
+
".jsx",
|
|
3169
|
+
".mjs",
|
|
3170
|
+
".cjs",
|
|
3171
|
+
".mts",
|
|
3172
|
+
".cts"
|
|
3173
|
+
];
|
|
3174
|
+
supportsFile = isTypeScriptFile;
|
|
3175
|
+
});
|
|
3176
|
+
|
|
3177
|
+
// src/modules/data/json/index.ts
|
|
3178
|
+
var exports_json = {};
|
|
3179
|
+
__export(exports_json, {
|
|
3180
|
+
supportsFile: () => supportsFile2,
|
|
3181
|
+
isJsonFile: () => isJsonFile,
|
|
3182
|
+
JsonModule: () => JsonModule,
|
|
3183
|
+
JSON_EXTENSIONS: () => JSON_EXTENSIONS,
|
|
3184
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
|
|
3185
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
|
|
3186
|
+
});
|
|
3187
|
+
import * as path10 from "path";
|
|
3188
|
+
function isJsonFile(filepath) {
|
|
3189
|
+
const ext = path10.extname(filepath).toLowerCase();
|
|
3190
|
+
return JSON_EXTENSIONS.includes(ext);
|
|
3191
|
+
}
|
|
3192
|
+
function extractJsonKeys(obj, prefix = "") {
|
|
3193
|
+
const keys = [];
|
|
3194
|
+
if (obj === null || obj === undefined) {
|
|
3195
|
+
return keys;
|
|
3196
|
+
}
|
|
3197
|
+
if (Array.isArray(obj)) {
|
|
3198
|
+
obj.forEach((item, index) => {
|
|
3199
|
+
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3200
|
+
});
|
|
3201
|
+
} else if (typeof obj === "object") {
|
|
3202
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3203
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3204
|
+
keys.push(key);
|
|
3205
|
+
keys.push(...extractJsonKeys(value, fullKey));
|
|
2660
3206
|
}
|
|
2661
|
-
const results = this.bm25Index.search(query, maxCandidates);
|
|
2662
|
-
return results.map((r) => r.id);
|
|
2663
3207
|
}
|
|
2664
|
-
|
|
2665
|
-
|
|
3208
|
+
return keys;
|
|
3209
|
+
}
|
|
3210
|
+
function extractJsonKeywords(content) {
|
|
3211
|
+
try {
|
|
3212
|
+
const parsed = JSON.parse(content);
|
|
3213
|
+
const keys = extractJsonKeys(parsed);
|
|
3214
|
+
const stringValues = [];
|
|
3215
|
+
const extractStrings = (obj) => {
|
|
3216
|
+
if (typeof obj === "string") {
|
|
3217
|
+
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3218
|
+
stringValues.push(...words);
|
|
3219
|
+
} else if (Array.isArray(obj)) {
|
|
3220
|
+
obj.forEach(extractStrings);
|
|
3221
|
+
} else if (obj && typeof obj === "object") {
|
|
3222
|
+
Object.values(obj).forEach(extractStrings);
|
|
3223
|
+
}
|
|
3224
|
+
};
|
|
3225
|
+
extractStrings(parsed);
|
|
3226
|
+
return [...new Set([...keys, ...stringValues])];
|
|
3227
|
+
} catch {
|
|
3228
|
+
return [];
|
|
2666
3229
|
}
|
|
2667
|
-
|
|
2668
|
-
|
|
3230
|
+
}
|
|
3231
|
+
|
|
3232
|
+
class JsonModule {
|
|
3233
|
+
id = "data/json";
|
|
3234
|
+
name = "JSON Search";
|
|
3235
|
+
description = "JSON file search with structure-aware indexing";
|
|
3236
|
+
version = "1.0.0";
|
|
3237
|
+
supportsFile(filepath) {
|
|
3238
|
+
return isJsonFile(filepath);
|
|
2669
3239
|
}
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
3240
|
+
embeddingConfig = null;
|
|
3241
|
+
symbolicIndex = null;
|
|
3242
|
+
pendingSummaries = new Map;
|
|
3243
|
+
rootDir = "";
|
|
3244
|
+
logger = undefined;
|
|
3245
|
+
async initialize(config) {
|
|
3246
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3247
|
+
this.logger = config.options?.logger;
|
|
3248
|
+
if (this.logger) {
|
|
3249
|
+
this.embeddingConfig = {
|
|
3250
|
+
...this.embeddingConfig,
|
|
3251
|
+
logger: this.logger
|
|
3252
|
+
};
|
|
2682
3253
|
}
|
|
3254
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3255
|
+
this.pendingSummaries.clear();
|
|
2683
3256
|
}
|
|
2684
|
-
async
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
this.
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
3257
|
+
async indexFile(filepath, content, ctx) {
|
|
3258
|
+
if (!isJsonFile(filepath)) {
|
|
3259
|
+
return null;
|
|
3260
|
+
}
|
|
3261
|
+
this.rootDir = ctx.rootDir;
|
|
3262
|
+
const textChunks = createLineBasedChunks(content, {
|
|
3263
|
+
chunkSize: 50,
|
|
3264
|
+
overlap: 10
|
|
3265
|
+
});
|
|
3266
|
+
if (textChunks.length === 0) {
|
|
3267
|
+
return null;
|
|
3268
|
+
}
|
|
3269
|
+
const chunkContents = textChunks.map((c) => {
|
|
3270
|
+
const filename = path10.basename(filepath);
|
|
3271
|
+
return `${filename}: ${c.content}`;
|
|
3272
|
+
});
|
|
3273
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3274
|
+
const chunks = textChunks.map((tc, i) => ({
|
|
3275
|
+
id: generateChunkId(filepath, tc.startLine, tc.endLine),
|
|
3276
|
+
content: tc.content,
|
|
3277
|
+
startLine: tc.startLine,
|
|
3278
|
+
endLine: tc.endLine,
|
|
3279
|
+
type: tc.type
|
|
3280
|
+
}));
|
|
3281
|
+
const jsonKeys = extractJsonKeys((() => {
|
|
3282
|
+
try {
|
|
3283
|
+
return JSON.parse(content);
|
|
3284
|
+
} catch {
|
|
3285
|
+
return {};
|
|
2708
3286
|
}
|
|
2709
|
-
}
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
const
|
|
2713
|
-
|
|
3287
|
+
})());
|
|
3288
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3289
|
+
const currentConfig = getEmbeddingConfig();
|
|
3290
|
+
const moduleData = {
|
|
3291
|
+
embeddings,
|
|
3292
|
+
embeddingModel: currentConfig.model,
|
|
3293
|
+
jsonKeys
|
|
3294
|
+
};
|
|
3295
|
+
const keywords = extractJsonKeywords(content);
|
|
3296
|
+
const fileSummary = {
|
|
3297
|
+
filepath,
|
|
3298
|
+
chunkCount: chunks.length,
|
|
3299
|
+
chunkTypes: ["file"],
|
|
3300
|
+
keywords,
|
|
3301
|
+
exports: [],
|
|
3302
|
+
lastModified: stats.lastModified
|
|
3303
|
+
};
|
|
3304
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3305
|
+
return {
|
|
3306
|
+
filepath,
|
|
3307
|
+
lastModified: stats.lastModified,
|
|
3308
|
+
chunks,
|
|
3309
|
+
moduleData
|
|
3310
|
+
};
|
|
2714
3311
|
}
|
|
2715
|
-
async
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
this.
|
|
3312
|
+
async finalize(ctx) {
|
|
3313
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3314
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3315
|
+
await this.symbolicIndex.initialize();
|
|
3316
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3317
|
+
this.symbolicIndex.addFile(summary);
|
|
3318
|
+
}
|
|
3319
|
+
this.symbolicIndex.buildBM25Index();
|
|
3320
|
+
await this.symbolicIndex.save();
|
|
3321
|
+
this.pendingSummaries.clear();
|
|
2720
3322
|
}
|
|
2721
|
-
async
|
|
3323
|
+
async search(query, ctx, options = {}) {
|
|
3324
|
+
const {
|
|
3325
|
+
topK = DEFAULT_TOP_K3,
|
|
3326
|
+
minScore = DEFAULT_MIN_SCORE3,
|
|
3327
|
+
filePatterns
|
|
3328
|
+
} = options;
|
|
3329
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3330
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3331
|
+
let allFiles;
|
|
2722
3332
|
try {
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
return true;
|
|
3333
|
+
await symbolicIndex.initialize();
|
|
3334
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
2726
3335
|
} catch {
|
|
2727
|
-
|
|
3336
|
+
allFiles = await ctx.listIndexedFiles();
|
|
2728
3337
|
}
|
|
2729
|
-
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
totalDocs: 0
|
|
2741
|
-
};
|
|
3338
|
+
let filesToSearch = allFiles.filter((f) => isJsonFile(f));
|
|
3339
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3340
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3341
|
+
return filePatterns.some((pattern) => {
|
|
3342
|
+
if (pattern.startsWith("*.")) {
|
|
3343
|
+
const ext = pattern.slice(1);
|
|
3344
|
+
return filepath.endsWith(ext);
|
|
3345
|
+
}
|
|
3346
|
+
return filepath.includes(pattern);
|
|
3347
|
+
});
|
|
3348
|
+
});
|
|
2742
3349
|
}
|
|
2743
|
-
|
|
3350
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3351
|
+
const bm25Index = new BM25Index;
|
|
3352
|
+
const allChunksData = [];
|
|
3353
|
+
for (const filepath of filesToSearch) {
|
|
3354
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3355
|
+
if (!fileIndex)
|
|
3356
|
+
continue;
|
|
3357
|
+
const moduleData = fileIndex.moduleData;
|
|
3358
|
+
if (!moduleData?.embeddings)
|
|
3359
|
+
continue;
|
|
3360
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3361
|
+
const chunk = fileIndex.chunks[i];
|
|
3362
|
+
const embedding = moduleData.embeddings[i];
|
|
3363
|
+
if (!embedding)
|
|
3364
|
+
continue;
|
|
3365
|
+
allChunksData.push({
|
|
3366
|
+
filepath: fileIndex.filepath,
|
|
3367
|
+
chunk,
|
|
3368
|
+
embedding
|
|
3369
|
+
});
|
|
3370
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3371
|
+
}
|
|
3372
|
+
}
|
|
3373
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3374
|
+
const bm25Scores = new Map;
|
|
3375
|
+
for (const result of bm25Results) {
|
|
3376
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3377
|
+
}
|
|
3378
|
+
const queryTerms = extractQueryTerms(query);
|
|
3379
|
+
const results = [];
|
|
3380
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3381
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3382
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3383
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
|
|
3384
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3385
|
+
results.push({
|
|
3386
|
+
filepath,
|
|
3387
|
+
chunk,
|
|
3388
|
+
score: hybridScore,
|
|
3389
|
+
moduleId: this.id,
|
|
3390
|
+
context: {
|
|
3391
|
+
semanticScore,
|
|
3392
|
+
bm25Score
|
|
3393
|
+
}
|
|
3394
|
+
});
|
|
3395
|
+
}
|
|
3396
|
+
}
|
|
3397
|
+
results.sort((a, b) => b.score - a.score);
|
|
3398
|
+
return results.slice(0, topK);
|
|
2744
3399
|
}
|
|
2745
3400
|
}
|
|
2746
|
-
var
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
3401
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
|
|
3402
|
+
var init_json = __esm(() => {
|
|
3403
|
+
init_embeddings();
|
|
3404
|
+
init_services();
|
|
3405
|
+
init_config2();
|
|
3406
|
+
init_storage();
|
|
3407
|
+
JSON_EXTENSIONS = [".json"];
|
|
3408
|
+
supportsFile2 = isJsonFile;
|
|
2754
3409
|
});
|
|
2755
3410
|
|
|
2756
|
-
// src/modules/
|
|
2757
|
-
var
|
|
2758
|
-
__export(
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
3411
|
+
// src/modules/docs/markdown/index.ts
|
|
3412
|
+
var exports_markdown = {};
|
|
3413
|
+
__export(exports_markdown, {
|
|
3414
|
+
supportsFile: () => supportsFile3,
|
|
3415
|
+
isMarkdownFile: () => isMarkdownFile,
|
|
3416
|
+
MarkdownModule: () => MarkdownModule,
|
|
3417
|
+
MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
|
|
3418
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
|
|
3419
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
|
|
2762
3420
|
});
|
|
2763
|
-
import * as
|
|
2764
|
-
function
|
|
2765
|
-
const
|
|
2766
|
-
|
|
2767
|
-
if (hasDocumentationTerm) {
|
|
2768
|
-
return "documentation";
|
|
2769
|
-
}
|
|
2770
|
-
if (hasImplementationTerm) {
|
|
2771
|
-
return "implementation";
|
|
2772
|
-
}
|
|
2773
|
-
return "neutral";
|
|
3421
|
+
import * as path11 from "path";
|
|
3422
|
+
function isMarkdownFile(filepath) {
|
|
3423
|
+
const ext = path11.extname(filepath).toLowerCase();
|
|
3424
|
+
return MARKDOWN_EXTENSIONS.includes(ext);
|
|
2774
3425
|
}
|
|
2775
|
-
function
|
|
2776
|
-
const
|
|
2777
|
-
|
|
2778
|
-
const
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
3426
|
+
function parseMarkdownSections(content) {
|
|
3427
|
+
const lines = content.split(`
|
|
3428
|
+
`);
|
|
3429
|
+
const sections = [];
|
|
3430
|
+
let currentSection = null;
|
|
3431
|
+
let currentContent = [];
|
|
3432
|
+
let startLine = 1;
|
|
3433
|
+
for (let i = 0;i < lines.length; i++) {
|
|
3434
|
+
const line = lines[i];
|
|
3435
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
3436
|
+
if (headingMatch) {
|
|
3437
|
+
if (currentSection) {
|
|
3438
|
+
currentSection.content = currentContent.join(`
|
|
3439
|
+
`).trim();
|
|
3440
|
+
currentSection.endLine = i;
|
|
3441
|
+
if (currentSection.content || currentSection.heading) {
|
|
3442
|
+
sections.push(currentSection);
|
|
3443
|
+
}
|
|
3444
|
+
} else if (currentContent.length > 0) {
|
|
3445
|
+
sections.push({
|
|
3446
|
+
heading: "",
|
|
3447
|
+
level: 0,
|
|
3448
|
+
content: currentContent.join(`
|
|
3449
|
+
`).trim(),
|
|
3450
|
+
startLine: 1,
|
|
3451
|
+
endLine: i
|
|
3452
|
+
});
|
|
3453
|
+
}
|
|
3454
|
+
currentSection = {
|
|
3455
|
+
heading: headingMatch[2],
|
|
3456
|
+
level: headingMatch[1].length,
|
|
3457
|
+
content: "",
|
|
3458
|
+
startLine: i + 1,
|
|
3459
|
+
endLine: lines.length
|
|
3460
|
+
};
|
|
3461
|
+
currentContent = [];
|
|
3462
|
+
} else {
|
|
3463
|
+
currentContent.push(line);
|
|
2783
3464
|
}
|
|
2784
|
-
return 0;
|
|
2785
3465
|
}
|
|
2786
|
-
if (
|
|
2787
|
-
|
|
2788
|
-
|
|
3466
|
+
if (currentSection) {
|
|
3467
|
+
currentSection.content = currentContent.join(`
|
|
3468
|
+
`).trim();
|
|
3469
|
+
currentSection.endLine = lines.length;
|
|
3470
|
+
if (currentSection.content || currentSection.heading) {
|
|
3471
|
+
sections.push(currentSection);
|
|
2789
3472
|
}
|
|
2790
|
-
|
|
3473
|
+
} else if (currentContent.length > 0) {
|
|
3474
|
+
sections.push({
|
|
3475
|
+
heading: "",
|
|
3476
|
+
level: 0,
|
|
3477
|
+
content: currentContent.join(`
|
|
3478
|
+
`).trim(),
|
|
3479
|
+
startLine: 1,
|
|
3480
|
+
endLine: lines.length
|
|
3481
|
+
});
|
|
2791
3482
|
}
|
|
2792
|
-
return
|
|
3483
|
+
return sections;
|
|
2793
3484
|
}
|
|
2794
|
-
function
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
3485
|
+
function extractMarkdownKeywords(content) {
|
|
3486
|
+
const keywords = [];
|
|
3487
|
+
const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
|
|
3488
|
+
for (const match of headingMatches) {
|
|
3489
|
+
const heading = match[1].toLowerCase();
|
|
3490
|
+
const words = heading.split(/\s+/).filter((w) => w.length > 2);
|
|
3491
|
+
keywords.push(...words);
|
|
3492
|
+
}
|
|
3493
|
+
const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
|
|
3494
|
+
for (const match of emphasisMatches) {
|
|
3495
|
+
const text = (match[1] || match[2] || "").toLowerCase();
|
|
3496
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3497
|
+
keywords.push(...words);
|
|
3498
|
+
}
|
|
3499
|
+
const codeMatches = content.matchAll(/`([^`]+)`/g);
|
|
3500
|
+
for (const match of codeMatches) {
|
|
3501
|
+
const code = match[1].toLowerCase();
|
|
3502
|
+
if (code.length > 2 && code.length < 50) {
|
|
3503
|
+
keywords.push(code);
|
|
3504
|
+
}
|
|
2810
3505
|
}
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
3506
|
+
const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
|
|
3507
|
+
for (const match of linkMatches) {
|
|
3508
|
+
const text = match[1].toLowerCase();
|
|
3509
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3510
|
+
keywords.push(...words);
|
|
3511
|
+
}
|
|
3512
|
+
return [...new Set(keywords)];
|
|
2814
3513
|
}
|
|
2815
3514
|
|
|
2816
|
-
class
|
|
2817
|
-
id = "
|
|
2818
|
-
name = "
|
|
2819
|
-
description = "
|
|
3515
|
+
class MarkdownModule {
|
|
3516
|
+
id = "docs/markdown";
|
|
3517
|
+
name = "Markdown Search";
|
|
3518
|
+
description = "Markdown documentation search with section-aware indexing";
|
|
2820
3519
|
version = "1.0.0";
|
|
3520
|
+
supportsFile(filepath) {
|
|
3521
|
+
return isMarkdownFile(filepath);
|
|
3522
|
+
}
|
|
2821
3523
|
embeddingConfig = null;
|
|
2822
3524
|
symbolicIndex = null;
|
|
2823
3525
|
pendingSummaries = new Map;
|
|
@@ -2836,66 +3538,53 @@ class TypeScriptModule {
|
|
|
2836
3538
|
this.pendingSummaries.clear();
|
|
2837
3539
|
}
|
|
2838
3540
|
async indexFile(filepath, content, ctx) {
|
|
3541
|
+
if (!isMarkdownFile(filepath)) {
|
|
3542
|
+
return null;
|
|
3543
|
+
}
|
|
2839
3544
|
this.rootDir = ctx.rootDir;
|
|
2840
|
-
const
|
|
2841
|
-
if (
|
|
3545
|
+
const sections = parseMarkdownSections(content);
|
|
3546
|
+
if (sections.length === 0) {
|
|
2842
3547
|
return null;
|
|
2843
3548
|
}
|
|
2844
|
-
const
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3549
|
+
const chunkContents = sections.map((s) => {
|
|
3550
|
+
const filename = path11.basename(filepath);
|
|
3551
|
+
const headingContext = s.heading ? `${s.heading}: ` : "";
|
|
3552
|
+
return `${filename} ${headingContext}${s.content}`;
|
|
2849
3553
|
});
|
|
2850
3554
|
const embeddings = await getEmbeddings(chunkContents);
|
|
2851
|
-
const chunks =
|
|
2852
|
-
id: generateChunkId(filepath,
|
|
2853
|
-
content:
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
3555
|
+
const chunks = sections.map((section, i) => ({
|
|
3556
|
+
id: generateChunkId(filepath, section.startLine, section.endLine),
|
|
3557
|
+
content: section.heading ? `## ${section.heading}
|
|
3558
|
+
|
|
3559
|
+
${section.content}` : section.content,
|
|
3560
|
+
startLine: section.startLine,
|
|
3561
|
+
endLine: section.endLine,
|
|
3562
|
+
type: "block",
|
|
3563
|
+
name: section.heading || undefined
|
|
2860
3564
|
}));
|
|
2861
|
-
const
|
|
3565
|
+
const headings = sections.filter((s) => s.heading).map((s) => s.heading);
|
|
2862
3566
|
const stats = await ctx.getFileStats(filepath);
|
|
2863
3567
|
const currentConfig = getEmbeddingConfig();
|
|
2864
3568
|
const moduleData = {
|
|
2865
3569
|
embeddings,
|
|
2866
|
-
embeddingModel: currentConfig.model
|
|
3570
|
+
embeddingModel: currentConfig.model,
|
|
3571
|
+
headings
|
|
2867
3572
|
};
|
|
2868
|
-
const
|
|
2869
|
-
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2870
|
-
];
|
|
2871
|
-
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2872
|
-
const allKeywords = new Set;
|
|
2873
|
-
for (const pc of parsedChunks) {
|
|
2874
|
-
const keywords = extractKeywords(pc.content, pc.name);
|
|
2875
|
-
keywords.forEach((k) => allKeywords.add(k));
|
|
2876
|
-
}
|
|
2877
|
-
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3573
|
+
const keywords = extractMarkdownKeywords(content);
|
|
2878
3574
|
const fileSummary = {
|
|
2879
3575
|
filepath,
|
|
2880
3576
|
chunkCount: chunks.length,
|
|
2881
|
-
chunkTypes,
|
|
2882
|
-
keywords
|
|
2883
|
-
exports,
|
|
2884
|
-
lastModified: stats.lastModified
|
|
2885
|
-
pathContext: {
|
|
2886
|
-
segments: pathContext.segments,
|
|
2887
|
-
layer: pathContext.layer,
|
|
2888
|
-
domain: pathContext.domain,
|
|
2889
|
-
depth: pathContext.depth
|
|
2890
|
-
}
|
|
3577
|
+
chunkTypes: ["block"],
|
|
3578
|
+
keywords,
|
|
3579
|
+
exports: headings,
|
|
3580
|
+
lastModified: stats.lastModified
|
|
2891
3581
|
};
|
|
2892
3582
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
2893
3583
|
return {
|
|
2894
3584
|
filepath,
|
|
2895
3585
|
lastModified: stats.lastModified,
|
|
2896
3586
|
chunks,
|
|
2897
|
-
moduleData
|
|
2898
|
-
references
|
|
3587
|
+
moduleData
|
|
2899
3588
|
};
|
|
2900
3589
|
}
|
|
2901
3590
|
async finalize(ctx) {
|
|
@@ -2911,8 +3600,8 @@ class TypeScriptModule {
|
|
|
2911
3600
|
}
|
|
2912
3601
|
async search(query, ctx, options = {}) {
|
|
2913
3602
|
const {
|
|
2914
|
-
topK =
|
|
2915
|
-
minScore =
|
|
3603
|
+
topK = DEFAULT_TOP_K4,
|
|
3604
|
+
minScore = DEFAULT_MIN_SCORE4,
|
|
2916
3605
|
filePatterns
|
|
2917
3606
|
} = options;
|
|
2918
3607
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
@@ -2924,9 +3613,9 @@ class TypeScriptModule {
|
|
|
2924
3613
|
} catch {
|
|
2925
3614
|
allFiles = await ctx.listIndexedFiles();
|
|
2926
3615
|
}
|
|
2927
|
-
let filesToSearch = allFiles;
|
|
3616
|
+
let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
|
|
2928
3617
|
if (filePatterns && filePatterns.length > 0) {
|
|
2929
|
-
filesToSearch =
|
|
3618
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
2930
3619
|
return filePatterns.some((pattern) => {
|
|
2931
3620
|
if (pattern.startsWith("*.")) {
|
|
2932
3621
|
const ext = pattern.slice(1);
|
|
@@ -2964,36 +3653,24 @@ class TypeScriptModule {
|
|
|
2964
3653
|
for (const result of bm25Results) {
|
|
2965
3654
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
2966
3655
|
}
|
|
2967
|
-
const queryTerms = query
|
|
2968
|
-
const pathBoosts = new Map;
|
|
2969
|
-
for (const filepath of filesToSearch) {
|
|
2970
|
-
const summary = symbolicIndex.getFileSummary(filepath);
|
|
2971
|
-
if (summary?.pathContext) {
|
|
2972
|
-
let boost = 0;
|
|
2973
|
-
const ctx2 = summary.pathContext;
|
|
2974
|
-
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
2975
|
-
boost += 0.1;
|
|
2976
|
-
}
|
|
2977
|
-
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
2978
|
-
boost += 0.05;
|
|
2979
|
-
}
|
|
2980
|
-
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
2981
|
-
if (segmentMatch) {
|
|
2982
|
-
boost += 0.05;
|
|
2983
|
-
}
|
|
2984
|
-
pathBoosts.set(filepath, boost);
|
|
2985
|
-
}
|
|
2986
|
-
}
|
|
3656
|
+
const queryTerms = extractQueryTerms(query);
|
|
2987
3657
|
const results = [];
|
|
2988
3658
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
2989
3659
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
2990
3660
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
3661
|
+
let docBoost = 0;
|
|
3662
|
+
if (queryTerms.some((t) => [
|
|
3663
|
+
"docs",
|
|
3664
|
+
"documentation",
|
|
3665
|
+
"readme",
|
|
3666
|
+
"guide",
|
|
3667
|
+
"how",
|
|
3668
|
+
"what",
|
|
3669
|
+
"explain"
|
|
3670
|
+
].includes(t))) {
|
|
3671
|
+
docBoost = 0.05;
|
|
3672
|
+
}
|
|
3673
|
+
const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
2997
3674
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
2998
3675
|
results.push({
|
|
2999
3676
|
filepath,
|
|
@@ -3003,10 +3680,7 @@ class TypeScriptModule {
|
|
|
3003
3680
|
context: {
|
|
3004
3681
|
semanticScore,
|
|
3005
3682
|
bm25Score,
|
|
3006
|
-
|
|
3007
|
-
fileTypeBoost,
|
|
3008
|
-
chunkTypeBoost,
|
|
3009
|
-
exportBoost
|
|
3683
|
+
docBoost
|
|
3010
3684
|
}
|
|
3011
3685
|
});
|
|
3012
3686
|
}
|
|
@@ -3014,91 +3688,22 @@ class TypeScriptModule {
|
|
|
3014
3688
|
results.sort((a, b) => b.score - a.score);
|
|
3015
3689
|
return results.slice(0, topK);
|
|
3016
3690
|
}
|
|
3017
|
-
extractReferences(content, filepath) {
|
|
3018
|
-
const references = [];
|
|
3019
|
-
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3020
|
-
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3021
|
-
let match;
|
|
3022
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
3023
|
-
const importPath = match[1];
|
|
3024
|
-
if (importPath.startsWith(".")) {
|
|
3025
|
-
const dir = path8.dirname(filepath);
|
|
3026
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3027
|
-
references.push(resolved);
|
|
3028
|
-
}
|
|
3029
|
-
}
|
|
3030
|
-
while ((match = requireRegex.exec(content)) !== null) {
|
|
3031
|
-
const importPath = match[1];
|
|
3032
|
-
if (importPath.startsWith(".")) {
|
|
3033
|
-
const dir = path8.dirname(filepath);
|
|
3034
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3035
|
-
references.push(resolved);
|
|
3036
|
-
}
|
|
3037
|
-
}
|
|
3038
|
-
return references;
|
|
3039
|
-
}
|
|
3040
3691
|
}
|
|
3041
|
-
var
|
|
3042
|
-
var
|
|
3692
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
|
|
3693
|
+
var init_markdown = __esm(() => {
|
|
3043
3694
|
init_embeddings();
|
|
3695
|
+
init_services();
|
|
3044
3696
|
init_config2();
|
|
3045
|
-
init_parseCode();
|
|
3046
3697
|
init_storage();
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
IMPLEMENTATION_TERMS = [
|
|
3050
|
-
"function",
|
|
3051
|
-
"method",
|
|
3052
|
-
"class",
|
|
3053
|
-
"interface",
|
|
3054
|
-
"implement",
|
|
3055
|
-
"implementation",
|
|
3056
|
-
"endpoint",
|
|
3057
|
-
"route",
|
|
3058
|
-
"handler",
|
|
3059
|
-
"controller",
|
|
3060
|
-
"module",
|
|
3061
|
-
"code"
|
|
3062
|
-
];
|
|
3063
|
-
DOCUMENTATION_TERMS = [
|
|
3064
|
-
"documentation",
|
|
3065
|
-
"docs",
|
|
3066
|
-
"guide",
|
|
3067
|
-
"tutorial",
|
|
3068
|
-
"readme",
|
|
3069
|
-
"how",
|
|
3070
|
-
"what",
|
|
3071
|
-
"why",
|
|
3072
|
-
"explain",
|
|
3073
|
-
"overview",
|
|
3074
|
-
"getting",
|
|
3075
|
-
"started",
|
|
3076
|
-
"requirements",
|
|
3077
|
-
"setup",
|
|
3078
|
-
"install",
|
|
3079
|
-
"configure",
|
|
3080
|
-
"configuration"
|
|
3081
|
-
];
|
|
3082
|
-
SOURCE_CODE_EXTENSIONS = [
|
|
3083
|
-
".ts",
|
|
3084
|
-
".tsx",
|
|
3085
|
-
".js",
|
|
3086
|
-
".jsx",
|
|
3087
|
-
".mjs",
|
|
3088
|
-
".cjs",
|
|
3089
|
-
".py",
|
|
3090
|
-
".go",
|
|
3091
|
-
".rs",
|
|
3092
|
-
".java"
|
|
3093
|
-
];
|
|
3094
|
-
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
3698
|
+
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
3699
|
+
supportsFile3 = isMarkdownFile;
|
|
3095
3700
|
});
|
|
3096
3701
|
|
|
3097
3702
|
// src/app/indexer/index.ts
|
|
3098
3703
|
init_config2();
|
|
3099
3704
|
import { glob } from "glob";
|
|
3100
3705
|
import * as fs6 from "fs/promises";
|
|
3101
|
-
import * as
|
|
3706
|
+
import * as path14 from "path";
|
|
3102
3707
|
|
|
3103
3708
|
// src/modules/registry.ts
|
|
3104
3709
|
class ModuleRegistryImpl {
|
|
@@ -3123,16 +3728,20 @@ var registry = new ModuleRegistryImpl;
|
|
|
3123
3728
|
async function registerBuiltInModules() {
|
|
3124
3729
|
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
3125
3730
|
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
3731
|
+
const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
|
|
3732
|
+
const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
|
|
3126
3733
|
registry.register(new CoreModule2);
|
|
3127
3734
|
registry.register(new TypeScriptModule2);
|
|
3735
|
+
registry.register(new JsonModule2);
|
|
3736
|
+
registry.register(new MarkdownModule2);
|
|
3128
3737
|
}
|
|
3129
3738
|
|
|
3130
3739
|
// src/infrastructure/introspection/IntrospectionIndex.ts
|
|
3131
|
-
import * as
|
|
3740
|
+
import * as path13 from "path";
|
|
3132
3741
|
import * as fs5 from "fs/promises";
|
|
3133
3742
|
|
|
3134
3743
|
// src/infrastructure/introspection/projectDetector.ts
|
|
3135
|
-
import * as
|
|
3744
|
+
import * as path12 from "path";
|
|
3136
3745
|
import * as fs4 from "fs/promises";
|
|
3137
3746
|
var MAX_SCAN_DEPTH = 4;
|
|
3138
3747
|
var SKIP_DIRS = new Set([
|
|
@@ -3149,7 +3758,7 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3149
3758
|
if (depth > MAX_SCAN_DEPTH)
|
|
3150
3759
|
return [];
|
|
3151
3760
|
const results = [];
|
|
3152
|
-
const fullDir = currentDir ?
|
|
3761
|
+
const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
|
|
3153
3762
|
try {
|
|
3154
3763
|
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
3155
3764
|
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
@@ -3172,10 +3781,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3172
3781
|
}
|
|
3173
3782
|
async function parsePackageJson(rootDir, relativePath) {
|
|
3174
3783
|
try {
|
|
3175
|
-
const packageJsonPath =
|
|
3784
|
+
const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
|
|
3176
3785
|
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
3177
3786
|
const pkg = JSON.parse(content);
|
|
3178
|
-
const name = pkg.name ||
|
|
3787
|
+
const name = pkg.name || path12.basename(relativePath);
|
|
3179
3788
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
3180
3789
|
let type = "unknown";
|
|
3181
3790
|
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
@@ -3220,7 +3829,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3220
3829
|
for (const pattern of monorepoPatterns) {
|
|
3221
3830
|
if (!dirNames.includes(pattern))
|
|
3222
3831
|
continue;
|
|
3223
|
-
const patternDir =
|
|
3832
|
+
const patternDir = path12.join(rootDir, pattern);
|
|
3224
3833
|
try {
|
|
3225
3834
|
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
3226
3835
|
for (const subDir of subDirs) {
|
|
@@ -3251,7 +3860,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3251
3860
|
}
|
|
3252
3861
|
let rootType = "unknown";
|
|
3253
3862
|
try {
|
|
3254
|
-
const rootPkgPath =
|
|
3863
|
+
const rootPkgPath = path12.join(rootDir, "package.json");
|
|
3255
3864
|
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
3256
3865
|
if (rootPkg.workspaces)
|
|
3257
3866
|
isMonorepo = true;
|
|
@@ -3292,7 +3901,7 @@ class IntrospectionIndex {
|
|
|
3292
3901
|
async initialize() {
|
|
3293
3902
|
this.structure = await detectProjectStructure(this.rootDir);
|
|
3294
3903
|
try {
|
|
3295
|
-
const configPath =
|
|
3904
|
+
const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
|
|
3296
3905
|
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
3297
3906
|
const config = JSON.parse(configContent);
|
|
3298
3907
|
this.config = config.introspection || {};
|
|
@@ -3332,28 +3941,28 @@ class IntrospectionIndex {
|
|
|
3332
3941
|
}
|
|
3333
3942
|
}
|
|
3334
3943
|
async save(config) {
|
|
3335
|
-
const introDir =
|
|
3944
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3336
3945
|
await fs5.mkdir(introDir, { recursive: true });
|
|
3337
|
-
const projectPath =
|
|
3946
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3338
3947
|
await fs5.writeFile(projectPath, JSON.stringify({
|
|
3339
3948
|
version: "1.0.0",
|
|
3340
3949
|
lastUpdated: new Date().toISOString(),
|
|
3341
3950
|
structure: this.structure
|
|
3342
3951
|
}, null, 2));
|
|
3343
3952
|
for (const [filepath, intro] of this.files) {
|
|
3344
|
-
const introFilePath =
|
|
3345
|
-
await fs5.mkdir(
|
|
3953
|
+
const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3954
|
+
await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
|
|
3346
3955
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3347
3956
|
}
|
|
3348
3957
|
}
|
|
3349
3958
|
async load(config) {
|
|
3350
|
-
const introDir =
|
|
3959
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3351
3960
|
try {
|
|
3352
|
-
const projectPath =
|
|
3961
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3353
3962
|
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
3354
3963
|
const projectData = JSON.parse(projectContent);
|
|
3355
3964
|
this.structure = projectData.structure;
|
|
3356
|
-
await this.loadFilesRecursive(
|
|
3965
|
+
await this.loadFilesRecursive(path13.join(introDir, "files"), "");
|
|
3357
3966
|
} catch {
|
|
3358
3967
|
this.structure = null;
|
|
3359
3968
|
this.files.clear();
|
|
@@ -3363,7 +3972,7 @@ class IntrospectionIndex {
|
|
|
3363
3972
|
try {
|
|
3364
3973
|
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
3365
3974
|
for (const entry of entries) {
|
|
3366
|
-
const entryPath =
|
|
3975
|
+
const entryPath = path13.join(basePath, entry.name);
|
|
3367
3976
|
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
3368
3977
|
if (entry.isDirectory()) {
|
|
3369
3978
|
await this.loadFilesRecursive(entryPath, relativePath);
|
|
@@ -3471,15 +4080,49 @@ import { watch } from "chokidar";
|
|
|
3471
4080
|
init_config2();
|
|
3472
4081
|
|
|
3473
4082
|
// src/app/indexer/index.ts
|
|
4083
|
+
async function parallelMap(items, processor, concurrency) {
|
|
4084
|
+
const results = new Array(items.length);
|
|
4085
|
+
let nextIndex = 0;
|
|
4086
|
+
async function worker() {
|
|
4087
|
+
while (nextIndex < items.length) {
|
|
4088
|
+
const index = nextIndex++;
|
|
4089
|
+
const item = items[index];
|
|
4090
|
+
try {
|
|
4091
|
+
const value = await processor(item, index);
|
|
4092
|
+
results[index] = { success: true, value };
|
|
4093
|
+
} catch (error) {
|
|
4094
|
+
results[index] = { success: false, error };
|
|
4095
|
+
}
|
|
4096
|
+
}
|
|
4097
|
+
}
|
|
4098
|
+
const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
|
|
4099
|
+
await Promise.all(workers);
|
|
4100
|
+
return results;
|
|
4101
|
+
}
|
|
3474
4102
|
var INDEX_SCHEMA_VERSION = "1.0.0";
|
|
4103
|
+
function formatDuration(ms) {
|
|
4104
|
+
if (ms < 1000) {
|
|
4105
|
+
return `${ms}ms`;
|
|
4106
|
+
}
|
|
4107
|
+
const seconds = ms / 1000;
|
|
4108
|
+
if (seconds < 60) {
|
|
4109
|
+
return `${seconds.toFixed(1)}s`;
|
|
4110
|
+
}
|
|
4111
|
+
const minutes = Math.floor(seconds / 60);
|
|
4112
|
+
const remainingSeconds = seconds % 60;
|
|
4113
|
+
return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
|
|
4114
|
+
}
|
|
4115
|
+
var DEFAULT_CONCURRENCY = 4;
|
|
3475
4116
|
async function indexDirectory(rootDir, options = {}) {
|
|
3476
4117
|
const verbose = options.verbose ?? false;
|
|
3477
4118
|
const quiet = options.quiet ?? false;
|
|
4119
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
3478
4120
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3479
|
-
rootDir =
|
|
4121
|
+
rootDir = path14.resolve(rootDir);
|
|
3480
4122
|
const location = getIndexLocation(rootDir);
|
|
3481
4123
|
logger.info(`Indexing directory: ${rootDir}`);
|
|
3482
4124
|
logger.info(`Index location: ${location.indexDir}`);
|
|
4125
|
+
logger.debug(`Concurrency: ${concurrency}`);
|
|
3483
4126
|
const config = await loadConfig(rootDir);
|
|
3484
4127
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3485
4128
|
await introspection.initialize();
|
|
@@ -3496,8 +4139,10 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3496
4139
|
logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3497
4140
|
const files = await findFiles(rootDir, config);
|
|
3498
4141
|
logger.info(`Found ${files.length} files to index`);
|
|
4142
|
+
const overallStart = Date.now();
|
|
3499
4143
|
const results = [];
|
|
3500
4144
|
for (const module of enabledModules) {
|
|
4145
|
+
const moduleStart = Date.now();
|
|
3501
4146
|
logger.info(`
|
|
3502
4147
|
[${module.name}] Starting indexing...`);
|
|
3503
4148
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
@@ -3515,7 +4160,9 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3515
4160
|
};
|
|
3516
4161
|
await module.initialize(configWithOverrides);
|
|
3517
4162
|
}
|
|
3518
|
-
const
|
|
4163
|
+
const moduleFiles = module.supportsFile ? files.filter((f) => module.supportsFile(f)) : files;
|
|
4164
|
+
logger.info(` Processing ${moduleFiles.length} files...`);
|
|
4165
|
+
const result = await indexWithModule(rootDir, moduleFiles, module, config, verbose, introspection, logger, concurrency);
|
|
3519
4166
|
results.push(result);
|
|
3520
4167
|
if (module.finalize) {
|
|
3521
4168
|
logger.info(`[${module.name}] Building secondary indexes...`);
|
|
@@ -3523,20 +4170,29 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3523
4170
|
rootDir,
|
|
3524
4171
|
config,
|
|
3525
4172
|
readFile: async (filepath) => {
|
|
3526
|
-
const fullPath =
|
|
4173
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3527
4174
|
return fs6.readFile(fullPath, "utf-8");
|
|
3528
4175
|
},
|
|
3529
4176
|
getFileStats: async (filepath) => {
|
|
3530
|
-
const fullPath =
|
|
4177
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3531
4178
|
const stats = await fs6.stat(fullPath);
|
|
3532
4179
|
return { lastModified: stats.mtime.toISOString() };
|
|
3533
4180
|
}
|
|
3534
4181
|
};
|
|
3535
4182
|
await module.finalize(ctx);
|
|
3536
4183
|
}
|
|
3537
|
-
|
|
4184
|
+
const moduleDuration = Date.now() - moduleStart;
|
|
4185
|
+
result.durationMs = moduleDuration;
|
|
4186
|
+
logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors (${formatDuration(moduleDuration)})`);
|
|
3538
4187
|
}
|
|
3539
4188
|
await introspection.save(config);
|
|
4189
|
+
const overallDuration = Date.now() - overallStart;
|
|
4190
|
+
logger.info(`
|
|
4191
|
+
Indexing complete in ${formatDuration(overallDuration)}`);
|
|
4192
|
+
const totalIndexed = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
4193
|
+
const totalSkipped = results.reduce((sum, r) => sum + r.skipped, 0);
|
|
4194
|
+
const totalErrors = results.reduce((sum, r) => sum + r.errors, 0);
|
|
4195
|
+
logger.info(`Total: ${totalIndexed} indexed, ${totalSkipped} skipped, ${totalErrors} errors`);
|
|
3540
4196
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
3541
4197
|
return results;
|
|
3542
4198
|
}
|
|
@@ -3558,7 +4214,7 @@ async function deleteIndex(rootDir) {
|
|
|
3558
4214
|
} catch {}
|
|
3559
4215
|
}
|
|
3560
4216
|
async function resetIndex(rootDir) {
|
|
3561
|
-
rootDir =
|
|
4217
|
+
rootDir = path14.resolve(rootDir);
|
|
3562
4218
|
const status = await getIndexStatus(rootDir);
|
|
3563
4219
|
if (!status.exists) {
|
|
3564
4220
|
throw new Error(`No index found for ${rootDir}`);
|
|
@@ -3573,7 +4229,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3573
4229
|
const verbose = options.verbose ?? false;
|
|
3574
4230
|
const quiet = options.quiet ?? false;
|
|
3575
4231
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3576
|
-
rootDir =
|
|
4232
|
+
rootDir = path14.resolve(rootDir);
|
|
3577
4233
|
const status = await getIndexStatus(rootDir);
|
|
3578
4234
|
if (!status.exists) {
|
|
3579
4235
|
logger.info(`No index found. Creating index...
|
|
@@ -3600,7 +4256,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3600
4256
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3601
4257
|
await introspection.initialize();
|
|
3602
4258
|
const currentFiles = await findFiles(rootDir, config);
|
|
3603
|
-
const currentFileSet = new Set(currentFiles.map((f) =>
|
|
4259
|
+
const currentFileSet = new Set(currentFiles.map((f) => path14.relative(rootDir, f)));
|
|
3604
4260
|
let totalIndexed = 0;
|
|
3605
4261
|
let totalRemoved = 0;
|
|
3606
4262
|
let totalUnchanged = 0;
|
|
@@ -3630,11 +4286,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3630
4286
|
}
|
|
3631
4287
|
for (const filepath of filesToRemove) {
|
|
3632
4288
|
logger.debug(` Removing stale: ${filepath}`);
|
|
3633
|
-
const indexFilePath =
|
|
4289
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3634
4290
|
try {
|
|
3635
4291
|
await fs6.unlink(indexFilePath);
|
|
3636
4292
|
} catch {}
|
|
3637
|
-
const symbolicFilePath =
|
|
4293
|
+
const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3638
4294
|
try {
|
|
3639
4295
|
await fs6.unlink(symbolicFilePath);
|
|
3640
4296
|
} catch {}
|
|
@@ -3645,11 +4301,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3645
4301
|
rootDir,
|
|
3646
4302
|
config,
|
|
3647
4303
|
readFile: async (filepath) => {
|
|
3648
|
-
const fullPath =
|
|
4304
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3649
4305
|
return fs6.readFile(fullPath, "utf-8");
|
|
3650
4306
|
},
|
|
3651
4307
|
getFileStats: async (filepath) => {
|
|
3652
|
-
const fullPath =
|
|
4308
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3653
4309
|
const stats = await fs6.stat(fullPath);
|
|
3654
4310
|
return { lastModified: stats.mtime.toISOString() };
|
|
3655
4311
|
},
|
|
@@ -3658,7 +4314,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3658
4314
|
const totalFiles = currentFiles.length;
|
|
3659
4315
|
for (let i = 0;i < currentFiles.length; i++) {
|
|
3660
4316
|
const filepath = currentFiles[i];
|
|
3661
|
-
const relativePath =
|
|
4317
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3662
4318
|
const progress = `[${i + 1}/${totalFiles}]`;
|
|
3663
4319
|
try {
|
|
3664
4320
|
const stats = await fs6.stat(filepath);
|
|
@@ -3709,7 +4365,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3709
4365
|
unchanged: totalUnchanged
|
|
3710
4366
|
};
|
|
3711
4367
|
}
|
|
3712
|
-
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger) {
|
|
4368
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
|
|
3713
4369
|
const result = {
|
|
3714
4370
|
moduleId: module.id,
|
|
3715
4371
|
indexed: 0,
|
|
@@ -3718,7 +4374,7 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3718
4374
|
};
|
|
3719
4375
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
3720
4376
|
const indexPath = getModuleIndexPath(rootDir, module.id, config);
|
|
3721
|
-
const currentFileSet = new Set(files.map((f) =>
|
|
4377
|
+
const currentFileSet = new Set(files.map((f) => path14.relative(rootDir, f)));
|
|
3722
4378
|
const filesToRemove = [];
|
|
3723
4379
|
for (const filepath of Object.keys(manifest.files)) {
|
|
3724
4380
|
if (!currentFileSet.has(filepath)) {
|
|
@@ -3729,11 +4385,11 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3729
4385
|
logger.info(` Removing ${filesToRemove.length} stale entries...`);
|
|
3730
4386
|
for (const filepath of filesToRemove) {
|
|
3731
4387
|
logger.debug(` Removing: ${filepath}`);
|
|
3732
|
-
const indexFilePath =
|
|
4388
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3733
4389
|
try {
|
|
3734
4390
|
await fs6.unlink(indexFilePath);
|
|
3735
4391
|
} catch {}
|
|
3736
|
-
const symbolicFilePath =
|
|
4392
|
+
const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3737
4393
|
try {
|
|
3738
4394
|
await fs6.unlink(symbolicFilePath);
|
|
3739
4395
|
} catch {}
|
|
@@ -3745,52 +4401,76 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3745
4401
|
rootDir,
|
|
3746
4402
|
config,
|
|
3747
4403
|
readFile: async (filepath) => {
|
|
3748
|
-
const fullPath =
|
|
4404
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3749
4405
|
return fs6.readFile(fullPath, "utf-8");
|
|
3750
4406
|
},
|
|
3751
4407
|
getFileStats: async (filepath) => {
|
|
3752
|
-
const fullPath =
|
|
4408
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3753
4409
|
const stats = await fs6.stat(fullPath);
|
|
3754
4410
|
return { lastModified: stats.mtime.toISOString() };
|
|
3755
4411
|
},
|
|
3756
4412
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3757
4413
|
};
|
|
3758
4414
|
const totalFiles = files.length;
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
const relativePath =
|
|
3762
|
-
const progress = `[${i + 1}/${totalFiles}]`;
|
|
4415
|
+
let completedCount = 0;
|
|
4416
|
+
const processFile = async (filepath, _index) => {
|
|
4417
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3763
4418
|
try {
|
|
3764
4419
|
const stats = await fs6.stat(filepath);
|
|
3765
4420
|
const lastModified = stats.mtime.toISOString();
|
|
3766
4421
|
const existingEntry = manifest.files[relativePath];
|
|
3767
4422
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
4423
|
+
completedCount++;
|
|
4424
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
|
|
4425
|
+
return { relativePath, status: "skipped" };
|
|
3771
4426
|
}
|
|
3772
4427
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3773
4428
|
introspection.addFile(relativePath, content);
|
|
3774
|
-
|
|
4429
|
+
completedCount++;
|
|
4430
|
+
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
3775
4431
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
3776
4432
|
if (!fileIndex) {
|
|
3777
|
-
logger.debug(` ${
|
|
3778
|
-
|
|
3779
|
-
continue;
|
|
4433
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
|
|
4434
|
+
return { relativePath, status: "skipped" };
|
|
3780
4435
|
}
|
|
3781
4436
|
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
3782
|
-
|
|
4437
|
+
return {
|
|
4438
|
+
relativePath,
|
|
4439
|
+
status: "indexed",
|
|
3783
4440
|
lastModified,
|
|
3784
4441
|
chunkCount: fileIndex.chunks.length
|
|
3785
4442
|
};
|
|
3786
|
-
result.indexed++;
|
|
3787
4443
|
} catch (error) {
|
|
3788
|
-
|
|
3789
|
-
|
|
4444
|
+
completedCount++;
|
|
4445
|
+
return { relativePath, status: "error", error };
|
|
4446
|
+
}
|
|
4447
|
+
};
|
|
4448
|
+
logger.debug(` Using concurrency: ${concurrency}`);
|
|
4449
|
+
const results = await parallelMap(files, processFile, concurrency);
|
|
4450
|
+
logger.clearProgress();
|
|
4451
|
+
for (const item of results) {
|
|
4452
|
+
if (!item.success) {
|
|
3790
4453
|
result.errors++;
|
|
4454
|
+
continue;
|
|
4455
|
+
}
|
|
4456
|
+
const fileResult = item.value;
|
|
4457
|
+
switch (fileResult.status) {
|
|
4458
|
+
case "indexed":
|
|
4459
|
+
manifest.files[fileResult.relativePath] = {
|
|
4460
|
+
lastModified: fileResult.lastModified,
|
|
4461
|
+
chunkCount: fileResult.chunkCount
|
|
4462
|
+
};
|
|
4463
|
+
result.indexed++;
|
|
4464
|
+
break;
|
|
4465
|
+
case "skipped":
|
|
4466
|
+
result.skipped++;
|
|
4467
|
+
break;
|
|
4468
|
+
case "error":
|
|
4469
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
4470
|
+
result.errors++;
|
|
4471
|
+
break;
|
|
3791
4472
|
}
|
|
3792
4473
|
}
|
|
3793
|
-
logger.clearProgress();
|
|
3794
4474
|
manifest.lastUpdated = new Date().toISOString();
|
|
3795
4475
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
3796
4476
|
return result;
|
|
@@ -3825,13 +4505,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
3825
4505
|
}
|
|
3826
4506
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
3827
4507
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
3828
|
-
await fs6.mkdir(
|
|
4508
|
+
await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
|
|
3829
4509
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3830
4510
|
}
|
|
3831
4511
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
3832
4512
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
3833
|
-
const indexFilePath =
|
|
3834
|
-
await fs6.mkdir(
|
|
4513
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4514
|
+
await fs6.mkdir(path14.dirname(indexFilePath), { recursive: true });
|
|
3835
4515
|
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
3836
4516
|
}
|
|
3837
4517
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
@@ -3841,13 +4521,13 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
3841
4521
|
lastUpdated: new Date().toISOString(),
|
|
3842
4522
|
modules: modules.map((m) => m.id)
|
|
3843
4523
|
};
|
|
3844
|
-
await fs6.mkdir(
|
|
4524
|
+
await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
|
|
3845
4525
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3846
4526
|
}
|
|
3847
4527
|
async function cleanupIndex(rootDir, options = {}) {
|
|
3848
4528
|
const verbose = options.verbose ?? false;
|
|
3849
4529
|
const logger = options.logger ?? createLogger({ verbose });
|
|
3850
|
-
rootDir =
|
|
4530
|
+
rootDir = path14.resolve(rootDir);
|
|
3851
4531
|
logger.info(`Cleaning up index in: ${rootDir}`);
|
|
3852
4532
|
const config = await loadConfig(rootDir);
|
|
3853
4533
|
await registerBuiltInModules();
|
|
@@ -3877,7 +4557,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
3877
4557
|
const filesToRemove = [];
|
|
3878
4558
|
const updatedFiles = {};
|
|
3879
4559
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
3880
|
-
const fullPath =
|
|
4560
|
+
const fullPath = path14.join(rootDir, filepath);
|
|
3881
4561
|
try {
|
|
3882
4562
|
await fs6.access(fullPath);
|
|
3883
4563
|
updatedFiles[filepath] = entry;
|
|
@@ -3889,7 +4569,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
3889
4569
|
}
|
|
3890
4570
|
}
|
|
3891
4571
|
for (const filepath of filesToRemove) {
|
|
3892
|
-
const indexFilePath =
|
|
4572
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3893
4573
|
try {
|
|
3894
4574
|
await fs6.unlink(indexFilePath);
|
|
3895
4575
|
} catch {}
|
|
@@ -3905,7 +4585,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3905
4585
|
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
3906
4586
|
for (const entry of entries) {
|
|
3907
4587
|
if (entry.isDirectory()) {
|
|
3908
|
-
const subDir =
|
|
4588
|
+
const subDir = path14.join(dir, entry.name);
|
|
3909
4589
|
await cleanupEmptyDirectories(subDir);
|
|
3910
4590
|
}
|
|
3911
4591
|
}
|
|
@@ -3920,7 +4600,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3920
4600
|
}
|
|
3921
4601
|
}
|
|
3922
4602
|
async function getIndexStatus(rootDir) {
|
|
3923
|
-
rootDir =
|
|
4603
|
+
rootDir = path14.resolve(rootDir);
|
|
3924
4604
|
const config = await loadConfig(rootDir);
|
|
3925
4605
|
const location = getIndexLocation(rootDir);
|
|
3926
4606
|
const indexDir = location.indexDir;
|
|
@@ -3956,7 +4636,7 @@ async function getIndexStatus(rootDir) {
|
|
|
3956
4636
|
}
|
|
3957
4637
|
} catch {
|
|
3958
4638
|
try {
|
|
3959
|
-
const entries = await fs6.readdir(
|
|
4639
|
+
const entries = await fs6.readdir(path14.join(indexDir, "index"));
|
|
3960
4640
|
if (entries.length > 0) {
|
|
3961
4641
|
status.exists = true;
|
|
3962
4642
|
for (const entry of entries) {
|
|
@@ -3979,7 +4659,7 @@ async function getIndexStatus(rootDir) {
|
|
|
3979
4659
|
|
|
3980
4660
|
// src/app/search/index.ts
|
|
3981
4661
|
import * as fs7 from "fs/promises";
|
|
3982
|
-
import * as
|
|
4662
|
+
import * as path15 from "path";
|
|
3983
4663
|
|
|
3984
4664
|
// src/types.ts
|
|
3985
4665
|
init_entities();
|
|
@@ -3987,7 +4667,7 @@ init_entities();
|
|
|
3987
4667
|
// src/app/search/index.ts
|
|
3988
4668
|
init_config2();
|
|
3989
4669
|
async function search(rootDir, query, options = {}) {
|
|
3990
|
-
rootDir =
|
|
4670
|
+
rootDir = path15.resolve(rootDir);
|
|
3991
4671
|
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
3992
4672
|
if (ensureFresh) {
|
|
3993
4673
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
@@ -4021,9 +4701,17 @@ async function search(rootDir, query, options = {}) {
|
|
|
4021
4701
|
const moduleResults = await module.search(query, ctx, options);
|
|
4022
4702
|
allResults.push(...moduleResults);
|
|
4023
4703
|
}
|
|
4024
|
-
|
|
4704
|
+
let filteredResults = allResults;
|
|
4705
|
+
if (options.pathFilter && options.pathFilter.length > 0) {
|
|
4706
|
+
const normalizedFilters = options.pathFilter.map((p) => p.replace(/\\/g, "/").replace(/^\//, "").replace(/\/$/, ""));
|
|
4707
|
+
filteredResults = allResults.filter((result) => {
|
|
4708
|
+
const normalizedPath = result.filepath.replace(/\\/g, "/");
|
|
4709
|
+
return normalizedFilters.some((filter) => normalizedPath.startsWith(filter + "/") || normalizedPath === filter || normalizedPath.startsWith("./" + filter + "/") || normalizedPath === "./" + filter);
|
|
4710
|
+
});
|
|
4711
|
+
}
|
|
4712
|
+
filteredResults.sort((a, b) => b.score - a.score);
|
|
4025
4713
|
const topK = options.topK ?? 10;
|
|
4026
|
-
return
|
|
4714
|
+
return filteredResults.slice(0, topK);
|
|
4027
4715
|
}
|
|
4028
4716
|
function createSearchContext(rootDir, moduleId, config) {
|
|
4029
4717
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
@@ -4032,7 +4720,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4032
4720
|
config,
|
|
4033
4721
|
loadFileIndex: async (filepath) => {
|
|
4034
4722
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
4035
|
-
const indexFilePath = hasExtension ?
|
|
4723
|
+
const indexFilePath = hasExtension ? path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path15.join(indexPath, filepath + ".json");
|
|
4036
4724
|
try {
|
|
4037
4725
|
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
4038
4726
|
return JSON.parse(content);
|
|
@@ -4044,7 +4732,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4044
4732
|
const files = [];
|
|
4045
4733
|
await traverseDirectory(indexPath, files, indexPath);
|
|
4046
4734
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
4047
|
-
const relative3 =
|
|
4735
|
+
const relative3 = path15.relative(indexPath, f);
|
|
4048
4736
|
return relative3.replace(/\.json$/, "");
|
|
4049
4737
|
});
|
|
4050
4738
|
}
|
|
@@ -4054,7 +4742,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
4054
4742
|
try {
|
|
4055
4743
|
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
4056
4744
|
for (const entry of entries) {
|
|
4057
|
-
const fullPath =
|
|
4745
|
+
const fullPath = path15.join(dir, entry.name);
|
|
4058
4746
|
if (entry.isDirectory()) {
|
|
4059
4747
|
await traverseDirectory(fullPath, files, basePath);
|
|
4060
4748
|
} else if (entry.isFile()) {
|
|
@@ -4156,4 +4844,4 @@ export {
|
|
|
4156
4844
|
ConsoleLogger
|
|
4157
4845
|
};
|
|
4158
4846
|
|
|
4159
|
-
//# debugId=
|
|
4847
|
+
//# debugId=C1C754EB0A3147DB64756E2164756E21
|