raggrep 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +2 -0
- package/dist/cli/main.js +1268 -622
- package/dist/cli/main.js.map +15 -10
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.js +1248 -612
- package/dist/index.js.map +14 -9
- package/dist/modules/data/json/index.d.ts +47 -0
- package/dist/modules/docs/markdown/index.d.ts +47 -0
- package/dist/modules/language/typescript/index.d.ts +9 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -348,6 +348,20 @@ function createDefaultConfig() {
|
|
|
348
348
|
options: {
|
|
349
349
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
350
350
|
}
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
id: "data/json",
|
|
354
|
+
enabled: true,
|
|
355
|
+
options: {
|
|
356
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
357
|
+
}
|
|
358
|
+
},
|
|
359
|
+
{
|
|
360
|
+
id: "docs/markdown",
|
|
361
|
+
enabled: true,
|
|
362
|
+
options: {
|
|
363
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
364
|
+
}
|
|
351
365
|
}
|
|
352
366
|
]
|
|
353
367
|
};
|
|
@@ -391,16 +405,18 @@ var init_config = __esm(() => {
|
|
|
391
405
|
".jsx",
|
|
392
406
|
".mjs",
|
|
393
407
|
".cjs",
|
|
408
|
+
".mts",
|
|
409
|
+
".cts",
|
|
410
|
+
".json",
|
|
411
|
+
".md",
|
|
394
412
|
".py",
|
|
395
413
|
".go",
|
|
396
414
|
".rs",
|
|
397
415
|
".java",
|
|
398
|
-
".json",
|
|
399
416
|
".yaml",
|
|
400
417
|
".yml",
|
|
401
418
|
".toml",
|
|
402
419
|
".sql",
|
|
403
|
-
".md",
|
|
404
420
|
".txt"
|
|
405
421
|
];
|
|
406
422
|
});
|
|
@@ -2292,221 +2308,6 @@ var init_core = __esm(() => {
|
|
|
2292
2308
|
init_symbols();
|
|
2293
2309
|
});
|
|
2294
2310
|
|
|
2295
|
-
// src/domain/services/similarity.ts
|
|
2296
|
-
function cosineSimilarity(a, b) {
|
|
2297
|
-
if (a.length !== b.length) {
|
|
2298
|
-
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2299
|
-
}
|
|
2300
|
-
let dotProduct = 0;
|
|
2301
|
-
let normA = 0;
|
|
2302
|
-
let normB = 0;
|
|
2303
|
-
for (let i = 0;i < a.length; i++) {
|
|
2304
|
-
dotProduct += a[i] * b[i];
|
|
2305
|
-
normA += a[i] * a[i];
|
|
2306
|
-
normB += b[i] * b[i];
|
|
2307
|
-
}
|
|
2308
|
-
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2309
|
-
if (magnitude === 0)
|
|
2310
|
-
return 0;
|
|
2311
|
-
return dotProduct / magnitude;
|
|
2312
|
-
}
|
|
2313
|
-
|
|
2314
|
-
// src/modules/language/typescript/parseCode.ts
|
|
2315
|
-
import * as ts from "typescript";
|
|
2316
|
-
function parseCode(content, filepath) {
|
|
2317
|
-
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
2318
|
-
if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
|
|
2319
|
-
return parseTypeScript(content, filepath);
|
|
2320
|
-
}
|
|
2321
|
-
return parseGenericCode(content);
|
|
2322
|
-
}
|
|
2323
|
-
function parseTypeScript(content, filepath) {
|
|
2324
|
-
const chunks = [];
|
|
2325
|
-
const lines = content.split(`
|
|
2326
|
-
`);
|
|
2327
|
-
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2328
|
-
function getLineNumbers(node) {
|
|
2329
|
-
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2330
|
-
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2331
|
-
return {
|
|
2332
|
-
startLine: start.line + 1,
|
|
2333
|
-
endLine: end.line + 1
|
|
2334
|
-
};
|
|
2335
|
-
}
|
|
2336
|
-
function getNodeText(node) {
|
|
2337
|
-
return node.getText(sourceFile);
|
|
2338
|
-
}
|
|
2339
|
-
function isExported(node) {
|
|
2340
|
-
if (!ts.canHaveModifiers(node))
|
|
2341
|
-
return false;
|
|
2342
|
-
const modifiers = ts.getModifiers(node);
|
|
2343
|
-
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2344
|
-
}
|
|
2345
|
-
function getJSDoc(node) {
|
|
2346
|
-
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2347
|
-
if (jsDocNodes.length === 0)
|
|
2348
|
-
return;
|
|
2349
|
-
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2350
|
-
`);
|
|
2351
|
-
}
|
|
2352
|
-
function getFunctionName(node) {
|
|
2353
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2354
|
-
return node.name.text;
|
|
2355
|
-
}
|
|
2356
|
-
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2357
|
-
return node.name.text;
|
|
2358
|
-
}
|
|
2359
|
-
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2360
|
-
return node.name.text;
|
|
2361
|
-
}
|
|
2362
|
-
return;
|
|
2363
|
-
}
|
|
2364
|
-
function visit(node) {
|
|
2365
|
-
const { startLine, endLine } = getLineNumbers(node);
|
|
2366
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2367
|
-
chunks.push({
|
|
2368
|
-
content: getNodeText(node),
|
|
2369
|
-
startLine,
|
|
2370
|
-
endLine,
|
|
2371
|
-
type: "function",
|
|
2372
|
-
name: node.name.text,
|
|
2373
|
-
isExported: isExported(node),
|
|
2374
|
-
jsDoc: getJSDoc(node)
|
|
2375
|
-
});
|
|
2376
|
-
return;
|
|
2377
|
-
}
|
|
2378
|
-
if (ts.isVariableStatement(node)) {
|
|
2379
|
-
for (const decl of node.declarationList.declarations) {
|
|
2380
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2381
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2382
|
-
chunks.push({
|
|
2383
|
-
content: getNodeText(node),
|
|
2384
|
-
startLine,
|
|
2385
|
-
endLine,
|
|
2386
|
-
type: "function",
|
|
2387
|
-
name,
|
|
2388
|
-
isExported: isExported(node),
|
|
2389
|
-
jsDoc: getJSDoc(node)
|
|
2390
|
-
});
|
|
2391
|
-
return;
|
|
2392
|
-
}
|
|
2393
|
-
}
|
|
2394
|
-
}
|
|
2395
|
-
if (ts.isClassDeclaration(node) && node.name) {
|
|
2396
|
-
chunks.push({
|
|
2397
|
-
content: getNodeText(node),
|
|
2398
|
-
startLine,
|
|
2399
|
-
endLine,
|
|
2400
|
-
type: "class",
|
|
2401
|
-
name: node.name.text,
|
|
2402
|
-
isExported: isExported(node),
|
|
2403
|
-
jsDoc: getJSDoc(node)
|
|
2404
|
-
});
|
|
2405
|
-
return;
|
|
2406
|
-
}
|
|
2407
|
-
if (ts.isInterfaceDeclaration(node)) {
|
|
2408
|
-
chunks.push({
|
|
2409
|
-
content: getNodeText(node),
|
|
2410
|
-
startLine,
|
|
2411
|
-
endLine,
|
|
2412
|
-
type: "interface",
|
|
2413
|
-
name: node.name.text,
|
|
2414
|
-
isExported: isExported(node),
|
|
2415
|
-
jsDoc: getJSDoc(node)
|
|
2416
|
-
});
|
|
2417
|
-
return;
|
|
2418
|
-
}
|
|
2419
|
-
if (ts.isTypeAliasDeclaration(node)) {
|
|
2420
|
-
chunks.push({
|
|
2421
|
-
content: getNodeText(node),
|
|
2422
|
-
startLine,
|
|
2423
|
-
endLine,
|
|
2424
|
-
type: "type",
|
|
2425
|
-
name: node.name.text,
|
|
2426
|
-
isExported: isExported(node),
|
|
2427
|
-
jsDoc: getJSDoc(node)
|
|
2428
|
-
});
|
|
2429
|
-
return;
|
|
2430
|
-
}
|
|
2431
|
-
if (ts.isEnumDeclaration(node)) {
|
|
2432
|
-
chunks.push({
|
|
2433
|
-
content: getNodeText(node),
|
|
2434
|
-
startLine,
|
|
2435
|
-
endLine,
|
|
2436
|
-
type: "enum",
|
|
2437
|
-
name: node.name.text,
|
|
2438
|
-
isExported: isExported(node),
|
|
2439
|
-
jsDoc: getJSDoc(node)
|
|
2440
|
-
});
|
|
2441
|
-
return;
|
|
2442
|
-
}
|
|
2443
|
-
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2444
|
-
for (const decl of node.declarationList.declarations) {
|
|
2445
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2446
|
-
continue;
|
|
2447
|
-
}
|
|
2448
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2449
|
-
chunks.push({
|
|
2450
|
-
content: getNodeText(node),
|
|
2451
|
-
startLine,
|
|
2452
|
-
endLine,
|
|
2453
|
-
type: "variable",
|
|
2454
|
-
name,
|
|
2455
|
-
isExported: true,
|
|
2456
|
-
jsDoc: getJSDoc(node)
|
|
2457
|
-
});
|
|
2458
|
-
}
|
|
2459
|
-
return;
|
|
2460
|
-
}
|
|
2461
|
-
ts.forEachChild(node, visit);
|
|
2462
|
-
}
|
|
2463
|
-
ts.forEachChild(sourceFile, visit);
|
|
2464
|
-
if (chunks.length === 0) {
|
|
2465
|
-
return parseGenericCode(content);
|
|
2466
|
-
}
|
|
2467
|
-
return chunks;
|
|
2468
|
-
}
|
|
2469
|
-
function parseGenericCode(content) {
|
|
2470
|
-
const chunks = [];
|
|
2471
|
-
const lines = content.split(`
|
|
2472
|
-
`);
|
|
2473
|
-
const CHUNK_SIZE = 30;
|
|
2474
|
-
const OVERLAP = 5;
|
|
2475
|
-
if (lines.length <= CHUNK_SIZE) {
|
|
2476
|
-
return [
|
|
2477
|
-
{
|
|
2478
|
-
content,
|
|
2479
|
-
startLine: 1,
|
|
2480
|
-
endLine: lines.length,
|
|
2481
|
-
type: "file"
|
|
2482
|
-
}
|
|
2483
|
-
];
|
|
2484
|
-
}
|
|
2485
|
-
for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
|
|
2486
|
-
const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
|
|
2487
|
-
chunks.push({
|
|
2488
|
-
content: lines.slice(i, endIdx).join(`
|
|
2489
|
-
`),
|
|
2490
|
-
startLine: i + 1,
|
|
2491
|
-
endLine: endIdx,
|
|
2492
|
-
type: "block"
|
|
2493
|
-
});
|
|
2494
|
-
if (endIdx >= lines.length)
|
|
2495
|
-
break;
|
|
2496
|
-
}
|
|
2497
|
-
return chunks;
|
|
2498
|
-
}
|
|
2499
|
-
function generateChunkId(filepath, startLine, endLine) {
|
|
2500
|
-
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2501
|
-
return `${safePath}-${startLine}-${endLine}`;
|
|
2502
|
-
}
|
|
2503
|
-
var init_parseCode = () => {};
|
|
2504
|
-
|
|
2505
|
-
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2506
|
-
var init_fileIndexStorage = __esm(() => {
|
|
2507
|
-
init_entities();
|
|
2508
|
-
});
|
|
2509
|
-
|
|
2510
2311
|
// src/domain/services/keywords.ts
|
|
2511
2312
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2512
2313
|
const keywords = new Set;
|
|
@@ -2695,222 +2496,1105 @@ var init_keywords = __esm(() => {
|
|
|
2695
2496
|
};
|
|
2696
2497
|
});
|
|
2697
2498
|
|
|
2698
|
-
// src/
|
|
2699
|
-
|
|
2700
|
-
|
|
2499
|
+
// src/domain/services/similarity.ts
|
|
2500
|
+
function cosineSimilarity(a, b) {
|
|
2501
|
+
if (a.length !== b.length) {
|
|
2502
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2503
|
+
}
|
|
2504
|
+
let dotProduct = 0;
|
|
2505
|
+
let normA = 0;
|
|
2506
|
+
let normB = 0;
|
|
2507
|
+
for (let i = 0;i < a.length; i++) {
|
|
2508
|
+
dotProduct += a[i] * b[i];
|
|
2509
|
+
normA += a[i] * a[i];
|
|
2510
|
+
normB += b[i] * b[i];
|
|
2511
|
+
}
|
|
2512
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2513
|
+
if (magnitude === 0)
|
|
2514
|
+
return 0;
|
|
2515
|
+
return dotProduct / magnitude;
|
|
2516
|
+
}
|
|
2701
2517
|
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
|
|
2710
|
-
this.moduleId = moduleId;
|
|
2518
|
+
// src/domain/services/queryIntent.ts
|
|
2519
|
+
import * as path7 from "path";
|
|
2520
|
+
function detectQueryIntent(queryTerms) {
|
|
2521
|
+
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2522
|
+
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
2523
|
+
if (hasDocumentationTerm) {
|
|
2524
|
+
return "documentation";
|
|
2711
2525
|
}
|
|
2712
|
-
|
|
2713
|
-
|
|
2526
|
+
if (hasImplementationTerm) {
|
|
2527
|
+
return "implementation";
|
|
2528
|
+
}
|
|
2529
|
+
return "neutral";
|
|
2530
|
+
}
|
|
2531
|
+
function extractQueryTerms(query) {
|
|
2532
|
+
return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
2533
|
+
}
|
|
2534
|
+
function isSourceCodeFile(filepath) {
|
|
2535
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2536
|
+
return SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2537
|
+
}
|
|
2538
|
+
function isDocFile(filepath) {
|
|
2539
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2540
|
+
return DOC_EXTENSIONS.includes(ext);
|
|
2541
|
+
}
|
|
2542
|
+
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2543
|
+
const isSourceCode = isSourceCodeFile(filepath);
|
|
2544
|
+
const isDoc = isDocFile(filepath);
|
|
2545
|
+
const intent = detectQueryIntent(queryTerms);
|
|
2546
|
+
if (intent === "implementation") {
|
|
2547
|
+
if (isSourceCode) {
|
|
2548
|
+
return 0.06;
|
|
2549
|
+
}
|
|
2550
|
+
return 0;
|
|
2551
|
+
}
|
|
2552
|
+
if (intent === "documentation") {
|
|
2553
|
+
if (isDoc) {
|
|
2554
|
+
return 0.08;
|
|
2555
|
+
}
|
|
2556
|
+
return 0;
|
|
2557
|
+
}
|
|
2558
|
+
return 0;
|
|
2559
|
+
}
|
|
2560
|
+
var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2561
|
+
var init_queryIntent = __esm(() => {
|
|
2562
|
+
IMPLEMENTATION_TERMS = [
|
|
2563
|
+
"function",
|
|
2564
|
+
"method",
|
|
2565
|
+
"class",
|
|
2566
|
+
"interface",
|
|
2567
|
+
"implement",
|
|
2568
|
+
"implementation",
|
|
2569
|
+
"endpoint",
|
|
2570
|
+
"route",
|
|
2571
|
+
"handler",
|
|
2572
|
+
"controller",
|
|
2573
|
+
"module",
|
|
2574
|
+
"code"
|
|
2575
|
+
];
|
|
2576
|
+
DOCUMENTATION_TERMS = [
|
|
2577
|
+
"documentation",
|
|
2578
|
+
"docs",
|
|
2579
|
+
"guide",
|
|
2580
|
+
"tutorial",
|
|
2581
|
+
"readme",
|
|
2582
|
+
"how",
|
|
2583
|
+
"what",
|
|
2584
|
+
"why",
|
|
2585
|
+
"explain",
|
|
2586
|
+
"overview",
|
|
2587
|
+
"getting",
|
|
2588
|
+
"started",
|
|
2589
|
+
"requirements",
|
|
2590
|
+
"setup",
|
|
2591
|
+
"install",
|
|
2592
|
+
"configure",
|
|
2593
|
+
"configuration"
|
|
2594
|
+
];
|
|
2595
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
2596
|
+
".ts",
|
|
2597
|
+
".tsx",
|
|
2598
|
+
".js",
|
|
2599
|
+
".jsx",
|
|
2600
|
+
".mjs",
|
|
2601
|
+
".cjs",
|
|
2602
|
+
".py",
|
|
2603
|
+
".go",
|
|
2604
|
+
".rs",
|
|
2605
|
+
".java"
|
|
2606
|
+
];
|
|
2607
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2608
|
+
});
|
|
2609
|
+
|
|
2610
|
+
// src/domain/services/chunking.ts
|
|
2611
|
+
function createLineBasedChunks(content, options = {}) {
|
|
2612
|
+
const {
|
|
2613
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2614
|
+
overlap = DEFAULT_OVERLAP,
|
|
2615
|
+
minLinesForMultipleChunks = chunkSize
|
|
2616
|
+
} = options;
|
|
2617
|
+
const lines = content.split(`
|
|
2618
|
+
`);
|
|
2619
|
+
const chunks = [];
|
|
2620
|
+
if (lines.length <= minLinesForMultipleChunks) {
|
|
2621
|
+
return [
|
|
2622
|
+
{
|
|
2623
|
+
content,
|
|
2624
|
+
startLine: 1,
|
|
2625
|
+
endLine: lines.length,
|
|
2626
|
+
type: "file"
|
|
2627
|
+
}
|
|
2628
|
+
];
|
|
2629
|
+
}
|
|
2630
|
+
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2631
|
+
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2632
|
+
chunks.push({
|
|
2633
|
+
content: lines.slice(i, endIdx).join(`
|
|
2634
|
+
`),
|
|
2635
|
+
startLine: i + 1,
|
|
2636
|
+
endLine: endIdx,
|
|
2637
|
+
type: "block"
|
|
2638
|
+
});
|
|
2639
|
+
if (endIdx >= lines.length)
|
|
2640
|
+
break;
|
|
2641
|
+
}
|
|
2642
|
+
return chunks;
|
|
2643
|
+
}
|
|
2644
|
+
function generateChunkId(filepath, startLine, endLine) {
|
|
2645
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2646
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2647
|
+
}
|
|
2648
|
+
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2649
|
+
|
|
2650
|
+
// src/domain/services/index.ts
|
|
2651
|
+
var init_services = __esm(() => {
|
|
2652
|
+
init_keywords();
|
|
2653
|
+
init_queryIntent();
|
|
2654
|
+
});
|
|
2655
|
+
|
|
2656
|
+
// src/modules/language/typescript/parseCode.ts
|
|
2657
|
+
import * as ts from "typescript";
|
|
2658
|
+
function parseTypeScriptCode(content, filepath) {
|
|
2659
|
+
return parseTypeScript(content, filepath);
|
|
2660
|
+
}
|
|
2661
|
+
function parseTypeScript(content, filepath) {
|
|
2662
|
+
const chunks = [];
|
|
2663
|
+
const lines = content.split(`
|
|
2664
|
+
`);
|
|
2665
|
+
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2666
|
+
function getLineNumbers(node) {
|
|
2667
|
+
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2668
|
+
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2669
|
+
return {
|
|
2670
|
+
startLine: start.line + 1,
|
|
2671
|
+
endLine: end.line + 1
|
|
2672
|
+
};
|
|
2673
|
+
}
|
|
2674
|
+
function getNodeText(node) {
|
|
2675
|
+
return node.getText(sourceFile);
|
|
2676
|
+
}
|
|
2677
|
+
function isExported(node) {
|
|
2678
|
+
if (!ts.canHaveModifiers(node))
|
|
2679
|
+
return false;
|
|
2680
|
+
const modifiers = ts.getModifiers(node);
|
|
2681
|
+
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2682
|
+
}
|
|
2683
|
+
function getJSDoc(node) {
|
|
2684
|
+
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2685
|
+
if (jsDocNodes.length === 0)
|
|
2686
|
+
return;
|
|
2687
|
+
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2688
|
+
`);
|
|
2689
|
+
}
|
|
2690
|
+
function getFunctionName(node) {
|
|
2691
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2692
|
+
return node.name.text;
|
|
2693
|
+
}
|
|
2694
|
+
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2695
|
+
return node.name.text;
|
|
2696
|
+
}
|
|
2697
|
+
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2698
|
+
return node.name.text;
|
|
2699
|
+
}
|
|
2700
|
+
return;
|
|
2701
|
+
}
|
|
2702
|
+
function visit(node) {
|
|
2703
|
+
const { startLine, endLine } = getLineNumbers(node);
|
|
2704
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2705
|
+
chunks.push({
|
|
2706
|
+
content: getNodeText(node),
|
|
2707
|
+
startLine,
|
|
2708
|
+
endLine,
|
|
2709
|
+
type: "function",
|
|
2710
|
+
name: node.name.text,
|
|
2711
|
+
isExported: isExported(node),
|
|
2712
|
+
jsDoc: getJSDoc(node)
|
|
2713
|
+
});
|
|
2714
|
+
return;
|
|
2715
|
+
}
|
|
2716
|
+
if (ts.isVariableStatement(node)) {
|
|
2717
|
+
for (const decl of node.declarationList.declarations) {
|
|
2718
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2719
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2720
|
+
chunks.push({
|
|
2721
|
+
content: getNodeText(node),
|
|
2722
|
+
startLine,
|
|
2723
|
+
endLine,
|
|
2724
|
+
type: "function",
|
|
2725
|
+
name,
|
|
2726
|
+
isExported: isExported(node),
|
|
2727
|
+
jsDoc: getJSDoc(node)
|
|
2728
|
+
});
|
|
2729
|
+
return;
|
|
2730
|
+
}
|
|
2731
|
+
}
|
|
2732
|
+
}
|
|
2733
|
+
if (ts.isClassDeclaration(node) && node.name) {
|
|
2734
|
+
chunks.push({
|
|
2735
|
+
content: getNodeText(node),
|
|
2736
|
+
startLine,
|
|
2737
|
+
endLine,
|
|
2738
|
+
type: "class",
|
|
2739
|
+
name: node.name.text,
|
|
2740
|
+
isExported: isExported(node),
|
|
2741
|
+
jsDoc: getJSDoc(node)
|
|
2742
|
+
});
|
|
2743
|
+
return;
|
|
2744
|
+
}
|
|
2745
|
+
if (ts.isInterfaceDeclaration(node)) {
|
|
2746
|
+
chunks.push({
|
|
2747
|
+
content: getNodeText(node),
|
|
2748
|
+
startLine,
|
|
2749
|
+
endLine,
|
|
2750
|
+
type: "interface",
|
|
2751
|
+
name: node.name.text,
|
|
2752
|
+
isExported: isExported(node),
|
|
2753
|
+
jsDoc: getJSDoc(node)
|
|
2754
|
+
});
|
|
2755
|
+
return;
|
|
2756
|
+
}
|
|
2757
|
+
if (ts.isTypeAliasDeclaration(node)) {
|
|
2758
|
+
chunks.push({
|
|
2759
|
+
content: getNodeText(node),
|
|
2760
|
+
startLine,
|
|
2761
|
+
endLine,
|
|
2762
|
+
type: "type",
|
|
2763
|
+
name: node.name.text,
|
|
2764
|
+
isExported: isExported(node),
|
|
2765
|
+
jsDoc: getJSDoc(node)
|
|
2766
|
+
});
|
|
2767
|
+
return;
|
|
2768
|
+
}
|
|
2769
|
+
if (ts.isEnumDeclaration(node)) {
|
|
2770
|
+
chunks.push({
|
|
2771
|
+
content: getNodeText(node),
|
|
2772
|
+
startLine,
|
|
2773
|
+
endLine,
|
|
2774
|
+
type: "enum",
|
|
2775
|
+
name: node.name.text,
|
|
2776
|
+
isExported: isExported(node),
|
|
2777
|
+
jsDoc: getJSDoc(node)
|
|
2778
|
+
});
|
|
2779
|
+
return;
|
|
2780
|
+
}
|
|
2781
|
+
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2782
|
+
for (const decl of node.declarationList.declarations) {
|
|
2783
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2784
|
+
continue;
|
|
2785
|
+
}
|
|
2786
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2787
|
+
chunks.push({
|
|
2788
|
+
content: getNodeText(node),
|
|
2789
|
+
startLine,
|
|
2790
|
+
endLine,
|
|
2791
|
+
type: "variable",
|
|
2792
|
+
name,
|
|
2793
|
+
isExported: true,
|
|
2794
|
+
jsDoc: getJSDoc(node)
|
|
2795
|
+
});
|
|
2796
|
+
}
|
|
2797
|
+
return;
|
|
2798
|
+
}
|
|
2799
|
+
ts.forEachChild(node, visit);
|
|
2800
|
+
}
|
|
2801
|
+
ts.forEachChild(sourceFile, visit);
|
|
2802
|
+
if (chunks.length === 0) {
|
|
2803
|
+
const lines2 = content.split(`
|
|
2804
|
+
`);
|
|
2805
|
+
return [
|
|
2806
|
+
{
|
|
2807
|
+
content,
|
|
2808
|
+
startLine: 1,
|
|
2809
|
+
endLine: lines2.length,
|
|
2810
|
+
type: "file"
|
|
2811
|
+
}
|
|
2812
|
+
];
|
|
2813
|
+
}
|
|
2814
|
+
return chunks;
|
|
2815
|
+
}
|
|
2816
|
+
function generateChunkId2(filepath, startLine, endLine) {
|
|
2817
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2818
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2819
|
+
}
|
|
2820
|
+
var init_parseCode = () => {};
|
|
2821
|
+
|
|
2822
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2823
|
+
var init_fileIndexStorage = __esm(() => {
|
|
2824
|
+
init_entities();
|
|
2825
|
+
});
|
|
2826
|
+
|
|
2827
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
2828
|
+
import * as fs3 from "fs/promises";
|
|
2829
|
+
import * as path8 from "path";
|
|
2830
|
+
|
|
2831
|
+
class SymbolicIndex {
|
|
2832
|
+
meta = null;
|
|
2833
|
+
fileSummaries = new Map;
|
|
2834
|
+
bm25Index = null;
|
|
2835
|
+
symbolicPath;
|
|
2836
|
+
moduleId;
|
|
2837
|
+
constructor(indexDir, moduleId) {
|
|
2838
|
+
this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
|
|
2839
|
+
this.moduleId = moduleId;
|
|
2840
|
+
}
|
|
2841
|
+
async initialize() {
|
|
2842
|
+
try {
|
|
2714
2843
|
await this.load();
|
|
2715
2844
|
} catch {
|
|
2716
|
-
this.meta = {
|
|
2717
|
-
version: "1.0.0",
|
|
2718
|
-
lastUpdated: new Date().toISOString(),
|
|
2719
|
-
moduleId: this.moduleId,
|
|
2720
|
-
fileCount: 0,
|
|
2721
|
-
bm25Data: {
|
|
2722
|
-
avgDocLength: 0,
|
|
2723
|
-
documentFrequencies: {},
|
|
2724
|
-
totalDocs: 0
|
|
2845
|
+
this.meta = {
|
|
2846
|
+
version: "1.0.0",
|
|
2847
|
+
lastUpdated: new Date().toISOString(),
|
|
2848
|
+
moduleId: this.moduleId,
|
|
2849
|
+
fileCount: 0,
|
|
2850
|
+
bm25Data: {
|
|
2851
|
+
avgDocLength: 0,
|
|
2852
|
+
documentFrequencies: {},
|
|
2853
|
+
totalDocs: 0
|
|
2854
|
+
}
|
|
2855
|
+
};
|
|
2856
|
+
this.bm25Index = new BM25Index;
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
addFile(summary) {
|
|
2860
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2861
|
+
}
|
|
2862
|
+
removeFile(filepath) {
|
|
2863
|
+
return this.fileSummaries.delete(filepath);
|
|
2864
|
+
}
|
|
2865
|
+
buildBM25Index() {
|
|
2866
|
+
this.bm25Index = new BM25Index;
|
|
2867
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2868
|
+
const content = [
|
|
2869
|
+
...summary.keywords,
|
|
2870
|
+
...summary.exports,
|
|
2871
|
+
...extractPathKeywords(filepath)
|
|
2872
|
+
].join(" ");
|
|
2873
|
+
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2874
|
+
}
|
|
2875
|
+
if (this.meta) {
|
|
2876
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2877
|
+
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2878
|
+
}
|
|
2879
|
+
}
|
|
2880
|
+
findCandidates(query, maxCandidates = 20) {
|
|
2881
|
+
if (!this.bm25Index) {
|
|
2882
|
+
return Array.from(this.fileSummaries.keys());
|
|
2883
|
+
}
|
|
2884
|
+
const results = this.bm25Index.search(query, maxCandidates);
|
|
2885
|
+
return results.map((r) => r.id);
|
|
2886
|
+
}
|
|
2887
|
+
getAllFiles() {
|
|
2888
|
+
return Array.from(this.fileSummaries.keys());
|
|
2889
|
+
}
|
|
2890
|
+
getFileSummary(filepath) {
|
|
2891
|
+
return this.fileSummaries.get(filepath);
|
|
2892
|
+
}
|
|
2893
|
+
async save() {
|
|
2894
|
+
if (!this.meta)
|
|
2895
|
+
throw new Error("Index not initialized");
|
|
2896
|
+
this.meta.lastUpdated = new Date().toISOString();
|
|
2897
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2898
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2899
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2900
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2901
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2902
|
+
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2903
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
2904
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2905
|
+
}
|
|
2906
|
+
}
|
|
2907
|
+
async load() {
|
|
2908
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2909
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2910
|
+
this.meta = JSON.parse(metaContent);
|
|
2911
|
+
this.fileSummaries.clear();
|
|
2912
|
+
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
2913
|
+
this.buildBM25Index();
|
|
2914
|
+
}
|
|
2915
|
+
async loadFileSummariesRecursive(dir) {
|
|
2916
|
+
try {
|
|
2917
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2918
|
+
for (const entry of entries) {
|
|
2919
|
+
const fullPath = path8.join(dir, entry.name);
|
|
2920
|
+
if (entry.isDirectory()) {
|
|
2921
|
+
await this.loadFileSummariesRecursive(fullPath);
|
|
2922
|
+
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
2923
|
+
try {
|
|
2924
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
2925
|
+
const summary = JSON.parse(content);
|
|
2926
|
+
if (summary.filepath) {
|
|
2927
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2928
|
+
}
|
|
2929
|
+
} catch {}
|
|
2930
|
+
}
|
|
2931
|
+
}
|
|
2932
|
+
} catch {}
|
|
2933
|
+
}
|
|
2934
|
+
getFileSummaryPath(filepath) {
|
|
2935
|
+
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2936
|
+
return path8.join(this.symbolicPath, jsonPath);
|
|
2937
|
+
}
|
|
2938
|
+
async deleteFileSummary(filepath) {
|
|
2939
|
+
try {
|
|
2940
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
2941
|
+
} catch {}
|
|
2942
|
+
this.fileSummaries.delete(filepath);
|
|
2943
|
+
}
|
|
2944
|
+
async exists() {
|
|
2945
|
+
try {
|
|
2946
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2947
|
+
await fs3.access(metaPath);
|
|
2948
|
+
return true;
|
|
2949
|
+
} catch {
|
|
2950
|
+
return false;
|
|
2951
|
+
}
|
|
2952
|
+
}
|
|
2953
|
+
get size() {
|
|
2954
|
+
return this.fileSummaries.size;
|
|
2955
|
+
}
|
|
2956
|
+
clear() {
|
|
2957
|
+
this.fileSummaries.clear();
|
|
2958
|
+
if (this.meta) {
|
|
2959
|
+
this.meta.fileCount = 0;
|
|
2960
|
+
this.meta.bm25Data = {
|
|
2961
|
+
avgDocLength: 0,
|
|
2962
|
+
documentFrequencies: {},
|
|
2963
|
+
totalDocs: 0
|
|
2964
|
+
};
|
|
2965
|
+
}
|
|
2966
|
+
this.bm25Index = new BM25Index;
|
|
2967
|
+
}
|
|
2968
|
+
}
|
|
2969
|
+
var init_symbolicIndex = __esm(() => {
|
|
2970
|
+
init_keywords();
|
|
2971
|
+
});
|
|
2972
|
+
|
|
2973
|
+
// src/infrastructure/storage/index.ts
|
|
2974
|
+
var init_storage = __esm(() => {
|
|
2975
|
+
init_fileIndexStorage();
|
|
2976
|
+
init_symbolicIndex();
|
|
2977
|
+
});
|
|
2978
|
+
|
|
2979
|
+
// src/modules/language/typescript/index.ts
|
|
2980
|
+
var exports_typescript = {};
|
|
2981
|
+
__export(exports_typescript, {
|
|
2982
|
+
isTypeScriptFile: () => isTypeScriptFile,
|
|
2983
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
2984
|
+
TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
|
|
2985
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2986
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2987
|
+
});
|
|
2988
|
+
import * as path9 from "path";
|
|
2989
|
+
function isTypeScriptFile(filepath) {
|
|
2990
|
+
const ext = path9.extname(filepath).toLowerCase();
|
|
2991
|
+
return TYPESCRIPT_EXTENSIONS.includes(ext);
|
|
2992
|
+
}
|
|
2993
|
+
function calculateChunkTypeBoost(chunk) {
|
|
2994
|
+
switch (chunk.type) {
|
|
2995
|
+
case "function":
|
|
2996
|
+
return 0.05;
|
|
2997
|
+
case "class":
|
|
2998
|
+
case "interface":
|
|
2999
|
+
return 0.04;
|
|
3000
|
+
case "type":
|
|
3001
|
+
case "enum":
|
|
3002
|
+
return 0.03;
|
|
3003
|
+
case "variable":
|
|
3004
|
+
return 0.02;
|
|
3005
|
+
case "file":
|
|
3006
|
+
case "block":
|
|
3007
|
+
default:
|
|
3008
|
+
return 0;
|
|
3009
|
+
}
|
|
3010
|
+
}
|
|
3011
|
+
function calculateExportBoost(chunk) {
|
|
3012
|
+
return chunk.isExported ? 0.03 : 0;
|
|
3013
|
+
}
|
|
3014
|
+
|
|
3015
|
+
class TypeScriptModule {
|
|
3016
|
+
id = "language/typescript";
|
|
3017
|
+
name = "TypeScript Search";
|
|
3018
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
3019
|
+
version = "1.0.0";
|
|
3020
|
+
embeddingConfig = null;
|
|
3021
|
+
symbolicIndex = null;
|
|
3022
|
+
pendingSummaries = new Map;
|
|
3023
|
+
rootDir = "";
|
|
3024
|
+
logger = undefined;
|
|
3025
|
+
async initialize(config) {
|
|
3026
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3027
|
+
this.logger = config.options?.logger;
|
|
3028
|
+
if (this.logger) {
|
|
3029
|
+
this.embeddingConfig = {
|
|
3030
|
+
...this.embeddingConfig,
|
|
3031
|
+
logger: this.logger
|
|
3032
|
+
};
|
|
3033
|
+
}
|
|
3034
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3035
|
+
this.pendingSummaries.clear();
|
|
3036
|
+
}
|
|
3037
|
+
async indexFile(filepath, content, ctx) {
|
|
3038
|
+
if (!isTypeScriptFile(filepath)) {
|
|
3039
|
+
return null;
|
|
3040
|
+
}
|
|
3041
|
+
this.rootDir = ctx.rootDir;
|
|
3042
|
+
const parsedChunks = parseTypeScriptCode(content, filepath);
|
|
3043
|
+
if (parsedChunks.length === 0) {
|
|
3044
|
+
return null;
|
|
3045
|
+
}
|
|
3046
|
+
const pathContext = parsePathContext(filepath);
|
|
3047
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
3048
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
3049
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
3050
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3051
|
+
});
|
|
3052
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3053
|
+
const chunks = parsedChunks.map((pc) => ({
|
|
3054
|
+
id: generateChunkId2(filepath, pc.startLine, pc.endLine),
|
|
3055
|
+
content: pc.content,
|
|
3056
|
+
startLine: pc.startLine,
|
|
3057
|
+
endLine: pc.endLine,
|
|
3058
|
+
type: pc.type,
|
|
3059
|
+
name: pc.name,
|
|
3060
|
+
isExported: pc.isExported,
|
|
3061
|
+
jsDoc: pc.jsDoc
|
|
3062
|
+
}));
|
|
3063
|
+
const references = this.extractReferences(content, filepath);
|
|
3064
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3065
|
+
const currentConfig = getEmbeddingConfig();
|
|
3066
|
+
const moduleData = {
|
|
3067
|
+
embeddings,
|
|
3068
|
+
embeddingModel: currentConfig.model
|
|
3069
|
+
};
|
|
3070
|
+
const chunkTypes = [
|
|
3071
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
3072
|
+
];
|
|
3073
|
+
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
3074
|
+
const allKeywords = new Set;
|
|
3075
|
+
for (const pc of parsedChunks) {
|
|
3076
|
+
const keywords = extractKeywords(pc.content, pc.name);
|
|
3077
|
+
keywords.forEach((k) => allKeywords.add(k));
|
|
3078
|
+
}
|
|
3079
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3080
|
+
const fileSummary = {
|
|
3081
|
+
filepath,
|
|
3082
|
+
chunkCount: chunks.length,
|
|
3083
|
+
chunkTypes,
|
|
3084
|
+
keywords: Array.from(allKeywords),
|
|
3085
|
+
exports,
|
|
3086
|
+
lastModified: stats.lastModified,
|
|
3087
|
+
pathContext: {
|
|
3088
|
+
segments: pathContext.segments,
|
|
3089
|
+
layer: pathContext.layer,
|
|
3090
|
+
domain: pathContext.domain,
|
|
3091
|
+
depth: pathContext.depth
|
|
3092
|
+
}
|
|
3093
|
+
};
|
|
3094
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3095
|
+
return {
|
|
3096
|
+
filepath,
|
|
3097
|
+
lastModified: stats.lastModified,
|
|
3098
|
+
chunks,
|
|
3099
|
+
moduleData,
|
|
3100
|
+
references
|
|
3101
|
+
};
|
|
3102
|
+
}
|
|
3103
|
+
async finalize(ctx) {
|
|
3104
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3105
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3106
|
+
await this.symbolicIndex.initialize();
|
|
3107
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3108
|
+
this.symbolicIndex.addFile(summary);
|
|
3109
|
+
}
|
|
3110
|
+
this.symbolicIndex.buildBM25Index();
|
|
3111
|
+
await this.symbolicIndex.save();
|
|
3112
|
+
this.pendingSummaries.clear();
|
|
3113
|
+
}
|
|
3114
|
+
async search(query, ctx, options = {}) {
|
|
3115
|
+
const {
|
|
3116
|
+
topK = DEFAULT_TOP_K2,
|
|
3117
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
3118
|
+
filePatterns
|
|
3119
|
+
} = options;
|
|
3120
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3121
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3122
|
+
let allFiles;
|
|
3123
|
+
try {
|
|
3124
|
+
await symbolicIndex.initialize();
|
|
3125
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3126
|
+
} catch {
|
|
3127
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3128
|
+
}
|
|
3129
|
+
let filesToSearch = allFiles;
|
|
3130
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3131
|
+
filesToSearch = allFiles.filter((filepath) => {
|
|
3132
|
+
return filePatterns.some((pattern) => {
|
|
3133
|
+
if (pattern.startsWith("*.")) {
|
|
3134
|
+
const ext = pattern.slice(1);
|
|
3135
|
+
return filepath.endsWith(ext);
|
|
3136
|
+
}
|
|
3137
|
+
return filepath.includes(pattern);
|
|
3138
|
+
});
|
|
3139
|
+
});
|
|
3140
|
+
}
|
|
3141
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3142
|
+
const bm25Index = new BM25Index;
|
|
3143
|
+
const allChunksData = [];
|
|
3144
|
+
for (const filepath of filesToSearch) {
|
|
3145
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3146
|
+
if (!fileIndex)
|
|
3147
|
+
continue;
|
|
3148
|
+
const moduleData = fileIndex.moduleData;
|
|
3149
|
+
if (!moduleData?.embeddings)
|
|
3150
|
+
continue;
|
|
3151
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3152
|
+
const chunk = fileIndex.chunks[i];
|
|
3153
|
+
const embedding = moduleData.embeddings[i];
|
|
3154
|
+
if (!embedding)
|
|
3155
|
+
continue;
|
|
3156
|
+
allChunksData.push({
|
|
3157
|
+
filepath: fileIndex.filepath,
|
|
3158
|
+
chunk,
|
|
3159
|
+
embedding
|
|
3160
|
+
});
|
|
3161
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3162
|
+
}
|
|
3163
|
+
}
|
|
3164
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3165
|
+
const bm25Scores = new Map;
|
|
3166
|
+
for (const result of bm25Results) {
|
|
3167
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3168
|
+
}
|
|
3169
|
+
const queryTerms = extractQueryTerms(query);
|
|
3170
|
+
const pathBoosts = new Map;
|
|
3171
|
+
for (const filepath of filesToSearch) {
|
|
3172
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3173
|
+
if (summary?.pathContext) {
|
|
3174
|
+
let boost = 0;
|
|
3175
|
+
const ctx2 = summary.pathContext;
|
|
3176
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3177
|
+
boost += 0.1;
|
|
2725
3178
|
}
|
|
2726
|
-
|
|
2727
|
-
|
|
3179
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3180
|
+
boost += 0.05;
|
|
3181
|
+
}
|
|
3182
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3183
|
+
if (segmentMatch) {
|
|
3184
|
+
boost += 0.05;
|
|
3185
|
+
}
|
|
3186
|
+
pathBoosts.set(filepath, boost);
|
|
3187
|
+
}
|
|
3188
|
+
}
|
|
3189
|
+
const results = [];
|
|
3190
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3191
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3192
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3193
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
3194
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
3195
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
3196
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
3197
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
3198
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
3199
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3200
|
+
results.push({
|
|
3201
|
+
filepath,
|
|
3202
|
+
chunk,
|
|
3203
|
+
score: hybridScore,
|
|
3204
|
+
moduleId: this.id,
|
|
3205
|
+
context: {
|
|
3206
|
+
semanticScore,
|
|
3207
|
+
bm25Score,
|
|
3208
|
+
pathBoost,
|
|
3209
|
+
fileTypeBoost,
|
|
3210
|
+
chunkTypeBoost,
|
|
3211
|
+
exportBoost
|
|
3212
|
+
}
|
|
3213
|
+
});
|
|
3214
|
+
}
|
|
2728
3215
|
}
|
|
3216
|
+
results.sort((a, b) => b.score - a.score);
|
|
3217
|
+
return results.slice(0, topK);
|
|
2729
3218
|
}
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
...extractPathKeywords(filepath)
|
|
2743
|
-
].join(" ");
|
|
2744
|
-
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
3219
|
+
extractReferences(content, filepath) {
|
|
3220
|
+
const references = [];
|
|
3221
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3222
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3223
|
+
let match;
|
|
3224
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
3225
|
+
const importPath = match[1];
|
|
3226
|
+
if (importPath.startsWith(".")) {
|
|
3227
|
+
const dir = path9.dirname(filepath);
|
|
3228
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3229
|
+
references.push(resolved);
|
|
3230
|
+
}
|
|
2745
3231
|
}
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
3232
|
+
while ((match = requireRegex.exec(content)) !== null) {
|
|
3233
|
+
const importPath = match[1];
|
|
3234
|
+
if (importPath.startsWith(".")) {
|
|
3235
|
+
const dir = path9.dirname(filepath);
|
|
3236
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3237
|
+
references.push(resolved);
|
|
3238
|
+
}
|
|
2749
3239
|
}
|
|
3240
|
+
return references;
|
|
2750
3241
|
}
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
3242
|
+
}
|
|
3243
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
|
|
3244
|
+
var init_typescript = __esm(() => {
|
|
3245
|
+
init_embeddings();
|
|
3246
|
+
init_services();
|
|
3247
|
+
init_config2();
|
|
3248
|
+
init_parseCode();
|
|
3249
|
+
init_storage();
|
|
3250
|
+
TYPESCRIPT_EXTENSIONS = [
|
|
3251
|
+
".ts",
|
|
3252
|
+
".tsx",
|
|
3253
|
+
".js",
|
|
3254
|
+
".jsx",
|
|
3255
|
+
".mjs",
|
|
3256
|
+
".cjs",
|
|
3257
|
+
".mts",
|
|
3258
|
+
".cts"
|
|
3259
|
+
];
|
|
3260
|
+
});
|
|
3261
|
+
|
|
3262
|
+
// src/modules/data/json/index.ts
|
|
3263
|
+
var exports_json = {};
|
|
3264
|
+
__export(exports_json, {
|
|
3265
|
+
isJsonFile: () => isJsonFile,
|
|
3266
|
+
JsonModule: () => JsonModule,
|
|
3267
|
+
JSON_EXTENSIONS: () => JSON_EXTENSIONS,
|
|
3268
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
|
|
3269
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
|
|
3270
|
+
});
|
|
3271
|
+
import * as path10 from "path";
|
|
3272
|
+
function isJsonFile(filepath) {
|
|
3273
|
+
const ext = path10.extname(filepath).toLowerCase();
|
|
3274
|
+
return JSON_EXTENSIONS.includes(ext);
|
|
3275
|
+
}
|
|
3276
|
+
function extractJsonKeys(obj, prefix = "") {
|
|
3277
|
+
const keys = [];
|
|
3278
|
+
if (obj === null || obj === undefined) {
|
|
3279
|
+
return keys;
|
|
3280
|
+
}
|
|
3281
|
+
if (Array.isArray(obj)) {
|
|
3282
|
+
obj.forEach((item, index) => {
|
|
3283
|
+
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3284
|
+
});
|
|
3285
|
+
} else if (typeof obj === "object") {
|
|
3286
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3287
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3288
|
+
keys.push(key);
|
|
3289
|
+
keys.push(...extractJsonKeys(value, fullKey));
|
|
2754
3290
|
}
|
|
2755
|
-
const results = this.bm25Index.search(query, maxCandidates);
|
|
2756
|
-
return results.map((r) => r.id);
|
|
2757
|
-
}
|
|
2758
|
-
getAllFiles() {
|
|
2759
|
-
return Array.from(this.fileSummaries.keys());
|
|
2760
3291
|
}
|
|
2761
|
-
|
|
2762
|
-
|
|
3292
|
+
return keys;
|
|
3293
|
+
}
|
|
3294
|
+
function extractJsonKeywords(content) {
|
|
3295
|
+
try {
|
|
3296
|
+
const parsed = JSON.parse(content);
|
|
3297
|
+
const keys = extractJsonKeys(parsed);
|
|
3298
|
+
const stringValues = [];
|
|
3299
|
+
const extractStrings = (obj) => {
|
|
3300
|
+
if (typeof obj === "string") {
|
|
3301
|
+
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3302
|
+
stringValues.push(...words);
|
|
3303
|
+
} else if (Array.isArray(obj)) {
|
|
3304
|
+
obj.forEach(extractStrings);
|
|
3305
|
+
} else if (obj && typeof obj === "object") {
|
|
3306
|
+
Object.values(obj).forEach(extractStrings);
|
|
3307
|
+
}
|
|
3308
|
+
};
|
|
3309
|
+
extractStrings(parsed);
|
|
3310
|
+
return [...new Set([...keys, ...stringValues])];
|
|
3311
|
+
} catch {
|
|
3312
|
+
return [];
|
|
2763
3313
|
}
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
3314
|
+
}
|
|
3315
|
+
|
|
3316
|
+
class JsonModule {
|
|
3317
|
+
id = "data/json";
|
|
3318
|
+
name = "JSON Search";
|
|
3319
|
+
description = "JSON file search with structure-aware indexing";
|
|
3320
|
+
version = "1.0.0";
|
|
3321
|
+
embeddingConfig = null;
|
|
3322
|
+
symbolicIndex = null;
|
|
3323
|
+
pendingSummaries = new Map;
|
|
3324
|
+
rootDir = "";
|
|
3325
|
+
logger = undefined;
|
|
3326
|
+
async initialize(config) {
|
|
3327
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3328
|
+
this.logger = config.options?.logger;
|
|
3329
|
+
if (this.logger) {
|
|
3330
|
+
this.embeddingConfig = {
|
|
3331
|
+
...this.embeddingConfig,
|
|
3332
|
+
logger: this.logger
|
|
3333
|
+
};
|
|
2776
3334
|
}
|
|
3335
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3336
|
+
this.pendingSummaries.clear();
|
|
2777
3337
|
}
|
|
2778
|
-
async
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
this.
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
3338
|
+
async indexFile(filepath, content, ctx) {
|
|
3339
|
+
if (!isJsonFile(filepath)) {
|
|
3340
|
+
return null;
|
|
3341
|
+
}
|
|
3342
|
+
this.rootDir = ctx.rootDir;
|
|
3343
|
+
const textChunks = createLineBasedChunks(content, {
|
|
3344
|
+
chunkSize: 50,
|
|
3345
|
+
overlap: 10
|
|
3346
|
+
});
|
|
3347
|
+
if (textChunks.length === 0) {
|
|
3348
|
+
return null;
|
|
3349
|
+
}
|
|
3350
|
+
const chunkContents = textChunks.map((c) => {
|
|
3351
|
+
const filename = path10.basename(filepath);
|
|
3352
|
+
return `${filename}: ${c.content}`;
|
|
3353
|
+
});
|
|
3354
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3355
|
+
const chunks = textChunks.map((tc, i) => ({
|
|
3356
|
+
id: generateChunkId(filepath, tc.startLine, tc.endLine),
|
|
3357
|
+
content: tc.content,
|
|
3358
|
+
startLine: tc.startLine,
|
|
3359
|
+
endLine: tc.endLine,
|
|
3360
|
+
type: tc.type
|
|
3361
|
+
}));
|
|
3362
|
+
const jsonKeys = extractJsonKeys((() => {
|
|
3363
|
+
try {
|
|
3364
|
+
return JSON.parse(content);
|
|
3365
|
+
} catch {
|
|
3366
|
+
return {};
|
|
2802
3367
|
}
|
|
2803
|
-
}
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
const
|
|
2807
|
-
|
|
3368
|
+
})());
|
|
3369
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3370
|
+
const currentConfig = getEmbeddingConfig();
|
|
3371
|
+
const moduleData = {
|
|
3372
|
+
embeddings,
|
|
3373
|
+
embeddingModel: currentConfig.model,
|
|
3374
|
+
jsonKeys
|
|
3375
|
+
};
|
|
3376
|
+
const keywords = extractJsonKeywords(content);
|
|
3377
|
+
const fileSummary = {
|
|
3378
|
+
filepath,
|
|
3379
|
+
chunkCount: chunks.length,
|
|
3380
|
+
chunkTypes: ["file"],
|
|
3381
|
+
keywords,
|
|
3382
|
+
exports: [],
|
|
3383
|
+
lastModified: stats.lastModified
|
|
3384
|
+
};
|
|
3385
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3386
|
+
return {
|
|
3387
|
+
filepath,
|
|
3388
|
+
lastModified: stats.lastModified,
|
|
3389
|
+
chunks,
|
|
3390
|
+
moduleData
|
|
3391
|
+
};
|
|
2808
3392
|
}
|
|
2809
|
-
async
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
this.
|
|
3393
|
+
async finalize(ctx) {
|
|
3394
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3395
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3396
|
+
await this.symbolicIndex.initialize();
|
|
3397
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3398
|
+
this.symbolicIndex.addFile(summary);
|
|
3399
|
+
}
|
|
3400
|
+
this.symbolicIndex.buildBM25Index();
|
|
3401
|
+
await this.symbolicIndex.save();
|
|
3402
|
+
this.pendingSummaries.clear();
|
|
2814
3403
|
}
|
|
2815
|
-
async
|
|
3404
|
+
async search(query, ctx, options = {}) {
|
|
3405
|
+
const {
|
|
3406
|
+
topK = DEFAULT_TOP_K3,
|
|
3407
|
+
minScore = DEFAULT_MIN_SCORE3,
|
|
3408
|
+
filePatterns
|
|
3409
|
+
} = options;
|
|
3410
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3411
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3412
|
+
let allFiles;
|
|
2816
3413
|
try {
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
return true;
|
|
3414
|
+
await symbolicIndex.initialize();
|
|
3415
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
2820
3416
|
} catch {
|
|
2821
|
-
|
|
3417
|
+
allFiles = await ctx.listIndexedFiles();
|
|
2822
3418
|
}
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
totalDocs: 0
|
|
2835
|
-
};
|
|
3419
|
+
let filesToSearch = allFiles.filter((f) => isJsonFile(f));
|
|
3420
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3421
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3422
|
+
return filePatterns.some((pattern) => {
|
|
3423
|
+
if (pattern.startsWith("*.")) {
|
|
3424
|
+
const ext = pattern.slice(1);
|
|
3425
|
+
return filepath.endsWith(ext);
|
|
3426
|
+
}
|
|
3427
|
+
return filepath.includes(pattern);
|
|
3428
|
+
});
|
|
3429
|
+
});
|
|
2836
3430
|
}
|
|
2837
|
-
|
|
3431
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3432
|
+
const bm25Index = new BM25Index;
|
|
3433
|
+
const allChunksData = [];
|
|
3434
|
+
for (const filepath of filesToSearch) {
|
|
3435
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3436
|
+
if (!fileIndex)
|
|
3437
|
+
continue;
|
|
3438
|
+
const moduleData = fileIndex.moduleData;
|
|
3439
|
+
if (!moduleData?.embeddings)
|
|
3440
|
+
continue;
|
|
3441
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3442
|
+
const chunk = fileIndex.chunks[i];
|
|
3443
|
+
const embedding = moduleData.embeddings[i];
|
|
3444
|
+
if (!embedding)
|
|
3445
|
+
continue;
|
|
3446
|
+
allChunksData.push({
|
|
3447
|
+
filepath: fileIndex.filepath,
|
|
3448
|
+
chunk,
|
|
3449
|
+
embedding
|
|
3450
|
+
});
|
|
3451
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3452
|
+
}
|
|
3453
|
+
}
|
|
3454
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3455
|
+
const bm25Scores = new Map;
|
|
3456
|
+
for (const result of bm25Results) {
|
|
3457
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3458
|
+
}
|
|
3459
|
+
const queryTerms = extractQueryTerms(query);
|
|
3460
|
+
const results = [];
|
|
3461
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3462
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3463
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3464
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
|
|
3465
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3466
|
+
results.push({
|
|
3467
|
+
filepath,
|
|
3468
|
+
chunk,
|
|
3469
|
+
score: hybridScore,
|
|
3470
|
+
moduleId: this.id,
|
|
3471
|
+
context: {
|
|
3472
|
+
semanticScore,
|
|
3473
|
+
bm25Score
|
|
3474
|
+
}
|
|
3475
|
+
});
|
|
3476
|
+
}
|
|
3477
|
+
}
|
|
3478
|
+
results.sort((a, b) => b.score - a.score);
|
|
3479
|
+
return results.slice(0, topK);
|
|
2838
3480
|
}
|
|
2839
3481
|
}
|
|
2840
|
-
var
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
init_symbolicIndex();
|
|
3482
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
|
|
3483
|
+
var init_json = __esm(() => {
|
|
3484
|
+
init_embeddings();
|
|
3485
|
+
init_services();
|
|
3486
|
+
init_config2();
|
|
3487
|
+
init_storage();
|
|
3488
|
+
JSON_EXTENSIONS = [".json"];
|
|
2848
3489
|
});
|
|
2849
3490
|
|
|
2850
|
-
// src/modules/
|
|
2851
|
-
var
|
|
2852
|
-
__export(
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
3491
|
+
// src/modules/docs/markdown/index.ts
|
|
3492
|
+
var exports_markdown = {};
|
|
3493
|
+
__export(exports_markdown, {
|
|
3494
|
+
isMarkdownFile: () => isMarkdownFile,
|
|
3495
|
+
MarkdownModule: () => MarkdownModule,
|
|
3496
|
+
MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
|
|
3497
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
|
|
3498
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
|
|
2856
3499
|
});
|
|
2857
|
-
import * as
|
|
2858
|
-
function
|
|
2859
|
-
const
|
|
2860
|
-
|
|
2861
|
-
if (hasDocumentationTerm) {
|
|
2862
|
-
return "documentation";
|
|
2863
|
-
}
|
|
2864
|
-
if (hasImplementationTerm) {
|
|
2865
|
-
return "implementation";
|
|
2866
|
-
}
|
|
2867
|
-
return "neutral";
|
|
3500
|
+
import * as path11 from "path";
|
|
3501
|
+
function isMarkdownFile(filepath) {
|
|
3502
|
+
const ext = path11.extname(filepath).toLowerCase();
|
|
3503
|
+
return MARKDOWN_EXTENSIONS.includes(ext);
|
|
2868
3504
|
}
|
|
2869
|
-
function
|
|
2870
|
-
const
|
|
2871
|
-
|
|
2872
|
-
const
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
3505
|
+
function parseMarkdownSections(content) {
|
|
3506
|
+
const lines = content.split(`
|
|
3507
|
+
`);
|
|
3508
|
+
const sections = [];
|
|
3509
|
+
let currentSection = null;
|
|
3510
|
+
let currentContent = [];
|
|
3511
|
+
let startLine = 1;
|
|
3512
|
+
for (let i = 0;i < lines.length; i++) {
|
|
3513
|
+
const line = lines[i];
|
|
3514
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
3515
|
+
if (headingMatch) {
|
|
3516
|
+
if (currentSection) {
|
|
3517
|
+
currentSection.content = currentContent.join(`
|
|
3518
|
+
`).trim();
|
|
3519
|
+
currentSection.endLine = i;
|
|
3520
|
+
if (currentSection.content || currentSection.heading) {
|
|
3521
|
+
sections.push(currentSection);
|
|
3522
|
+
}
|
|
3523
|
+
} else if (currentContent.length > 0) {
|
|
3524
|
+
sections.push({
|
|
3525
|
+
heading: "",
|
|
3526
|
+
level: 0,
|
|
3527
|
+
content: currentContent.join(`
|
|
3528
|
+
`).trim(),
|
|
3529
|
+
startLine: 1,
|
|
3530
|
+
endLine: i
|
|
3531
|
+
});
|
|
3532
|
+
}
|
|
3533
|
+
currentSection = {
|
|
3534
|
+
heading: headingMatch[2],
|
|
3535
|
+
level: headingMatch[1].length,
|
|
3536
|
+
content: "",
|
|
3537
|
+
startLine: i + 1,
|
|
3538
|
+
endLine: lines.length
|
|
3539
|
+
};
|
|
3540
|
+
currentContent = [];
|
|
3541
|
+
} else {
|
|
3542
|
+
currentContent.push(line);
|
|
2877
3543
|
}
|
|
2878
|
-
return 0;
|
|
2879
3544
|
}
|
|
2880
|
-
if (
|
|
2881
|
-
|
|
2882
|
-
|
|
3545
|
+
if (currentSection) {
|
|
3546
|
+
currentSection.content = currentContent.join(`
|
|
3547
|
+
`).trim();
|
|
3548
|
+
currentSection.endLine = lines.length;
|
|
3549
|
+
if (currentSection.content || currentSection.heading) {
|
|
3550
|
+
sections.push(currentSection);
|
|
2883
3551
|
}
|
|
2884
|
-
|
|
3552
|
+
} else if (currentContent.length > 0) {
|
|
3553
|
+
sections.push({
|
|
3554
|
+
heading: "",
|
|
3555
|
+
level: 0,
|
|
3556
|
+
content: currentContent.join(`
|
|
3557
|
+
`).trim(),
|
|
3558
|
+
startLine: 1,
|
|
3559
|
+
endLine: lines.length
|
|
3560
|
+
});
|
|
2885
3561
|
}
|
|
2886
|
-
return
|
|
3562
|
+
return sections;
|
|
2887
3563
|
}
|
|
2888
|
-
function
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
3564
|
+
function extractMarkdownKeywords(content) {
|
|
3565
|
+
const keywords = [];
|
|
3566
|
+
const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
|
|
3567
|
+
for (const match of headingMatches) {
|
|
3568
|
+
const heading = match[1].toLowerCase();
|
|
3569
|
+
const words = heading.split(/\s+/).filter((w) => w.length > 2);
|
|
3570
|
+
keywords.push(...words);
|
|
3571
|
+
}
|
|
3572
|
+
const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
|
|
3573
|
+
for (const match of emphasisMatches) {
|
|
3574
|
+
const text = (match[1] || match[2] || "").toLowerCase();
|
|
3575
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3576
|
+
keywords.push(...words);
|
|
3577
|
+
}
|
|
3578
|
+
const codeMatches = content.matchAll(/`([^`]+)`/g);
|
|
3579
|
+
for (const match of codeMatches) {
|
|
3580
|
+
const code = match[1].toLowerCase();
|
|
3581
|
+
if (code.length > 2 && code.length < 50) {
|
|
3582
|
+
keywords.push(code);
|
|
3583
|
+
}
|
|
2904
3584
|
}
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
3585
|
+
const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
|
|
3586
|
+
for (const match of linkMatches) {
|
|
3587
|
+
const text = match[1].toLowerCase();
|
|
3588
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3589
|
+
keywords.push(...words);
|
|
3590
|
+
}
|
|
3591
|
+
return [...new Set(keywords)];
|
|
2908
3592
|
}
|
|
2909
3593
|
|
|
2910
|
-
class
|
|
2911
|
-
id = "
|
|
2912
|
-
name = "
|
|
2913
|
-
description = "
|
|
3594
|
+
class MarkdownModule {
|
|
3595
|
+
id = "docs/markdown";
|
|
3596
|
+
name = "Markdown Search";
|
|
3597
|
+
description = "Markdown documentation search with section-aware indexing";
|
|
2914
3598
|
version = "1.0.0";
|
|
2915
3599
|
embeddingConfig = null;
|
|
2916
3600
|
symbolicIndex = null;
|
|
@@ -2930,66 +3614,53 @@ class TypeScriptModule {
|
|
|
2930
3614
|
this.pendingSummaries.clear();
|
|
2931
3615
|
}
|
|
2932
3616
|
async indexFile(filepath, content, ctx) {
|
|
3617
|
+
if (!isMarkdownFile(filepath)) {
|
|
3618
|
+
return null;
|
|
3619
|
+
}
|
|
2933
3620
|
this.rootDir = ctx.rootDir;
|
|
2934
|
-
const
|
|
2935
|
-
if (
|
|
3621
|
+
const sections = parseMarkdownSections(content);
|
|
3622
|
+
if (sections.length === 0) {
|
|
2936
3623
|
return null;
|
|
2937
3624
|
}
|
|
2938
|
-
const
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3625
|
+
const chunkContents = sections.map((s) => {
|
|
3626
|
+
const filename = path11.basename(filepath);
|
|
3627
|
+
const headingContext = s.heading ? `${s.heading}: ` : "";
|
|
3628
|
+
return `${filename} ${headingContext}${s.content}`;
|
|
2943
3629
|
});
|
|
2944
3630
|
const embeddings = await getEmbeddings(chunkContents);
|
|
2945
|
-
const chunks =
|
|
2946
|
-
id: generateChunkId(filepath,
|
|
2947
|
-
content:
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
3631
|
+
const chunks = sections.map((section, i) => ({
|
|
3632
|
+
id: generateChunkId(filepath, section.startLine, section.endLine),
|
|
3633
|
+
content: section.heading ? `## ${section.heading}
|
|
3634
|
+
|
|
3635
|
+
${section.content}` : section.content,
|
|
3636
|
+
startLine: section.startLine,
|
|
3637
|
+
endLine: section.endLine,
|
|
3638
|
+
type: "block",
|
|
3639
|
+
name: section.heading || undefined
|
|
2954
3640
|
}));
|
|
2955
|
-
const
|
|
3641
|
+
const headings = sections.filter((s) => s.heading).map((s) => s.heading);
|
|
2956
3642
|
const stats = await ctx.getFileStats(filepath);
|
|
2957
3643
|
const currentConfig = getEmbeddingConfig();
|
|
2958
3644
|
const moduleData = {
|
|
2959
3645
|
embeddings,
|
|
2960
|
-
embeddingModel: currentConfig.model
|
|
3646
|
+
embeddingModel: currentConfig.model,
|
|
3647
|
+
headings
|
|
2961
3648
|
};
|
|
2962
|
-
const
|
|
2963
|
-
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2964
|
-
];
|
|
2965
|
-
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2966
|
-
const allKeywords = new Set;
|
|
2967
|
-
for (const pc of parsedChunks) {
|
|
2968
|
-
const keywords = extractKeywords(pc.content, pc.name);
|
|
2969
|
-
keywords.forEach((k) => allKeywords.add(k));
|
|
2970
|
-
}
|
|
2971
|
-
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3649
|
+
const keywords = extractMarkdownKeywords(content);
|
|
2972
3650
|
const fileSummary = {
|
|
2973
3651
|
filepath,
|
|
2974
3652
|
chunkCount: chunks.length,
|
|
2975
|
-
chunkTypes,
|
|
2976
|
-
keywords
|
|
2977
|
-
exports,
|
|
2978
|
-
lastModified: stats.lastModified
|
|
2979
|
-
pathContext: {
|
|
2980
|
-
segments: pathContext.segments,
|
|
2981
|
-
layer: pathContext.layer,
|
|
2982
|
-
domain: pathContext.domain,
|
|
2983
|
-
depth: pathContext.depth
|
|
2984
|
-
}
|
|
3653
|
+
chunkTypes: ["block"],
|
|
3654
|
+
keywords,
|
|
3655
|
+
exports: headings,
|
|
3656
|
+
lastModified: stats.lastModified
|
|
2985
3657
|
};
|
|
2986
3658
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
2987
3659
|
return {
|
|
2988
3660
|
filepath,
|
|
2989
3661
|
lastModified: stats.lastModified,
|
|
2990
3662
|
chunks,
|
|
2991
|
-
moduleData
|
|
2992
|
-
references
|
|
3663
|
+
moduleData
|
|
2993
3664
|
};
|
|
2994
3665
|
}
|
|
2995
3666
|
async finalize(ctx) {
|
|
@@ -3005,8 +3676,8 @@ class TypeScriptModule {
|
|
|
3005
3676
|
}
|
|
3006
3677
|
async search(query, ctx, options = {}) {
|
|
3007
3678
|
const {
|
|
3008
|
-
topK =
|
|
3009
|
-
minScore =
|
|
3679
|
+
topK = DEFAULT_TOP_K4,
|
|
3680
|
+
minScore = DEFAULT_MIN_SCORE4,
|
|
3010
3681
|
filePatterns
|
|
3011
3682
|
} = options;
|
|
3012
3683
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
@@ -3018,9 +3689,9 @@ class TypeScriptModule {
|
|
|
3018
3689
|
} catch {
|
|
3019
3690
|
allFiles = await ctx.listIndexedFiles();
|
|
3020
3691
|
}
|
|
3021
|
-
let filesToSearch = allFiles;
|
|
3692
|
+
let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
|
|
3022
3693
|
if (filePatterns && filePatterns.length > 0) {
|
|
3023
|
-
filesToSearch =
|
|
3694
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3024
3695
|
return filePatterns.some((pattern) => {
|
|
3025
3696
|
if (pattern.startsWith("*.")) {
|
|
3026
3697
|
const ext = pattern.slice(1);
|
|
@@ -3058,36 +3729,24 @@ class TypeScriptModule {
|
|
|
3058
3729
|
for (const result of bm25Results) {
|
|
3059
3730
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3060
3731
|
}
|
|
3061
|
-
const queryTerms = query
|
|
3062
|
-
const pathBoosts = new Map;
|
|
3063
|
-
for (const filepath of filesToSearch) {
|
|
3064
|
-
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3065
|
-
if (summary?.pathContext) {
|
|
3066
|
-
let boost = 0;
|
|
3067
|
-
const ctx2 = summary.pathContext;
|
|
3068
|
-
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3069
|
-
boost += 0.1;
|
|
3070
|
-
}
|
|
3071
|
-
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3072
|
-
boost += 0.05;
|
|
3073
|
-
}
|
|
3074
|
-
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3075
|
-
if (segmentMatch) {
|
|
3076
|
-
boost += 0.05;
|
|
3077
|
-
}
|
|
3078
|
-
pathBoosts.set(filepath, boost);
|
|
3079
|
-
}
|
|
3080
|
-
}
|
|
3732
|
+
const queryTerms = extractQueryTerms(query);
|
|
3081
3733
|
const results = [];
|
|
3082
3734
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3083
3735
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3084
3736
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3737
|
+
let docBoost = 0;
|
|
3738
|
+
if (queryTerms.some((t) => [
|
|
3739
|
+
"docs",
|
|
3740
|
+
"documentation",
|
|
3741
|
+
"readme",
|
|
3742
|
+
"guide",
|
|
3743
|
+
"how",
|
|
3744
|
+
"what",
|
|
3745
|
+
"explain"
|
|
3746
|
+
].includes(t))) {
|
|
3747
|
+
docBoost = 0.05;
|
|
3748
|
+
}
|
|
3749
|
+
const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
3091
3750
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3092
3751
|
results.push({
|
|
3093
3752
|
filepath,
|
|
@@ -3097,10 +3756,7 @@ class TypeScriptModule {
|
|
|
3097
3756
|
context: {
|
|
3098
3757
|
semanticScore,
|
|
3099
3758
|
bm25Score,
|
|
3100
|
-
|
|
3101
|
-
fileTypeBoost,
|
|
3102
|
-
chunkTypeBoost,
|
|
3103
|
-
exportBoost
|
|
3759
|
+
docBoost
|
|
3104
3760
|
}
|
|
3105
3761
|
});
|
|
3106
3762
|
}
|
|
@@ -3108,84 +3764,14 @@ class TypeScriptModule {
|
|
|
3108
3764
|
results.sort((a, b) => b.score - a.score);
|
|
3109
3765
|
return results.slice(0, topK);
|
|
3110
3766
|
}
|
|
3111
|
-
extractReferences(content, filepath) {
|
|
3112
|
-
const references = [];
|
|
3113
|
-
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3114
|
-
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3115
|
-
let match;
|
|
3116
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
3117
|
-
const importPath = match[1];
|
|
3118
|
-
if (importPath.startsWith(".")) {
|
|
3119
|
-
const dir = path8.dirname(filepath);
|
|
3120
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3121
|
-
references.push(resolved);
|
|
3122
|
-
}
|
|
3123
|
-
}
|
|
3124
|
-
while ((match = requireRegex.exec(content)) !== null) {
|
|
3125
|
-
const importPath = match[1];
|
|
3126
|
-
if (importPath.startsWith(".")) {
|
|
3127
|
-
const dir = path8.dirname(filepath);
|
|
3128
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3129
|
-
references.push(resolved);
|
|
3130
|
-
}
|
|
3131
|
-
}
|
|
3132
|
-
return references;
|
|
3133
|
-
}
|
|
3134
3767
|
}
|
|
3135
|
-
var
|
|
3136
|
-
var
|
|
3768
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
|
|
3769
|
+
var init_markdown = __esm(() => {
|
|
3137
3770
|
init_embeddings();
|
|
3771
|
+
init_services();
|
|
3138
3772
|
init_config2();
|
|
3139
|
-
init_parseCode();
|
|
3140
3773
|
init_storage();
|
|
3141
|
-
|
|
3142
|
-
init_keywords();
|
|
3143
|
-
IMPLEMENTATION_TERMS = [
|
|
3144
|
-
"function",
|
|
3145
|
-
"method",
|
|
3146
|
-
"class",
|
|
3147
|
-
"interface",
|
|
3148
|
-
"implement",
|
|
3149
|
-
"implementation",
|
|
3150
|
-
"endpoint",
|
|
3151
|
-
"route",
|
|
3152
|
-
"handler",
|
|
3153
|
-
"controller",
|
|
3154
|
-
"module",
|
|
3155
|
-
"code"
|
|
3156
|
-
];
|
|
3157
|
-
DOCUMENTATION_TERMS = [
|
|
3158
|
-
"documentation",
|
|
3159
|
-
"docs",
|
|
3160
|
-
"guide",
|
|
3161
|
-
"tutorial",
|
|
3162
|
-
"readme",
|
|
3163
|
-
"how",
|
|
3164
|
-
"what",
|
|
3165
|
-
"why",
|
|
3166
|
-
"explain",
|
|
3167
|
-
"overview",
|
|
3168
|
-
"getting",
|
|
3169
|
-
"started",
|
|
3170
|
-
"requirements",
|
|
3171
|
-
"setup",
|
|
3172
|
-
"install",
|
|
3173
|
-
"configure",
|
|
3174
|
-
"configuration"
|
|
3175
|
-
];
|
|
3176
|
-
SOURCE_CODE_EXTENSIONS = [
|
|
3177
|
-
".ts",
|
|
3178
|
-
".tsx",
|
|
3179
|
-
".js",
|
|
3180
|
-
".jsx",
|
|
3181
|
-
".mjs",
|
|
3182
|
-
".cjs",
|
|
3183
|
-
".py",
|
|
3184
|
-
".go",
|
|
3185
|
-
".rs",
|
|
3186
|
-
".java"
|
|
3187
|
-
];
|
|
3188
|
-
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
3774
|
+
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
3189
3775
|
});
|
|
3190
3776
|
|
|
3191
3777
|
// src/modules/registry.ts
|
|
@@ -3210,8 +3796,12 @@ class ModuleRegistryImpl {
|
|
|
3210
3796
|
async function registerBuiltInModules() {
|
|
3211
3797
|
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
3212
3798
|
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
3799
|
+
const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
|
|
3800
|
+
const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
|
|
3213
3801
|
registry.register(new CoreModule2);
|
|
3214
3802
|
registry.register(new TypeScriptModule2);
|
|
3803
|
+
registry.register(new JsonModule2);
|
|
3804
|
+
registry.register(new MarkdownModule2);
|
|
3215
3805
|
}
|
|
3216
3806
|
var registry;
|
|
3217
3807
|
var init_registry = __esm(() => {
|
|
@@ -3219,13 +3809,13 @@ var init_registry = __esm(() => {
|
|
|
3219
3809
|
});
|
|
3220
3810
|
|
|
3221
3811
|
// src/infrastructure/introspection/projectDetector.ts
|
|
3222
|
-
import * as
|
|
3812
|
+
import * as path12 from "path";
|
|
3223
3813
|
import * as fs4 from "fs/promises";
|
|
3224
3814
|
async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
3225
3815
|
if (depth > MAX_SCAN_DEPTH)
|
|
3226
3816
|
return [];
|
|
3227
3817
|
const results = [];
|
|
3228
|
-
const fullDir = currentDir ?
|
|
3818
|
+
const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
|
|
3229
3819
|
try {
|
|
3230
3820
|
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
3231
3821
|
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
@@ -3248,10 +3838,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3248
3838
|
}
|
|
3249
3839
|
async function parsePackageJson(rootDir, relativePath) {
|
|
3250
3840
|
try {
|
|
3251
|
-
const packageJsonPath =
|
|
3841
|
+
const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
|
|
3252
3842
|
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
3253
3843
|
const pkg = JSON.parse(content);
|
|
3254
|
-
const name = pkg.name ||
|
|
3844
|
+
const name = pkg.name || path12.basename(relativePath);
|
|
3255
3845
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
3256
3846
|
let type = "unknown";
|
|
3257
3847
|
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
@@ -3296,7 +3886,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3296
3886
|
for (const pattern of monorepoPatterns) {
|
|
3297
3887
|
if (!dirNames.includes(pattern))
|
|
3298
3888
|
continue;
|
|
3299
|
-
const patternDir =
|
|
3889
|
+
const patternDir = path12.join(rootDir, pattern);
|
|
3300
3890
|
try {
|
|
3301
3891
|
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
3302
3892
|
for (const subDir of subDirs) {
|
|
@@ -3327,7 +3917,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3327
3917
|
}
|
|
3328
3918
|
let rootType = "unknown";
|
|
3329
3919
|
try {
|
|
3330
|
-
const rootPkgPath =
|
|
3920
|
+
const rootPkgPath = path12.join(rootDir, "package.json");
|
|
3331
3921
|
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
3332
3922
|
if (rootPkg.workspaces)
|
|
3333
3923
|
isMonorepo = true;
|
|
@@ -3367,7 +3957,7 @@ var init_projectDetector = __esm(() => {
|
|
|
3367
3957
|
});
|
|
3368
3958
|
|
|
3369
3959
|
// src/infrastructure/introspection/IntrospectionIndex.ts
|
|
3370
|
-
import * as
|
|
3960
|
+
import * as path13 from "path";
|
|
3371
3961
|
import * as fs5 from "fs/promises";
|
|
3372
3962
|
|
|
3373
3963
|
class IntrospectionIndex {
|
|
@@ -3381,7 +3971,7 @@ class IntrospectionIndex {
|
|
|
3381
3971
|
async initialize() {
|
|
3382
3972
|
this.structure = await detectProjectStructure(this.rootDir);
|
|
3383
3973
|
try {
|
|
3384
|
-
const configPath =
|
|
3974
|
+
const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
|
|
3385
3975
|
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
3386
3976
|
const config = JSON.parse(configContent);
|
|
3387
3977
|
this.config = config.introspection || {};
|
|
@@ -3421,28 +4011,28 @@ class IntrospectionIndex {
|
|
|
3421
4011
|
}
|
|
3422
4012
|
}
|
|
3423
4013
|
async save(config) {
|
|
3424
|
-
const introDir =
|
|
4014
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3425
4015
|
await fs5.mkdir(introDir, { recursive: true });
|
|
3426
|
-
const projectPath =
|
|
4016
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3427
4017
|
await fs5.writeFile(projectPath, JSON.stringify({
|
|
3428
4018
|
version: "1.0.0",
|
|
3429
4019
|
lastUpdated: new Date().toISOString(),
|
|
3430
4020
|
structure: this.structure
|
|
3431
4021
|
}, null, 2));
|
|
3432
4022
|
for (const [filepath, intro] of this.files) {
|
|
3433
|
-
const introFilePath =
|
|
3434
|
-
await fs5.mkdir(
|
|
4023
|
+
const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
4024
|
+
await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
|
|
3435
4025
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3436
4026
|
}
|
|
3437
4027
|
}
|
|
3438
4028
|
async load(config) {
|
|
3439
|
-
const introDir =
|
|
4029
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3440
4030
|
try {
|
|
3441
|
-
const projectPath =
|
|
4031
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3442
4032
|
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
3443
4033
|
const projectData = JSON.parse(projectContent);
|
|
3444
4034
|
this.structure = projectData.structure;
|
|
3445
|
-
await this.loadFilesRecursive(
|
|
4035
|
+
await this.loadFilesRecursive(path13.join(introDir, "files"), "");
|
|
3446
4036
|
} catch {
|
|
3447
4037
|
this.structure = null;
|
|
3448
4038
|
this.files.clear();
|
|
@@ -3452,7 +4042,7 @@ class IntrospectionIndex {
|
|
|
3452
4042
|
try {
|
|
3453
4043
|
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
3454
4044
|
for (const entry of entries) {
|
|
3455
|
-
const entryPath =
|
|
4045
|
+
const entryPath = path13.join(basePath, entry.name);
|
|
3456
4046
|
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
3457
4047
|
if (entry.isDirectory()) {
|
|
3458
4048
|
await this.loadFilesRecursive(entryPath, relativePath);
|
|
@@ -3483,7 +4073,7 @@ var init_introspection2 = __esm(() => {
|
|
|
3483
4073
|
|
|
3484
4074
|
// src/app/indexer/watcher.ts
|
|
3485
4075
|
import { watch } from "chokidar";
|
|
3486
|
-
import * as
|
|
4076
|
+
import * as path14 from "path";
|
|
3487
4077
|
async function watchDirectory(rootDir, options = {}) {
|
|
3488
4078
|
const {
|
|
3489
4079
|
debounceMs = DEFAULT_DEBOUNCE_MS,
|
|
@@ -3494,7 +4084,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3494
4084
|
onFileChange,
|
|
3495
4085
|
onError
|
|
3496
4086
|
} = options;
|
|
3497
|
-
rootDir =
|
|
4087
|
+
rootDir = path14.resolve(rootDir);
|
|
3498
4088
|
const config = await loadConfig(rootDir);
|
|
3499
4089
|
const indexLocation = getIndexLocation(rootDir);
|
|
3500
4090
|
const validExtensions = new Set(config.extensions);
|
|
@@ -3504,7 +4094,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3504
4094
|
"**/.git/**"
|
|
3505
4095
|
];
|
|
3506
4096
|
function shouldWatchFile(filepath) {
|
|
3507
|
-
const ext =
|
|
4097
|
+
const ext = path14.extname(filepath);
|
|
3508
4098
|
return validExtensions.has(ext);
|
|
3509
4099
|
}
|
|
3510
4100
|
let isRunning = true;
|
|
@@ -3586,7 +4176,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3586
4176
|
function handleFileEvent(event, filepath) {
|
|
3587
4177
|
if (!isRunning)
|
|
3588
4178
|
return;
|
|
3589
|
-
const relativePath =
|
|
4179
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3590
4180
|
if (!shouldWatchFile(filepath)) {
|
|
3591
4181
|
return;
|
|
3592
4182
|
}
|
|
@@ -3665,15 +4255,36 @@ __export(exports_indexer, {
|
|
|
3665
4255
|
});
|
|
3666
4256
|
import { glob } from "glob";
|
|
3667
4257
|
import * as fs6 from "fs/promises";
|
|
3668
|
-
import * as
|
|
4258
|
+
import * as path15 from "path";
|
|
4259
|
+
async function parallelMap(items, processor, concurrency) {
|
|
4260
|
+
const results = new Array(items.length);
|
|
4261
|
+
let nextIndex = 0;
|
|
4262
|
+
async function worker() {
|
|
4263
|
+
while (nextIndex < items.length) {
|
|
4264
|
+
const index = nextIndex++;
|
|
4265
|
+
const item = items[index];
|
|
4266
|
+
try {
|
|
4267
|
+
const value = await processor(item, index);
|
|
4268
|
+
results[index] = { success: true, value };
|
|
4269
|
+
} catch (error) {
|
|
4270
|
+
results[index] = { success: false, error };
|
|
4271
|
+
}
|
|
4272
|
+
}
|
|
4273
|
+
}
|
|
4274
|
+
const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
|
|
4275
|
+
await Promise.all(workers);
|
|
4276
|
+
return results;
|
|
4277
|
+
}
|
|
3669
4278
|
async function indexDirectory(rootDir, options = {}) {
|
|
3670
4279
|
const verbose = options.verbose ?? false;
|
|
3671
4280
|
const quiet = options.quiet ?? false;
|
|
4281
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
3672
4282
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3673
|
-
rootDir =
|
|
4283
|
+
rootDir = path15.resolve(rootDir);
|
|
3674
4284
|
const location = getIndexLocation(rootDir);
|
|
3675
4285
|
logger.info(`Indexing directory: ${rootDir}`);
|
|
3676
4286
|
logger.info(`Index location: ${location.indexDir}`);
|
|
4287
|
+
logger.debug(`Concurrency: ${concurrency}`);
|
|
3677
4288
|
const config = await loadConfig(rootDir);
|
|
3678
4289
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3679
4290
|
await introspection.initialize();
|
|
@@ -3709,7 +4320,7 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3709
4320
|
};
|
|
3710
4321
|
await module.initialize(configWithOverrides);
|
|
3711
4322
|
}
|
|
3712
|
-
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger);
|
|
4323
|
+
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
|
|
3713
4324
|
results.push(result);
|
|
3714
4325
|
if (module.finalize) {
|
|
3715
4326
|
logger.info(`[${module.name}] Building secondary indexes...`);
|
|
@@ -3717,11 +4328,11 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3717
4328
|
rootDir,
|
|
3718
4329
|
config,
|
|
3719
4330
|
readFile: async (filepath) => {
|
|
3720
|
-
const fullPath =
|
|
4331
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3721
4332
|
return fs6.readFile(fullPath, "utf-8");
|
|
3722
4333
|
},
|
|
3723
4334
|
getFileStats: async (filepath) => {
|
|
3724
|
-
const fullPath =
|
|
4335
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3725
4336
|
const stats = await fs6.stat(fullPath);
|
|
3726
4337
|
return { lastModified: stats.mtime.toISOString() };
|
|
3727
4338
|
}
|
|
@@ -3752,7 +4363,7 @@ async function deleteIndex(rootDir) {
|
|
|
3752
4363
|
} catch {}
|
|
3753
4364
|
}
|
|
3754
4365
|
async function resetIndex(rootDir) {
|
|
3755
|
-
rootDir =
|
|
4366
|
+
rootDir = path15.resolve(rootDir);
|
|
3756
4367
|
const status = await getIndexStatus(rootDir);
|
|
3757
4368
|
if (!status.exists) {
|
|
3758
4369
|
throw new Error(`No index found for ${rootDir}`);
|
|
@@ -3767,7 +4378,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3767
4378
|
const verbose = options.verbose ?? false;
|
|
3768
4379
|
const quiet = options.quiet ?? false;
|
|
3769
4380
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3770
|
-
rootDir =
|
|
4381
|
+
rootDir = path15.resolve(rootDir);
|
|
3771
4382
|
const status = await getIndexStatus(rootDir);
|
|
3772
4383
|
if (!status.exists) {
|
|
3773
4384
|
logger.info(`No index found. Creating index...
|
|
@@ -3794,7 +4405,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3794
4405
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3795
4406
|
await introspection.initialize();
|
|
3796
4407
|
const currentFiles = await findFiles(rootDir, config);
|
|
3797
|
-
const currentFileSet = new Set(currentFiles.map((f) =>
|
|
4408
|
+
const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
|
|
3798
4409
|
let totalIndexed = 0;
|
|
3799
4410
|
let totalRemoved = 0;
|
|
3800
4411
|
let totalUnchanged = 0;
|
|
@@ -3824,11 +4435,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3824
4435
|
}
|
|
3825
4436
|
for (const filepath of filesToRemove) {
|
|
3826
4437
|
logger.debug(` Removing stale: ${filepath}`);
|
|
3827
|
-
const indexFilePath =
|
|
4438
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3828
4439
|
try {
|
|
3829
4440
|
await fs6.unlink(indexFilePath);
|
|
3830
4441
|
} catch {}
|
|
3831
|
-
const symbolicFilePath =
|
|
4442
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3832
4443
|
try {
|
|
3833
4444
|
await fs6.unlink(symbolicFilePath);
|
|
3834
4445
|
} catch {}
|
|
@@ -3839,11 +4450,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3839
4450
|
rootDir,
|
|
3840
4451
|
config,
|
|
3841
4452
|
readFile: async (filepath) => {
|
|
3842
|
-
const fullPath =
|
|
4453
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3843
4454
|
return fs6.readFile(fullPath, "utf-8");
|
|
3844
4455
|
},
|
|
3845
4456
|
getFileStats: async (filepath) => {
|
|
3846
|
-
const fullPath =
|
|
4457
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3847
4458
|
const stats = await fs6.stat(fullPath);
|
|
3848
4459
|
return { lastModified: stats.mtime.toISOString() };
|
|
3849
4460
|
},
|
|
@@ -3852,7 +4463,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3852
4463
|
const totalFiles = currentFiles.length;
|
|
3853
4464
|
for (let i = 0;i < currentFiles.length; i++) {
|
|
3854
4465
|
const filepath = currentFiles[i];
|
|
3855
|
-
const relativePath =
|
|
4466
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
3856
4467
|
const progress = `[${i + 1}/${totalFiles}]`;
|
|
3857
4468
|
try {
|
|
3858
4469
|
const stats = await fs6.stat(filepath);
|
|
@@ -3903,7 +4514,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3903
4514
|
unchanged: totalUnchanged
|
|
3904
4515
|
};
|
|
3905
4516
|
}
|
|
3906
|
-
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger) {
|
|
4517
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
|
|
3907
4518
|
const result = {
|
|
3908
4519
|
moduleId: module.id,
|
|
3909
4520
|
indexed: 0,
|
|
@@ -3912,7 +4523,7 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3912
4523
|
};
|
|
3913
4524
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
3914
4525
|
const indexPath = getModuleIndexPath(rootDir, module.id, config);
|
|
3915
|
-
const currentFileSet = new Set(files.map((f) =>
|
|
4526
|
+
const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
|
|
3916
4527
|
const filesToRemove = [];
|
|
3917
4528
|
for (const filepath of Object.keys(manifest.files)) {
|
|
3918
4529
|
if (!currentFileSet.has(filepath)) {
|
|
@@ -3923,11 +4534,11 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3923
4534
|
logger.info(` Removing ${filesToRemove.length} stale entries...`);
|
|
3924
4535
|
for (const filepath of filesToRemove) {
|
|
3925
4536
|
logger.debug(` Removing: ${filepath}`);
|
|
3926
|
-
const indexFilePath =
|
|
4537
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3927
4538
|
try {
|
|
3928
4539
|
await fs6.unlink(indexFilePath);
|
|
3929
4540
|
} catch {}
|
|
3930
|
-
const symbolicFilePath =
|
|
4541
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3931
4542
|
try {
|
|
3932
4543
|
await fs6.unlink(symbolicFilePath);
|
|
3933
4544
|
} catch {}
|
|
@@ -3939,52 +4550,76 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3939
4550
|
rootDir,
|
|
3940
4551
|
config,
|
|
3941
4552
|
readFile: async (filepath) => {
|
|
3942
|
-
const fullPath =
|
|
4553
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3943
4554
|
return fs6.readFile(fullPath, "utf-8");
|
|
3944
4555
|
},
|
|
3945
4556
|
getFileStats: async (filepath) => {
|
|
3946
|
-
const fullPath =
|
|
4557
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3947
4558
|
const stats = await fs6.stat(fullPath);
|
|
3948
4559
|
return { lastModified: stats.mtime.toISOString() };
|
|
3949
4560
|
},
|
|
3950
4561
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3951
4562
|
};
|
|
3952
4563
|
const totalFiles = files.length;
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
const relativePath =
|
|
3956
|
-
const progress = `[${i + 1}/${totalFiles}]`;
|
|
4564
|
+
let completedCount = 0;
|
|
4565
|
+
const processFile = async (filepath, _index) => {
|
|
4566
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
3957
4567
|
try {
|
|
3958
4568
|
const stats = await fs6.stat(filepath);
|
|
3959
4569
|
const lastModified = stats.mtime.toISOString();
|
|
3960
4570
|
const existingEntry = manifest.files[relativePath];
|
|
3961
4571
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
|
|
4572
|
+
completedCount++;
|
|
4573
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
|
|
4574
|
+
return { relativePath, status: "skipped" };
|
|
3965
4575
|
}
|
|
3966
4576
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3967
4577
|
introspection.addFile(relativePath, content);
|
|
3968
|
-
|
|
4578
|
+
completedCount++;
|
|
4579
|
+
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
3969
4580
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
3970
4581
|
if (!fileIndex) {
|
|
3971
|
-
logger.debug(` ${
|
|
3972
|
-
|
|
3973
|
-
continue;
|
|
4582
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
|
|
4583
|
+
return { relativePath, status: "skipped" };
|
|
3974
4584
|
}
|
|
3975
4585
|
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
3976
|
-
|
|
4586
|
+
return {
|
|
4587
|
+
relativePath,
|
|
4588
|
+
status: "indexed",
|
|
3977
4589
|
lastModified,
|
|
3978
4590
|
chunkCount: fileIndex.chunks.length
|
|
3979
4591
|
};
|
|
3980
|
-
result.indexed++;
|
|
3981
4592
|
} catch (error) {
|
|
3982
|
-
|
|
3983
|
-
|
|
4593
|
+
completedCount++;
|
|
4594
|
+
return { relativePath, status: "error", error };
|
|
4595
|
+
}
|
|
4596
|
+
};
|
|
4597
|
+
logger.debug(` Using concurrency: ${concurrency}`);
|
|
4598
|
+
const results = await parallelMap(files, processFile, concurrency);
|
|
4599
|
+
logger.clearProgress();
|
|
4600
|
+
for (const item of results) {
|
|
4601
|
+
if (!item.success) {
|
|
3984
4602
|
result.errors++;
|
|
4603
|
+
continue;
|
|
4604
|
+
}
|
|
4605
|
+
const fileResult = item.value;
|
|
4606
|
+
switch (fileResult.status) {
|
|
4607
|
+
case "indexed":
|
|
4608
|
+
manifest.files[fileResult.relativePath] = {
|
|
4609
|
+
lastModified: fileResult.lastModified,
|
|
4610
|
+
chunkCount: fileResult.chunkCount
|
|
4611
|
+
};
|
|
4612
|
+
result.indexed++;
|
|
4613
|
+
break;
|
|
4614
|
+
case "skipped":
|
|
4615
|
+
result.skipped++;
|
|
4616
|
+
break;
|
|
4617
|
+
case "error":
|
|
4618
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
4619
|
+
result.errors++;
|
|
4620
|
+
break;
|
|
3985
4621
|
}
|
|
3986
4622
|
}
|
|
3987
|
-
logger.clearProgress();
|
|
3988
4623
|
manifest.lastUpdated = new Date().toISOString();
|
|
3989
4624
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
3990
4625
|
return result;
|
|
@@ -4019,13 +4654,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
4019
4654
|
}
|
|
4020
4655
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
4021
4656
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
4022
|
-
await fs6.mkdir(
|
|
4657
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
4023
4658
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
4024
4659
|
}
|
|
4025
4660
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
4026
4661
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
4027
|
-
const indexFilePath =
|
|
4028
|
-
await fs6.mkdir(
|
|
4662
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4663
|
+
await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
|
|
4029
4664
|
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
4030
4665
|
}
|
|
4031
4666
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
@@ -4035,13 +4670,13 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
4035
4670
|
lastUpdated: new Date().toISOString(),
|
|
4036
4671
|
modules: modules.map((m) => m.id)
|
|
4037
4672
|
};
|
|
4038
|
-
await fs6.mkdir(
|
|
4673
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
4039
4674
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
4040
4675
|
}
|
|
4041
4676
|
async function cleanupIndex(rootDir, options = {}) {
|
|
4042
4677
|
const verbose = options.verbose ?? false;
|
|
4043
4678
|
const logger = options.logger ?? createLogger({ verbose });
|
|
4044
|
-
rootDir =
|
|
4679
|
+
rootDir = path15.resolve(rootDir);
|
|
4045
4680
|
logger.info(`Cleaning up index in: ${rootDir}`);
|
|
4046
4681
|
const config = await loadConfig(rootDir);
|
|
4047
4682
|
await registerBuiltInModules();
|
|
@@ -4071,7 +4706,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
4071
4706
|
const filesToRemove = [];
|
|
4072
4707
|
const updatedFiles = {};
|
|
4073
4708
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
4074
|
-
const fullPath =
|
|
4709
|
+
const fullPath = path15.join(rootDir, filepath);
|
|
4075
4710
|
try {
|
|
4076
4711
|
await fs6.access(fullPath);
|
|
4077
4712
|
updatedFiles[filepath] = entry;
|
|
@@ -4083,7 +4718,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
4083
4718
|
}
|
|
4084
4719
|
}
|
|
4085
4720
|
for (const filepath of filesToRemove) {
|
|
4086
|
-
const indexFilePath =
|
|
4721
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4087
4722
|
try {
|
|
4088
4723
|
await fs6.unlink(indexFilePath);
|
|
4089
4724
|
} catch {}
|
|
@@ -4099,7 +4734,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
4099
4734
|
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
4100
4735
|
for (const entry of entries) {
|
|
4101
4736
|
if (entry.isDirectory()) {
|
|
4102
|
-
const subDir =
|
|
4737
|
+
const subDir = path15.join(dir, entry.name);
|
|
4103
4738
|
await cleanupEmptyDirectories(subDir);
|
|
4104
4739
|
}
|
|
4105
4740
|
}
|
|
@@ -4114,7 +4749,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
4114
4749
|
}
|
|
4115
4750
|
}
|
|
4116
4751
|
async function getIndexStatus(rootDir) {
|
|
4117
|
-
rootDir =
|
|
4752
|
+
rootDir = path15.resolve(rootDir);
|
|
4118
4753
|
const config = await loadConfig(rootDir);
|
|
4119
4754
|
const location = getIndexLocation(rootDir);
|
|
4120
4755
|
const indexDir = location.indexDir;
|
|
@@ -4150,7 +4785,7 @@ async function getIndexStatus(rootDir) {
|
|
|
4150
4785
|
}
|
|
4151
4786
|
} catch {
|
|
4152
4787
|
try {
|
|
4153
|
-
const entries = await fs6.readdir(
|
|
4788
|
+
const entries = await fs6.readdir(path15.join(indexDir, "index"));
|
|
4154
4789
|
if (entries.length > 0) {
|
|
4155
4790
|
status.exists = true;
|
|
4156
4791
|
for (const entry of entries) {
|
|
@@ -4170,7 +4805,7 @@ async function getIndexStatus(rootDir) {
|
|
|
4170
4805
|
}
|
|
4171
4806
|
return status;
|
|
4172
4807
|
}
|
|
4173
|
-
var INDEX_SCHEMA_VERSION = "1.0.0";
|
|
4808
|
+
var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
|
|
4174
4809
|
var init_indexer = __esm(() => {
|
|
4175
4810
|
init_config2();
|
|
4176
4811
|
init_registry();
|
|
@@ -4191,9 +4826,9 @@ __export(exports_search, {
|
|
|
4191
4826
|
formatSearchResults: () => formatSearchResults
|
|
4192
4827
|
});
|
|
4193
4828
|
import * as fs7 from "fs/promises";
|
|
4194
|
-
import * as
|
|
4829
|
+
import * as path16 from "path";
|
|
4195
4830
|
async function search(rootDir, query, options = {}) {
|
|
4196
|
-
rootDir =
|
|
4831
|
+
rootDir = path16.resolve(rootDir);
|
|
4197
4832
|
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
4198
4833
|
if (ensureFresh) {
|
|
4199
4834
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
@@ -4238,7 +4873,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4238
4873
|
config,
|
|
4239
4874
|
loadFileIndex: async (filepath) => {
|
|
4240
4875
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
4241
|
-
const indexFilePath = hasExtension ?
|
|
4876
|
+
const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
|
|
4242
4877
|
try {
|
|
4243
4878
|
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
4244
4879
|
return JSON.parse(content);
|
|
@@ -4250,7 +4885,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4250
4885
|
const files = [];
|
|
4251
4886
|
await traverseDirectory(indexPath, files, indexPath);
|
|
4252
4887
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
4253
|
-
const relative4 =
|
|
4888
|
+
const relative4 = path16.relative(indexPath, f);
|
|
4254
4889
|
return relative4.replace(/\.json$/, "");
|
|
4255
4890
|
});
|
|
4256
4891
|
}
|
|
@@ -4260,7 +4895,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
4260
4895
|
try {
|
|
4261
4896
|
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
4262
4897
|
for (const entry of entries) {
|
|
4263
|
-
const fullPath =
|
|
4898
|
+
const fullPath = path16.join(dir, entry.name);
|
|
4264
4899
|
if (entry.isDirectory()) {
|
|
4265
4900
|
await traverseDirectory(fullPath, files, basePath);
|
|
4266
4901
|
} else if (entry.isFile()) {
|
|
@@ -4338,7 +4973,7 @@ init_logger();
|
|
|
4338
4973
|
// package.json
|
|
4339
4974
|
var package_default = {
|
|
4340
4975
|
name: "raggrep",
|
|
4341
|
-
version: "0.
|
|
4976
|
+
version: "0.5.0",
|
|
4342
4977
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
4343
4978
|
type: "module",
|
|
4344
4979
|
main: "./dist/index.js",
|
|
@@ -4474,6 +5109,14 @@ function parseFlags(args2) {
|
|
|
4474
5109
|
console.error("--type requires a file extension (e.g., ts, tsx, js)");
|
|
4475
5110
|
process.exit(1);
|
|
4476
5111
|
}
|
|
5112
|
+
} else if (arg === "--concurrency" || arg === "-c") {
|
|
5113
|
+
const c = parseInt(args2[++i], 10);
|
|
5114
|
+
if (!isNaN(c) && c > 0) {
|
|
5115
|
+
flags.concurrency = c;
|
|
5116
|
+
} else {
|
|
5117
|
+
console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
|
|
5118
|
+
process.exit(1);
|
|
5119
|
+
}
|
|
4477
5120
|
} else if (!arg.startsWith("-")) {
|
|
4478
5121
|
flags.remaining.push(arg);
|
|
4479
5122
|
}
|
|
@@ -4493,10 +5136,11 @@ Usage:
|
|
|
4493
5136
|
raggrep index [options]
|
|
4494
5137
|
|
|
4495
5138
|
Options:
|
|
4496
|
-
-w, --watch
|
|
4497
|
-
-m, --model <name>
|
|
4498
|
-
-
|
|
4499
|
-
-
|
|
5139
|
+
-w, --watch Watch for file changes and re-index automatically
|
|
5140
|
+
-m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
|
|
5141
|
+
-c, --concurrency <n> Number of files to process in parallel (default: 4)
|
|
5142
|
+
-v, --verbose Show detailed progress
|
|
5143
|
+
-h, --help Show this help message
|
|
4500
5144
|
|
|
4501
5145
|
Available Models:
|
|
4502
5146
|
${models}
|
|
@@ -4507,6 +5151,7 @@ Examples:
|
|
|
4507
5151
|
raggrep index
|
|
4508
5152
|
raggrep index --watch
|
|
4509
5153
|
raggrep index --model bge-small-en-v1.5
|
|
5154
|
+
raggrep index --concurrency 8
|
|
4510
5155
|
raggrep index --verbose
|
|
4511
5156
|
`);
|
|
4512
5157
|
process.exit(0);
|
|
@@ -4520,6 +5165,7 @@ Examples:
|
|
|
4520
5165
|
const results = await indexDirectory2(process.cwd(), {
|
|
4521
5166
|
model: flags.model,
|
|
4522
5167
|
verbose: flags.verbose,
|
|
5168
|
+
concurrency: flags.concurrency,
|
|
4523
5169
|
logger
|
|
4524
5170
|
});
|
|
4525
5171
|
console.log(`
|
|
@@ -4773,4 +5419,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
4773
5419
|
}
|
|
4774
5420
|
main();
|
|
4775
5421
|
|
|
4776
|
-
//# debugId=
|
|
5422
|
+
//# debugId=5CA623D9974ACF4364756E2164756E21
|