raggrep 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +4 -0
- package/dist/cli/main.js +1323 -610
- package/dist/cli/main.js.map +19 -14
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/entities/searchResult.d.ts +5 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.js +1301 -613
- package/dist/index.js.map +18 -13
- package/dist/modules/core/index.d.ts +4 -0
- package/dist/modules/data/json/index.d.ts +49 -0
- package/dist/modules/docs/markdown/index.d.ts +49 -0
- package/dist/modules/language/typescript/index.d.ts +11 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/dist/types.d.ts +6 -0
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -325,6 +325,7 @@ var init_searchResult = __esm(() => {
|
|
|
325
325
|
topK: 10,
|
|
326
326
|
minScore: 0.15,
|
|
327
327
|
filePatterns: [],
|
|
328
|
+
pathFilter: [],
|
|
328
329
|
ensureFresh: true
|
|
329
330
|
};
|
|
330
331
|
});
|
|
@@ -348,6 +349,20 @@ function createDefaultConfig() {
|
|
|
348
349
|
options: {
|
|
349
350
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
350
351
|
}
|
|
352
|
+
},
|
|
353
|
+
{
|
|
354
|
+
id: "data/json",
|
|
355
|
+
enabled: true,
|
|
356
|
+
options: {
|
|
357
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
358
|
+
}
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
id: "docs/markdown",
|
|
362
|
+
enabled: true,
|
|
363
|
+
options: {
|
|
364
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
365
|
+
}
|
|
351
366
|
}
|
|
352
367
|
]
|
|
353
368
|
};
|
|
@@ -391,16 +406,18 @@ var init_config = __esm(() => {
|
|
|
391
406
|
".jsx",
|
|
392
407
|
".mjs",
|
|
393
408
|
".cjs",
|
|
409
|
+
".mts",
|
|
410
|
+
".cts",
|
|
411
|
+
".json",
|
|
412
|
+
".md",
|
|
394
413
|
".py",
|
|
395
414
|
".go",
|
|
396
415
|
".rs",
|
|
397
416
|
".java",
|
|
398
|
-
".json",
|
|
399
417
|
".yaml",
|
|
400
418
|
".yml",
|
|
401
419
|
".toml",
|
|
402
420
|
".sql",
|
|
403
|
-
".md",
|
|
404
421
|
".txt"
|
|
405
422
|
];
|
|
406
423
|
});
|
|
@@ -2071,6 +2088,9 @@ class CoreModule {
|
|
|
2071
2088
|
name = "Core Search";
|
|
2072
2089
|
description = "Language-agnostic text search with symbol extraction";
|
|
2073
2090
|
version = "1.0.0";
|
|
2091
|
+
supportsFile(_filepath) {
|
|
2092
|
+
return true;
|
|
2093
|
+
}
|
|
2074
2094
|
symbolIndex = new Map;
|
|
2075
2095
|
bm25Index = null;
|
|
2076
2096
|
rootDir = "";
|
|
@@ -2292,221 +2312,6 @@ var init_core = __esm(() => {
|
|
|
2292
2312
|
init_symbols();
|
|
2293
2313
|
});
|
|
2294
2314
|
|
|
2295
|
-
// src/domain/services/similarity.ts
|
|
2296
|
-
function cosineSimilarity(a, b) {
|
|
2297
|
-
if (a.length !== b.length) {
|
|
2298
|
-
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2299
|
-
}
|
|
2300
|
-
let dotProduct = 0;
|
|
2301
|
-
let normA = 0;
|
|
2302
|
-
let normB = 0;
|
|
2303
|
-
for (let i = 0;i < a.length; i++) {
|
|
2304
|
-
dotProduct += a[i] * b[i];
|
|
2305
|
-
normA += a[i] * a[i];
|
|
2306
|
-
normB += b[i] * b[i];
|
|
2307
|
-
}
|
|
2308
|
-
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2309
|
-
if (magnitude === 0)
|
|
2310
|
-
return 0;
|
|
2311
|
-
return dotProduct / magnitude;
|
|
2312
|
-
}
|
|
2313
|
-
|
|
2314
|
-
// src/modules/language/typescript/parseCode.ts
|
|
2315
|
-
import * as ts from "typescript";
|
|
2316
|
-
function parseCode(content, filepath) {
|
|
2317
|
-
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
2318
|
-
if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
|
|
2319
|
-
return parseTypeScript(content, filepath);
|
|
2320
|
-
}
|
|
2321
|
-
return parseGenericCode(content);
|
|
2322
|
-
}
|
|
2323
|
-
function parseTypeScript(content, filepath) {
|
|
2324
|
-
const chunks = [];
|
|
2325
|
-
const lines = content.split(`
|
|
2326
|
-
`);
|
|
2327
|
-
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2328
|
-
function getLineNumbers(node) {
|
|
2329
|
-
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2330
|
-
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2331
|
-
return {
|
|
2332
|
-
startLine: start.line + 1,
|
|
2333
|
-
endLine: end.line + 1
|
|
2334
|
-
};
|
|
2335
|
-
}
|
|
2336
|
-
function getNodeText(node) {
|
|
2337
|
-
return node.getText(sourceFile);
|
|
2338
|
-
}
|
|
2339
|
-
function isExported(node) {
|
|
2340
|
-
if (!ts.canHaveModifiers(node))
|
|
2341
|
-
return false;
|
|
2342
|
-
const modifiers = ts.getModifiers(node);
|
|
2343
|
-
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2344
|
-
}
|
|
2345
|
-
function getJSDoc(node) {
|
|
2346
|
-
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2347
|
-
if (jsDocNodes.length === 0)
|
|
2348
|
-
return;
|
|
2349
|
-
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2350
|
-
`);
|
|
2351
|
-
}
|
|
2352
|
-
function getFunctionName(node) {
|
|
2353
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2354
|
-
return node.name.text;
|
|
2355
|
-
}
|
|
2356
|
-
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2357
|
-
return node.name.text;
|
|
2358
|
-
}
|
|
2359
|
-
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2360
|
-
return node.name.text;
|
|
2361
|
-
}
|
|
2362
|
-
return;
|
|
2363
|
-
}
|
|
2364
|
-
function visit(node) {
|
|
2365
|
-
const { startLine, endLine } = getLineNumbers(node);
|
|
2366
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2367
|
-
chunks.push({
|
|
2368
|
-
content: getNodeText(node),
|
|
2369
|
-
startLine,
|
|
2370
|
-
endLine,
|
|
2371
|
-
type: "function",
|
|
2372
|
-
name: node.name.text,
|
|
2373
|
-
isExported: isExported(node),
|
|
2374
|
-
jsDoc: getJSDoc(node)
|
|
2375
|
-
});
|
|
2376
|
-
return;
|
|
2377
|
-
}
|
|
2378
|
-
if (ts.isVariableStatement(node)) {
|
|
2379
|
-
for (const decl of node.declarationList.declarations) {
|
|
2380
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2381
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2382
|
-
chunks.push({
|
|
2383
|
-
content: getNodeText(node),
|
|
2384
|
-
startLine,
|
|
2385
|
-
endLine,
|
|
2386
|
-
type: "function",
|
|
2387
|
-
name,
|
|
2388
|
-
isExported: isExported(node),
|
|
2389
|
-
jsDoc: getJSDoc(node)
|
|
2390
|
-
});
|
|
2391
|
-
return;
|
|
2392
|
-
}
|
|
2393
|
-
}
|
|
2394
|
-
}
|
|
2395
|
-
if (ts.isClassDeclaration(node) && node.name) {
|
|
2396
|
-
chunks.push({
|
|
2397
|
-
content: getNodeText(node),
|
|
2398
|
-
startLine,
|
|
2399
|
-
endLine,
|
|
2400
|
-
type: "class",
|
|
2401
|
-
name: node.name.text,
|
|
2402
|
-
isExported: isExported(node),
|
|
2403
|
-
jsDoc: getJSDoc(node)
|
|
2404
|
-
});
|
|
2405
|
-
return;
|
|
2406
|
-
}
|
|
2407
|
-
if (ts.isInterfaceDeclaration(node)) {
|
|
2408
|
-
chunks.push({
|
|
2409
|
-
content: getNodeText(node),
|
|
2410
|
-
startLine,
|
|
2411
|
-
endLine,
|
|
2412
|
-
type: "interface",
|
|
2413
|
-
name: node.name.text,
|
|
2414
|
-
isExported: isExported(node),
|
|
2415
|
-
jsDoc: getJSDoc(node)
|
|
2416
|
-
});
|
|
2417
|
-
return;
|
|
2418
|
-
}
|
|
2419
|
-
if (ts.isTypeAliasDeclaration(node)) {
|
|
2420
|
-
chunks.push({
|
|
2421
|
-
content: getNodeText(node),
|
|
2422
|
-
startLine,
|
|
2423
|
-
endLine,
|
|
2424
|
-
type: "type",
|
|
2425
|
-
name: node.name.text,
|
|
2426
|
-
isExported: isExported(node),
|
|
2427
|
-
jsDoc: getJSDoc(node)
|
|
2428
|
-
});
|
|
2429
|
-
return;
|
|
2430
|
-
}
|
|
2431
|
-
if (ts.isEnumDeclaration(node)) {
|
|
2432
|
-
chunks.push({
|
|
2433
|
-
content: getNodeText(node),
|
|
2434
|
-
startLine,
|
|
2435
|
-
endLine,
|
|
2436
|
-
type: "enum",
|
|
2437
|
-
name: node.name.text,
|
|
2438
|
-
isExported: isExported(node),
|
|
2439
|
-
jsDoc: getJSDoc(node)
|
|
2440
|
-
});
|
|
2441
|
-
return;
|
|
2442
|
-
}
|
|
2443
|
-
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2444
|
-
for (const decl of node.declarationList.declarations) {
|
|
2445
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2446
|
-
continue;
|
|
2447
|
-
}
|
|
2448
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2449
|
-
chunks.push({
|
|
2450
|
-
content: getNodeText(node),
|
|
2451
|
-
startLine,
|
|
2452
|
-
endLine,
|
|
2453
|
-
type: "variable",
|
|
2454
|
-
name,
|
|
2455
|
-
isExported: true,
|
|
2456
|
-
jsDoc: getJSDoc(node)
|
|
2457
|
-
});
|
|
2458
|
-
}
|
|
2459
|
-
return;
|
|
2460
|
-
}
|
|
2461
|
-
ts.forEachChild(node, visit);
|
|
2462
|
-
}
|
|
2463
|
-
ts.forEachChild(sourceFile, visit);
|
|
2464
|
-
if (chunks.length === 0) {
|
|
2465
|
-
return parseGenericCode(content);
|
|
2466
|
-
}
|
|
2467
|
-
return chunks;
|
|
2468
|
-
}
|
|
2469
|
-
function parseGenericCode(content) {
|
|
2470
|
-
const chunks = [];
|
|
2471
|
-
const lines = content.split(`
|
|
2472
|
-
`);
|
|
2473
|
-
const CHUNK_SIZE = 30;
|
|
2474
|
-
const OVERLAP = 5;
|
|
2475
|
-
if (lines.length <= CHUNK_SIZE) {
|
|
2476
|
-
return [
|
|
2477
|
-
{
|
|
2478
|
-
content,
|
|
2479
|
-
startLine: 1,
|
|
2480
|
-
endLine: lines.length,
|
|
2481
|
-
type: "file"
|
|
2482
|
-
}
|
|
2483
|
-
];
|
|
2484
|
-
}
|
|
2485
|
-
for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
|
|
2486
|
-
const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
|
|
2487
|
-
chunks.push({
|
|
2488
|
-
content: lines.slice(i, endIdx).join(`
|
|
2489
|
-
`),
|
|
2490
|
-
startLine: i + 1,
|
|
2491
|
-
endLine: endIdx,
|
|
2492
|
-
type: "block"
|
|
2493
|
-
});
|
|
2494
|
-
if (endIdx >= lines.length)
|
|
2495
|
-
break;
|
|
2496
|
-
}
|
|
2497
|
-
return chunks;
|
|
2498
|
-
}
|
|
2499
|
-
function generateChunkId(filepath, startLine, endLine) {
|
|
2500
|
-
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2501
|
-
return `${safePath}-${startLine}-${endLine}`;
|
|
2502
|
-
}
|
|
2503
|
-
var init_parseCode = () => {};
|
|
2504
|
-
|
|
2505
|
-
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2506
|
-
var init_fileIndexStorage = __esm(() => {
|
|
2507
|
-
init_entities();
|
|
2508
|
-
});
|
|
2509
|
-
|
|
2510
2315
|
// src/domain/services/keywords.ts
|
|
2511
2316
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2512
2317
|
const keywords = new Set;
|
|
@@ -2695,19 +2500,347 @@ var init_keywords = __esm(() => {
|
|
|
2695
2500
|
};
|
|
2696
2501
|
});
|
|
2697
2502
|
|
|
2698
|
-
// src/
|
|
2699
|
-
|
|
2700
|
-
|
|
2503
|
+
// src/domain/services/similarity.ts
|
|
2504
|
+
function cosineSimilarity(a, b) {
|
|
2505
|
+
if (a.length !== b.length) {
|
|
2506
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2507
|
+
}
|
|
2508
|
+
let dotProduct = 0;
|
|
2509
|
+
let normA = 0;
|
|
2510
|
+
let normB = 0;
|
|
2511
|
+
for (let i = 0;i < a.length; i++) {
|
|
2512
|
+
dotProduct += a[i] * b[i];
|
|
2513
|
+
normA += a[i] * a[i];
|
|
2514
|
+
normB += b[i] * b[i];
|
|
2515
|
+
}
|
|
2516
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2517
|
+
if (magnitude === 0)
|
|
2518
|
+
return 0;
|
|
2519
|
+
return dotProduct / magnitude;
|
|
2520
|
+
}
|
|
2701
2521
|
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2522
|
+
// src/domain/services/queryIntent.ts
|
|
2523
|
+
import * as path7 from "path";
|
|
2524
|
+
function detectQueryIntent(queryTerms) {
|
|
2525
|
+
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2526
|
+
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
2527
|
+
if (hasDocumentationTerm) {
|
|
2528
|
+
return "documentation";
|
|
2529
|
+
}
|
|
2530
|
+
if (hasImplementationTerm) {
|
|
2531
|
+
return "implementation";
|
|
2532
|
+
}
|
|
2533
|
+
return "neutral";
|
|
2534
|
+
}
|
|
2535
|
+
function extractQueryTerms(query) {
|
|
2536
|
+
return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
2537
|
+
}
|
|
2538
|
+
function isSourceCodeFile(filepath) {
|
|
2539
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2540
|
+
return SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2541
|
+
}
|
|
2542
|
+
function isDocFile(filepath) {
|
|
2543
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2544
|
+
return DOC_EXTENSIONS.includes(ext);
|
|
2545
|
+
}
|
|
2546
|
+
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2547
|
+
const isSourceCode = isSourceCodeFile(filepath);
|
|
2548
|
+
const isDoc = isDocFile(filepath);
|
|
2549
|
+
const intent = detectQueryIntent(queryTerms);
|
|
2550
|
+
if (intent === "implementation") {
|
|
2551
|
+
if (isSourceCode) {
|
|
2552
|
+
return 0.06;
|
|
2553
|
+
}
|
|
2554
|
+
return 0;
|
|
2555
|
+
}
|
|
2556
|
+
if (intent === "documentation") {
|
|
2557
|
+
if (isDoc) {
|
|
2558
|
+
return 0.08;
|
|
2559
|
+
}
|
|
2560
|
+
return 0;
|
|
2561
|
+
}
|
|
2562
|
+
return 0;
|
|
2563
|
+
}
|
|
2564
|
+
var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2565
|
+
var init_queryIntent = __esm(() => {
|
|
2566
|
+
IMPLEMENTATION_TERMS = [
|
|
2567
|
+
"function",
|
|
2568
|
+
"method",
|
|
2569
|
+
"class",
|
|
2570
|
+
"interface",
|
|
2571
|
+
"implement",
|
|
2572
|
+
"implementation",
|
|
2573
|
+
"endpoint",
|
|
2574
|
+
"route",
|
|
2575
|
+
"handler",
|
|
2576
|
+
"controller",
|
|
2577
|
+
"module",
|
|
2578
|
+
"code"
|
|
2579
|
+
];
|
|
2580
|
+
DOCUMENTATION_TERMS = [
|
|
2581
|
+
"documentation",
|
|
2582
|
+
"docs",
|
|
2583
|
+
"guide",
|
|
2584
|
+
"tutorial",
|
|
2585
|
+
"readme",
|
|
2586
|
+
"how",
|
|
2587
|
+
"what",
|
|
2588
|
+
"why",
|
|
2589
|
+
"explain",
|
|
2590
|
+
"overview",
|
|
2591
|
+
"getting",
|
|
2592
|
+
"started",
|
|
2593
|
+
"requirements",
|
|
2594
|
+
"setup",
|
|
2595
|
+
"install",
|
|
2596
|
+
"configure",
|
|
2597
|
+
"configuration"
|
|
2598
|
+
];
|
|
2599
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
2600
|
+
".ts",
|
|
2601
|
+
".tsx",
|
|
2602
|
+
".js",
|
|
2603
|
+
".jsx",
|
|
2604
|
+
".mjs",
|
|
2605
|
+
".cjs",
|
|
2606
|
+
".py",
|
|
2607
|
+
".go",
|
|
2608
|
+
".rs",
|
|
2609
|
+
".java"
|
|
2610
|
+
];
|
|
2611
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2612
|
+
});
|
|
2613
|
+
|
|
2614
|
+
// src/domain/services/chunking.ts
|
|
2615
|
+
function createLineBasedChunks(content, options = {}) {
|
|
2616
|
+
const {
|
|
2617
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2618
|
+
overlap = DEFAULT_OVERLAP,
|
|
2619
|
+
minLinesForMultipleChunks = chunkSize
|
|
2620
|
+
} = options;
|
|
2621
|
+
const lines = content.split(`
|
|
2622
|
+
`);
|
|
2623
|
+
const chunks = [];
|
|
2624
|
+
if (lines.length <= minLinesForMultipleChunks) {
|
|
2625
|
+
return [
|
|
2626
|
+
{
|
|
2627
|
+
content,
|
|
2628
|
+
startLine: 1,
|
|
2629
|
+
endLine: lines.length,
|
|
2630
|
+
type: "file"
|
|
2631
|
+
}
|
|
2632
|
+
];
|
|
2633
|
+
}
|
|
2634
|
+
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2635
|
+
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2636
|
+
chunks.push({
|
|
2637
|
+
content: lines.slice(i, endIdx).join(`
|
|
2638
|
+
`),
|
|
2639
|
+
startLine: i + 1,
|
|
2640
|
+
endLine: endIdx,
|
|
2641
|
+
type: "block"
|
|
2642
|
+
});
|
|
2643
|
+
if (endIdx >= lines.length)
|
|
2644
|
+
break;
|
|
2645
|
+
}
|
|
2646
|
+
return chunks;
|
|
2647
|
+
}
|
|
2648
|
+
function generateChunkId(filepath, startLine, endLine) {
|
|
2649
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2650
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2651
|
+
}
|
|
2652
|
+
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2653
|
+
|
|
2654
|
+
// src/domain/services/index.ts
|
|
2655
|
+
var init_services = __esm(() => {
|
|
2656
|
+
init_keywords();
|
|
2657
|
+
init_queryIntent();
|
|
2658
|
+
});
|
|
2659
|
+
|
|
2660
|
+
// src/modules/language/typescript/parseCode.ts
|
|
2661
|
+
import * as ts from "typescript";
|
|
2662
|
+
function parseTypeScriptCode(content, filepath) {
|
|
2663
|
+
return parseTypeScript(content, filepath);
|
|
2664
|
+
}
|
|
2665
|
+
function parseTypeScript(content, filepath) {
|
|
2666
|
+
const chunks = [];
|
|
2667
|
+
const lines = content.split(`
|
|
2668
|
+
`);
|
|
2669
|
+
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2670
|
+
function getLineNumbers(node) {
|
|
2671
|
+
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2672
|
+
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2673
|
+
return {
|
|
2674
|
+
startLine: start.line + 1,
|
|
2675
|
+
endLine: end.line + 1
|
|
2676
|
+
};
|
|
2677
|
+
}
|
|
2678
|
+
function getNodeText(node) {
|
|
2679
|
+
return node.getText(sourceFile);
|
|
2680
|
+
}
|
|
2681
|
+
function isExported(node) {
|
|
2682
|
+
if (!ts.canHaveModifiers(node))
|
|
2683
|
+
return false;
|
|
2684
|
+
const modifiers = ts.getModifiers(node);
|
|
2685
|
+
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2686
|
+
}
|
|
2687
|
+
function getJSDoc(node) {
|
|
2688
|
+
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2689
|
+
if (jsDocNodes.length === 0)
|
|
2690
|
+
return;
|
|
2691
|
+
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2692
|
+
`);
|
|
2693
|
+
}
|
|
2694
|
+
function getFunctionName(node) {
|
|
2695
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2696
|
+
return node.name.text;
|
|
2697
|
+
}
|
|
2698
|
+
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2699
|
+
return node.name.text;
|
|
2700
|
+
}
|
|
2701
|
+
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2702
|
+
return node.name.text;
|
|
2703
|
+
}
|
|
2704
|
+
return;
|
|
2705
|
+
}
|
|
2706
|
+
function visit(node) {
|
|
2707
|
+
const { startLine, endLine } = getLineNumbers(node);
|
|
2708
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2709
|
+
chunks.push({
|
|
2710
|
+
content: getNodeText(node),
|
|
2711
|
+
startLine,
|
|
2712
|
+
endLine,
|
|
2713
|
+
type: "function",
|
|
2714
|
+
name: node.name.text,
|
|
2715
|
+
isExported: isExported(node),
|
|
2716
|
+
jsDoc: getJSDoc(node)
|
|
2717
|
+
});
|
|
2718
|
+
return;
|
|
2719
|
+
}
|
|
2720
|
+
if (ts.isVariableStatement(node)) {
|
|
2721
|
+
for (const decl of node.declarationList.declarations) {
|
|
2722
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2723
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2724
|
+
chunks.push({
|
|
2725
|
+
content: getNodeText(node),
|
|
2726
|
+
startLine,
|
|
2727
|
+
endLine,
|
|
2728
|
+
type: "function",
|
|
2729
|
+
name,
|
|
2730
|
+
isExported: isExported(node),
|
|
2731
|
+
jsDoc: getJSDoc(node)
|
|
2732
|
+
});
|
|
2733
|
+
return;
|
|
2734
|
+
}
|
|
2735
|
+
}
|
|
2736
|
+
}
|
|
2737
|
+
if (ts.isClassDeclaration(node) && node.name) {
|
|
2738
|
+
chunks.push({
|
|
2739
|
+
content: getNodeText(node),
|
|
2740
|
+
startLine,
|
|
2741
|
+
endLine,
|
|
2742
|
+
type: "class",
|
|
2743
|
+
name: node.name.text,
|
|
2744
|
+
isExported: isExported(node),
|
|
2745
|
+
jsDoc: getJSDoc(node)
|
|
2746
|
+
});
|
|
2747
|
+
return;
|
|
2748
|
+
}
|
|
2749
|
+
if (ts.isInterfaceDeclaration(node)) {
|
|
2750
|
+
chunks.push({
|
|
2751
|
+
content: getNodeText(node),
|
|
2752
|
+
startLine,
|
|
2753
|
+
endLine,
|
|
2754
|
+
type: "interface",
|
|
2755
|
+
name: node.name.text,
|
|
2756
|
+
isExported: isExported(node),
|
|
2757
|
+
jsDoc: getJSDoc(node)
|
|
2758
|
+
});
|
|
2759
|
+
return;
|
|
2760
|
+
}
|
|
2761
|
+
if (ts.isTypeAliasDeclaration(node)) {
|
|
2762
|
+
chunks.push({
|
|
2763
|
+
content: getNodeText(node),
|
|
2764
|
+
startLine,
|
|
2765
|
+
endLine,
|
|
2766
|
+
type: "type",
|
|
2767
|
+
name: node.name.text,
|
|
2768
|
+
isExported: isExported(node),
|
|
2769
|
+
jsDoc: getJSDoc(node)
|
|
2770
|
+
});
|
|
2771
|
+
return;
|
|
2772
|
+
}
|
|
2773
|
+
if (ts.isEnumDeclaration(node)) {
|
|
2774
|
+
chunks.push({
|
|
2775
|
+
content: getNodeText(node),
|
|
2776
|
+
startLine,
|
|
2777
|
+
endLine,
|
|
2778
|
+
type: "enum",
|
|
2779
|
+
name: node.name.text,
|
|
2780
|
+
isExported: isExported(node),
|
|
2781
|
+
jsDoc: getJSDoc(node)
|
|
2782
|
+
});
|
|
2783
|
+
return;
|
|
2784
|
+
}
|
|
2785
|
+
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2786
|
+
for (const decl of node.declarationList.declarations) {
|
|
2787
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2788
|
+
continue;
|
|
2789
|
+
}
|
|
2790
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2791
|
+
chunks.push({
|
|
2792
|
+
content: getNodeText(node),
|
|
2793
|
+
startLine,
|
|
2794
|
+
endLine,
|
|
2795
|
+
type: "variable",
|
|
2796
|
+
name,
|
|
2797
|
+
isExported: true,
|
|
2798
|
+
jsDoc: getJSDoc(node)
|
|
2799
|
+
});
|
|
2800
|
+
}
|
|
2801
|
+
return;
|
|
2802
|
+
}
|
|
2803
|
+
ts.forEachChild(node, visit);
|
|
2804
|
+
}
|
|
2805
|
+
ts.forEachChild(sourceFile, visit);
|
|
2806
|
+
if (chunks.length === 0) {
|
|
2807
|
+
const lines2 = content.split(`
|
|
2808
|
+
`);
|
|
2809
|
+
return [
|
|
2810
|
+
{
|
|
2811
|
+
content,
|
|
2812
|
+
startLine: 1,
|
|
2813
|
+
endLine: lines2.length,
|
|
2814
|
+
type: "file"
|
|
2815
|
+
}
|
|
2816
|
+
];
|
|
2817
|
+
}
|
|
2818
|
+
return chunks;
|
|
2819
|
+
}
|
|
2820
|
+
function generateChunkId2(filepath, startLine, endLine) {
|
|
2821
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2822
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2823
|
+
}
|
|
2824
|
+
var init_parseCode = () => {};
|
|
2825
|
+
|
|
2826
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2827
|
+
var init_fileIndexStorage = __esm(() => {
|
|
2828
|
+
init_entities();
|
|
2829
|
+
});
|
|
2830
|
+
|
|
2831
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
2832
|
+
import * as fs3 from "fs/promises";
|
|
2833
|
+
import * as path8 from "path";
|
|
2834
|
+
|
|
2835
|
+
class SymbolicIndex {
|
|
2836
|
+
meta = null;
|
|
2837
|
+
fileSummaries = new Map;
|
|
2838
|
+
bm25Index = null;
|
|
2839
|
+
symbolicPath;
|
|
2840
|
+
moduleId;
|
|
2841
|
+
constructor(indexDir, moduleId) {
|
|
2842
|
+
this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
|
|
2843
|
+
this.moduleId = moduleId;
|
|
2711
2844
|
}
|
|
2712
2845
|
async initialize() {
|
|
2713
2846
|
try {
|
|
@@ -2724,194 +2857,763 @@ class SymbolicIndex {
|
|
|
2724
2857
|
totalDocs: 0
|
|
2725
2858
|
}
|
|
2726
2859
|
};
|
|
2727
|
-
this.bm25Index = new BM25Index;
|
|
2860
|
+
this.bm25Index = new BM25Index;
|
|
2861
|
+
}
|
|
2862
|
+
}
|
|
2863
|
+
addFile(summary) {
|
|
2864
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2865
|
+
}
|
|
2866
|
+
removeFile(filepath) {
|
|
2867
|
+
return this.fileSummaries.delete(filepath);
|
|
2868
|
+
}
|
|
2869
|
+
buildBM25Index() {
|
|
2870
|
+
this.bm25Index = new BM25Index;
|
|
2871
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2872
|
+
const content = [
|
|
2873
|
+
...summary.keywords,
|
|
2874
|
+
...summary.exports,
|
|
2875
|
+
...extractPathKeywords(filepath)
|
|
2876
|
+
].join(" ");
|
|
2877
|
+
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2878
|
+
}
|
|
2879
|
+
if (this.meta) {
|
|
2880
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2881
|
+
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2882
|
+
}
|
|
2883
|
+
}
|
|
2884
|
+
findCandidates(query, maxCandidates = 20) {
|
|
2885
|
+
if (!this.bm25Index) {
|
|
2886
|
+
return Array.from(this.fileSummaries.keys());
|
|
2887
|
+
}
|
|
2888
|
+
const results = this.bm25Index.search(query, maxCandidates);
|
|
2889
|
+
return results.map((r) => r.id);
|
|
2890
|
+
}
|
|
2891
|
+
getAllFiles() {
|
|
2892
|
+
return Array.from(this.fileSummaries.keys());
|
|
2893
|
+
}
|
|
2894
|
+
getFileSummary(filepath) {
|
|
2895
|
+
return this.fileSummaries.get(filepath);
|
|
2896
|
+
}
|
|
2897
|
+
async save() {
|
|
2898
|
+
if (!this.meta)
|
|
2899
|
+
throw new Error("Index not initialized");
|
|
2900
|
+
this.meta.lastUpdated = new Date().toISOString();
|
|
2901
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2902
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2903
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2904
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2905
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2906
|
+
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2907
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
2908
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2909
|
+
}
|
|
2910
|
+
}
|
|
2911
|
+
async load() {
|
|
2912
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2913
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2914
|
+
this.meta = JSON.parse(metaContent);
|
|
2915
|
+
this.fileSummaries.clear();
|
|
2916
|
+
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
2917
|
+
this.buildBM25Index();
|
|
2918
|
+
}
|
|
2919
|
+
async loadFileSummariesRecursive(dir) {
|
|
2920
|
+
try {
|
|
2921
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2922
|
+
for (const entry of entries) {
|
|
2923
|
+
const fullPath = path8.join(dir, entry.name);
|
|
2924
|
+
if (entry.isDirectory()) {
|
|
2925
|
+
await this.loadFileSummariesRecursive(fullPath);
|
|
2926
|
+
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
2927
|
+
try {
|
|
2928
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
2929
|
+
const summary = JSON.parse(content);
|
|
2930
|
+
if (summary.filepath) {
|
|
2931
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2932
|
+
}
|
|
2933
|
+
} catch {}
|
|
2934
|
+
}
|
|
2935
|
+
}
|
|
2936
|
+
} catch {}
|
|
2937
|
+
}
|
|
2938
|
+
getFileSummaryPath(filepath) {
|
|
2939
|
+
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2940
|
+
return path8.join(this.symbolicPath, jsonPath);
|
|
2941
|
+
}
|
|
2942
|
+
async deleteFileSummary(filepath) {
|
|
2943
|
+
try {
|
|
2944
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
2945
|
+
} catch {}
|
|
2946
|
+
this.fileSummaries.delete(filepath);
|
|
2947
|
+
}
|
|
2948
|
+
async exists() {
|
|
2949
|
+
try {
|
|
2950
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2951
|
+
await fs3.access(metaPath);
|
|
2952
|
+
return true;
|
|
2953
|
+
} catch {
|
|
2954
|
+
return false;
|
|
2955
|
+
}
|
|
2956
|
+
}
|
|
2957
|
+
get size() {
|
|
2958
|
+
return this.fileSummaries.size;
|
|
2959
|
+
}
|
|
2960
|
+
clear() {
|
|
2961
|
+
this.fileSummaries.clear();
|
|
2962
|
+
if (this.meta) {
|
|
2963
|
+
this.meta.fileCount = 0;
|
|
2964
|
+
this.meta.bm25Data = {
|
|
2965
|
+
avgDocLength: 0,
|
|
2966
|
+
documentFrequencies: {},
|
|
2967
|
+
totalDocs: 0
|
|
2968
|
+
};
|
|
2728
2969
|
}
|
|
2970
|
+
this.bm25Index = new BM25Index;
|
|
2729
2971
|
}
|
|
2730
|
-
|
|
2731
|
-
|
|
2972
|
+
}
|
|
2973
|
+
var init_symbolicIndex = __esm(() => {
|
|
2974
|
+
init_keywords();
|
|
2975
|
+
});
|
|
2976
|
+
|
|
2977
|
+
// src/infrastructure/storage/index.ts
|
|
2978
|
+
var init_storage = __esm(() => {
|
|
2979
|
+
init_fileIndexStorage();
|
|
2980
|
+
init_symbolicIndex();
|
|
2981
|
+
});
|
|
2982
|
+
|
|
2983
|
+
// src/modules/language/typescript/index.ts
|
|
2984
|
+
var exports_typescript = {};
|
|
2985
|
+
__export(exports_typescript, {
|
|
2986
|
+
supportsFile: () => supportsFile,
|
|
2987
|
+
isTypeScriptFile: () => isTypeScriptFile,
|
|
2988
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
2989
|
+
TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
|
|
2990
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2991
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2992
|
+
});
|
|
2993
|
+
import * as path9 from "path";
|
|
2994
|
+
function isTypeScriptFile(filepath) {
|
|
2995
|
+
const ext = path9.extname(filepath).toLowerCase();
|
|
2996
|
+
return TYPESCRIPT_EXTENSIONS.includes(ext);
|
|
2997
|
+
}
|
|
2998
|
+
function calculateChunkTypeBoost(chunk) {
|
|
2999
|
+
switch (chunk.type) {
|
|
3000
|
+
case "function":
|
|
3001
|
+
return 0.05;
|
|
3002
|
+
case "class":
|
|
3003
|
+
case "interface":
|
|
3004
|
+
return 0.04;
|
|
3005
|
+
case "type":
|
|
3006
|
+
case "enum":
|
|
3007
|
+
return 0.03;
|
|
3008
|
+
case "variable":
|
|
3009
|
+
return 0.02;
|
|
3010
|
+
case "file":
|
|
3011
|
+
case "block":
|
|
3012
|
+
default:
|
|
3013
|
+
return 0;
|
|
2732
3014
|
}
|
|
2733
|
-
|
|
2734
|
-
|
|
3015
|
+
}
|
|
3016
|
+
function calculateExportBoost(chunk) {
|
|
3017
|
+
return chunk.isExported ? 0.03 : 0;
|
|
3018
|
+
}
|
|
3019
|
+
|
|
3020
|
+
class TypeScriptModule {
|
|
3021
|
+
id = "language/typescript";
|
|
3022
|
+
name = "TypeScript Search";
|
|
3023
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
3024
|
+
version = "1.0.0";
|
|
3025
|
+
supportsFile(filepath) {
|
|
3026
|
+
return isTypeScriptFile(filepath);
|
|
3027
|
+
}
|
|
3028
|
+
embeddingConfig = null;
|
|
3029
|
+
symbolicIndex = null;
|
|
3030
|
+
pendingSummaries = new Map;
|
|
3031
|
+
rootDir = "";
|
|
3032
|
+
logger = undefined;
|
|
3033
|
+
async initialize(config) {
|
|
3034
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3035
|
+
this.logger = config.options?.logger;
|
|
3036
|
+
if (this.logger) {
|
|
3037
|
+
this.embeddingConfig = {
|
|
3038
|
+
...this.embeddingConfig,
|
|
3039
|
+
logger: this.logger
|
|
3040
|
+
};
|
|
3041
|
+
}
|
|
3042
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3043
|
+
this.pendingSummaries.clear();
|
|
3044
|
+
}
|
|
3045
|
+
async indexFile(filepath, content, ctx) {
|
|
3046
|
+
if (!isTypeScriptFile(filepath)) {
|
|
3047
|
+
return null;
|
|
3048
|
+
}
|
|
3049
|
+
this.rootDir = ctx.rootDir;
|
|
3050
|
+
const parsedChunks = parseTypeScriptCode(content, filepath);
|
|
3051
|
+
if (parsedChunks.length === 0) {
|
|
3052
|
+
return null;
|
|
3053
|
+
}
|
|
3054
|
+
const pathContext = parsePathContext(filepath);
|
|
3055
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
3056
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
3057
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
3058
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3059
|
+
});
|
|
3060
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3061
|
+
const chunks = parsedChunks.map((pc) => ({
|
|
3062
|
+
id: generateChunkId2(filepath, pc.startLine, pc.endLine),
|
|
3063
|
+
content: pc.content,
|
|
3064
|
+
startLine: pc.startLine,
|
|
3065
|
+
endLine: pc.endLine,
|
|
3066
|
+
type: pc.type,
|
|
3067
|
+
name: pc.name,
|
|
3068
|
+
isExported: pc.isExported,
|
|
3069
|
+
jsDoc: pc.jsDoc
|
|
3070
|
+
}));
|
|
3071
|
+
const references = this.extractReferences(content, filepath);
|
|
3072
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3073
|
+
const currentConfig = getEmbeddingConfig();
|
|
3074
|
+
const moduleData = {
|
|
3075
|
+
embeddings,
|
|
3076
|
+
embeddingModel: currentConfig.model
|
|
3077
|
+
};
|
|
3078
|
+
const chunkTypes = [
|
|
3079
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
3080
|
+
];
|
|
3081
|
+
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
3082
|
+
const allKeywords = new Set;
|
|
3083
|
+
for (const pc of parsedChunks) {
|
|
3084
|
+
const keywords = extractKeywords(pc.content, pc.name);
|
|
3085
|
+
keywords.forEach((k) => allKeywords.add(k));
|
|
3086
|
+
}
|
|
3087
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3088
|
+
const fileSummary = {
|
|
3089
|
+
filepath,
|
|
3090
|
+
chunkCount: chunks.length,
|
|
3091
|
+
chunkTypes,
|
|
3092
|
+
keywords: Array.from(allKeywords),
|
|
3093
|
+
exports,
|
|
3094
|
+
lastModified: stats.lastModified,
|
|
3095
|
+
pathContext: {
|
|
3096
|
+
segments: pathContext.segments,
|
|
3097
|
+
layer: pathContext.layer,
|
|
3098
|
+
domain: pathContext.domain,
|
|
3099
|
+
depth: pathContext.depth
|
|
3100
|
+
}
|
|
3101
|
+
};
|
|
3102
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3103
|
+
return {
|
|
3104
|
+
filepath,
|
|
3105
|
+
lastModified: stats.lastModified,
|
|
3106
|
+
chunks,
|
|
3107
|
+
moduleData,
|
|
3108
|
+
references
|
|
3109
|
+
};
|
|
3110
|
+
}
|
|
3111
|
+
async finalize(ctx) {
|
|
3112
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3113
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3114
|
+
await this.symbolicIndex.initialize();
|
|
3115
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3116
|
+
this.symbolicIndex.addFile(summary);
|
|
3117
|
+
}
|
|
3118
|
+
this.symbolicIndex.buildBM25Index();
|
|
3119
|
+
await this.symbolicIndex.save();
|
|
3120
|
+
this.pendingSummaries.clear();
|
|
3121
|
+
}
|
|
3122
|
+
async search(query, ctx, options = {}) {
|
|
3123
|
+
const {
|
|
3124
|
+
topK = DEFAULT_TOP_K2,
|
|
3125
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
3126
|
+
filePatterns
|
|
3127
|
+
} = options;
|
|
3128
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3129
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3130
|
+
let allFiles;
|
|
3131
|
+
try {
|
|
3132
|
+
await symbolicIndex.initialize();
|
|
3133
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3134
|
+
} catch {
|
|
3135
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3136
|
+
}
|
|
3137
|
+
let filesToSearch = allFiles;
|
|
3138
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3139
|
+
filesToSearch = allFiles.filter((filepath) => {
|
|
3140
|
+
return filePatterns.some((pattern) => {
|
|
3141
|
+
if (pattern.startsWith("*.")) {
|
|
3142
|
+
const ext = pattern.slice(1);
|
|
3143
|
+
return filepath.endsWith(ext);
|
|
3144
|
+
}
|
|
3145
|
+
return filepath.includes(pattern);
|
|
3146
|
+
});
|
|
3147
|
+
});
|
|
3148
|
+
}
|
|
3149
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3150
|
+
const bm25Index = new BM25Index;
|
|
3151
|
+
const allChunksData = [];
|
|
3152
|
+
for (const filepath of filesToSearch) {
|
|
3153
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3154
|
+
if (!fileIndex)
|
|
3155
|
+
continue;
|
|
3156
|
+
const moduleData = fileIndex.moduleData;
|
|
3157
|
+
if (!moduleData?.embeddings)
|
|
3158
|
+
continue;
|
|
3159
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3160
|
+
const chunk = fileIndex.chunks[i];
|
|
3161
|
+
const embedding = moduleData.embeddings[i];
|
|
3162
|
+
if (!embedding)
|
|
3163
|
+
continue;
|
|
3164
|
+
allChunksData.push({
|
|
3165
|
+
filepath: fileIndex.filepath,
|
|
3166
|
+
chunk,
|
|
3167
|
+
embedding
|
|
3168
|
+
});
|
|
3169
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3170
|
+
}
|
|
3171
|
+
}
|
|
3172
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3173
|
+
const bm25Scores = new Map;
|
|
3174
|
+
for (const result of bm25Results) {
|
|
3175
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3176
|
+
}
|
|
3177
|
+
const queryTerms = extractQueryTerms(query);
|
|
3178
|
+
const pathBoosts = new Map;
|
|
3179
|
+
for (const filepath of filesToSearch) {
|
|
3180
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3181
|
+
if (summary?.pathContext) {
|
|
3182
|
+
let boost = 0;
|
|
3183
|
+
const ctx2 = summary.pathContext;
|
|
3184
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3185
|
+
boost += 0.1;
|
|
3186
|
+
}
|
|
3187
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3188
|
+
boost += 0.05;
|
|
3189
|
+
}
|
|
3190
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3191
|
+
if (segmentMatch) {
|
|
3192
|
+
boost += 0.05;
|
|
3193
|
+
}
|
|
3194
|
+
pathBoosts.set(filepath, boost);
|
|
3195
|
+
}
|
|
3196
|
+
}
|
|
3197
|
+
const results = [];
|
|
3198
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3199
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3200
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3201
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
3202
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
3203
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
3204
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
3205
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
3206
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
3207
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3208
|
+
results.push({
|
|
3209
|
+
filepath,
|
|
3210
|
+
chunk,
|
|
3211
|
+
score: hybridScore,
|
|
3212
|
+
moduleId: this.id,
|
|
3213
|
+
context: {
|
|
3214
|
+
semanticScore,
|
|
3215
|
+
bm25Score,
|
|
3216
|
+
pathBoost,
|
|
3217
|
+
fileTypeBoost,
|
|
3218
|
+
chunkTypeBoost,
|
|
3219
|
+
exportBoost
|
|
3220
|
+
}
|
|
3221
|
+
});
|
|
3222
|
+
}
|
|
3223
|
+
}
|
|
3224
|
+
results.sort((a, b) => b.score - a.score);
|
|
3225
|
+
return results.slice(0, topK);
|
|
2735
3226
|
}
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
3227
|
+
extractReferences(content, filepath) {
|
|
3228
|
+
const references = [];
|
|
3229
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3230
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3231
|
+
let match;
|
|
3232
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
3233
|
+
const importPath = match[1];
|
|
3234
|
+
if (importPath.startsWith(".")) {
|
|
3235
|
+
const dir = path9.dirname(filepath);
|
|
3236
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3237
|
+
references.push(resolved);
|
|
3238
|
+
}
|
|
2745
3239
|
}
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
3240
|
+
while ((match = requireRegex.exec(content)) !== null) {
|
|
3241
|
+
const importPath = match[1];
|
|
3242
|
+
if (importPath.startsWith(".")) {
|
|
3243
|
+
const dir = path9.dirname(filepath);
|
|
3244
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3245
|
+
references.push(resolved);
|
|
3246
|
+
}
|
|
2749
3247
|
}
|
|
3248
|
+
return references;
|
|
2750
3249
|
}
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
3250
|
+
}
|
|
3251
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
3252
|
+
var init_typescript = __esm(() => {
|
|
3253
|
+
init_embeddings();
|
|
3254
|
+
init_services();
|
|
3255
|
+
init_config2();
|
|
3256
|
+
init_parseCode();
|
|
3257
|
+
init_storage();
|
|
3258
|
+
TYPESCRIPT_EXTENSIONS = [
|
|
3259
|
+
".ts",
|
|
3260
|
+
".tsx",
|
|
3261
|
+
".js",
|
|
3262
|
+
".jsx",
|
|
3263
|
+
".mjs",
|
|
3264
|
+
".cjs",
|
|
3265
|
+
".mts",
|
|
3266
|
+
".cts"
|
|
3267
|
+
];
|
|
3268
|
+
supportsFile = isTypeScriptFile;
|
|
3269
|
+
});
|
|
3270
|
+
|
|
3271
|
+
// src/modules/data/json/index.ts
|
|
3272
|
+
var exports_json = {};
|
|
3273
|
+
__export(exports_json, {
|
|
3274
|
+
supportsFile: () => supportsFile2,
|
|
3275
|
+
isJsonFile: () => isJsonFile,
|
|
3276
|
+
JsonModule: () => JsonModule,
|
|
3277
|
+
JSON_EXTENSIONS: () => JSON_EXTENSIONS,
|
|
3278
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
|
|
3279
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
|
|
3280
|
+
});
|
|
3281
|
+
import * as path10 from "path";
|
|
3282
|
+
function isJsonFile(filepath) {
|
|
3283
|
+
const ext = path10.extname(filepath).toLowerCase();
|
|
3284
|
+
return JSON_EXTENSIONS.includes(ext);
|
|
3285
|
+
}
|
|
3286
|
+
function extractJsonKeys(obj, prefix = "") {
|
|
3287
|
+
const keys = [];
|
|
3288
|
+
if (obj === null || obj === undefined) {
|
|
3289
|
+
return keys;
|
|
3290
|
+
}
|
|
3291
|
+
if (Array.isArray(obj)) {
|
|
3292
|
+
obj.forEach((item, index) => {
|
|
3293
|
+
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3294
|
+
});
|
|
3295
|
+
} else if (typeof obj === "object") {
|
|
3296
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3297
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3298
|
+
keys.push(key);
|
|
3299
|
+
keys.push(...extractJsonKeys(value, fullKey));
|
|
2754
3300
|
}
|
|
2755
|
-
const results = this.bm25Index.search(query, maxCandidates);
|
|
2756
|
-
return results.map((r) => r.id);
|
|
2757
3301
|
}
|
|
2758
|
-
|
|
2759
|
-
|
|
3302
|
+
return keys;
|
|
3303
|
+
}
|
|
3304
|
+
function extractJsonKeywords(content) {
|
|
3305
|
+
try {
|
|
3306
|
+
const parsed = JSON.parse(content);
|
|
3307
|
+
const keys = extractJsonKeys(parsed);
|
|
3308
|
+
const stringValues = [];
|
|
3309
|
+
const extractStrings = (obj) => {
|
|
3310
|
+
if (typeof obj === "string") {
|
|
3311
|
+
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3312
|
+
stringValues.push(...words);
|
|
3313
|
+
} else if (Array.isArray(obj)) {
|
|
3314
|
+
obj.forEach(extractStrings);
|
|
3315
|
+
} else if (obj && typeof obj === "object") {
|
|
3316
|
+
Object.values(obj).forEach(extractStrings);
|
|
3317
|
+
}
|
|
3318
|
+
};
|
|
3319
|
+
extractStrings(parsed);
|
|
3320
|
+
return [...new Set([...keys, ...stringValues])];
|
|
3321
|
+
} catch {
|
|
3322
|
+
return [];
|
|
2760
3323
|
}
|
|
2761
|
-
|
|
2762
|
-
|
|
3324
|
+
}
|
|
3325
|
+
|
|
3326
|
+
class JsonModule {
|
|
3327
|
+
id = "data/json";
|
|
3328
|
+
name = "JSON Search";
|
|
3329
|
+
description = "JSON file search with structure-aware indexing";
|
|
3330
|
+
version = "1.0.0";
|
|
3331
|
+
supportsFile(filepath) {
|
|
3332
|
+
return isJsonFile(filepath);
|
|
2763
3333
|
}
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
3334
|
+
embeddingConfig = null;
|
|
3335
|
+
symbolicIndex = null;
|
|
3336
|
+
pendingSummaries = new Map;
|
|
3337
|
+
rootDir = "";
|
|
3338
|
+
logger = undefined;
|
|
3339
|
+
async initialize(config) {
|
|
3340
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3341
|
+
this.logger = config.options?.logger;
|
|
3342
|
+
if (this.logger) {
|
|
3343
|
+
this.embeddingConfig = {
|
|
3344
|
+
...this.embeddingConfig,
|
|
3345
|
+
logger: this.logger
|
|
3346
|
+
};
|
|
2776
3347
|
}
|
|
3348
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3349
|
+
this.pendingSummaries.clear();
|
|
2777
3350
|
}
|
|
2778
|
-
async
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
this.
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
3351
|
+
async indexFile(filepath, content, ctx) {
|
|
3352
|
+
if (!isJsonFile(filepath)) {
|
|
3353
|
+
return null;
|
|
3354
|
+
}
|
|
3355
|
+
this.rootDir = ctx.rootDir;
|
|
3356
|
+
const textChunks = createLineBasedChunks(content, {
|
|
3357
|
+
chunkSize: 50,
|
|
3358
|
+
overlap: 10
|
|
3359
|
+
});
|
|
3360
|
+
if (textChunks.length === 0) {
|
|
3361
|
+
return null;
|
|
3362
|
+
}
|
|
3363
|
+
const chunkContents = textChunks.map((c) => {
|
|
3364
|
+
const filename = path10.basename(filepath);
|
|
3365
|
+
return `${filename}: ${c.content}`;
|
|
3366
|
+
});
|
|
3367
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3368
|
+
const chunks = textChunks.map((tc, i) => ({
|
|
3369
|
+
id: generateChunkId(filepath, tc.startLine, tc.endLine),
|
|
3370
|
+
content: tc.content,
|
|
3371
|
+
startLine: tc.startLine,
|
|
3372
|
+
endLine: tc.endLine,
|
|
3373
|
+
type: tc.type
|
|
3374
|
+
}));
|
|
3375
|
+
const jsonKeys = extractJsonKeys((() => {
|
|
3376
|
+
try {
|
|
3377
|
+
return JSON.parse(content);
|
|
3378
|
+
} catch {
|
|
3379
|
+
return {};
|
|
2802
3380
|
}
|
|
2803
|
-
}
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
const
|
|
2807
|
-
|
|
3381
|
+
})());
|
|
3382
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3383
|
+
const currentConfig = getEmbeddingConfig();
|
|
3384
|
+
const moduleData = {
|
|
3385
|
+
embeddings,
|
|
3386
|
+
embeddingModel: currentConfig.model,
|
|
3387
|
+
jsonKeys
|
|
3388
|
+
};
|
|
3389
|
+
const keywords = extractJsonKeywords(content);
|
|
3390
|
+
const fileSummary = {
|
|
3391
|
+
filepath,
|
|
3392
|
+
chunkCount: chunks.length,
|
|
3393
|
+
chunkTypes: ["file"],
|
|
3394
|
+
keywords,
|
|
3395
|
+
exports: [],
|
|
3396
|
+
lastModified: stats.lastModified
|
|
3397
|
+
};
|
|
3398
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3399
|
+
return {
|
|
3400
|
+
filepath,
|
|
3401
|
+
lastModified: stats.lastModified,
|
|
3402
|
+
chunks,
|
|
3403
|
+
moduleData
|
|
3404
|
+
};
|
|
2808
3405
|
}
|
|
2809
|
-
async
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
this.
|
|
3406
|
+
async finalize(ctx) {
|
|
3407
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3408
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3409
|
+
await this.symbolicIndex.initialize();
|
|
3410
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3411
|
+
this.symbolicIndex.addFile(summary);
|
|
3412
|
+
}
|
|
3413
|
+
this.symbolicIndex.buildBM25Index();
|
|
3414
|
+
await this.symbolicIndex.save();
|
|
3415
|
+
this.pendingSummaries.clear();
|
|
2814
3416
|
}
|
|
2815
|
-
async
|
|
3417
|
+
async search(query, ctx, options = {}) {
|
|
3418
|
+
const {
|
|
3419
|
+
topK = DEFAULT_TOP_K3,
|
|
3420
|
+
minScore = DEFAULT_MIN_SCORE3,
|
|
3421
|
+
filePatterns
|
|
3422
|
+
} = options;
|
|
3423
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3424
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3425
|
+
let allFiles;
|
|
2816
3426
|
try {
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
return true;
|
|
3427
|
+
await symbolicIndex.initialize();
|
|
3428
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
2820
3429
|
} catch {
|
|
2821
|
-
|
|
3430
|
+
allFiles = await ctx.listIndexedFiles();
|
|
2822
3431
|
}
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
totalDocs: 0
|
|
2835
|
-
};
|
|
3432
|
+
let filesToSearch = allFiles.filter((f) => isJsonFile(f));
|
|
3433
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3434
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3435
|
+
return filePatterns.some((pattern) => {
|
|
3436
|
+
if (pattern.startsWith("*.")) {
|
|
3437
|
+
const ext = pattern.slice(1);
|
|
3438
|
+
return filepath.endsWith(ext);
|
|
3439
|
+
}
|
|
3440
|
+
return filepath.includes(pattern);
|
|
3441
|
+
});
|
|
3442
|
+
});
|
|
2836
3443
|
}
|
|
2837
|
-
|
|
3444
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3445
|
+
const bm25Index = new BM25Index;
|
|
3446
|
+
const allChunksData = [];
|
|
3447
|
+
for (const filepath of filesToSearch) {
|
|
3448
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3449
|
+
if (!fileIndex)
|
|
3450
|
+
continue;
|
|
3451
|
+
const moduleData = fileIndex.moduleData;
|
|
3452
|
+
if (!moduleData?.embeddings)
|
|
3453
|
+
continue;
|
|
3454
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3455
|
+
const chunk = fileIndex.chunks[i];
|
|
3456
|
+
const embedding = moduleData.embeddings[i];
|
|
3457
|
+
if (!embedding)
|
|
3458
|
+
continue;
|
|
3459
|
+
allChunksData.push({
|
|
3460
|
+
filepath: fileIndex.filepath,
|
|
3461
|
+
chunk,
|
|
3462
|
+
embedding
|
|
3463
|
+
});
|
|
3464
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3465
|
+
}
|
|
3466
|
+
}
|
|
3467
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3468
|
+
const bm25Scores = new Map;
|
|
3469
|
+
for (const result of bm25Results) {
|
|
3470
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3471
|
+
}
|
|
3472
|
+
const queryTerms = extractQueryTerms(query);
|
|
3473
|
+
const results = [];
|
|
3474
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3475
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3476
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3477
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
|
|
3478
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3479
|
+
results.push({
|
|
3480
|
+
filepath,
|
|
3481
|
+
chunk,
|
|
3482
|
+
score: hybridScore,
|
|
3483
|
+
moduleId: this.id,
|
|
3484
|
+
context: {
|
|
3485
|
+
semanticScore,
|
|
3486
|
+
bm25Score
|
|
3487
|
+
}
|
|
3488
|
+
});
|
|
3489
|
+
}
|
|
3490
|
+
}
|
|
3491
|
+
results.sort((a, b) => b.score - a.score);
|
|
3492
|
+
return results.slice(0, topK);
|
|
2838
3493
|
}
|
|
2839
3494
|
}
|
|
2840
|
-
var
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
3495
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
|
|
3496
|
+
var init_json = __esm(() => {
|
|
3497
|
+
init_embeddings();
|
|
3498
|
+
init_services();
|
|
3499
|
+
init_config2();
|
|
3500
|
+
init_storage();
|
|
3501
|
+
JSON_EXTENSIONS = [".json"];
|
|
3502
|
+
supportsFile2 = isJsonFile;
|
|
2848
3503
|
});
|
|
2849
3504
|
|
|
2850
|
-
// src/modules/
|
|
2851
|
-
var
|
|
2852
|
-
__export(
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
3505
|
+
// src/modules/docs/markdown/index.ts
|
|
3506
|
+
var exports_markdown = {};
|
|
3507
|
+
__export(exports_markdown, {
|
|
3508
|
+
supportsFile: () => supportsFile3,
|
|
3509
|
+
isMarkdownFile: () => isMarkdownFile,
|
|
3510
|
+
MarkdownModule: () => MarkdownModule,
|
|
3511
|
+
MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
|
|
3512
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
|
|
3513
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
|
|
2856
3514
|
});
|
|
2857
|
-
import * as
|
|
2858
|
-
function
|
|
2859
|
-
const
|
|
2860
|
-
|
|
2861
|
-
if (hasDocumentationTerm) {
|
|
2862
|
-
return "documentation";
|
|
2863
|
-
}
|
|
2864
|
-
if (hasImplementationTerm) {
|
|
2865
|
-
return "implementation";
|
|
2866
|
-
}
|
|
2867
|
-
return "neutral";
|
|
3515
|
+
import * as path11 from "path";
|
|
3516
|
+
function isMarkdownFile(filepath) {
|
|
3517
|
+
const ext = path11.extname(filepath).toLowerCase();
|
|
3518
|
+
return MARKDOWN_EXTENSIONS.includes(ext);
|
|
2868
3519
|
}
|
|
2869
|
-
function
|
|
2870
|
-
const
|
|
2871
|
-
|
|
2872
|
-
const
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
3520
|
+
function parseMarkdownSections(content) {
|
|
3521
|
+
const lines = content.split(`
|
|
3522
|
+
`);
|
|
3523
|
+
const sections = [];
|
|
3524
|
+
let currentSection = null;
|
|
3525
|
+
let currentContent = [];
|
|
3526
|
+
let startLine = 1;
|
|
3527
|
+
for (let i = 0;i < lines.length; i++) {
|
|
3528
|
+
const line = lines[i];
|
|
3529
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
3530
|
+
if (headingMatch) {
|
|
3531
|
+
if (currentSection) {
|
|
3532
|
+
currentSection.content = currentContent.join(`
|
|
3533
|
+
`).trim();
|
|
3534
|
+
currentSection.endLine = i;
|
|
3535
|
+
if (currentSection.content || currentSection.heading) {
|
|
3536
|
+
sections.push(currentSection);
|
|
3537
|
+
}
|
|
3538
|
+
} else if (currentContent.length > 0) {
|
|
3539
|
+
sections.push({
|
|
3540
|
+
heading: "",
|
|
3541
|
+
level: 0,
|
|
3542
|
+
content: currentContent.join(`
|
|
3543
|
+
`).trim(),
|
|
3544
|
+
startLine: 1,
|
|
3545
|
+
endLine: i
|
|
3546
|
+
});
|
|
3547
|
+
}
|
|
3548
|
+
currentSection = {
|
|
3549
|
+
heading: headingMatch[2],
|
|
3550
|
+
level: headingMatch[1].length,
|
|
3551
|
+
content: "",
|
|
3552
|
+
startLine: i + 1,
|
|
3553
|
+
endLine: lines.length
|
|
3554
|
+
};
|
|
3555
|
+
currentContent = [];
|
|
3556
|
+
} else {
|
|
3557
|
+
currentContent.push(line);
|
|
2877
3558
|
}
|
|
2878
|
-
return 0;
|
|
2879
3559
|
}
|
|
2880
|
-
if (
|
|
2881
|
-
|
|
2882
|
-
|
|
3560
|
+
if (currentSection) {
|
|
3561
|
+
currentSection.content = currentContent.join(`
|
|
3562
|
+
`).trim();
|
|
3563
|
+
currentSection.endLine = lines.length;
|
|
3564
|
+
if (currentSection.content || currentSection.heading) {
|
|
3565
|
+
sections.push(currentSection);
|
|
2883
3566
|
}
|
|
2884
|
-
|
|
3567
|
+
} else if (currentContent.length > 0) {
|
|
3568
|
+
sections.push({
|
|
3569
|
+
heading: "",
|
|
3570
|
+
level: 0,
|
|
3571
|
+
content: currentContent.join(`
|
|
3572
|
+
`).trim(),
|
|
3573
|
+
startLine: 1,
|
|
3574
|
+
endLine: lines.length
|
|
3575
|
+
});
|
|
2885
3576
|
}
|
|
2886
|
-
return
|
|
3577
|
+
return sections;
|
|
2887
3578
|
}
|
|
2888
|
-
function
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
3579
|
+
function extractMarkdownKeywords(content) {
|
|
3580
|
+
const keywords = [];
|
|
3581
|
+
const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
|
|
3582
|
+
for (const match of headingMatches) {
|
|
3583
|
+
const heading = match[1].toLowerCase();
|
|
3584
|
+
const words = heading.split(/\s+/).filter((w) => w.length > 2);
|
|
3585
|
+
keywords.push(...words);
|
|
3586
|
+
}
|
|
3587
|
+
const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
|
|
3588
|
+
for (const match of emphasisMatches) {
|
|
3589
|
+
const text = (match[1] || match[2] || "").toLowerCase();
|
|
3590
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3591
|
+
keywords.push(...words);
|
|
3592
|
+
}
|
|
3593
|
+
const codeMatches = content.matchAll(/`([^`]+)`/g);
|
|
3594
|
+
for (const match of codeMatches) {
|
|
3595
|
+
const code = match[1].toLowerCase();
|
|
3596
|
+
if (code.length > 2 && code.length < 50) {
|
|
3597
|
+
keywords.push(code);
|
|
3598
|
+
}
|
|
2904
3599
|
}
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
3600
|
+
const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
|
|
3601
|
+
for (const match of linkMatches) {
|
|
3602
|
+
const text = match[1].toLowerCase();
|
|
3603
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3604
|
+
keywords.push(...words);
|
|
3605
|
+
}
|
|
3606
|
+
return [...new Set(keywords)];
|
|
2908
3607
|
}
|
|
2909
3608
|
|
|
2910
|
-
class
|
|
2911
|
-
id = "
|
|
2912
|
-
name = "
|
|
2913
|
-
description = "
|
|
3609
|
+
class MarkdownModule {
|
|
3610
|
+
id = "docs/markdown";
|
|
3611
|
+
name = "Markdown Search";
|
|
3612
|
+
description = "Markdown documentation search with section-aware indexing";
|
|
2914
3613
|
version = "1.0.0";
|
|
3614
|
+
supportsFile(filepath) {
|
|
3615
|
+
return isMarkdownFile(filepath);
|
|
3616
|
+
}
|
|
2915
3617
|
embeddingConfig = null;
|
|
2916
3618
|
symbolicIndex = null;
|
|
2917
3619
|
pendingSummaries = new Map;
|
|
@@ -2930,66 +3632,53 @@ class TypeScriptModule {
|
|
|
2930
3632
|
this.pendingSummaries.clear();
|
|
2931
3633
|
}
|
|
2932
3634
|
async indexFile(filepath, content, ctx) {
|
|
3635
|
+
if (!isMarkdownFile(filepath)) {
|
|
3636
|
+
return null;
|
|
3637
|
+
}
|
|
2933
3638
|
this.rootDir = ctx.rootDir;
|
|
2934
|
-
const
|
|
2935
|
-
if (
|
|
3639
|
+
const sections = parseMarkdownSections(content);
|
|
3640
|
+
if (sections.length === 0) {
|
|
2936
3641
|
return null;
|
|
2937
3642
|
}
|
|
2938
|
-
const
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3643
|
+
const chunkContents = sections.map((s) => {
|
|
3644
|
+
const filename = path11.basename(filepath);
|
|
3645
|
+
const headingContext = s.heading ? `${s.heading}: ` : "";
|
|
3646
|
+
return `${filename} ${headingContext}${s.content}`;
|
|
2943
3647
|
});
|
|
2944
3648
|
const embeddings = await getEmbeddings(chunkContents);
|
|
2945
|
-
const chunks =
|
|
2946
|
-
id: generateChunkId(filepath,
|
|
2947
|
-
content:
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
3649
|
+
const chunks = sections.map((section, i) => ({
|
|
3650
|
+
id: generateChunkId(filepath, section.startLine, section.endLine),
|
|
3651
|
+
content: section.heading ? `## ${section.heading}
|
|
3652
|
+
|
|
3653
|
+
${section.content}` : section.content,
|
|
3654
|
+
startLine: section.startLine,
|
|
3655
|
+
endLine: section.endLine,
|
|
3656
|
+
type: "block",
|
|
3657
|
+
name: section.heading || undefined
|
|
2954
3658
|
}));
|
|
2955
|
-
const
|
|
3659
|
+
const headings = sections.filter((s) => s.heading).map((s) => s.heading);
|
|
2956
3660
|
const stats = await ctx.getFileStats(filepath);
|
|
2957
3661
|
const currentConfig = getEmbeddingConfig();
|
|
2958
3662
|
const moduleData = {
|
|
2959
3663
|
embeddings,
|
|
2960
|
-
embeddingModel: currentConfig.model
|
|
3664
|
+
embeddingModel: currentConfig.model,
|
|
3665
|
+
headings
|
|
2961
3666
|
};
|
|
2962
|
-
const
|
|
2963
|
-
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2964
|
-
];
|
|
2965
|
-
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2966
|
-
const allKeywords = new Set;
|
|
2967
|
-
for (const pc of parsedChunks) {
|
|
2968
|
-
const keywords = extractKeywords(pc.content, pc.name);
|
|
2969
|
-
keywords.forEach((k) => allKeywords.add(k));
|
|
2970
|
-
}
|
|
2971
|
-
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3667
|
+
const keywords = extractMarkdownKeywords(content);
|
|
2972
3668
|
const fileSummary = {
|
|
2973
3669
|
filepath,
|
|
2974
3670
|
chunkCount: chunks.length,
|
|
2975
|
-
chunkTypes,
|
|
2976
|
-
keywords
|
|
2977
|
-
exports,
|
|
2978
|
-
lastModified: stats.lastModified
|
|
2979
|
-
pathContext: {
|
|
2980
|
-
segments: pathContext.segments,
|
|
2981
|
-
layer: pathContext.layer,
|
|
2982
|
-
domain: pathContext.domain,
|
|
2983
|
-
depth: pathContext.depth
|
|
2984
|
-
}
|
|
3671
|
+
chunkTypes: ["block"],
|
|
3672
|
+
keywords,
|
|
3673
|
+
exports: headings,
|
|
3674
|
+
lastModified: stats.lastModified
|
|
2985
3675
|
};
|
|
2986
3676
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
2987
3677
|
return {
|
|
2988
3678
|
filepath,
|
|
2989
3679
|
lastModified: stats.lastModified,
|
|
2990
3680
|
chunks,
|
|
2991
|
-
moduleData
|
|
2992
|
-
references
|
|
3681
|
+
moduleData
|
|
2993
3682
|
};
|
|
2994
3683
|
}
|
|
2995
3684
|
async finalize(ctx) {
|
|
@@ -3005,8 +3694,8 @@ class TypeScriptModule {
|
|
|
3005
3694
|
}
|
|
3006
3695
|
async search(query, ctx, options = {}) {
|
|
3007
3696
|
const {
|
|
3008
|
-
topK =
|
|
3009
|
-
minScore =
|
|
3697
|
+
topK = DEFAULT_TOP_K4,
|
|
3698
|
+
minScore = DEFAULT_MIN_SCORE4,
|
|
3010
3699
|
filePatterns
|
|
3011
3700
|
} = options;
|
|
3012
3701
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
@@ -3018,9 +3707,9 @@ class TypeScriptModule {
|
|
|
3018
3707
|
} catch {
|
|
3019
3708
|
allFiles = await ctx.listIndexedFiles();
|
|
3020
3709
|
}
|
|
3021
|
-
let filesToSearch = allFiles;
|
|
3710
|
+
let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
|
|
3022
3711
|
if (filePatterns && filePatterns.length > 0) {
|
|
3023
|
-
filesToSearch =
|
|
3712
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3024
3713
|
return filePatterns.some((pattern) => {
|
|
3025
3714
|
if (pattern.startsWith("*.")) {
|
|
3026
3715
|
const ext = pattern.slice(1);
|
|
@@ -3058,36 +3747,24 @@ class TypeScriptModule {
|
|
|
3058
3747
|
for (const result of bm25Results) {
|
|
3059
3748
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3060
3749
|
}
|
|
3061
|
-
const queryTerms = query
|
|
3062
|
-
const pathBoosts = new Map;
|
|
3063
|
-
for (const filepath of filesToSearch) {
|
|
3064
|
-
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3065
|
-
if (summary?.pathContext) {
|
|
3066
|
-
let boost = 0;
|
|
3067
|
-
const ctx2 = summary.pathContext;
|
|
3068
|
-
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3069
|
-
boost += 0.1;
|
|
3070
|
-
}
|
|
3071
|
-
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3072
|
-
boost += 0.05;
|
|
3073
|
-
}
|
|
3074
|
-
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3075
|
-
if (segmentMatch) {
|
|
3076
|
-
boost += 0.05;
|
|
3077
|
-
}
|
|
3078
|
-
pathBoosts.set(filepath, boost);
|
|
3079
|
-
}
|
|
3080
|
-
}
|
|
3750
|
+
const queryTerms = extractQueryTerms(query);
|
|
3081
3751
|
const results = [];
|
|
3082
3752
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3083
3753
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3084
3754
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3755
|
+
let docBoost = 0;
|
|
3756
|
+
if (queryTerms.some((t) => [
|
|
3757
|
+
"docs",
|
|
3758
|
+
"documentation",
|
|
3759
|
+
"readme",
|
|
3760
|
+
"guide",
|
|
3761
|
+
"how",
|
|
3762
|
+
"what",
|
|
3763
|
+
"explain"
|
|
3764
|
+
].includes(t))) {
|
|
3765
|
+
docBoost = 0.05;
|
|
3766
|
+
}
|
|
3767
|
+
const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
3091
3768
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3092
3769
|
results.push({
|
|
3093
3770
|
filepath,
|
|
@@ -3097,10 +3774,7 @@ class TypeScriptModule {
|
|
|
3097
3774
|
context: {
|
|
3098
3775
|
semanticScore,
|
|
3099
3776
|
bm25Score,
|
|
3100
|
-
|
|
3101
|
-
fileTypeBoost,
|
|
3102
|
-
chunkTypeBoost,
|
|
3103
|
-
exportBoost
|
|
3777
|
+
docBoost
|
|
3104
3778
|
}
|
|
3105
3779
|
});
|
|
3106
3780
|
}
|
|
@@ -3108,84 +3782,15 @@ class TypeScriptModule {
|
|
|
3108
3782
|
results.sort((a, b) => b.score - a.score);
|
|
3109
3783
|
return results.slice(0, topK);
|
|
3110
3784
|
}
|
|
3111
|
-
extractReferences(content, filepath) {
|
|
3112
|
-
const references = [];
|
|
3113
|
-
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3114
|
-
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3115
|
-
let match;
|
|
3116
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
3117
|
-
const importPath = match[1];
|
|
3118
|
-
if (importPath.startsWith(".")) {
|
|
3119
|
-
const dir = path8.dirname(filepath);
|
|
3120
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3121
|
-
references.push(resolved);
|
|
3122
|
-
}
|
|
3123
|
-
}
|
|
3124
|
-
while ((match = requireRegex.exec(content)) !== null) {
|
|
3125
|
-
const importPath = match[1];
|
|
3126
|
-
if (importPath.startsWith(".")) {
|
|
3127
|
-
const dir = path8.dirname(filepath);
|
|
3128
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
3129
|
-
references.push(resolved);
|
|
3130
|
-
}
|
|
3131
|
-
}
|
|
3132
|
-
return references;
|
|
3133
|
-
}
|
|
3134
3785
|
}
|
|
3135
|
-
var
|
|
3136
|
-
var
|
|
3786
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
|
|
3787
|
+
var init_markdown = __esm(() => {
|
|
3137
3788
|
init_embeddings();
|
|
3789
|
+
init_services();
|
|
3138
3790
|
init_config2();
|
|
3139
|
-
init_parseCode();
|
|
3140
3791
|
init_storage();
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
IMPLEMENTATION_TERMS = [
|
|
3144
|
-
"function",
|
|
3145
|
-
"method",
|
|
3146
|
-
"class",
|
|
3147
|
-
"interface",
|
|
3148
|
-
"implement",
|
|
3149
|
-
"implementation",
|
|
3150
|
-
"endpoint",
|
|
3151
|
-
"route",
|
|
3152
|
-
"handler",
|
|
3153
|
-
"controller",
|
|
3154
|
-
"module",
|
|
3155
|
-
"code"
|
|
3156
|
-
];
|
|
3157
|
-
DOCUMENTATION_TERMS = [
|
|
3158
|
-
"documentation",
|
|
3159
|
-
"docs",
|
|
3160
|
-
"guide",
|
|
3161
|
-
"tutorial",
|
|
3162
|
-
"readme",
|
|
3163
|
-
"how",
|
|
3164
|
-
"what",
|
|
3165
|
-
"why",
|
|
3166
|
-
"explain",
|
|
3167
|
-
"overview",
|
|
3168
|
-
"getting",
|
|
3169
|
-
"started",
|
|
3170
|
-
"requirements",
|
|
3171
|
-
"setup",
|
|
3172
|
-
"install",
|
|
3173
|
-
"configure",
|
|
3174
|
-
"configuration"
|
|
3175
|
-
];
|
|
3176
|
-
SOURCE_CODE_EXTENSIONS = [
|
|
3177
|
-
".ts",
|
|
3178
|
-
".tsx",
|
|
3179
|
-
".js",
|
|
3180
|
-
".jsx",
|
|
3181
|
-
".mjs",
|
|
3182
|
-
".cjs",
|
|
3183
|
-
".py",
|
|
3184
|
-
".go",
|
|
3185
|
-
".rs",
|
|
3186
|
-
".java"
|
|
3187
|
-
];
|
|
3188
|
-
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
3792
|
+
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
3793
|
+
supportsFile3 = isMarkdownFile;
|
|
3189
3794
|
});
|
|
3190
3795
|
|
|
3191
3796
|
// src/modules/registry.ts
|
|
@@ -3210,8 +3815,12 @@ class ModuleRegistryImpl {
|
|
|
3210
3815
|
async function registerBuiltInModules() {
|
|
3211
3816
|
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
3212
3817
|
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
3818
|
+
const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
|
|
3819
|
+
const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
|
|
3213
3820
|
registry.register(new CoreModule2);
|
|
3214
3821
|
registry.register(new TypeScriptModule2);
|
|
3822
|
+
registry.register(new JsonModule2);
|
|
3823
|
+
registry.register(new MarkdownModule2);
|
|
3215
3824
|
}
|
|
3216
3825
|
var registry;
|
|
3217
3826
|
var init_registry = __esm(() => {
|
|
@@ -3219,13 +3828,13 @@ var init_registry = __esm(() => {
|
|
|
3219
3828
|
});
|
|
3220
3829
|
|
|
3221
3830
|
// src/infrastructure/introspection/projectDetector.ts
|
|
3222
|
-
import * as
|
|
3831
|
+
import * as path12 from "path";
|
|
3223
3832
|
import * as fs4 from "fs/promises";
|
|
3224
3833
|
async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
3225
3834
|
if (depth > MAX_SCAN_DEPTH)
|
|
3226
3835
|
return [];
|
|
3227
3836
|
const results = [];
|
|
3228
|
-
const fullDir = currentDir ?
|
|
3837
|
+
const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
|
|
3229
3838
|
try {
|
|
3230
3839
|
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
3231
3840
|
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
@@ -3248,10 +3857,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3248
3857
|
}
|
|
3249
3858
|
async function parsePackageJson(rootDir, relativePath) {
|
|
3250
3859
|
try {
|
|
3251
|
-
const packageJsonPath =
|
|
3860
|
+
const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
|
|
3252
3861
|
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
3253
3862
|
const pkg = JSON.parse(content);
|
|
3254
|
-
const name = pkg.name ||
|
|
3863
|
+
const name = pkg.name || path12.basename(relativePath);
|
|
3255
3864
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
3256
3865
|
let type = "unknown";
|
|
3257
3866
|
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
@@ -3296,7 +3905,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3296
3905
|
for (const pattern of monorepoPatterns) {
|
|
3297
3906
|
if (!dirNames.includes(pattern))
|
|
3298
3907
|
continue;
|
|
3299
|
-
const patternDir =
|
|
3908
|
+
const patternDir = path12.join(rootDir, pattern);
|
|
3300
3909
|
try {
|
|
3301
3910
|
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
3302
3911
|
for (const subDir of subDirs) {
|
|
@@ -3327,7 +3936,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3327
3936
|
}
|
|
3328
3937
|
let rootType = "unknown";
|
|
3329
3938
|
try {
|
|
3330
|
-
const rootPkgPath =
|
|
3939
|
+
const rootPkgPath = path12.join(rootDir, "package.json");
|
|
3331
3940
|
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
3332
3941
|
if (rootPkg.workspaces)
|
|
3333
3942
|
isMonorepo = true;
|
|
@@ -3367,7 +3976,7 @@ var init_projectDetector = __esm(() => {
|
|
|
3367
3976
|
});
|
|
3368
3977
|
|
|
3369
3978
|
// src/infrastructure/introspection/IntrospectionIndex.ts
|
|
3370
|
-
import * as
|
|
3979
|
+
import * as path13 from "path";
|
|
3371
3980
|
import * as fs5 from "fs/promises";
|
|
3372
3981
|
|
|
3373
3982
|
class IntrospectionIndex {
|
|
@@ -3381,7 +3990,7 @@ class IntrospectionIndex {
|
|
|
3381
3990
|
async initialize() {
|
|
3382
3991
|
this.structure = await detectProjectStructure(this.rootDir);
|
|
3383
3992
|
try {
|
|
3384
|
-
const configPath =
|
|
3993
|
+
const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
|
|
3385
3994
|
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
3386
3995
|
const config = JSON.parse(configContent);
|
|
3387
3996
|
this.config = config.introspection || {};
|
|
@@ -3421,28 +4030,28 @@ class IntrospectionIndex {
|
|
|
3421
4030
|
}
|
|
3422
4031
|
}
|
|
3423
4032
|
async save(config) {
|
|
3424
|
-
const introDir =
|
|
4033
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3425
4034
|
await fs5.mkdir(introDir, { recursive: true });
|
|
3426
|
-
const projectPath =
|
|
4035
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3427
4036
|
await fs5.writeFile(projectPath, JSON.stringify({
|
|
3428
4037
|
version: "1.0.0",
|
|
3429
4038
|
lastUpdated: new Date().toISOString(),
|
|
3430
4039
|
structure: this.structure
|
|
3431
4040
|
}, null, 2));
|
|
3432
4041
|
for (const [filepath, intro] of this.files) {
|
|
3433
|
-
const introFilePath =
|
|
3434
|
-
await fs5.mkdir(
|
|
4042
|
+
const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
4043
|
+
await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
|
|
3435
4044
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3436
4045
|
}
|
|
3437
4046
|
}
|
|
3438
4047
|
async load(config) {
|
|
3439
|
-
const introDir =
|
|
4048
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3440
4049
|
try {
|
|
3441
|
-
const projectPath =
|
|
4050
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3442
4051
|
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
3443
4052
|
const projectData = JSON.parse(projectContent);
|
|
3444
4053
|
this.structure = projectData.structure;
|
|
3445
|
-
await this.loadFilesRecursive(
|
|
4054
|
+
await this.loadFilesRecursive(path13.join(introDir, "files"), "");
|
|
3446
4055
|
} catch {
|
|
3447
4056
|
this.structure = null;
|
|
3448
4057
|
this.files.clear();
|
|
@@ -3452,7 +4061,7 @@ class IntrospectionIndex {
|
|
|
3452
4061
|
try {
|
|
3453
4062
|
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
3454
4063
|
for (const entry of entries) {
|
|
3455
|
-
const entryPath =
|
|
4064
|
+
const entryPath = path13.join(basePath, entry.name);
|
|
3456
4065
|
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
3457
4066
|
if (entry.isDirectory()) {
|
|
3458
4067
|
await this.loadFilesRecursive(entryPath, relativePath);
|
|
@@ -3483,7 +4092,7 @@ var init_introspection2 = __esm(() => {
|
|
|
3483
4092
|
|
|
3484
4093
|
// src/app/indexer/watcher.ts
|
|
3485
4094
|
import { watch } from "chokidar";
|
|
3486
|
-
import * as
|
|
4095
|
+
import * as path14 from "path";
|
|
3487
4096
|
async function watchDirectory(rootDir, options = {}) {
|
|
3488
4097
|
const {
|
|
3489
4098
|
debounceMs = DEFAULT_DEBOUNCE_MS,
|
|
@@ -3494,7 +4103,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3494
4103
|
onFileChange,
|
|
3495
4104
|
onError
|
|
3496
4105
|
} = options;
|
|
3497
|
-
rootDir =
|
|
4106
|
+
rootDir = path14.resolve(rootDir);
|
|
3498
4107
|
const config = await loadConfig(rootDir);
|
|
3499
4108
|
const indexLocation = getIndexLocation(rootDir);
|
|
3500
4109
|
const validExtensions = new Set(config.extensions);
|
|
@@ -3504,7 +4113,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3504
4113
|
"**/.git/**"
|
|
3505
4114
|
];
|
|
3506
4115
|
function shouldWatchFile(filepath) {
|
|
3507
|
-
const ext =
|
|
4116
|
+
const ext = path14.extname(filepath);
|
|
3508
4117
|
return validExtensions.has(ext);
|
|
3509
4118
|
}
|
|
3510
4119
|
let isRunning = true;
|
|
@@ -3586,7 +4195,7 @@ async function watchDirectory(rootDir, options = {}) {
|
|
|
3586
4195
|
function handleFileEvent(event, filepath) {
|
|
3587
4196
|
if (!isRunning)
|
|
3588
4197
|
return;
|
|
3589
|
-
const relativePath =
|
|
4198
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3590
4199
|
if (!shouldWatchFile(filepath)) {
|
|
3591
4200
|
return;
|
|
3592
4201
|
}
|
|
@@ -3665,15 +4274,48 @@ __export(exports_indexer, {
|
|
|
3665
4274
|
});
|
|
3666
4275
|
import { glob } from "glob";
|
|
3667
4276
|
import * as fs6 from "fs/promises";
|
|
3668
|
-
import * as
|
|
4277
|
+
import * as path15 from "path";
|
|
4278
|
+
async function parallelMap(items, processor, concurrency) {
|
|
4279
|
+
const results = new Array(items.length);
|
|
4280
|
+
let nextIndex = 0;
|
|
4281
|
+
async function worker() {
|
|
4282
|
+
while (nextIndex < items.length) {
|
|
4283
|
+
const index = nextIndex++;
|
|
4284
|
+
const item = items[index];
|
|
4285
|
+
try {
|
|
4286
|
+
const value = await processor(item, index);
|
|
4287
|
+
results[index] = { success: true, value };
|
|
4288
|
+
} catch (error) {
|
|
4289
|
+
results[index] = { success: false, error };
|
|
4290
|
+
}
|
|
4291
|
+
}
|
|
4292
|
+
}
|
|
4293
|
+
const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
|
|
4294
|
+
await Promise.all(workers);
|
|
4295
|
+
return results;
|
|
4296
|
+
}
|
|
4297
|
+
function formatDuration(ms) {
|
|
4298
|
+
if (ms < 1000) {
|
|
4299
|
+
return `${ms}ms`;
|
|
4300
|
+
}
|
|
4301
|
+
const seconds = ms / 1000;
|
|
4302
|
+
if (seconds < 60) {
|
|
4303
|
+
return `${seconds.toFixed(1)}s`;
|
|
4304
|
+
}
|
|
4305
|
+
const minutes = Math.floor(seconds / 60);
|
|
4306
|
+
const remainingSeconds = seconds % 60;
|
|
4307
|
+
return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
|
|
4308
|
+
}
|
|
3669
4309
|
async function indexDirectory(rootDir, options = {}) {
|
|
3670
4310
|
const verbose = options.verbose ?? false;
|
|
3671
4311
|
const quiet = options.quiet ?? false;
|
|
4312
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
3672
4313
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3673
|
-
rootDir =
|
|
4314
|
+
rootDir = path15.resolve(rootDir);
|
|
3674
4315
|
const location = getIndexLocation(rootDir);
|
|
3675
4316
|
logger.info(`Indexing directory: ${rootDir}`);
|
|
3676
4317
|
logger.info(`Index location: ${location.indexDir}`);
|
|
4318
|
+
logger.debug(`Concurrency: ${concurrency}`);
|
|
3677
4319
|
const config = await loadConfig(rootDir);
|
|
3678
4320
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3679
4321
|
await introspection.initialize();
|
|
@@ -3690,8 +4332,10 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3690
4332
|
logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3691
4333
|
const files = await findFiles(rootDir, config);
|
|
3692
4334
|
logger.info(`Found ${files.length} files to index`);
|
|
4335
|
+
const overallStart = Date.now();
|
|
3693
4336
|
const results = [];
|
|
3694
4337
|
for (const module of enabledModules) {
|
|
4338
|
+
const moduleStart = Date.now();
|
|
3695
4339
|
logger.info(`
|
|
3696
4340
|
[${module.name}] Starting indexing...`);
|
|
3697
4341
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
@@ -3709,7 +4353,9 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3709
4353
|
};
|
|
3710
4354
|
await module.initialize(configWithOverrides);
|
|
3711
4355
|
}
|
|
3712
|
-
const
|
|
4356
|
+
const moduleFiles = module.supportsFile ? files.filter((f) => module.supportsFile(f)) : files;
|
|
4357
|
+
logger.info(` Processing ${moduleFiles.length} files...`);
|
|
4358
|
+
const result = await indexWithModule(rootDir, moduleFiles, module, config, verbose, introspection, logger, concurrency);
|
|
3713
4359
|
results.push(result);
|
|
3714
4360
|
if (module.finalize) {
|
|
3715
4361
|
logger.info(`[${module.name}] Building secondary indexes...`);
|
|
@@ -3717,20 +4363,29 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3717
4363
|
rootDir,
|
|
3718
4364
|
config,
|
|
3719
4365
|
readFile: async (filepath) => {
|
|
3720
|
-
const fullPath =
|
|
4366
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3721
4367
|
return fs6.readFile(fullPath, "utf-8");
|
|
3722
4368
|
},
|
|
3723
4369
|
getFileStats: async (filepath) => {
|
|
3724
|
-
const fullPath =
|
|
4370
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3725
4371
|
const stats = await fs6.stat(fullPath);
|
|
3726
4372
|
return { lastModified: stats.mtime.toISOString() };
|
|
3727
4373
|
}
|
|
3728
4374
|
};
|
|
3729
4375
|
await module.finalize(ctx);
|
|
3730
4376
|
}
|
|
3731
|
-
|
|
4377
|
+
const moduleDuration = Date.now() - moduleStart;
|
|
4378
|
+
result.durationMs = moduleDuration;
|
|
4379
|
+
logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors (${formatDuration(moduleDuration)})`);
|
|
3732
4380
|
}
|
|
3733
4381
|
await introspection.save(config);
|
|
4382
|
+
const overallDuration = Date.now() - overallStart;
|
|
4383
|
+
logger.info(`
|
|
4384
|
+
Indexing complete in ${formatDuration(overallDuration)}`);
|
|
4385
|
+
const totalIndexed = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
4386
|
+
const totalSkipped = results.reduce((sum, r) => sum + r.skipped, 0);
|
|
4387
|
+
const totalErrors = results.reduce((sum, r) => sum + r.errors, 0);
|
|
4388
|
+
logger.info(`Total: ${totalIndexed} indexed, ${totalSkipped} skipped, ${totalErrors} errors`);
|
|
3734
4389
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
3735
4390
|
return results;
|
|
3736
4391
|
}
|
|
@@ -3752,7 +4407,7 @@ async function deleteIndex(rootDir) {
|
|
|
3752
4407
|
} catch {}
|
|
3753
4408
|
}
|
|
3754
4409
|
async function resetIndex(rootDir) {
|
|
3755
|
-
rootDir =
|
|
4410
|
+
rootDir = path15.resolve(rootDir);
|
|
3756
4411
|
const status = await getIndexStatus(rootDir);
|
|
3757
4412
|
if (!status.exists) {
|
|
3758
4413
|
throw new Error(`No index found for ${rootDir}`);
|
|
@@ -3767,7 +4422,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3767
4422
|
const verbose = options.verbose ?? false;
|
|
3768
4423
|
const quiet = options.quiet ?? false;
|
|
3769
4424
|
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
3770
|
-
rootDir =
|
|
4425
|
+
rootDir = path15.resolve(rootDir);
|
|
3771
4426
|
const status = await getIndexStatus(rootDir);
|
|
3772
4427
|
if (!status.exists) {
|
|
3773
4428
|
logger.info(`No index found. Creating index...
|
|
@@ -3794,7 +4449,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3794
4449
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3795
4450
|
await introspection.initialize();
|
|
3796
4451
|
const currentFiles = await findFiles(rootDir, config);
|
|
3797
|
-
const currentFileSet = new Set(currentFiles.map((f) =>
|
|
4452
|
+
const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
|
|
3798
4453
|
let totalIndexed = 0;
|
|
3799
4454
|
let totalRemoved = 0;
|
|
3800
4455
|
let totalUnchanged = 0;
|
|
@@ -3824,11 +4479,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3824
4479
|
}
|
|
3825
4480
|
for (const filepath of filesToRemove) {
|
|
3826
4481
|
logger.debug(` Removing stale: ${filepath}`);
|
|
3827
|
-
const indexFilePath =
|
|
4482
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3828
4483
|
try {
|
|
3829
4484
|
await fs6.unlink(indexFilePath);
|
|
3830
4485
|
} catch {}
|
|
3831
|
-
const symbolicFilePath =
|
|
4486
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3832
4487
|
try {
|
|
3833
4488
|
await fs6.unlink(symbolicFilePath);
|
|
3834
4489
|
} catch {}
|
|
@@ -3839,11 +4494,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3839
4494
|
rootDir,
|
|
3840
4495
|
config,
|
|
3841
4496
|
readFile: async (filepath) => {
|
|
3842
|
-
const fullPath =
|
|
4497
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3843
4498
|
return fs6.readFile(fullPath, "utf-8");
|
|
3844
4499
|
},
|
|
3845
4500
|
getFileStats: async (filepath) => {
|
|
3846
|
-
const fullPath =
|
|
4501
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3847
4502
|
const stats = await fs6.stat(fullPath);
|
|
3848
4503
|
return { lastModified: stats.mtime.toISOString() };
|
|
3849
4504
|
},
|
|
@@ -3852,7 +4507,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3852
4507
|
const totalFiles = currentFiles.length;
|
|
3853
4508
|
for (let i = 0;i < currentFiles.length; i++) {
|
|
3854
4509
|
const filepath = currentFiles[i];
|
|
3855
|
-
const relativePath =
|
|
4510
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
3856
4511
|
const progress = `[${i + 1}/${totalFiles}]`;
|
|
3857
4512
|
try {
|
|
3858
4513
|
const stats = await fs6.stat(filepath);
|
|
@@ -3903,7 +4558,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3903
4558
|
unchanged: totalUnchanged
|
|
3904
4559
|
};
|
|
3905
4560
|
}
|
|
3906
|
-
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger) {
|
|
4561
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
|
|
3907
4562
|
const result = {
|
|
3908
4563
|
moduleId: module.id,
|
|
3909
4564
|
indexed: 0,
|
|
@@ -3912,7 +4567,7 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3912
4567
|
};
|
|
3913
4568
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
3914
4569
|
const indexPath = getModuleIndexPath(rootDir, module.id, config);
|
|
3915
|
-
const currentFileSet = new Set(files.map((f) =>
|
|
4570
|
+
const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
|
|
3916
4571
|
const filesToRemove = [];
|
|
3917
4572
|
for (const filepath of Object.keys(manifest.files)) {
|
|
3918
4573
|
if (!currentFileSet.has(filepath)) {
|
|
@@ -3923,11 +4578,11 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3923
4578
|
logger.info(` Removing ${filesToRemove.length} stale entries...`);
|
|
3924
4579
|
for (const filepath of filesToRemove) {
|
|
3925
4580
|
logger.debug(` Removing: ${filepath}`);
|
|
3926
|
-
const indexFilePath =
|
|
4581
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3927
4582
|
try {
|
|
3928
4583
|
await fs6.unlink(indexFilePath);
|
|
3929
4584
|
} catch {}
|
|
3930
|
-
const symbolicFilePath =
|
|
4585
|
+
const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3931
4586
|
try {
|
|
3932
4587
|
await fs6.unlink(symbolicFilePath);
|
|
3933
4588
|
} catch {}
|
|
@@ -3939,52 +4594,76 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3939
4594
|
rootDir,
|
|
3940
4595
|
config,
|
|
3941
4596
|
readFile: async (filepath) => {
|
|
3942
|
-
const fullPath =
|
|
4597
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3943
4598
|
return fs6.readFile(fullPath, "utf-8");
|
|
3944
4599
|
},
|
|
3945
4600
|
getFileStats: async (filepath) => {
|
|
3946
|
-
const fullPath =
|
|
4601
|
+
const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
|
|
3947
4602
|
const stats = await fs6.stat(fullPath);
|
|
3948
4603
|
return { lastModified: stats.mtime.toISOString() };
|
|
3949
4604
|
},
|
|
3950
4605
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3951
4606
|
};
|
|
3952
4607
|
const totalFiles = files.length;
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
const relativePath =
|
|
3956
|
-
const progress = `[${i + 1}/${totalFiles}]`;
|
|
4608
|
+
let completedCount = 0;
|
|
4609
|
+
const processFile = async (filepath, _index) => {
|
|
4610
|
+
const relativePath = path15.relative(rootDir, filepath);
|
|
3957
4611
|
try {
|
|
3958
4612
|
const stats = await fs6.stat(filepath);
|
|
3959
4613
|
const lastModified = stats.mtime.toISOString();
|
|
3960
4614
|
const existingEntry = manifest.files[relativePath];
|
|
3961
4615
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
|
|
4616
|
+
completedCount++;
|
|
4617
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
|
|
4618
|
+
return { relativePath, status: "skipped" };
|
|
3965
4619
|
}
|
|
3966
4620
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3967
4621
|
introspection.addFile(relativePath, content);
|
|
3968
|
-
|
|
4622
|
+
completedCount++;
|
|
4623
|
+
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
3969
4624
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
3970
4625
|
if (!fileIndex) {
|
|
3971
|
-
logger.debug(` ${
|
|
3972
|
-
|
|
3973
|
-
continue;
|
|
4626
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
|
|
4627
|
+
return { relativePath, status: "skipped" };
|
|
3974
4628
|
}
|
|
3975
4629
|
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
3976
|
-
|
|
4630
|
+
return {
|
|
4631
|
+
relativePath,
|
|
4632
|
+
status: "indexed",
|
|
3977
4633
|
lastModified,
|
|
3978
4634
|
chunkCount: fileIndex.chunks.length
|
|
3979
4635
|
};
|
|
3980
|
-
result.indexed++;
|
|
3981
4636
|
} catch (error) {
|
|
3982
|
-
|
|
3983
|
-
|
|
4637
|
+
completedCount++;
|
|
4638
|
+
return { relativePath, status: "error", error };
|
|
4639
|
+
}
|
|
4640
|
+
};
|
|
4641
|
+
logger.debug(` Using concurrency: ${concurrency}`);
|
|
4642
|
+
const results = await parallelMap(files, processFile, concurrency);
|
|
4643
|
+
logger.clearProgress();
|
|
4644
|
+
for (const item of results) {
|
|
4645
|
+
if (!item.success) {
|
|
3984
4646
|
result.errors++;
|
|
4647
|
+
continue;
|
|
4648
|
+
}
|
|
4649
|
+
const fileResult = item.value;
|
|
4650
|
+
switch (fileResult.status) {
|
|
4651
|
+
case "indexed":
|
|
4652
|
+
manifest.files[fileResult.relativePath] = {
|
|
4653
|
+
lastModified: fileResult.lastModified,
|
|
4654
|
+
chunkCount: fileResult.chunkCount
|
|
4655
|
+
};
|
|
4656
|
+
result.indexed++;
|
|
4657
|
+
break;
|
|
4658
|
+
case "skipped":
|
|
4659
|
+
result.skipped++;
|
|
4660
|
+
break;
|
|
4661
|
+
case "error":
|
|
4662
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
4663
|
+
result.errors++;
|
|
4664
|
+
break;
|
|
3985
4665
|
}
|
|
3986
4666
|
}
|
|
3987
|
-
logger.clearProgress();
|
|
3988
4667
|
manifest.lastUpdated = new Date().toISOString();
|
|
3989
4668
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
3990
4669
|
return result;
|
|
@@ -4019,13 +4698,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
4019
4698
|
}
|
|
4020
4699
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
4021
4700
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
4022
|
-
await fs6.mkdir(
|
|
4701
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
4023
4702
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
4024
4703
|
}
|
|
4025
4704
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
4026
4705
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
4027
|
-
const indexFilePath =
|
|
4028
|
-
await fs6.mkdir(
|
|
4706
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4707
|
+
await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
|
|
4029
4708
|
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
4030
4709
|
}
|
|
4031
4710
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
@@ -4035,13 +4714,13 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
4035
4714
|
lastUpdated: new Date().toISOString(),
|
|
4036
4715
|
modules: modules.map((m) => m.id)
|
|
4037
4716
|
};
|
|
4038
|
-
await fs6.mkdir(
|
|
4717
|
+
await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
|
|
4039
4718
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
4040
4719
|
}
|
|
4041
4720
|
async function cleanupIndex(rootDir, options = {}) {
|
|
4042
4721
|
const verbose = options.verbose ?? false;
|
|
4043
4722
|
const logger = options.logger ?? createLogger({ verbose });
|
|
4044
|
-
rootDir =
|
|
4723
|
+
rootDir = path15.resolve(rootDir);
|
|
4045
4724
|
logger.info(`Cleaning up index in: ${rootDir}`);
|
|
4046
4725
|
const config = await loadConfig(rootDir);
|
|
4047
4726
|
await registerBuiltInModules();
|
|
@@ -4071,7 +4750,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
4071
4750
|
const filesToRemove = [];
|
|
4072
4751
|
const updatedFiles = {};
|
|
4073
4752
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
4074
|
-
const fullPath =
|
|
4753
|
+
const fullPath = path15.join(rootDir, filepath);
|
|
4075
4754
|
try {
|
|
4076
4755
|
await fs6.access(fullPath);
|
|
4077
4756
|
updatedFiles[filepath] = entry;
|
|
@@ -4083,7 +4762,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
|
4083
4762
|
}
|
|
4084
4763
|
}
|
|
4085
4764
|
for (const filepath of filesToRemove) {
|
|
4086
|
-
const indexFilePath =
|
|
4765
|
+
const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4087
4766
|
try {
|
|
4088
4767
|
await fs6.unlink(indexFilePath);
|
|
4089
4768
|
} catch {}
|
|
@@ -4099,7 +4778,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
4099
4778
|
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
4100
4779
|
for (const entry of entries) {
|
|
4101
4780
|
if (entry.isDirectory()) {
|
|
4102
|
-
const subDir =
|
|
4781
|
+
const subDir = path15.join(dir, entry.name);
|
|
4103
4782
|
await cleanupEmptyDirectories(subDir);
|
|
4104
4783
|
}
|
|
4105
4784
|
}
|
|
@@ -4114,7 +4793,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
4114
4793
|
}
|
|
4115
4794
|
}
|
|
4116
4795
|
async function getIndexStatus(rootDir) {
|
|
4117
|
-
rootDir =
|
|
4796
|
+
rootDir = path15.resolve(rootDir);
|
|
4118
4797
|
const config = await loadConfig(rootDir);
|
|
4119
4798
|
const location = getIndexLocation(rootDir);
|
|
4120
4799
|
const indexDir = location.indexDir;
|
|
@@ -4150,7 +4829,7 @@ async function getIndexStatus(rootDir) {
|
|
|
4150
4829
|
}
|
|
4151
4830
|
} catch {
|
|
4152
4831
|
try {
|
|
4153
|
-
const entries = await fs6.readdir(
|
|
4832
|
+
const entries = await fs6.readdir(path15.join(indexDir, "index"));
|
|
4154
4833
|
if (entries.length > 0) {
|
|
4155
4834
|
status.exists = true;
|
|
4156
4835
|
for (const entry of entries) {
|
|
@@ -4170,7 +4849,7 @@ async function getIndexStatus(rootDir) {
|
|
|
4170
4849
|
}
|
|
4171
4850
|
return status;
|
|
4172
4851
|
}
|
|
4173
|
-
var INDEX_SCHEMA_VERSION = "1.0.0";
|
|
4852
|
+
var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
|
|
4174
4853
|
var init_indexer = __esm(() => {
|
|
4175
4854
|
init_config2();
|
|
4176
4855
|
init_registry();
|
|
@@ -4191,9 +4870,9 @@ __export(exports_search, {
|
|
|
4191
4870
|
formatSearchResults: () => formatSearchResults
|
|
4192
4871
|
});
|
|
4193
4872
|
import * as fs7 from "fs/promises";
|
|
4194
|
-
import * as
|
|
4873
|
+
import * as path16 from "path";
|
|
4195
4874
|
async function search(rootDir, query, options = {}) {
|
|
4196
|
-
rootDir =
|
|
4875
|
+
rootDir = path16.resolve(rootDir);
|
|
4197
4876
|
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
4198
4877
|
if (ensureFresh) {
|
|
4199
4878
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
@@ -4227,9 +4906,17 @@ async function search(rootDir, query, options = {}) {
|
|
|
4227
4906
|
const moduleResults = await module.search(query, ctx, options);
|
|
4228
4907
|
allResults.push(...moduleResults);
|
|
4229
4908
|
}
|
|
4230
|
-
|
|
4909
|
+
let filteredResults = allResults;
|
|
4910
|
+
if (options.pathFilter && options.pathFilter.length > 0) {
|
|
4911
|
+
const normalizedFilters = options.pathFilter.map((p) => p.replace(/\\/g, "/").replace(/^\//, "").replace(/\/$/, ""));
|
|
4912
|
+
filteredResults = allResults.filter((result) => {
|
|
4913
|
+
const normalizedPath = result.filepath.replace(/\\/g, "/");
|
|
4914
|
+
return normalizedFilters.some((filter) => normalizedPath.startsWith(filter + "/") || normalizedPath === filter || normalizedPath.startsWith("./" + filter + "/") || normalizedPath === "./" + filter);
|
|
4915
|
+
});
|
|
4916
|
+
}
|
|
4917
|
+
filteredResults.sort((a, b) => b.score - a.score);
|
|
4231
4918
|
const topK = options.topK ?? 10;
|
|
4232
|
-
return
|
|
4919
|
+
return filteredResults.slice(0, topK);
|
|
4233
4920
|
}
|
|
4234
4921
|
function createSearchContext(rootDir, moduleId, config) {
|
|
4235
4922
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
@@ -4238,7 +4925,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4238
4925
|
config,
|
|
4239
4926
|
loadFileIndex: async (filepath) => {
|
|
4240
4927
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
4241
|
-
const indexFilePath = hasExtension ?
|
|
4928
|
+
const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
|
|
4242
4929
|
try {
|
|
4243
4930
|
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
4244
4931
|
return JSON.parse(content);
|
|
@@ -4250,7 +4937,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
4250
4937
|
const files = [];
|
|
4251
4938
|
await traverseDirectory(indexPath, files, indexPath);
|
|
4252
4939
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
4253
|
-
const relative4 =
|
|
4940
|
+
const relative4 = path16.relative(indexPath, f);
|
|
4254
4941
|
return relative4.replace(/\.json$/, "");
|
|
4255
4942
|
});
|
|
4256
4943
|
}
|
|
@@ -4260,7 +4947,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
4260
4947
|
try {
|
|
4261
4948
|
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
4262
4949
|
for (const entry of entries) {
|
|
4263
|
-
const fullPath =
|
|
4950
|
+
const fullPath = path16.join(dir, entry.name);
|
|
4264
4951
|
if (entry.isDirectory()) {
|
|
4265
4952
|
await traverseDirectory(fullPath, files, basePath);
|
|
4266
4953
|
} else if (entry.isFile()) {
|
|
@@ -4338,7 +5025,7 @@ init_logger();
|
|
|
4338
5025
|
// package.json
|
|
4339
5026
|
var package_default = {
|
|
4340
5027
|
name: "raggrep",
|
|
4341
|
-
version: "0.
|
|
5028
|
+
version: "0.5.1",
|
|
4342
5029
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
4343
5030
|
type: "module",
|
|
4344
5031
|
main: "./dist/index.js",
|
|
@@ -4474,6 +5161,25 @@ function parseFlags(args2) {
|
|
|
4474
5161
|
console.error("--type requires a file extension (e.g., ts, tsx, js)");
|
|
4475
5162
|
process.exit(1);
|
|
4476
5163
|
}
|
|
5164
|
+
} else if (arg === "--concurrency" || arg === "-c") {
|
|
5165
|
+
const c = parseInt(args2[++i], 10);
|
|
5166
|
+
if (!isNaN(c) && c > 0) {
|
|
5167
|
+
flags.concurrency = c;
|
|
5168
|
+
} else {
|
|
5169
|
+
console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
|
|
5170
|
+
process.exit(1);
|
|
5171
|
+
}
|
|
5172
|
+
} else if (arg === "--filter" || arg === "-f") {
|
|
5173
|
+
const filterPath = args2[++i];
|
|
5174
|
+
if (filterPath) {
|
|
5175
|
+
if (!flags.pathFilter) {
|
|
5176
|
+
flags.pathFilter = [];
|
|
5177
|
+
}
|
|
5178
|
+
flags.pathFilter.push(filterPath);
|
|
5179
|
+
} else {
|
|
5180
|
+
console.error("--filter requires a path (e.g., src/auth)");
|
|
5181
|
+
process.exit(1);
|
|
5182
|
+
}
|
|
4477
5183
|
} else if (!arg.startsWith("-")) {
|
|
4478
5184
|
flags.remaining.push(arg);
|
|
4479
5185
|
}
|
|
@@ -4493,10 +5199,11 @@ Usage:
|
|
|
4493
5199
|
raggrep index [options]
|
|
4494
5200
|
|
|
4495
5201
|
Options:
|
|
4496
|
-
-w, --watch
|
|
4497
|
-
-m, --model <name>
|
|
4498
|
-
-
|
|
4499
|
-
-
|
|
5202
|
+
-w, --watch Watch for file changes and re-index automatically
|
|
5203
|
+
-m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
|
|
5204
|
+
-c, --concurrency <n> Number of files to process in parallel (default: 4)
|
|
5205
|
+
-v, --verbose Show detailed progress
|
|
5206
|
+
-h, --help Show this help message
|
|
4500
5207
|
|
|
4501
5208
|
Available Models:
|
|
4502
5209
|
${models}
|
|
@@ -4507,6 +5214,7 @@ Examples:
|
|
|
4507
5214
|
raggrep index
|
|
4508
5215
|
raggrep index --watch
|
|
4509
5216
|
raggrep index --model bge-small-en-v1.5
|
|
5217
|
+
raggrep index --concurrency 8
|
|
4510
5218
|
raggrep index --verbose
|
|
4511
5219
|
`);
|
|
4512
5220
|
process.exit(0);
|
|
@@ -4520,6 +5228,7 @@ Examples:
|
|
|
4520
5228
|
const results = await indexDirectory2(process.cwd(), {
|
|
4521
5229
|
model: flags.model,
|
|
4522
5230
|
verbose: flags.verbose,
|
|
5231
|
+
concurrency: flags.concurrency,
|
|
4523
5232
|
logger
|
|
4524
5233
|
});
|
|
4525
5234
|
console.log(`
|
|
@@ -4579,6 +5288,7 @@ Options:
|
|
|
4579
5288
|
-k, --top <n> Number of results to return (default: 10)
|
|
4580
5289
|
-s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
|
|
4581
5290
|
-t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
|
|
5291
|
+
-f, --filter <path> Filter by path prefix (can be used multiple times)
|
|
4582
5292
|
-h, --help Show this help message
|
|
4583
5293
|
|
|
4584
5294
|
Note:
|
|
@@ -4593,6 +5303,8 @@ Examples:
|
|
|
4593
5303
|
raggrep query "handle errors" --top 5
|
|
4594
5304
|
raggrep query "database" --min-score 0.1
|
|
4595
5305
|
raggrep query "interface" --type ts
|
|
5306
|
+
raggrep query "login" --filter src/auth
|
|
5307
|
+
raggrep query "api" --filter src/api --filter src/routes
|
|
4596
5308
|
`);
|
|
4597
5309
|
process.exit(0);
|
|
4598
5310
|
}
|
|
@@ -4633,6 +5345,7 @@ Examples:
|
|
|
4633
5345
|
topK: flags.topK ?? 10,
|
|
4634
5346
|
minScore: flags.minScore,
|
|
4635
5347
|
filePatterns,
|
|
5348
|
+
pathFilter: flags.pathFilter,
|
|
4636
5349
|
ensureFresh: false
|
|
4637
5350
|
});
|
|
4638
5351
|
console.log(formatSearchResults2(results));
|
|
@@ -4773,4 +5486,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
4773
5486
|
}
|
|
4774
5487
|
main();
|
|
4775
5488
|
|
|
4776
|
-
//# debugId=
|
|
5489
|
+
//# debugId=E73618F0DDE8326264756E2164756E21
|