raggrep 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -348,6 +348,20 @@ function createDefaultConfig() {
348
348
  options: {
349
349
  embeddingModel: "all-MiniLM-L6-v2"
350
350
  }
351
+ },
352
+ {
353
+ id: "data/json",
354
+ enabled: true,
355
+ options: {
356
+ embeddingModel: "all-MiniLM-L6-v2"
357
+ }
358
+ },
359
+ {
360
+ id: "docs/markdown",
361
+ enabled: true,
362
+ options: {
363
+ embeddingModel: "all-MiniLM-L6-v2"
364
+ }
351
365
  }
352
366
  ]
353
367
  };
@@ -391,16 +405,18 @@ var init_config = __esm(() => {
391
405
  ".jsx",
392
406
  ".mjs",
393
407
  ".cjs",
408
+ ".mts",
409
+ ".cts",
410
+ ".json",
411
+ ".md",
394
412
  ".py",
395
413
  ".go",
396
414
  ".rs",
397
415
  ".java",
398
- ".json",
399
416
  ".yaml",
400
417
  ".yml",
401
418
  ".toml",
402
419
  ".sql",
403
- ".md",
404
420
  ".txt"
405
421
  ];
406
422
  });
@@ -2292,221 +2308,6 @@ var init_core = __esm(() => {
2292
2308
  init_symbols();
2293
2309
  });
2294
2310
 
2295
- // src/domain/services/similarity.ts
2296
- function cosineSimilarity(a, b) {
2297
- if (a.length !== b.length) {
2298
- throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2299
- }
2300
- let dotProduct = 0;
2301
- let normA = 0;
2302
- let normB = 0;
2303
- for (let i = 0;i < a.length; i++) {
2304
- dotProduct += a[i] * b[i];
2305
- normA += a[i] * a[i];
2306
- normB += b[i] * b[i];
2307
- }
2308
- const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2309
- if (magnitude === 0)
2310
- return 0;
2311
- return dotProduct / magnitude;
2312
- }
2313
-
2314
- // src/modules/language/typescript/parseCode.ts
2315
- import * as ts from "typescript";
2316
- function parseCode(content, filepath) {
2317
- const ext = filepath.split(".").pop()?.toLowerCase();
2318
- if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
2319
- return parseTypeScript(content, filepath);
2320
- }
2321
- return parseGenericCode(content);
2322
- }
2323
- function parseTypeScript(content, filepath) {
2324
- const chunks = [];
2325
- const lines = content.split(`
2326
- `);
2327
- const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2328
- function getLineNumbers(node) {
2329
- const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2330
- const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2331
- return {
2332
- startLine: start.line + 1,
2333
- endLine: end.line + 1
2334
- };
2335
- }
2336
- function getNodeText(node) {
2337
- return node.getText(sourceFile);
2338
- }
2339
- function isExported(node) {
2340
- if (!ts.canHaveModifiers(node))
2341
- return false;
2342
- const modifiers = ts.getModifiers(node);
2343
- return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2344
- }
2345
- function getJSDoc(node) {
2346
- const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2347
- if (jsDocNodes.length === 0)
2348
- return;
2349
- return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2350
- `);
2351
- }
2352
- function getFunctionName(node) {
2353
- if (ts.isFunctionDeclaration(node) && node.name) {
2354
- return node.name.text;
2355
- }
2356
- if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2357
- return node.name.text;
2358
- }
2359
- if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2360
- return node.name.text;
2361
- }
2362
- return;
2363
- }
2364
- function visit(node) {
2365
- const { startLine, endLine } = getLineNumbers(node);
2366
- if (ts.isFunctionDeclaration(node) && node.name) {
2367
- chunks.push({
2368
- content: getNodeText(node),
2369
- startLine,
2370
- endLine,
2371
- type: "function",
2372
- name: node.name.text,
2373
- isExported: isExported(node),
2374
- jsDoc: getJSDoc(node)
2375
- });
2376
- return;
2377
- }
2378
- if (ts.isVariableStatement(node)) {
2379
- for (const decl of node.declarationList.declarations) {
2380
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2381
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2382
- chunks.push({
2383
- content: getNodeText(node),
2384
- startLine,
2385
- endLine,
2386
- type: "function",
2387
- name,
2388
- isExported: isExported(node),
2389
- jsDoc: getJSDoc(node)
2390
- });
2391
- return;
2392
- }
2393
- }
2394
- }
2395
- if (ts.isClassDeclaration(node) && node.name) {
2396
- chunks.push({
2397
- content: getNodeText(node),
2398
- startLine,
2399
- endLine,
2400
- type: "class",
2401
- name: node.name.text,
2402
- isExported: isExported(node),
2403
- jsDoc: getJSDoc(node)
2404
- });
2405
- return;
2406
- }
2407
- if (ts.isInterfaceDeclaration(node)) {
2408
- chunks.push({
2409
- content: getNodeText(node),
2410
- startLine,
2411
- endLine,
2412
- type: "interface",
2413
- name: node.name.text,
2414
- isExported: isExported(node),
2415
- jsDoc: getJSDoc(node)
2416
- });
2417
- return;
2418
- }
2419
- if (ts.isTypeAliasDeclaration(node)) {
2420
- chunks.push({
2421
- content: getNodeText(node),
2422
- startLine,
2423
- endLine,
2424
- type: "type",
2425
- name: node.name.text,
2426
- isExported: isExported(node),
2427
- jsDoc: getJSDoc(node)
2428
- });
2429
- return;
2430
- }
2431
- if (ts.isEnumDeclaration(node)) {
2432
- chunks.push({
2433
- content: getNodeText(node),
2434
- startLine,
2435
- endLine,
2436
- type: "enum",
2437
- name: node.name.text,
2438
- isExported: isExported(node),
2439
- jsDoc: getJSDoc(node)
2440
- });
2441
- return;
2442
- }
2443
- if (ts.isVariableStatement(node) && isExported(node)) {
2444
- for (const decl of node.declarationList.declarations) {
2445
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2446
- continue;
2447
- }
2448
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2449
- chunks.push({
2450
- content: getNodeText(node),
2451
- startLine,
2452
- endLine,
2453
- type: "variable",
2454
- name,
2455
- isExported: true,
2456
- jsDoc: getJSDoc(node)
2457
- });
2458
- }
2459
- return;
2460
- }
2461
- ts.forEachChild(node, visit);
2462
- }
2463
- ts.forEachChild(sourceFile, visit);
2464
- if (chunks.length === 0) {
2465
- return parseGenericCode(content);
2466
- }
2467
- return chunks;
2468
- }
2469
- function parseGenericCode(content) {
2470
- const chunks = [];
2471
- const lines = content.split(`
2472
- `);
2473
- const CHUNK_SIZE = 30;
2474
- const OVERLAP = 5;
2475
- if (lines.length <= CHUNK_SIZE) {
2476
- return [
2477
- {
2478
- content,
2479
- startLine: 1,
2480
- endLine: lines.length,
2481
- type: "file"
2482
- }
2483
- ];
2484
- }
2485
- for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
2486
- const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
2487
- chunks.push({
2488
- content: lines.slice(i, endIdx).join(`
2489
- `),
2490
- startLine: i + 1,
2491
- endLine: endIdx,
2492
- type: "block"
2493
- });
2494
- if (endIdx >= lines.length)
2495
- break;
2496
- }
2497
- return chunks;
2498
- }
2499
- function generateChunkId(filepath, startLine, endLine) {
2500
- const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2501
- return `${safePath}-${startLine}-${endLine}`;
2502
- }
2503
- var init_parseCode = () => {};
2504
-
2505
- // src/infrastructure/storage/fileIndexStorage.ts
2506
- var init_fileIndexStorage = __esm(() => {
2507
- init_entities();
2508
- });
2509
-
2510
2311
  // src/domain/services/keywords.ts
2511
2312
  function extractKeywords(content, name, maxKeywords = 50) {
2512
2313
  const keywords = new Set;
@@ -2695,222 +2496,1105 @@ var init_keywords = __esm(() => {
2695
2496
  };
2696
2497
  });
2697
2498
 
2698
- // src/infrastructure/storage/symbolicIndex.ts
2699
- import * as fs3 from "fs/promises";
2700
- import * as path7 from "path";
2499
+ // src/domain/services/similarity.ts
2500
+ function cosineSimilarity(a, b) {
2501
+ if (a.length !== b.length) {
2502
+ throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2503
+ }
2504
+ let dotProduct = 0;
2505
+ let normA = 0;
2506
+ let normB = 0;
2507
+ for (let i = 0;i < a.length; i++) {
2508
+ dotProduct += a[i] * b[i];
2509
+ normA += a[i] * a[i];
2510
+ normB += b[i] * b[i];
2511
+ }
2512
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2513
+ if (magnitude === 0)
2514
+ return 0;
2515
+ return dotProduct / magnitude;
2516
+ }
2701
2517
 
2702
- class SymbolicIndex {
2703
- meta = null;
2704
- fileSummaries = new Map;
2705
- bm25Index = null;
2706
- symbolicPath;
2707
- moduleId;
2708
- constructor(indexDir, moduleId) {
2709
- this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
2710
- this.moduleId = moduleId;
2518
+ // src/domain/services/queryIntent.ts
2519
+ import * as path7 from "path";
2520
+ function detectQueryIntent(queryTerms) {
2521
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2522
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2523
+ if (hasDocumentationTerm) {
2524
+ return "documentation";
2711
2525
  }
2712
- async initialize() {
2713
- try {
2526
+ if (hasImplementationTerm) {
2527
+ return "implementation";
2528
+ }
2529
+ return "neutral";
2530
+ }
2531
+ function extractQueryTerms(query) {
2532
+ return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2533
+ }
2534
+ function isSourceCodeFile(filepath) {
2535
+ const ext = path7.extname(filepath).toLowerCase();
2536
+ return SOURCE_CODE_EXTENSIONS.includes(ext);
2537
+ }
2538
+ function isDocFile(filepath) {
2539
+ const ext = path7.extname(filepath).toLowerCase();
2540
+ return DOC_EXTENSIONS.includes(ext);
2541
+ }
2542
+ function calculateFileTypeBoost(filepath, queryTerms) {
2543
+ const isSourceCode = isSourceCodeFile(filepath);
2544
+ const isDoc = isDocFile(filepath);
2545
+ const intent = detectQueryIntent(queryTerms);
2546
+ if (intent === "implementation") {
2547
+ if (isSourceCode) {
2548
+ return 0.06;
2549
+ }
2550
+ return 0;
2551
+ }
2552
+ if (intent === "documentation") {
2553
+ if (isDoc) {
2554
+ return 0.08;
2555
+ }
2556
+ return 0;
2557
+ }
2558
+ return 0;
2559
+ }
2560
+ var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2561
+ var init_queryIntent = __esm(() => {
2562
+ IMPLEMENTATION_TERMS = [
2563
+ "function",
2564
+ "method",
2565
+ "class",
2566
+ "interface",
2567
+ "implement",
2568
+ "implementation",
2569
+ "endpoint",
2570
+ "route",
2571
+ "handler",
2572
+ "controller",
2573
+ "module",
2574
+ "code"
2575
+ ];
2576
+ DOCUMENTATION_TERMS = [
2577
+ "documentation",
2578
+ "docs",
2579
+ "guide",
2580
+ "tutorial",
2581
+ "readme",
2582
+ "how",
2583
+ "what",
2584
+ "why",
2585
+ "explain",
2586
+ "overview",
2587
+ "getting",
2588
+ "started",
2589
+ "requirements",
2590
+ "setup",
2591
+ "install",
2592
+ "configure",
2593
+ "configuration"
2594
+ ];
2595
+ SOURCE_CODE_EXTENSIONS = [
2596
+ ".ts",
2597
+ ".tsx",
2598
+ ".js",
2599
+ ".jsx",
2600
+ ".mjs",
2601
+ ".cjs",
2602
+ ".py",
2603
+ ".go",
2604
+ ".rs",
2605
+ ".java"
2606
+ ];
2607
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2608
+ });
2609
+
2610
+ // src/domain/services/chunking.ts
2611
+ function createLineBasedChunks(content, options = {}) {
2612
+ const {
2613
+ chunkSize = DEFAULT_CHUNK_SIZE,
2614
+ overlap = DEFAULT_OVERLAP,
2615
+ minLinesForMultipleChunks = chunkSize
2616
+ } = options;
2617
+ const lines = content.split(`
2618
+ `);
2619
+ const chunks = [];
2620
+ if (lines.length <= minLinesForMultipleChunks) {
2621
+ return [
2622
+ {
2623
+ content,
2624
+ startLine: 1,
2625
+ endLine: lines.length,
2626
+ type: "file"
2627
+ }
2628
+ ];
2629
+ }
2630
+ for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2631
+ const endIdx = Math.min(i + chunkSize, lines.length);
2632
+ chunks.push({
2633
+ content: lines.slice(i, endIdx).join(`
2634
+ `),
2635
+ startLine: i + 1,
2636
+ endLine: endIdx,
2637
+ type: "block"
2638
+ });
2639
+ if (endIdx >= lines.length)
2640
+ break;
2641
+ }
2642
+ return chunks;
2643
+ }
2644
+ function generateChunkId(filepath, startLine, endLine) {
2645
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2646
+ return `${safePath}-${startLine}-${endLine}`;
2647
+ }
2648
+ var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2649
+
2650
+ // src/domain/services/index.ts
2651
+ var init_services = __esm(() => {
2652
+ init_keywords();
2653
+ init_queryIntent();
2654
+ });
2655
+
2656
+ // src/modules/language/typescript/parseCode.ts
2657
+ import * as ts from "typescript";
2658
+ function parseTypeScriptCode(content, filepath) {
2659
+ return parseTypeScript(content, filepath);
2660
+ }
2661
+ function parseTypeScript(content, filepath) {
2662
+ const chunks = [];
2663
+ const lines = content.split(`
2664
+ `);
2665
+ const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2666
+ function getLineNumbers(node) {
2667
+ const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2668
+ const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2669
+ return {
2670
+ startLine: start.line + 1,
2671
+ endLine: end.line + 1
2672
+ };
2673
+ }
2674
+ function getNodeText(node) {
2675
+ return node.getText(sourceFile);
2676
+ }
2677
+ function isExported(node) {
2678
+ if (!ts.canHaveModifiers(node))
2679
+ return false;
2680
+ const modifiers = ts.getModifiers(node);
2681
+ return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2682
+ }
2683
+ function getJSDoc(node) {
2684
+ const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2685
+ if (jsDocNodes.length === 0)
2686
+ return;
2687
+ return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2688
+ `);
2689
+ }
2690
+ function getFunctionName(node) {
2691
+ if (ts.isFunctionDeclaration(node) && node.name) {
2692
+ return node.name.text;
2693
+ }
2694
+ if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2695
+ return node.name.text;
2696
+ }
2697
+ if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2698
+ return node.name.text;
2699
+ }
2700
+ return;
2701
+ }
2702
+ function visit(node) {
2703
+ const { startLine, endLine } = getLineNumbers(node);
2704
+ if (ts.isFunctionDeclaration(node) && node.name) {
2705
+ chunks.push({
2706
+ content: getNodeText(node),
2707
+ startLine,
2708
+ endLine,
2709
+ type: "function",
2710
+ name: node.name.text,
2711
+ isExported: isExported(node),
2712
+ jsDoc: getJSDoc(node)
2713
+ });
2714
+ return;
2715
+ }
2716
+ if (ts.isVariableStatement(node)) {
2717
+ for (const decl of node.declarationList.declarations) {
2718
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2719
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2720
+ chunks.push({
2721
+ content: getNodeText(node),
2722
+ startLine,
2723
+ endLine,
2724
+ type: "function",
2725
+ name,
2726
+ isExported: isExported(node),
2727
+ jsDoc: getJSDoc(node)
2728
+ });
2729
+ return;
2730
+ }
2731
+ }
2732
+ }
2733
+ if (ts.isClassDeclaration(node) && node.name) {
2734
+ chunks.push({
2735
+ content: getNodeText(node),
2736
+ startLine,
2737
+ endLine,
2738
+ type: "class",
2739
+ name: node.name.text,
2740
+ isExported: isExported(node),
2741
+ jsDoc: getJSDoc(node)
2742
+ });
2743
+ return;
2744
+ }
2745
+ if (ts.isInterfaceDeclaration(node)) {
2746
+ chunks.push({
2747
+ content: getNodeText(node),
2748
+ startLine,
2749
+ endLine,
2750
+ type: "interface",
2751
+ name: node.name.text,
2752
+ isExported: isExported(node),
2753
+ jsDoc: getJSDoc(node)
2754
+ });
2755
+ return;
2756
+ }
2757
+ if (ts.isTypeAliasDeclaration(node)) {
2758
+ chunks.push({
2759
+ content: getNodeText(node),
2760
+ startLine,
2761
+ endLine,
2762
+ type: "type",
2763
+ name: node.name.text,
2764
+ isExported: isExported(node),
2765
+ jsDoc: getJSDoc(node)
2766
+ });
2767
+ return;
2768
+ }
2769
+ if (ts.isEnumDeclaration(node)) {
2770
+ chunks.push({
2771
+ content: getNodeText(node),
2772
+ startLine,
2773
+ endLine,
2774
+ type: "enum",
2775
+ name: node.name.text,
2776
+ isExported: isExported(node),
2777
+ jsDoc: getJSDoc(node)
2778
+ });
2779
+ return;
2780
+ }
2781
+ if (ts.isVariableStatement(node) && isExported(node)) {
2782
+ for (const decl of node.declarationList.declarations) {
2783
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2784
+ continue;
2785
+ }
2786
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2787
+ chunks.push({
2788
+ content: getNodeText(node),
2789
+ startLine,
2790
+ endLine,
2791
+ type: "variable",
2792
+ name,
2793
+ isExported: true,
2794
+ jsDoc: getJSDoc(node)
2795
+ });
2796
+ }
2797
+ return;
2798
+ }
2799
+ ts.forEachChild(node, visit);
2800
+ }
2801
+ ts.forEachChild(sourceFile, visit);
2802
+ if (chunks.length === 0) {
2803
+ const lines2 = content.split(`
2804
+ `);
2805
+ return [
2806
+ {
2807
+ content,
2808
+ startLine: 1,
2809
+ endLine: lines2.length,
2810
+ type: "file"
2811
+ }
2812
+ ];
2813
+ }
2814
+ return chunks;
2815
+ }
2816
+ function generateChunkId2(filepath, startLine, endLine) {
2817
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2818
+ return `${safePath}-${startLine}-${endLine}`;
2819
+ }
2820
+ var init_parseCode = () => {};
2821
+
2822
+ // src/infrastructure/storage/fileIndexStorage.ts
2823
+ var init_fileIndexStorage = __esm(() => {
2824
+ init_entities();
2825
+ });
2826
+
2827
+ // src/infrastructure/storage/symbolicIndex.ts
2828
+ import * as fs3 from "fs/promises";
2829
+ import * as path8 from "path";
2830
+
2831
+ class SymbolicIndex {
2832
+ meta = null;
2833
+ fileSummaries = new Map;
2834
+ bm25Index = null;
2835
+ symbolicPath;
2836
+ moduleId;
2837
+ constructor(indexDir, moduleId) {
2838
+ this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
2839
+ this.moduleId = moduleId;
2840
+ }
2841
+ async initialize() {
2842
+ try {
2714
2843
  await this.load();
2715
2844
  } catch {
2716
- this.meta = {
2717
- version: "1.0.0",
2718
- lastUpdated: new Date().toISOString(),
2719
- moduleId: this.moduleId,
2720
- fileCount: 0,
2721
- bm25Data: {
2722
- avgDocLength: 0,
2723
- documentFrequencies: {},
2724
- totalDocs: 0
2845
+ this.meta = {
2846
+ version: "1.0.0",
2847
+ lastUpdated: new Date().toISOString(),
2848
+ moduleId: this.moduleId,
2849
+ fileCount: 0,
2850
+ bm25Data: {
2851
+ avgDocLength: 0,
2852
+ documentFrequencies: {},
2853
+ totalDocs: 0
2854
+ }
2855
+ };
2856
+ this.bm25Index = new BM25Index;
2857
+ }
2858
+ }
2859
+ addFile(summary) {
2860
+ this.fileSummaries.set(summary.filepath, summary);
2861
+ }
2862
+ removeFile(filepath) {
2863
+ return this.fileSummaries.delete(filepath);
2864
+ }
2865
+ buildBM25Index() {
2866
+ this.bm25Index = new BM25Index;
2867
+ for (const [filepath, summary] of this.fileSummaries) {
2868
+ const content = [
2869
+ ...summary.keywords,
2870
+ ...summary.exports,
2871
+ ...extractPathKeywords(filepath)
2872
+ ].join(" ");
2873
+ this.bm25Index.addDocuments([{ id: filepath, content }]);
2874
+ }
2875
+ if (this.meta) {
2876
+ this.meta.fileCount = this.fileSummaries.size;
2877
+ this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2878
+ }
2879
+ }
2880
+ findCandidates(query, maxCandidates = 20) {
2881
+ if (!this.bm25Index) {
2882
+ return Array.from(this.fileSummaries.keys());
2883
+ }
2884
+ const results = this.bm25Index.search(query, maxCandidates);
2885
+ return results.map((r) => r.id);
2886
+ }
2887
+ getAllFiles() {
2888
+ return Array.from(this.fileSummaries.keys());
2889
+ }
2890
+ getFileSummary(filepath) {
2891
+ return this.fileSummaries.get(filepath);
2892
+ }
2893
+ async save() {
2894
+ if (!this.meta)
2895
+ throw new Error("Index not initialized");
2896
+ this.meta.lastUpdated = new Date().toISOString();
2897
+ this.meta.fileCount = this.fileSummaries.size;
2898
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
2899
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2900
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2901
+ for (const [filepath, summary] of this.fileSummaries) {
2902
+ const summaryPath = this.getFileSummaryPath(filepath);
2903
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
2904
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
2905
+ }
2906
+ }
2907
+ async load() {
2908
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2909
+ const metaContent = await fs3.readFile(metaPath, "utf-8");
2910
+ this.meta = JSON.parse(metaContent);
2911
+ this.fileSummaries.clear();
2912
+ await this.loadFileSummariesRecursive(this.symbolicPath);
2913
+ this.buildBM25Index();
2914
+ }
2915
+ async loadFileSummariesRecursive(dir) {
2916
+ try {
2917
+ const entries = await fs3.readdir(dir, { withFileTypes: true });
2918
+ for (const entry of entries) {
2919
+ const fullPath = path8.join(dir, entry.name);
2920
+ if (entry.isDirectory()) {
2921
+ await this.loadFileSummariesRecursive(fullPath);
2922
+ } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2923
+ try {
2924
+ const content = await fs3.readFile(fullPath, "utf-8");
2925
+ const summary = JSON.parse(content);
2926
+ if (summary.filepath) {
2927
+ this.fileSummaries.set(summary.filepath, summary);
2928
+ }
2929
+ } catch {}
2930
+ }
2931
+ }
2932
+ } catch {}
2933
+ }
2934
+ getFileSummaryPath(filepath) {
2935
+ const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2936
+ return path8.join(this.symbolicPath, jsonPath);
2937
+ }
2938
+ async deleteFileSummary(filepath) {
2939
+ try {
2940
+ await fs3.unlink(this.getFileSummaryPath(filepath));
2941
+ } catch {}
2942
+ this.fileSummaries.delete(filepath);
2943
+ }
2944
+ async exists() {
2945
+ try {
2946
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2947
+ await fs3.access(metaPath);
2948
+ return true;
2949
+ } catch {
2950
+ return false;
2951
+ }
2952
+ }
2953
+ get size() {
2954
+ return this.fileSummaries.size;
2955
+ }
2956
+ clear() {
2957
+ this.fileSummaries.clear();
2958
+ if (this.meta) {
2959
+ this.meta.fileCount = 0;
2960
+ this.meta.bm25Data = {
2961
+ avgDocLength: 0,
2962
+ documentFrequencies: {},
2963
+ totalDocs: 0
2964
+ };
2965
+ }
2966
+ this.bm25Index = new BM25Index;
2967
+ }
2968
+ }
2969
+ var init_symbolicIndex = __esm(() => {
2970
+ init_keywords();
2971
+ });
2972
+
2973
+ // src/infrastructure/storage/index.ts
2974
+ var init_storage = __esm(() => {
2975
+ init_fileIndexStorage();
2976
+ init_symbolicIndex();
2977
+ });
2978
+
2979
+ // src/modules/language/typescript/index.ts
2980
+ var exports_typescript = {};
2981
+ __export(exports_typescript, {
2982
+ isTypeScriptFile: () => isTypeScriptFile,
2983
+ TypeScriptModule: () => TypeScriptModule,
2984
+ TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
2985
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2986
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2987
+ });
2988
+ import * as path9 from "path";
2989
+ function isTypeScriptFile(filepath) {
2990
+ const ext = path9.extname(filepath).toLowerCase();
2991
+ return TYPESCRIPT_EXTENSIONS.includes(ext);
2992
+ }
2993
+ function calculateChunkTypeBoost(chunk) {
2994
+ switch (chunk.type) {
2995
+ case "function":
2996
+ return 0.05;
2997
+ case "class":
2998
+ case "interface":
2999
+ return 0.04;
3000
+ case "type":
3001
+ case "enum":
3002
+ return 0.03;
3003
+ case "variable":
3004
+ return 0.02;
3005
+ case "file":
3006
+ case "block":
3007
+ default:
3008
+ return 0;
3009
+ }
3010
+ }
3011
+ function calculateExportBoost(chunk) {
3012
+ return chunk.isExported ? 0.03 : 0;
3013
+ }
3014
+
3015
+ class TypeScriptModule {
3016
+ id = "language/typescript";
3017
+ name = "TypeScript Search";
3018
+ description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3019
+ version = "1.0.0";
3020
+ embeddingConfig = null;
3021
+ symbolicIndex = null;
3022
+ pendingSummaries = new Map;
3023
+ rootDir = "";
3024
+ logger = undefined;
3025
+ async initialize(config) {
3026
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3027
+ this.logger = config.options?.logger;
3028
+ if (this.logger) {
3029
+ this.embeddingConfig = {
3030
+ ...this.embeddingConfig,
3031
+ logger: this.logger
3032
+ };
3033
+ }
3034
+ configureEmbeddings(this.embeddingConfig);
3035
+ this.pendingSummaries.clear();
3036
+ }
3037
+ async indexFile(filepath, content, ctx) {
3038
+ if (!isTypeScriptFile(filepath)) {
3039
+ return null;
3040
+ }
3041
+ this.rootDir = ctx.rootDir;
3042
+ const parsedChunks = parseTypeScriptCode(content, filepath);
3043
+ if (parsedChunks.length === 0) {
3044
+ return null;
3045
+ }
3046
+ const pathContext = parsePathContext(filepath);
3047
+ const pathPrefix = formatPathContextForEmbedding(pathContext);
3048
+ const chunkContents = parsedChunks.map((c) => {
3049
+ const namePrefix = c.name ? `${c.name}: ` : "";
3050
+ return `${pathPrefix} ${namePrefix}${c.content}`;
3051
+ });
3052
+ const embeddings = await getEmbeddings(chunkContents);
3053
+ const chunks = parsedChunks.map((pc) => ({
3054
+ id: generateChunkId2(filepath, pc.startLine, pc.endLine),
3055
+ content: pc.content,
3056
+ startLine: pc.startLine,
3057
+ endLine: pc.endLine,
3058
+ type: pc.type,
3059
+ name: pc.name,
3060
+ isExported: pc.isExported,
3061
+ jsDoc: pc.jsDoc
3062
+ }));
3063
+ const references = this.extractReferences(content, filepath);
3064
+ const stats = await ctx.getFileStats(filepath);
3065
+ const currentConfig = getEmbeddingConfig();
3066
+ const moduleData = {
3067
+ embeddings,
3068
+ embeddingModel: currentConfig.model
3069
+ };
3070
+ const chunkTypes = [
3071
+ ...new Set(parsedChunks.map((pc) => pc.type))
3072
+ ];
3073
+ const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
3074
+ const allKeywords = new Set;
3075
+ for (const pc of parsedChunks) {
3076
+ const keywords = extractKeywords(pc.content, pc.name);
3077
+ keywords.forEach((k) => allKeywords.add(k));
3078
+ }
3079
+ pathContext.keywords.forEach((k) => allKeywords.add(k));
3080
+ const fileSummary = {
3081
+ filepath,
3082
+ chunkCount: chunks.length,
3083
+ chunkTypes,
3084
+ keywords: Array.from(allKeywords),
3085
+ exports,
3086
+ lastModified: stats.lastModified,
3087
+ pathContext: {
3088
+ segments: pathContext.segments,
3089
+ layer: pathContext.layer,
3090
+ domain: pathContext.domain,
3091
+ depth: pathContext.depth
3092
+ }
3093
+ };
3094
+ this.pendingSummaries.set(filepath, fileSummary);
3095
+ return {
3096
+ filepath,
3097
+ lastModified: stats.lastModified,
3098
+ chunks,
3099
+ moduleData,
3100
+ references
3101
+ };
3102
+ }
3103
+ async finalize(ctx) {
3104
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3105
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3106
+ await this.symbolicIndex.initialize();
3107
+ for (const [filepath, summary] of this.pendingSummaries) {
3108
+ this.symbolicIndex.addFile(summary);
3109
+ }
3110
+ this.symbolicIndex.buildBM25Index();
3111
+ await this.symbolicIndex.save();
3112
+ this.pendingSummaries.clear();
3113
+ }
3114
+ async search(query, ctx, options = {}) {
3115
+ const {
3116
+ topK = DEFAULT_TOP_K2,
3117
+ minScore = DEFAULT_MIN_SCORE2,
3118
+ filePatterns
3119
+ } = options;
3120
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3121
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3122
+ let allFiles;
3123
+ try {
3124
+ await symbolicIndex.initialize();
3125
+ allFiles = symbolicIndex.getAllFiles();
3126
+ } catch {
3127
+ allFiles = await ctx.listIndexedFiles();
3128
+ }
3129
+ let filesToSearch = allFiles;
3130
+ if (filePatterns && filePatterns.length > 0) {
3131
+ filesToSearch = allFiles.filter((filepath) => {
3132
+ return filePatterns.some((pattern) => {
3133
+ if (pattern.startsWith("*.")) {
3134
+ const ext = pattern.slice(1);
3135
+ return filepath.endsWith(ext);
3136
+ }
3137
+ return filepath.includes(pattern);
3138
+ });
3139
+ });
3140
+ }
3141
+ const queryEmbedding = await getEmbedding(query);
3142
+ const bm25Index = new BM25Index;
3143
+ const allChunksData = [];
3144
+ for (const filepath of filesToSearch) {
3145
+ const fileIndex = await ctx.loadFileIndex(filepath);
3146
+ if (!fileIndex)
3147
+ continue;
3148
+ const moduleData = fileIndex.moduleData;
3149
+ if (!moduleData?.embeddings)
3150
+ continue;
3151
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3152
+ const chunk = fileIndex.chunks[i];
3153
+ const embedding = moduleData.embeddings[i];
3154
+ if (!embedding)
3155
+ continue;
3156
+ allChunksData.push({
3157
+ filepath: fileIndex.filepath,
3158
+ chunk,
3159
+ embedding
3160
+ });
3161
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3162
+ }
3163
+ }
3164
+ const bm25Results = bm25Index.search(query, topK * 3);
3165
+ const bm25Scores = new Map;
3166
+ for (const result of bm25Results) {
3167
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3168
+ }
3169
+ const queryTerms = extractQueryTerms(query);
3170
+ const pathBoosts = new Map;
3171
+ for (const filepath of filesToSearch) {
3172
+ const summary = symbolicIndex.getFileSummary(filepath);
3173
+ if (summary?.pathContext) {
3174
+ let boost = 0;
3175
+ const ctx2 = summary.pathContext;
3176
+ if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3177
+ boost += 0.1;
2725
3178
  }
2726
- };
2727
- this.bm25Index = new BM25Index;
3179
+ if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3180
+ boost += 0.05;
3181
+ }
3182
+ const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3183
+ if (segmentMatch) {
3184
+ boost += 0.05;
3185
+ }
3186
+ pathBoosts.set(filepath, boost);
3187
+ }
3188
+ }
3189
+ const results = [];
3190
+ for (const { filepath, chunk, embedding } of allChunksData) {
3191
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3192
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3193
+ const pathBoost = pathBoosts.get(filepath) || 0;
3194
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3195
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3196
+ const exportBoost = calculateExportBoost(chunk);
3197
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3198
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3199
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3200
+ results.push({
3201
+ filepath,
3202
+ chunk,
3203
+ score: hybridScore,
3204
+ moduleId: this.id,
3205
+ context: {
3206
+ semanticScore,
3207
+ bm25Score,
3208
+ pathBoost,
3209
+ fileTypeBoost,
3210
+ chunkTypeBoost,
3211
+ exportBoost
3212
+ }
3213
+ });
3214
+ }
2728
3215
  }
3216
+ results.sort((a, b) => b.score - a.score);
3217
+ return results.slice(0, topK);
2729
3218
  }
2730
- addFile(summary) {
2731
- this.fileSummaries.set(summary.filepath, summary);
2732
- }
2733
- removeFile(filepath) {
2734
- return this.fileSummaries.delete(filepath);
2735
- }
2736
- buildBM25Index() {
2737
- this.bm25Index = new BM25Index;
2738
- for (const [filepath, summary] of this.fileSummaries) {
2739
- const content = [
2740
- ...summary.keywords,
2741
- ...summary.exports,
2742
- ...extractPathKeywords(filepath)
2743
- ].join(" ");
2744
- this.bm25Index.addDocuments([{ id: filepath, content }]);
3219
+ extractReferences(content, filepath) {
3220
+ const references = [];
3221
+ const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3222
+ const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3223
+ let match;
3224
+ while ((match = importRegex.exec(content)) !== null) {
3225
+ const importPath = match[1];
3226
+ if (importPath.startsWith(".")) {
3227
+ const dir = path9.dirname(filepath);
3228
+ const resolved = path9.normalize(path9.join(dir, importPath));
3229
+ references.push(resolved);
3230
+ }
2745
3231
  }
2746
- if (this.meta) {
2747
- this.meta.fileCount = this.fileSummaries.size;
2748
- this.meta.bm25Data.totalDocs = this.fileSummaries.size;
3232
+ while ((match = requireRegex.exec(content)) !== null) {
3233
+ const importPath = match[1];
3234
+ if (importPath.startsWith(".")) {
3235
+ const dir = path9.dirname(filepath);
3236
+ const resolved = path9.normalize(path9.join(dir, importPath));
3237
+ references.push(resolved);
3238
+ }
2749
3239
  }
3240
+ return references;
2750
3241
  }
2751
- findCandidates(query, maxCandidates = 20) {
2752
- if (!this.bm25Index) {
2753
- return Array.from(this.fileSummaries.keys());
3242
+ }
3243
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
3244
+ var init_typescript = __esm(() => {
3245
+ init_embeddings();
3246
+ init_services();
3247
+ init_config2();
3248
+ init_parseCode();
3249
+ init_storage();
3250
+ TYPESCRIPT_EXTENSIONS = [
3251
+ ".ts",
3252
+ ".tsx",
3253
+ ".js",
3254
+ ".jsx",
3255
+ ".mjs",
3256
+ ".cjs",
3257
+ ".mts",
3258
+ ".cts"
3259
+ ];
3260
+ });
3261
+
3262
+ // src/modules/data/json/index.ts
3263
+ var exports_json = {};
3264
+ __export(exports_json, {
3265
+ isJsonFile: () => isJsonFile,
3266
+ JsonModule: () => JsonModule,
3267
+ JSON_EXTENSIONS: () => JSON_EXTENSIONS,
3268
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
3269
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
3270
+ });
3271
+ import * as path10 from "path";
3272
+ function isJsonFile(filepath) {
3273
+ const ext = path10.extname(filepath).toLowerCase();
3274
+ return JSON_EXTENSIONS.includes(ext);
3275
+ }
3276
+ function extractJsonKeys(obj, prefix = "") {
3277
+ const keys = [];
3278
+ if (obj === null || obj === undefined) {
3279
+ return keys;
3280
+ }
3281
+ if (Array.isArray(obj)) {
3282
+ obj.forEach((item, index) => {
3283
+ keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3284
+ });
3285
+ } else if (typeof obj === "object") {
3286
+ for (const [key, value] of Object.entries(obj)) {
3287
+ const fullKey = prefix ? `${prefix}.${key}` : key;
3288
+ keys.push(key);
3289
+ keys.push(...extractJsonKeys(value, fullKey));
2754
3290
  }
2755
- const results = this.bm25Index.search(query, maxCandidates);
2756
- return results.map((r) => r.id);
2757
- }
2758
- getAllFiles() {
2759
- return Array.from(this.fileSummaries.keys());
2760
3291
  }
2761
- getFileSummary(filepath) {
2762
- return this.fileSummaries.get(filepath);
3292
+ return keys;
3293
+ }
3294
+ function extractJsonKeywords(content) {
3295
+ try {
3296
+ const parsed = JSON.parse(content);
3297
+ const keys = extractJsonKeys(parsed);
3298
+ const stringValues = [];
3299
+ const extractStrings = (obj) => {
3300
+ if (typeof obj === "string") {
3301
+ const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3302
+ stringValues.push(...words);
3303
+ } else if (Array.isArray(obj)) {
3304
+ obj.forEach(extractStrings);
3305
+ } else if (obj && typeof obj === "object") {
3306
+ Object.values(obj).forEach(extractStrings);
3307
+ }
3308
+ };
3309
+ extractStrings(parsed);
3310
+ return [...new Set([...keys, ...stringValues])];
3311
+ } catch {
3312
+ return [];
2763
3313
  }
2764
- async save() {
2765
- if (!this.meta)
2766
- throw new Error("Index not initialized");
2767
- this.meta.lastUpdated = new Date().toISOString();
2768
- this.meta.fileCount = this.fileSummaries.size;
2769
- await fs3.mkdir(this.symbolicPath, { recursive: true });
2770
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2771
- await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2772
- for (const [filepath, summary] of this.fileSummaries) {
2773
- const summaryPath = this.getFileSummaryPath(filepath);
2774
- await fs3.mkdir(path7.dirname(summaryPath), { recursive: true });
2775
- await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3314
+ }
3315
+
3316
+ class JsonModule {
3317
+ id = "data/json";
3318
+ name = "JSON Search";
3319
+ description = "JSON file search with structure-aware indexing";
3320
+ version = "1.0.0";
3321
+ embeddingConfig = null;
3322
+ symbolicIndex = null;
3323
+ pendingSummaries = new Map;
3324
+ rootDir = "";
3325
+ logger = undefined;
3326
+ async initialize(config) {
3327
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3328
+ this.logger = config.options?.logger;
3329
+ if (this.logger) {
3330
+ this.embeddingConfig = {
3331
+ ...this.embeddingConfig,
3332
+ logger: this.logger
3333
+ };
2776
3334
  }
3335
+ configureEmbeddings(this.embeddingConfig);
3336
+ this.pendingSummaries.clear();
2777
3337
  }
2778
- async load() {
2779
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2780
- const metaContent = await fs3.readFile(metaPath, "utf-8");
2781
- this.meta = JSON.parse(metaContent);
2782
- this.fileSummaries.clear();
2783
- await this.loadFileSummariesRecursive(this.symbolicPath);
2784
- this.buildBM25Index();
2785
- }
2786
- async loadFileSummariesRecursive(dir) {
2787
- try {
2788
- const entries = await fs3.readdir(dir, { withFileTypes: true });
2789
- for (const entry of entries) {
2790
- const fullPath = path7.join(dir, entry.name);
2791
- if (entry.isDirectory()) {
2792
- await this.loadFileSummariesRecursive(fullPath);
2793
- } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2794
- try {
2795
- const content = await fs3.readFile(fullPath, "utf-8");
2796
- const summary = JSON.parse(content);
2797
- if (summary.filepath) {
2798
- this.fileSummaries.set(summary.filepath, summary);
2799
- }
2800
- } catch {}
2801
- }
3338
+ async indexFile(filepath, content, ctx) {
3339
+ if (!isJsonFile(filepath)) {
3340
+ return null;
3341
+ }
3342
+ this.rootDir = ctx.rootDir;
3343
+ const textChunks = createLineBasedChunks(content, {
3344
+ chunkSize: 50,
3345
+ overlap: 10
3346
+ });
3347
+ if (textChunks.length === 0) {
3348
+ return null;
3349
+ }
3350
+ const chunkContents = textChunks.map((c) => {
3351
+ const filename = path10.basename(filepath);
3352
+ return `${filename}: ${c.content}`;
3353
+ });
3354
+ const embeddings = await getEmbeddings(chunkContents);
3355
+ const chunks = textChunks.map((tc, i) => ({
3356
+ id: generateChunkId(filepath, tc.startLine, tc.endLine),
3357
+ content: tc.content,
3358
+ startLine: tc.startLine,
3359
+ endLine: tc.endLine,
3360
+ type: tc.type
3361
+ }));
3362
+ const jsonKeys = extractJsonKeys((() => {
3363
+ try {
3364
+ return JSON.parse(content);
3365
+ } catch {
3366
+ return {};
2802
3367
  }
2803
- } catch {}
2804
- }
2805
- getFileSummaryPath(filepath) {
2806
- const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2807
- return path7.join(this.symbolicPath, jsonPath);
3368
+ })());
3369
+ const stats = await ctx.getFileStats(filepath);
3370
+ const currentConfig = getEmbeddingConfig();
3371
+ const moduleData = {
3372
+ embeddings,
3373
+ embeddingModel: currentConfig.model,
3374
+ jsonKeys
3375
+ };
3376
+ const keywords = extractJsonKeywords(content);
3377
+ const fileSummary = {
3378
+ filepath,
3379
+ chunkCount: chunks.length,
3380
+ chunkTypes: ["file"],
3381
+ keywords,
3382
+ exports: [],
3383
+ lastModified: stats.lastModified
3384
+ };
3385
+ this.pendingSummaries.set(filepath, fileSummary);
3386
+ return {
3387
+ filepath,
3388
+ lastModified: stats.lastModified,
3389
+ chunks,
3390
+ moduleData
3391
+ };
2808
3392
  }
2809
- async deleteFileSummary(filepath) {
2810
- try {
2811
- await fs3.unlink(this.getFileSummaryPath(filepath));
2812
- } catch {}
2813
- this.fileSummaries.delete(filepath);
3393
+ async finalize(ctx) {
3394
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3395
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3396
+ await this.symbolicIndex.initialize();
3397
+ for (const [filepath, summary] of this.pendingSummaries) {
3398
+ this.symbolicIndex.addFile(summary);
3399
+ }
3400
+ this.symbolicIndex.buildBM25Index();
3401
+ await this.symbolicIndex.save();
3402
+ this.pendingSummaries.clear();
2814
3403
  }
2815
- async exists() {
3404
+ async search(query, ctx, options = {}) {
3405
+ const {
3406
+ topK = DEFAULT_TOP_K3,
3407
+ minScore = DEFAULT_MIN_SCORE3,
3408
+ filePatterns
3409
+ } = options;
3410
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3411
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3412
+ let allFiles;
2816
3413
  try {
2817
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2818
- await fs3.access(metaPath);
2819
- return true;
3414
+ await symbolicIndex.initialize();
3415
+ allFiles = symbolicIndex.getAllFiles();
2820
3416
  } catch {
2821
- return false;
3417
+ allFiles = await ctx.listIndexedFiles();
2822
3418
  }
2823
- }
2824
- get size() {
2825
- return this.fileSummaries.size;
2826
- }
2827
- clear() {
2828
- this.fileSummaries.clear();
2829
- if (this.meta) {
2830
- this.meta.fileCount = 0;
2831
- this.meta.bm25Data = {
2832
- avgDocLength: 0,
2833
- documentFrequencies: {},
2834
- totalDocs: 0
2835
- };
3419
+ let filesToSearch = allFiles.filter((f) => isJsonFile(f));
3420
+ if (filePatterns && filePatterns.length > 0) {
3421
+ filesToSearch = filesToSearch.filter((filepath) => {
3422
+ return filePatterns.some((pattern) => {
3423
+ if (pattern.startsWith("*.")) {
3424
+ const ext = pattern.slice(1);
3425
+ return filepath.endsWith(ext);
3426
+ }
3427
+ return filepath.includes(pattern);
3428
+ });
3429
+ });
2836
3430
  }
2837
- this.bm25Index = new BM25Index;
3431
+ const queryEmbedding = await getEmbedding(query);
3432
+ const bm25Index = new BM25Index;
3433
+ const allChunksData = [];
3434
+ for (const filepath of filesToSearch) {
3435
+ const fileIndex = await ctx.loadFileIndex(filepath);
3436
+ if (!fileIndex)
3437
+ continue;
3438
+ const moduleData = fileIndex.moduleData;
3439
+ if (!moduleData?.embeddings)
3440
+ continue;
3441
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3442
+ const chunk = fileIndex.chunks[i];
3443
+ const embedding = moduleData.embeddings[i];
3444
+ if (!embedding)
3445
+ continue;
3446
+ allChunksData.push({
3447
+ filepath: fileIndex.filepath,
3448
+ chunk,
3449
+ embedding
3450
+ });
3451
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3452
+ }
3453
+ }
3454
+ const bm25Results = bm25Index.search(query, topK * 3);
3455
+ const bm25Scores = new Map;
3456
+ for (const result of bm25Results) {
3457
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3458
+ }
3459
+ const queryTerms = extractQueryTerms(query);
3460
+ const results = [];
3461
+ for (const { filepath, chunk, embedding } of allChunksData) {
3462
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3463
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3464
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
3465
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3466
+ results.push({
3467
+ filepath,
3468
+ chunk,
3469
+ score: hybridScore,
3470
+ moduleId: this.id,
3471
+ context: {
3472
+ semanticScore,
3473
+ bm25Score
3474
+ }
3475
+ });
3476
+ }
3477
+ }
3478
+ results.sort((a, b) => b.score - a.score);
3479
+ return results.slice(0, topK);
2838
3480
  }
2839
3481
  }
2840
- var init_symbolicIndex = __esm(() => {
2841
- init_keywords();
2842
- });
2843
-
2844
- // src/infrastructure/storage/index.ts
2845
- var init_storage = __esm(() => {
2846
- init_fileIndexStorage();
2847
- init_symbolicIndex();
3482
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
3483
+ var init_json = __esm(() => {
3484
+ init_embeddings();
3485
+ init_services();
3486
+ init_config2();
3487
+ init_storage();
3488
+ JSON_EXTENSIONS = [".json"];
2848
3489
  });
2849
3490
 
2850
- // src/modules/language/typescript/index.ts
2851
- var exports_typescript = {};
2852
- __export(exports_typescript, {
2853
- TypeScriptModule: () => TypeScriptModule,
2854
- DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2855
- DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
3491
+ // src/modules/docs/markdown/index.ts
3492
+ var exports_markdown = {};
3493
+ __export(exports_markdown, {
3494
+ isMarkdownFile: () => isMarkdownFile,
3495
+ MarkdownModule: () => MarkdownModule,
3496
+ MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
3497
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
3498
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
2856
3499
  });
2857
- import * as path8 from "path";
2858
- function detectQueryIntent(queryTerms) {
2859
- const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2860
- const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2861
- if (hasDocumentationTerm) {
2862
- return "documentation";
2863
- }
2864
- if (hasImplementationTerm) {
2865
- return "implementation";
2866
- }
2867
- return "neutral";
3500
+ import * as path11 from "path";
3501
+ function isMarkdownFile(filepath) {
3502
+ const ext = path11.extname(filepath).toLowerCase();
3503
+ return MARKDOWN_EXTENSIONS.includes(ext);
2868
3504
  }
2869
- function calculateFileTypeBoost(filepath, queryTerms) {
2870
- const ext = path8.extname(filepath).toLowerCase();
2871
- const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2872
- const isDoc = DOC_EXTENSIONS.includes(ext);
2873
- const intent = detectQueryIntent(queryTerms);
2874
- if (intent === "implementation") {
2875
- if (isSourceCode) {
2876
- return 0.06;
3505
+ function parseMarkdownSections(content) {
3506
+ const lines = content.split(`
3507
+ `);
3508
+ const sections = [];
3509
+ let currentSection = null;
3510
+ let currentContent = [];
3511
+ let startLine = 1;
3512
+ for (let i = 0;i < lines.length; i++) {
3513
+ const line = lines[i];
3514
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
3515
+ if (headingMatch) {
3516
+ if (currentSection) {
3517
+ currentSection.content = currentContent.join(`
3518
+ `).trim();
3519
+ currentSection.endLine = i;
3520
+ if (currentSection.content || currentSection.heading) {
3521
+ sections.push(currentSection);
3522
+ }
3523
+ } else if (currentContent.length > 0) {
3524
+ sections.push({
3525
+ heading: "",
3526
+ level: 0,
3527
+ content: currentContent.join(`
3528
+ `).trim(),
3529
+ startLine: 1,
3530
+ endLine: i
3531
+ });
3532
+ }
3533
+ currentSection = {
3534
+ heading: headingMatch[2],
3535
+ level: headingMatch[1].length,
3536
+ content: "",
3537
+ startLine: i + 1,
3538
+ endLine: lines.length
3539
+ };
3540
+ currentContent = [];
3541
+ } else {
3542
+ currentContent.push(line);
2877
3543
  }
2878
- return 0;
2879
3544
  }
2880
- if (intent === "documentation") {
2881
- if (isDoc) {
2882
- return 0.08;
3545
+ if (currentSection) {
3546
+ currentSection.content = currentContent.join(`
3547
+ `).trim();
3548
+ currentSection.endLine = lines.length;
3549
+ if (currentSection.content || currentSection.heading) {
3550
+ sections.push(currentSection);
2883
3551
  }
2884
- return 0;
3552
+ } else if (currentContent.length > 0) {
3553
+ sections.push({
3554
+ heading: "",
3555
+ level: 0,
3556
+ content: currentContent.join(`
3557
+ `).trim(),
3558
+ startLine: 1,
3559
+ endLine: lines.length
3560
+ });
2885
3561
  }
2886
- return 0;
3562
+ return sections;
2887
3563
  }
2888
- function calculateChunkTypeBoost(chunk) {
2889
- switch (chunk.type) {
2890
- case "function":
2891
- return 0.05;
2892
- case "class":
2893
- case "interface":
2894
- return 0.04;
2895
- case "type":
2896
- case "enum":
2897
- return 0.03;
2898
- case "variable":
2899
- return 0.02;
2900
- case "file":
2901
- case "block":
2902
- default:
2903
- return 0;
3564
+ function extractMarkdownKeywords(content) {
3565
+ const keywords = [];
3566
+ const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
3567
+ for (const match of headingMatches) {
3568
+ const heading = match[1].toLowerCase();
3569
+ const words = heading.split(/\s+/).filter((w) => w.length > 2);
3570
+ keywords.push(...words);
3571
+ }
3572
+ const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
3573
+ for (const match of emphasisMatches) {
3574
+ const text = (match[1] || match[2] || "").toLowerCase();
3575
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3576
+ keywords.push(...words);
3577
+ }
3578
+ const codeMatches = content.matchAll(/`([^`]+)`/g);
3579
+ for (const match of codeMatches) {
3580
+ const code = match[1].toLowerCase();
3581
+ if (code.length > 2 && code.length < 50) {
3582
+ keywords.push(code);
3583
+ }
2904
3584
  }
2905
- }
2906
- function calculateExportBoost(chunk) {
2907
- return chunk.isExported ? 0.03 : 0;
3585
+ const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
3586
+ for (const match of linkMatches) {
3587
+ const text = match[1].toLowerCase();
3588
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3589
+ keywords.push(...words);
3590
+ }
3591
+ return [...new Set(keywords)];
2908
3592
  }
2909
3593
 
2910
- class TypeScriptModule {
2911
- id = "language/typescript";
2912
- name = "TypeScript Search";
2913
- description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3594
+ class MarkdownModule {
3595
+ id = "docs/markdown";
3596
+ name = "Markdown Search";
3597
+ description = "Markdown documentation search with section-aware indexing";
2914
3598
  version = "1.0.0";
2915
3599
  embeddingConfig = null;
2916
3600
  symbolicIndex = null;
@@ -2930,66 +3614,53 @@ class TypeScriptModule {
2930
3614
  this.pendingSummaries.clear();
2931
3615
  }
2932
3616
  async indexFile(filepath, content, ctx) {
3617
+ if (!isMarkdownFile(filepath)) {
3618
+ return null;
3619
+ }
2933
3620
  this.rootDir = ctx.rootDir;
2934
- const parsedChunks = parseCode(content, filepath);
2935
- if (parsedChunks.length === 0) {
3621
+ const sections = parseMarkdownSections(content);
3622
+ if (sections.length === 0) {
2936
3623
  return null;
2937
3624
  }
2938
- const pathContext = parsePathContext(filepath);
2939
- const pathPrefix = formatPathContextForEmbedding(pathContext);
2940
- const chunkContents = parsedChunks.map((c) => {
2941
- const namePrefix = c.name ? `${c.name}: ` : "";
2942
- return `${pathPrefix} ${namePrefix}${c.content}`;
3625
+ const chunkContents = sections.map((s) => {
3626
+ const filename = path11.basename(filepath);
3627
+ const headingContext = s.heading ? `${s.heading}: ` : "";
3628
+ return `${filename} ${headingContext}${s.content}`;
2943
3629
  });
2944
3630
  const embeddings = await getEmbeddings(chunkContents);
2945
- const chunks = parsedChunks.map((pc) => ({
2946
- id: generateChunkId(filepath, pc.startLine, pc.endLine),
2947
- content: pc.content,
2948
- startLine: pc.startLine,
2949
- endLine: pc.endLine,
2950
- type: pc.type,
2951
- name: pc.name,
2952
- isExported: pc.isExported,
2953
- jsDoc: pc.jsDoc
3631
+ const chunks = sections.map((section, i) => ({
3632
+ id: generateChunkId(filepath, section.startLine, section.endLine),
3633
+ content: section.heading ? `## ${section.heading}
3634
+
3635
+ ${section.content}` : section.content,
3636
+ startLine: section.startLine,
3637
+ endLine: section.endLine,
3638
+ type: "block",
3639
+ name: section.heading || undefined
2954
3640
  }));
2955
- const references = this.extractReferences(content, filepath);
3641
+ const headings = sections.filter((s) => s.heading).map((s) => s.heading);
2956
3642
  const stats = await ctx.getFileStats(filepath);
2957
3643
  const currentConfig = getEmbeddingConfig();
2958
3644
  const moduleData = {
2959
3645
  embeddings,
2960
- embeddingModel: currentConfig.model
3646
+ embeddingModel: currentConfig.model,
3647
+ headings
2961
3648
  };
2962
- const chunkTypes = [
2963
- ...new Set(parsedChunks.map((pc) => pc.type))
2964
- ];
2965
- const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2966
- const allKeywords = new Set;
2967
- for (const pc of parsedChunks) {
2968
- const keywords = extractKeywords(pc.content, pc.name);
2969
- keywords.forEach((k) => allKeywords.add(k));
2970
- }
2971
- pathContext.keywords.forEach((k) => allKeywords.add(k));
3649
+ const keywords = extractMarkdownKeywords(content);
2972
3650
  const fileSummary = {
2973
3651
  filepath,
2974
3652
  chunkCount: chunks.length,
2975
- chunkTypes,
2976
- keywords: Array.from(allKeywords),
2977
- exports,
2978
- lastModified: stats.lastModified,
2979
- pathContext: {
2980
- segments: pathContext.segments,
2981
- layer: pathContext.layer,
2982
- domain: pathContext.domain,
2983
- depth: pathContext.depth
2984
- }
3653
+ chunkTypes: ["block"],
3654
+ keywords,
3655
+ exports: headings,
3656
+ lastModified: stats.lastModified
2985
3657
  };
2986
3658
  this.pendingSummaries.set(filepath, fileSummary);
2987
3659
  return {
2988
3660
  filepath,
2989
3661
  lastModified: stats.lastModified,
2990
3662
  chunks,
2991
- moduleData,
2992
- references
3663
+ moduleData
2993
3664
  };
2994
3665
  }
2995
3666
  async finalize(ctx) {
@@ -3005,8 +3676,8 @@ class TypeScriptModule {
3005
3676
  }
3006
3677
  async search(query, ctx, options = {}) {
3007
3678
  const {
3008
- topK = DEFAULT_TOP_K2,
3009
- minScore = DEFAULT_MIN_SCORE2,
3679
+ topK = DEFAULT_TOP_K4,
3680
+ minScore = DEFAULT_MIN_SCORE4,
3010
3681
  filePatterns
3011
3682
  } = options;
3012
3683
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
@@ -3018,9 +3689,9 @@ class TypeScriptModule {
3018
3689
  } catch {
3019
3690
  allFiles = await ctx.listIndexedFiles();
3020
3691
  }
3021
- let filesToSearch = allFiles;
3692
+ let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
3022
3693
  if (filePatterns && filePatterns.length > 0) {
3023
- filesToSearch = allFiles.filter((filepath) => {
3694
+ filesToSearch = filesToSearch.filter((filepath) => {
3024
3695
  return filePatterns.some((pattern) => {
3025
3696
  if (pattern.startsWith("*.")) {
3026
3697
  const ext = pattern.slice(1);
@@ -3058,36 +3729,24 @@ class TypeScriptModule {
3058
3729
  for (const result of bm25Results) {
3059
3730
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
3060
3731
  }
3061
- const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
3062
- const pathBoosts = new Map;
3063
- for (const filepath of filesToSearch) {
3064
- const summary = symbolicIndex.getFileSummary(filepath);
3065
- if (summary?.pathContext) {
3066
- let boost = 0;
3067
- const ctx2 = summary.pathContext;
3068
- if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3069
- boost += 0.1;
3070
- }
3071
- if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3072
- boost += 0.05;
3073
- }
3074
- const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3075
- if (segmentMatch) {
3076
- boost += 0.05;
3077
- }
3078
- pathBoosts.set(filepath, boost);
3079
- }
3080
- }
3732
+ const queryTerms = extractQueryTerms(query);
3081
3733
  const results = [];
3082
3734
  for (const { filepath, chunk, embedding } of allChunksData) {
3083
3735
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3084
3736
  const bm25Score = bm25Scores.get(chunk.id) || 0;
3085
- const pathBoost = pathBoosts.get(filepath) || 0;
3086
- const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3087
- const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3088
- const exportBoost = calculateExportBoost(chunk);
3089
- const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3090
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3737
+ let docBoost = 0;
3738
+ if (queryTerms.some((t) => [
3739
+ "docs",
3740
+ "documentation",
3741
+ "readme",
3742
+ "guide",
3743
+ "how",
3744
+ "what",
3745
+ "explain"
3746
+ ].includes(t))) {
3747
+ docBoost = 0.05;
3748
+ }
3749
+ const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
3091
3750
  if (hybridScore >= minScore || bm25Score > 0.3) {
3092
3751
  results.push({
3093
3752
  filepath,
@@ -3097,10 +3756,7 @@ class TypeScriptModule {
3097
3756
  context: {
3098
3757
  semanticScore,
3099
3758
  bm25Score,
3100
- pathBoost,
3101
- fileTypeBoost,
3102
- chunkTypeBoost,
3103
- exportBoost
3759
+ docBoost
3104
3760
  }
3105
3761
  });
3106
3762
  }
@@ -3108,84 +3764,14 @@ class TypeScriptModule {
3108
3764
  results.sort((a, b) => b.score - a.score);
3109
3765
  return results.slice(0, topK);
3110
3766
  }
3111
- extractReferences(content, filepath) {
3112
- const references = [];
3113
- const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3114
- const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3115
- let match;
3116
- while ((match = importRegex.exec(content)) !== null) {
3117
- const importPath = match[1];
3118
- if (importPath.startsWith(".")) {
3119
- const dir = path8.dirname(filepath);
3120
- const resolved = path8.normalize(path8.join(dir, importPath));
3121
- references.push(resolved);
3122
- }
3123
- }
3124
- while ((match = requireRegex.exec(content)) !== null) {
3125
- const importPath = match[1];
3126
- if (importPath.startsWith(".")) {
3127
- const dir = path8.dirname(filepath);
3128
- const resolved = path8.normalize(path8.join(dir, importPath));
3129
- references.push(resolved);
3130
- }
3131
- }
3132
- return references;
3133
- }
3134
3767
  }
3135
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
3136
- var init_typescript = __esm(() => {
3768
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
3769
+ var init_markdown = __esm(() => {
3137
3770
  init_embeddings();
3771
+ init_services();
3138
3772
  init_config2();
3139
- init_parseCode();
3140
3773
  init_storage();
3141
- init_keywords();
3142
- init_keywords();
3143
- IMPLEMENTATION_TERMS = [
3144
- "function",
3145
- "method",
3146
- "class",
3147
- "interface",
3148
- "implement",
3149
- "implementation",
3150
- "endpoint",
3151
- "route",
3152
- "handler",
3153
- "controller",
3154
- "module",
3155
- "code"
3156
- ];
3157
- DOCUMENTATION_TERMS = [
3158
- "documentation",
3159
- "docs",
3160
- "guide",
3161
- "tutorial",
3162
- "readme",
3163
- "how",
3164
- "what",
3165
- "why",
3166
- "explain",
3167
- "overview",
3168
- "getting",
3169
- "started",
3170
- "requirements",
3171
- "setup",
3172
- "install",
3173
- "configure",
3174
- "configuration"
3175
- ];
3176
- SOURCE_CODE_EXTENSIONS = [
3177
- ".ts",
3178
- ".tsx",
3179
- ".js",
3180
- ".jsx",
3181
- ".mjs",
3182
- ".cjs",
3183
- ".py",
3184
- ".go",
3185
- ".rs",
3186
- ".java"
3187
- ];
3188
- DOC_EXTENSIONS = [".md", ".txt", ".rst"];
3774
+ MARKDOWN_EXTENSIONS = [".md", ".txt"];
3189
3775
  });
3190
3776
 
3191
3777
  // src/modules/registry.ts
@@ -3210,8 +3796,12 @@ class ModuleRegistryImpl {
3210
3796
  async function registerBuiltInModules() {
3211
3797
  const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
3212
3798
  const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
3799
+ const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
3800
+ const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
3213
3801
  registry.register(new CoreModule2);
3214
3802
  registry.register(new TypeScriptModule2);
3803
+ registry.register(new JsonModule2);
3804
+ registry.register(new MarkdownModule2);
3215
3805
  }
3216
3806
  var registry;
3217
3807
  var init_registry = __esm(() => {
@@ -3219,13 +3809,13 @@ var init_registry = __esm(() => {
3219
3809
  });
3220
3810
 
3221
3811
  // src/infrastructure/introspection/projectDetector.ts
3222
- import * as path9 from "path";
3812
+ import * as path12 from "path";
3223
3813
  import * as fs4 from "fs/promises";
3224
3814
  async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3225
3815
  if (depth > MAX_SCAN_DEPTH)
3226
3816
  return [];
3227
3817
  const results = [];
3228
- const fullDir = currentDir ? path9.join(rootDir, currentDir) : rootDir;
3818
+ const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
3229
3819
  try {
3230
3820
  const entries = await fs4.readdir(fullDir, { withFileTypes: true });
3231
3821
  const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
@@ -3248,10 +3838,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3248
3838
  }
3249
3839
  async function parsePackageJson(rootDir, relativePath) {
3250
3840
  try {
3251
- const packageJsonPath = path9.join(rootDir, relativePath, "package.json");
3841
+ const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
3252
3842
  const content = await fs4.readFile(packageJsonPath, "utf-8");
3253
3843
  const pkg = JSON.parse(content);
3254
- const name = pkg.name || path9.basename(relativePath);
3844
+ const name = pkg.name || path12.basename(relativePath);
3255
3845
  const deps = { ...pkg.dependencies, ...pkg.devDependencies };
3256
3846
  let type = "unknown";
3257
3847
  if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
@@ -3296,7 +3886,7 @@ async function detectProjectStructure(rootDir) {
3296
3886
  for (const pattern of monorepoPatterns) {
3297
3887
  if (!dirNames.includes(pattern))
3298
3888
  continue;
3299
- const patternDir = path9.join(rootDir, pattern);
3889
+ const patternDir = path12.join(rootDir, pattern);
3300
3890
  try {
3301
3891
  const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
3302
3892
  for (const subDir of subDirs) {
@@ -3327,7 +3917,7 @@ async function detectProjectStructure(rootDir) {
3327
3917
  }
3328
3918
  let rootType = "unknown";
3329
3919
  try {
3330
- const rootPkgPath = path9.join(rootDir, "package.json");
3920
+ const rootPkgPath = path12.join(rootDir, "package.json");
3331
3921
  const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
3332
3922
  if (rootPkg.workspaces)
3333
3923
  isMonorepo = true;
@@ -3367,7 +3957,7 @@ var init_projectDetector = __esm(() => {
3367
3957
  });
3368
3958
 
3369
3959
  // src/infrastructure/introspection/IntrospectionIndex.ts
3370
- import * as path10 from "path";
3960
+ import * as path13 from "path";
3371
3961
  import * as fs5 from "fs/promises";
3372
3962
 
3373
3963
  class IntrospectionIndex {
@@ -3381,7 +3971,7 @@ class IntrospectionIndex {
3381
3971
  async initialize() {
3382
3972
  this.structure = await detectProjectStructure(this.rootDir);
3383
3973
  try {
3384
- const configPath = path10.join(this.rootDir, ".raggrep", "config.json");
3974
+ const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
3385
3975
  const configContent = await fs5.readFile(configPath, "utf-8");
3386
3976
  const config = JSON.parse(configContent);
3387
3977
  this.config = config.introspection || {};
@@ -3421,28 +4011,28 @@ class IntrospectionIndex {
3421
4011
  }
3422
4012
  }
3423
4013
  async save(config) {
3424
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4014
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3425
4015
  await fs5.mkdir(introDir, { recursive: true });
3426
- const projectPath = path10.join(introDir, "_project.json");
4016
+ const projectPath = path13.join(introDir, "_project.json");
3427
4017
  await fs5.writeFile(projectPath, JSON.stringify({
3428
4018
  version: "1.0.0",
3429
4019
  lastUpdated: new Date().toISOString(),
3430
4020
  structure: this.structure
3431
4021
  }, null, 2));
3432
4022
  for (const [filepath, intro] of this.files) {
3433
- const introFilePath = path10.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
3434
- await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
4023
+ const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
4024
+ await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
3435
4025
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3436
4026
  }
3437
4027
  }
3438
4028
  async load(config) {
3439
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4029
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3440
4030
  try {
3441
- const projectPath = path10.join(introDir, "_project.json");
4031
+ const projectPath = path13.join(introDir, "_project.json");
3442
4032
  const projectContent = await fs5.readFile(projectPath, "utf-8");
3443
4033
  const projectData = JSON.parse(projectContent);
3444
4034
  this.structure = projectData.structure;
3445
- await this.loadFilesRecursive(path10.join(introDir, "files"), "");
4035
+ await this.loadFilesRecursive(path13.join(introDir, "files"), "");
3446
4036
  } catch {
3447
4037
  this.structure = null;
3448
4038
  this.files.clear();
@@ -3452,7 +4042,7 @@ class IntrospectionIndex {
3452
4042
  try {
3453
4043
  const entries = await fs5.readdir(basePath, { withFileTypes: true });
3454
4044
  for (const entry of entries) {
3455
- const entryPath = path10.join(basePath, entry.name);
4045
+ const entryPath = path13.join(basePath, entry.name);
3456
4046
  const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
3457
4047
  if (entry.isDirectory()) {
3458
4048
  await this.loadFilesRecursive(entryPath, relativePath);
@@ -3483,7 +4073,7 @@ var init_introspection2 = __esm(() => {
3483
4073
 
3484
4074
  // src/app/indexer/watcher.ts
3485
4075
  import { watch } from "chokidar";
3486
- import * as path11 from "path";
4076
+ import * as path14 from "path";
3487
4077
  async function watchDirectory(rootDir, options = {}) {
3488
4078
  const {
3489
4079
  debounceMs = DEFAULT_DEBOUNCE_MS,
@@ -3494,7 +4084,7 @@ async function watchDirectory(rootDir, options = {}) {
3494
4084
  onFileChange,
3495
4085
  onError
3496
4086
  } = options;
3497
- rootDir = path11.resolve(rootDir);
4087
+ rootDir = path14.resolve(rootDir);
3498
4088
  const config = await loadConfig(rootDir);
3499
4089
  const indexLocation = getIndexLocation(rootDir);
3500
4090
  const validExtensions = new Set(config.extensions);
@@ -3504,7 +4094,7 @@ async function watchDirectory(rootDir, options = {}) {
3504
4094
  "**/.git/**"
3505
4095
  ];
3506
4096
  function shouldWatchFile(filepath) {
3507
- const ext = path11.extname(filepath);
4097
+ const ext = path14.extname(filepath);
3508
4098
  return validExtensions.has(ext);
3509
4099
  }
3510
4100
  let isRunning = true;
@@ -3586,7 +4176,7 @@ async function watchDirectory(rootDir, options = {}) {
3586
4176
  function handleFileEvent(event, filepath) {
3587
4177
  if (!isRunning)
3588
4178
  return;
3589
- const relativePath = path11.relative(rootDir, filepath);
4179
+ const relativePath = path14.relative(rootDir, filepath);
3590
4180
  if (!shouldWatchFile(filepath)) {
3591
4181
  return;
3592
4182
  }
@@ -3665,15 +4255,36 @@ __export(exports_indexer, {
3665
4255
  });
3666
4256
  import { glob } from "glob";
3667
4257
  import * as fs6 from "fs/promises";
3668
- import * as path12 from "path";
4258
+ import * as path15 from "path";
4259
+ async function parallelMap(items, processor, concurrency) {
4260
+ const results = new Array(items.length);
4261
+ let nextIndex = 0;
4262
+ async function worker() {
4263
+ while (nextIndex < items.length) {
4264
+ const index = nextIndex++;
4265
+ const item = items[index];
4266
+ try {
4267
+ const value = await processor(item, index);
4268
+ results[index] = { success: true, value };
4269
+ } catch (error) {
4270
+ results[index] = { success: false, error };
4271
+ }
4272
+ }
4273
+ }
4274
+ const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
4275
+ await Promise.all(workers);
4276
+ return results;
4277
+ }
3669
4278
  async function indexDirectory(rootDir, options = {}) {
3670
4279
  const verbose = options.verbose ?? false;
3671
4280
  const quiet = options.quiet ?? false;
4281
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
3672
4282
  const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
3673
- rootDir = path12.resolve(rootDir);
4283
+ rootDir = path15.resolve(rootDir);
3674
4284
  const location = getIndexLocation(rootDir);
3675
4285
  logger.info(`Indexing directory: ${rootDir}`);
3676
4286
  logger.info(`Index location: ${location.indexDir}`);
4287
+ logger.debug(`Concurrency: ${concurrency}`);
3677
4288
  const config = await loadConfig(rootDir);
3678
4289
  const introspection = new IntrospectionIndex(rootDir);
3679
4290
  await introspection.initialize();
@@ -3709,7 +4320,7 @@ async function indexDirectory(rootDir, options = {}) {
3709
4320
  };
3710
4321
  await module.initialize(configWithOverrides);
3711
4322
  }
3712
- const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger);
4323
+ const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
3713
4324
  results.push(result);
3714
4325
  if (module.finalize) {
3715
4326
  logger.info(`[${module.name}] Building secondary indexes...`);
@@ -3717,11 +4328,11 @@ async function indexDirectory(rootDir, options = {}) {
3717
4328
  rootDir,
3718
4329
  config,
3719
4330
  readFile: async (filepath) => {
3720
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4331
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3721
4332
  return fs6.readFile(fullPath, "utf-8");
3722
4333
  },
3723
4334
  getFileStats: async (filepath) => {
3724
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4335
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3725
4336
  const stats = await fs6.stat(fullPath);
3726
4337
  return { lastModified: stats.mtime.toISOString() };
3727
4338
  }
@@ -3752,7 +4363,7 @@ async function deleteIndex(rootDir) {
3752
4363
  } catch {}
3753
4364
  }
3754
4365
  async function resetIndex(rootDir) {
3755
- rootDir = path12.resolve(rootDir);
4366
+ rootDir = path15.resolve(rootDir);
3756
4367
  const status = await getIndexStatus(rootDir);
3757
4368
  if (!status.exists) {
3758
4369
  throw new Error(`No index found for ${rootDir}`);
@@ -3767,7 +4378,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3767
4378
  const verbose = options.verbose ?? false;
3768
4379
  const quiet = options.quiet ?? false;
3769
4380
  const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
3770
- rootDir = path12.resolve(rootDir);
4381
+ rootDir = path15.resolve(rootDir);
3771
4382
  const status = await getIndexStatus(rootDir);
3772
4383
  if (!status.exists) {
3773
4384
  logger.info(`No index found. Creating index...
@@ -3794,7 +4405,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3794
4405
  const introspection = new IntrospectionIndex(rootDir);
3795
4406
  await introspection.initialize();
3796
4407
  const currentFiles = await findFiles(rootDir, config);
3797
- const currentFileSet = new Set(currentFiles.map((f) => path12.relative(rootDir, f)));
4408
+ const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
3798
4409
  let totalIndexed = 0;
3799
4410
  let totalRemoved = 0;
3800
4411
  let totalUnchanged = 0;
@@ -3824,11 +4435,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3824
4435
  }
3825
4436
  for (const filepath of filesToRemove) {
3826
4437
  logger.debug(` Removing stale: ${filepath}`);
3827
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4438
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3828
4439
  try {
3829
4440
  await fs6.unlink(indexFilePath);
3830
4441
  } catch {}
3831
- const symbolicFilePath = path12.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4442
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3832
4443
  try {
3833
4444
  await fs6.unlink(symbolicFilePath);
3834
4445
  } catch {}
@@ -3839,11 +4450,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3839
4450
  rootDir,
3840
4451
  config,
3841
4452
  readFile: async (filepath) => {
3842
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4453
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3843
4454
  return fs6.readFile(fullPath, "utf-8");
3844
4455
  },
3845
4456
  getFileStats: async (filepath) => {
3846
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4457
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3847
4458
  const stats = await fs6.stat(fullPath);
3848
4459
  return { lastModified: stats.mtime.toISOString() };
3849
4460
  },
@@ -3852,7 +4463,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3852
4463
  const totalFiles = currentFiles.length;
3853
4464
  for (let i = 0;i < currentFiles.length; i++) {
3854
4465
  const filepath = currentFiles[i];
3855
- const relativePath = path12.relative(rootDir, filepath);
4466
+ const relativePath = path15.relative(rootDir, filepath);
3856
4467
  const progress = `[${i + 1}/${totalFiles}]`;
3857
4468
  try {
3858
4469
  const stats = await fs6.stat(filepath);
@@ -3903,7 +4514,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3903
4514
  unchanged: totalUnchanged
3904
4515
  };
3905
4516
  }
3906
- async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger) {
4517
+ async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
3907
4518
  const result = {
3908
4519
  moduleId: module.id,
3909
4520
  indexed: 0,
@@ -3912,7 +4523,7 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3912
4523
  };
3913
4524
  const manifest = await loadModuleManifest(rootDir, module.id, config);
3914
4525
  const indexPath = getModuleIndexPath(rootDir, module.id, config);
3915
- const currentFileSet = new Set(files.map((f) => path12.relative(rootDir, f)));
4526
+ const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
3916
4527
  const filesToRemove = [];
3917
4528
  for (const filepath of Object.keys(manifest.files)) {
3918
4529
  if (!currentFileSet.has(filepath)) {
@@ -3923,11 +4534,11 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3923
4534
  logger.info(` Removing ${filesToRemove.length} stale entries...`);
3924
4535
  for (const filepath of filesToRemove) {
3925
4536
  logger.debug(` Removing: ${filepath}`);
3926
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4537
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3927
4538
  try {
3928
4539
  await fs6.unlink(indexFilePath);
3929
4540
  } catch {}
3930
- const symbolicFilePath = path12.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4541
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3931
4542
  try {
3932
4543
  await fs6.unlink(symbolicFilePath);
3933
4544
  } catch {}
@@ -3939,52 +4550,76 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3939
4550
  rootDir,
3940
4551
  config,
3941
4552
  readFile: async (filepath) => {
3942
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4553
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3943
4554
  return fs6.readFile(fullPath, "utf-8");
3944
4555
  },
3945
4556
  getFileStats: async (filepath) => {
3946
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4557
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3947
4558
  const stats = await fs6.stat(fullPath);
3948
4559
  return { lastModified: stats.mtime.toISOString() };
3949
4560
  },
3950
4561
  getIntrospection: (filepath) => introspection.getFile(filepath)
3951
4562
  };
3952
4563
  const totalFiles = files.length;
3953
- for (let i = 0;i < files.length; i++) {
3954
- const filepath = files[i];
3955
- const relativePath = path12.relative(rootDir, filepath);
3956
- const progress = `[${i + 1}/${totalFiles}]`;
4564
+ let completedCount = 0;
4565
+ const processFile = async (filepath, _index) => {
4566
+ const relativePath = path15.relative(rootDir, filepath);
3957
4567
  try {
3958
4568
  const stats = await fs6.stat(filepath);
3959
4569
  const lastModified = stats.mtime.toISOString();
3960
4570
  const existingEntry = manifest.files[relativePath];
3961
4571
  if (existingEntry && existingEntry.lastModified === lastModified) {
3962
- logger.debug(` ${progress} Skipped ${relativePath} (unchanged)`);
3963
- result.skipped++;
3964
- continue;
4572
+ completedCount++;
4573
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
4574
+ return { relativePath, status: "skipped" };
3965
4575
  }
3966
4576
  const content = await fs6.readFile(filepath, "utf-8");
3967
4577
  introspection.addFile(relativePath, content);
3968
- logger.progress(` ${progress} Processing: ${relativePath}`);
4578
+ completedCount++;
4579
+ logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
3969
4580
  const fileIndex = await module.indexFile(relativePath, content, ctx);
3970
4581
  if (!fileIndex) {
3971
- logger.debug(` ${progress} Skipped ${relativePath} (no chunks)`);
3972
- result.skipped++;
3973
- continue;
4582
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
4583
+ return { relativePath, status: "skipped" };
3974
4584
  }
3975
4585
  await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3976
- manifest.files[relativePath] = {
4586
+ return {
4587
+ relativePath,
4588
+ status: "indexed",
3977
4589
  lastModified,
3978
4590
  chunkCount: fileIndex.chunks.length
3979
4591
  };
3980
- result.indexed++;
3981
4592
  } catch (error) {
3982
- logger.clearProgress();
3983
- logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
4593
+ completedCount++;
4594
+ return { relativePath, status: "error", error };
4595
+ }
4596
+ };
4597
+ logger.debug(` Using concurrency: ${concurrency}`);
4598
+ const results = await parallelMap(files, processFile, concurrency);
4599
+ logger.clearProgress();
4600
+ for (const item of results) {
4601
+ if (!item.success) {
3984
4602
  result.errors++;
4603
+ continue;
4604
+ }
4605
+ const fileResult = item.value;
4606
+ switch (fileResult.status) {
4607
+ case "indexed":
4608
+ manifest.files[fileResult.relativePath] = {
4609
+ lastModified: fileResult.lastModified,
4610
+ chunkCount: fileResult.chunkCount
4611
+ };
4612
+ result.indexed++;
4613
+ break;
4614
+ case "skipped":
4615
+ result.skipped++;
4616
+ break;
4617
+ case "error":
4618
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
4619
+ result.errors++;
4620
+ break;
3985
4621
  }
3986
4622
  }
3987
- logger.clearProgress();
3988
4623
  manifest.lastUpdated = new Date().toISOString();
3989
4624
  await writeModuleManifest(rootDir, module.id, manifest, config);
3990
4625
  return result;
@@ -4019,13 +4654,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
4019
4654
  }
4020
4655
  async function writeModuleManifest(rootDir, moduleId, manifest, config) {
4021
4656
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
4022
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4657
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
4023
4658
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
4024
4659
  }
4025
4660
  async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
4026
4661
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
4027
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4028
- await fs6.mkdir(path12.dirname(indexFilePath), { recursive: true });
4662
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4663
+ await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
4029
4664
  await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
4030
4665
  }
4031
4666
  async function updateGlobalManifest(rootDir, modules, config) {
@@ -4035,13 +4670,13 @@ async function updateGlobalManifest(rootDir, modules, config) {
4035
4670
  lastUpdated: new Date().toISOString(),
4036
4671
  modules: modules.map((m) => m.id)
4037
4672
  };
4038
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4673
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
4039
4674
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
4040
4675
  }
4041
4676
  async function cleanupIndex(rootDir, options = {}) {
4042
4677
  const verbose = options.verbose ?? false;
4043
4678
  const logger = options.logger ?? createLogger({ verbose });
4044
- rootDir = path12.resolve(rootDir);
4679
+ rootDir = path15.resolve(rootDir);
4045
4680
  logger.info(`Cleaning up index in: ${rootDir}`);
4046
4681
  const config = await loadConfig(rootDir);
4047
4682
  await registerBuiltInModules();
@@ -4071,7 +4706,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
4071
4706
  const filesToRemove = [];
4072
4707
  const updatedFiles = {};
4073
4708
  for (const [filepath, entry] of Object.entries(manifest.files)) {
4074
- const fullPath = path12.join(rootDir, filepath);
4709
+ const fullPath = path15.join(rootDir, filepath);
4075
4710
  try {
4076
4711
  await fs6.access(fullPath);
4077
4712
  updatedFiles[filepath] = entry;
@@ -4083,7 +4718,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
4083
4718
  }
4084
4719
  }
4085
4720
  for (const filepath of filesToRemove) {
4086
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4721
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4087
4722
  try {
4088
4723
  await fs6.unlink(indexFilePath);
4089
4724
  } catch {}
@@ -4099,7 +4734,7 @@ async function cleanupEmptyDirectories(dir) {
4099
4734
  const entries = await fs6.readdir(dir, { withFileTypes: true });
4100
4735
  for (const entry of entries) {
4101
4736
  if (entry.isDirectory()) {
4102
- const subDir = path12.join(dir, entry.name);
4737
+ const subDir = path15.join(dir, entry.name);
4103
4738
  await cleanupEmptyDirectories(subDir);
4104
4739
  }
4105
4740
  }
@@ -4114,7 +4749,7 @@ async function cleanupEmptyDirectories(dir) {
4114
4749
  }
4115
4750
  }
4116
4751
  async function getIndexStatus(rootDir) {
4117
- rootDir = path12.resolve(rootDir);
4752
+ rootDir = path15.resolve(rootDir);
4118
4753
  const config = await loadConfig(rootDir);
4119
4754
  const location = getIndexLocation(rootDir);
4120
4755
  const indexDir = location.indexDir;
@@ -4150,7 +4785,7 @@ async function getIndexStatus(rootDir) {
4150
4785
  }
4151
4786
  } catch {
4152
4787
  try {
4153
- const entries = await fs6.readdir(path12.join(indexDir, "index"));
4788
+ const entries = await fs6.readdir(path15.join(indexDir, "index"));
4154
4789
  if (entries.length > 0) {
4155
4790
  status.exists = true;
4156
4791
  for (const entry of entries) {
@@ -4170,7 +4805,7 @@ async function getIndexStatus(rootDir) {
4170
4805
  }
4171
4806
  return status;
4172
4807
  }
4173
- var INDEX_SCHEMA_VERSION = "1.0.0";
4808
+ var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
4174
4809
  var init_indexer = __esm(() => {
4175
4810
  init_config2();
4176
4811
  init_registry();
@@ -4191,9 +4826,9 @@ __export(exports_search, {
4191
4826
  formatSearchResults: () => formatSearchResults
4192
4827
  });
4193
4828
  import * as fs7 from "fs/promises";
4194
- import * as path13 from "path";
4829
+ import * as path16 from "path";
4195
4830
  async function search(rootDir, query, options = {}) {
4196
- rootDir = path13.resolve(rootDir);
4831
+ rootDir = path16.resolve(rootDir);
4197
4832
  const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
4198
4833
  if (ensureFresh) {
4199
4834
  await ensureIndexFresh(rootDir, { quiet: true });
@@ -4238,7 +4873,7 @@ function createSearchContext(rootDir, moduleId, config) {
4238
4873
  config,
4239
4874
  loadFileIndex: async (filepath) => {
4240
4875
  const hasExtension = /\.[^./]+$/.test(filepath);
4241
- const indexFilePath = hasExtension ? path13.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path13.join(indexPath, filepath + ".json");
4876
+ const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
4242
4877
  try {
4243
4878
  const content = await fs7.readFile(indexFilePath, "utf-8");
4244
4879
  return JSON.parse(content);
@@ -4250,7 +4885,7 @@ function createSearchContext(rootDir, moduleId, config) {
4250
4885
  const files = [];
4251
4886
  await traverseDirectory(indexPath, files, indexPath);
4252
4887
  return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
4253
- const relative4 = path13.relative(indexPath, f);
4888
+ const relative4 = path16.relative(indexPath, f);
4254
4889
  return relative4.replace(/\.json$/, "");
4255
4890
  });
4256
4891
  }
@@ -4260,7 +4895,7 @@ async function traverseDirectory(dir, files, basePath) {
4260
4895
  try {
4261
4896
  const entries = await fs7.readdir(dir, { withFileTypes: true });
4262
4897
  for (const entry of entries) {
4263
- const fullPath = path13.join(dir, entry.name);
4898
+ const fullPath = path16.join(dir, entry.name);
4264
4899
  if (entry.isDirectory()) {
4265
4900
  await traverseDirectory(fullPath, files, basePath);
4266
4901
  } else if (entry.isFile()) {
@@ -4338,7 +4973,7 @@ init_logger();
4338
4973
  // package.json
4339
4974
  var package_default = {
4340
4975
  name: "raggrep",
4341
- version: "0.4.0",
4976
+ version: "0.5.0",
4342
4977
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
4343
4978
  type: "module",
4344
4979
  main: "./dist/index.js",
@@ -4474,6 +5109,14 @@ function parseFlags(args2) {
4474
5109
  console.error("--type requires a file extension (e.g., ts, tsx, js)");
4475
5110
  process.exit(1);
4476
5111
  }
5112
+ } else if (arg === "--concurrency" || arg === "-c") {
5113
+ const c = parseInt(args2[++i], 10);
5114
+ if (!isNaN(c) && c > 0) {
5115
+ flags.concurrency = c;
5116
+ } else {
5117
+ console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
5118
+ process.exit(1);
5119
+ }
4477
5120
  } else if (!arg.startsWith("-")) {
4478
5121
  flags.remaining.push(arg);
4479
5122
  }
@@ -4493,10 +5136,11 @@ Usage:
4493
5136
  raggrep index [options]
4494
5137
 
4495
5138
  Options:
4496
- -w, --watch Watch for file changes and re-index automatically
4497
- -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
4498
- -v, --verbose Show detailed progress
4499
- -h, --help Show this help message
5139
+ -w, --watch Watch for file changes and re-index automatically
5140
+ -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
5141
+ -c, --concurrency <n> Number of files to process in parallel (default: 4)
5142
+ -v, --verbose Show detailed progress
5143
+ -h, --help Show this help message
4500
5144
 
4501
5145
  Available Models:
4502
5146
  ${models}
@@ -4507,6 +5151,7 @@ Examples:
4507
5151
  raggrep index
4508
5152
  raggrep index --watch
4509
5153
  raggrep index --model bge-small-en-v1.5
5154
+ raggrep index --concurrency 8
4510
5155
  raggrep index --verbose
4511
5156
  `);
4512
5157
  process.exit(0);
@@ -4520,6 +5165,7 @@ Examples:
4520
5165
  const results = await indexDirectory2(process.cwd(), {
4521
5166
  model: flags.model,
4522
5167
  verbose: flags.verbose,
5168
+ concurrency: flags.concurrency,
4523
5169
  logger
4524
5170
  });
4525
5171
  console.log(`
@@ -4773,4 +5419,4 @@ Run 'raggrep <command> --help' for more information.
4773
5419
  }
4774
5420
  main();
4775
5421
 
4776
- //# debugId=B729BEE1B814E8D564756E2164756E21
5422
+ //# debugId=5CA623D9974ACF4364756E2164756E21