raggrep 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -325,6 +325,7 @@ var init_searchResult = __esm(() => {
325
325
  topK: 10,
326
326
  minScore: 0.15,
327
327
  filePatterns: [],
328
+ pathFilter: [],
328
329
  ensureFresh: true
329
330
  };
330
331
  });
@@ -348,6 +349,20 @@ function createDefaultConfig() {
348
349
  options: {
349
350
  embeddingModel: "all-MiniLM-L6-v2"
350
351
  }
352
+ },
353
+ {
354
+ id: "data/json",
355
+ enabled: true,
356
+ options: {
357
+ embeddingModel: "all-MiniLM-L6-v2"
358
+ }
359
+ },
360
+ {
361
+ id: "docs/markdown",
362
+ enabled: true,
363
+ options: {
364
+ embeddingModel: "all-MiniLM-L6-v2"
365
+ }
351
366
  }
352
367
  ]
353
368
  };
@@ -391,16 +406,18 @@ var init_config = __esm(() => {
391
406
  ".jsx",
392
407
  ".mjs",
393
408
  ".cjs",
409
+ ".mts",
410
+ ".cts",
411
+ ".json",
412
+ ".md",
394
413
  ".py",
395
414
  ".go",
396
415
  ".rs",
397
416
  ".java",
398
- ".json",
399
417
  ".yaml",
400
418
  ".yml",
401
419
  ".toml",
402
420
  ".sql",
403
- ".md",
404
421
  ".txt"
405
422
  ];
406
423
  });
@@ -2071,6 +2088,9 @@ class CoreModule {
2071
2088
  name = "Core Search";
2072
2089
  description = "Language-agnostic text search with symbol extraction";
2073
2090
  version = "1.0.0";
2091
+ supportsFile(_filepath) {
2092
+ return true;
2093
+ }
2074
2094
  symbolIndex = new Map;
2075
2095
  bm25Index = null;
2076
2096
  rootDir = "";
@@ -2292,221 +2312,6 @@ var init_core = __esm(() => {
2292
2312
  init_symbols();
2293
2313
  });
2294
2314
 
2295
- // src/domain/services/similarity.ts
2296
- function cosineSimilarity(a, b) {
2297
- if (a.length !== b.length) {
2298
- throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2299
- }
2300
- let dotProduct = 0;
2301
- let normA = 0;
2302
- let normB = 0;
2303
- for (let i = 0;i < a.length; i++) {
2304
- dotProduct += a[i] * b[i];
2305
- normA += a[i] * a[i];
2306
- normB += b[i] * b[i];
2307
- }
2308
- const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2309
- if (magnitude === 0)
2310
- return 0;
2311
- return dotProduct / magnitude;
2312
- }
2313
-
2314
- // src/modules/language/typescript/parseCode.ts
2315
- import * as ts from "typescript";
2316
- function parseCode(content, filepath) {
2317
- const ext = filepath.split(".").pop()?.toLowerCase();
2318
- if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
2319
- return parseTypeScript(content, filepath);
2320
- }
2321
- return parseGenericCode(content);
2322
- }
2323
- function parseTypeScript(content, filepath) {
2324
- const chunks = [];
2325
- const lines = content.split(`
2326
- `);
2327
- const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2328
- function getLineNumbers(node) {
2329
- const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2330
- const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2331
- return {
2332
- startLine: start.line + 1,
2333
- endLine: end.line + 1
2334
- };
2335
- }
2336
- function getNodeText(node) {
2337
- return node.getText(sourceFile);
2338
- }
2339
- function isExported(node) {
2340
- if (!ts.canHaveModifiers(node))
2341
- return false;
2342
- const modifiers = ts.getModifiers(node);
2343
- return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2344
- }
2345
- function getJSDoc(node) {
2346
- const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2347
- if (jsDocNodes.length === 0)
2348
- return;
2349
- return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2350
- `);
2351
- }
2352
- function getFunctionName(node) {
2353
- if (ts.isFunctionDeclaration(node) && node.name) {
2354
- return node.name.text;
2355
- }
2356
- if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2357
- return node.name.text;
2358
- }
2359
- if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2360
- return node.name.text;
2361
- }
2362
- return;
2363
- }
2364
- function visit(node) {
2365
- const { startLine, endLine } = getLineNumbers(node);
2366
- if (ts.isFunctionDeclaration(node) && node.name) {
2367
- chunks.push({
2368
- content: getNodeText(node),
2369
- startLine,
2370
- endLine,
2371
- type: "function",
2372
- name: node.name.text,
2373
- isExported: isExported(node),
2374
- jsDoc: getJSDoc(node)
2375
- });
2376
- return;
2377
- }
2378
- if (ts.isVariableStatement(node)) {
2379
- for (const decl of node.declarationList.declarations) {
2380
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2381
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2382
- chunks.push({
2383
- content: getNodeText(node),
2384
- startLine,
2385
- endLine,
2386
- type: "function",
2387
- name,
2388
- isExported: isExported(node),
2389
- jsDoc: getJSDoc(node)
2390
- });
2391
- return;
2392
- }
2393
- }
2394
- }
2395
- if (ts.isClassDeclaration(node) && node.name) {
2396
- chunks.push({
2397
- content: getNodeText(node),
2398
- startLine,
2399
- endLine,
2400
- type: "class",
2401
- name: node.name.text,
2402
- isExported: isExported(node),
2403
- jsDoc: getJSDoc(node)
2404
- });
2405
- return;
2406
- }
2407
- if (ts.isInterfaceDeclaration(node)) {
2408
- chunks.push({
2409
- content: getNodeText(node),
2410
- startLine,
2411
- endLine,
2412
- type: "interface",
2413
- name: node.name.text,
2414
- isExported: isExported(node),
2415
- jsDoc: getJSDoc(node)
2416
- });
2417
- return;
2418
- }
2419
- if (ts.isTypeAliasDeclaration(node)) {
2420
- chunks.push({
2421
- content: getNodeText(node),
2422
- startLine,
2423
- endLine,
2424
- type: "type",
2425
- name: node.name.text,
2426
- isExported: isExported(node),
2427
- jsDoc: getJSDoc(node)
2428
- });
2429
- return;
2430
- }
2431
- if (ts.isEnumDeclaration(node)) {
2432
- chunks.push({
2433
- content: getNodeText(node),
2434
- startLine,
2435
- endLine,
2436
- type: "enum",
2437
- name: node.name.text,
2438
- isExported: isExported(node),
2439
- jsDoc: getJSDoc(node)
2440
- });
2441
- return;
2442
- }
2443
- if (ts.isVariableStatement(node) && isExported(node)) {
2444
- for (const decl of node.declarationList.declarations) {
2445
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2446
- continue;
2447
- }
2448
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2449
- chunks.push({
2450
- content: getNodeText(node),
2451
- startLine,
2452
- endLine,
2453
- type: "variable",
2454
- name,
2455
- isExported: true,
2456
- jsDoc: getJSDoc(node)
2457
- });
2458
- }
2459
- return;
2460
- }
2461
- ts.forEachChild(node, visit);
2462
- }
2463
- ts.forEachChild(sourceFile, visit);
2464
- if (chunks.length === 0) {
2465
- return parseGenericCode(content);
2466
- }
2467
- return chunks;
2468
- }
2469
- function parseGenericCode(content) {
2470
- const chunks = [];
2471
- const lines = content.split(`
2472
- `);
2473
- const CHUNK_SIZE = 30;
2474
- const OVERLAP = 5;
2475
- if (lines.length <= CHUNK_SIZE) {
2476
- return [
2477
- {
2478
- content,
2479
- startLine: 1,
2480
- endLine: lines.length,
2481
- type: "file"
2482
- }
2483
- ];
2484
- }
2485
- for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
2486
- const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
2487
- chunks.push({
2488
- content: lines.slice(i, endIdx).join(`
2489
- `),
2490
- startLine: i + 1,
2491
- endLine: endIdx,
2492
- type: "block"
2493
- });
2494
- if (endIdx >= lines.length)
2495
- break;
2496
- }
2497
- return chunks;
2498
- }
2499
- function generateChunkId(filepath, startLine, endLine) {
2500
- const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2501
- return `${safePath}-${startLine}-${endLine}`;
2502
- }
2503
- var init_parseCode = () => {};
2504
-
2505
- // src/infrastructure/storage/fileIndexStorage.ts
2506
- var init_fileIndexStorage = __esm(() => {
2507
- init_entities();
2508
- });
2509
-
2510
2315
  // src/domain/services/keywords.ts
2511
2316
  function extractKeywords(content, name, maxKeywords = 50) {
2512
2317
  const keywords = new Set;
@@ -2695,19 +2500,347 @@ var init_keywords = __esm(() => {
2695
2500
  };
2696
2501
  });
2697
2502
 
2698
- // src/infrastructure/storage/symbolicIndex.ts
2699
- import * as fs3 from "fs/promises";
2700
- import * as path7 from "path";
2503
+ // src/domain/services/similarity.ts
2504
+ function cosineSimilarity(a, b) {
2505
+ if (a.length !== b.length) {
2506
+ throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2507
+ }
2508
+ let dotProduct = 0;
2509
+ let normA = 0;
2510
+ let normB = 0;
2511
+ for (let i = 0;i < a.length; i++) {
2512
+ dotProduct += a[i] * b[i];
2513
+ normA += a[i] * a[i];
2514
+ normB += b[i] * b[i];
2515
+ }
2516
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2517
+ if (magnitude === 0)
2518
+ return 0;
2519
+ return dotProduct / magnitude;
2520
+ }
2701
2521
 
2702
- class SymbolicIndex {
2703
- meta = null;
2704
- fileSummaries = new Map;
2705
- bm25Index = null;
2706
- symbolicPath;
2707
- moduleId;
2708
- constructor(indexDir, moduleId) {
2709
- this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
2710
- this.moduleId = moduleId;
2522
+ // src/domain/services/queryIntent.ts
2523
+ import * as path7 from "path";
2524
+ function detectQueryIntent(queryTerms) {
2525
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2526
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2527
+ if (hasDocumentationTerm) {
2528
+ return "documentation";
2529
+ }
2530
+ if (hasImplementationTerm) {
2531
+ return "implementation";
2532
+ }
2533
+ return "neutral";
2534
+ }
2535
+ function extractQueryTerms(query) {
2536
+ return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2537
+ }
2538
+ function isSourceCodeFile(filepath) {
2539
+ const ext = path7.extname(filepath).toLowerCase();
2540
+ return SOURCE_CODE_EXTENSIONS.includes(ext);
2541
+ }
2542
+ function isDocFile(filepath) {
2543
+ const ext = path7.extname(filepath).toLowerCase();
2544
+ return DOC_EXTENSIONS.includes(ext);
2545
+ }
2546
+ function calculateFileTypeBoost(filepath, queryTerms) {
2547
+ const isSourceCode = isSourceCodeFile(filepath);
2548
+ const isDoc = isDocFile(filepath);
2549
+ const intent = detectQueryIntent(queryTerms);
2550
+ if (intent === "implementation") {
2551
+ if (isSourceCode) {
2552
+ return 0.06;
2553
+ }
2554
+ return 0;
2555
+ }
2556
+ if (intent === "documentation") {
2557
+ if (isDoc) {
2558
+ return 0.08;
2559
+ }
2560
+ return 0;
2561
+ }
2562
+ return 0;
2563
+ }
2564
+ var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2565
+ var init_queryIntent = __esm(() => {
2566
+ IMPLEMENTATION_TERMS = [
2567
+ "function",
2568
+ "method",
2569
+ "class",
2570
+ "interface",
2571
+ "implement",
2572
+ "implementation",
2573
+ "endpoint",
2574
+ "route",
2575
+ "handler",
2576
+ "controller",
2577
+ "module",
2578
+ "code"
2579
+ ];
2580
+ DOCUMENTATION_TERMS = [
2581
+ "documentation",
2582
+ "docs",
2583
+ "guide",
2584
+ "tutorial",
2585
+ "readme",
2586
+ "how",
2587
+ "what",
2588
+ "why",
2589
+ "explain",
2590
+ "overview",
2591
+ "getting",
2592
+ "started",
2593
+ "requirements",
2594
+ "setup",
2595
+ "install",
2596
+ "configure",
2597
+ "configuration"
2598
+ ];
2599
+ SOURCE_CODE_EXTENSIONS = [
2600
+ ".ts",
2601
+ ".tsx",
2602
+ ".js",
2603
+ ".jsx",
2604
+ ".mjs",
2605
+ ".cjs",
2606
+ ".py",
2607
+ ".go",
2608
+ ".rs",
2609
+ ".java"
2610
+ ];
2611
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2612
+ });
2613
+
2614
+ // src/domain/services/chunking.ts
2615
+ function createLineBasedChunks(content, options = {}) {
2616
+ const {
2617
+ chunkSize = DEFAULT_CHUNK_SIZE,
2618
+ overlap = DEFAULT_OVERLAP,
2619
+ minLinesForMultipleChunks = chunkSize
2620
+ } = options;
2621
+ const lines = content.split(`
2622
+ `);
2623
+ const chunks = [];
2624
+ if (lines.length <= minLinesForMultipleChunks) {
2625
+ return [
2626
+ {
2627
+ content,
2628
+ startLine: 1,
2629
+ endLine: lines.length,
2630
+ type: "file"
2631
+ }
2632
+ ];
2633
+ }
2634
+ for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2635
+ const endIdx = Math.min(i + chunkSize, lines.length);
2636
+ chunks.push({
2637
+ content: lines.slice(i, endIdx).join(`
2638
+ `),
2639
+ startLine: i + 1,
2640
+ endLine: endIdx,
2641
+ type: "block"
2642
+ });
2643
+ if (endIdx >= lines.length)
2644
+ break;
2645
+ }
2646
+ return chunks;
2647
+ }
2648
+ function generateChunkId(filepath, startLine, endLine) {
2649
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2650
+ return `${safePath}-${startLine}-${endLine}`;
2651
+ }
2652
+ var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2653
+
2654
+ // src/domain/services/index.ts
2655
+ var init_services = __esm(() => {
2656
+ init_keywords();
2657
+ init_queryIntent();
2658
+ });
2659
+
2660
+ // src/modules/language/typescript/parseCode.ts
2661
+ import * as ts from "typescript";
2662
+ function parseTypeScriptCode(content, filepath) {
2663
+ return parseTypeScript(content, filepath);
2664
+ }
2665
+ function parseTypeScript(content, filepath) {
2666
+ const chunks = [];
2667
+ const lines = content.split(`
2668
+ `);
2669
+ const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2670
+ function getLineNumbers(node) {
2671
+ const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2672
+ const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2673
+ return {
2674
+ startLine: start.line + 1,
2675
+ endLine: end.line + 1
2676
+ };
2677
+ }
2678
+ function getNodeText(node) {
2679
+ return node.getText(sourceFile);
2680
+ }
2681
+ function isExported(node) {
2682
+ if (!ts.canHaveModifiers(node))
2683
+ return false;
2684
+ const modifiers = ts.getModifiers(node);
2685
+ return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2686
+ }
2687
+ function getJSDoc(node) {
2688
+ const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2689
+ if (jsDocNodes.length === 0)
2690
+ return;
2691
+ return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2692
+ `);
2693
+ }
2694
+ function getFunctionName(node) {
2695
+ if (ts.isFunctionDeclaration(node) && node.name) {
2696
+ return node.name.text;
2697
+ }
2698
+ if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2699
+ return node.name.text;
2700
+ }
2701
+ if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2702
+ return node.name.text;
2703
+ }
2704
+ return;
2705
+ }
2706
+ function visit(node) {
2707
+ const { startLine, endLine } = getLineNumbers(node);
2708
+ if (ts.isFunctionDeclaration(node) && node.name) {
2709
+ chunks.push({
2710
+ content: getNodeText(node),
2711
+ startLine,
2712
+ endLine,
2713
+ type: "function",
2714
+ name: node.name.text,
2715
+ isExported: isExported(node),
2716
+ jsDoc: getJSDoc(node)
2717
+ });
2718
+ return;
2719
+ }
2720
+ if (ts.isVariableStatement(node)) {
2721
+ for (const decl of node.declarationList.declarations) {
2722
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2723
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2724
+ chunks.push({
2725
+ content: getNodeText(node),
2726
+ startLine,
2727
+ endLine,
2728
+ type: "function",
2729
+ name,
2730
+ isExported: isExported(node),
2731
+ jsDoc: getJSDoc(node)
2732
+ });
2733
+ return;
2734
+ }
2735
+ }
2736
+ }
2737
+ if (ts.isClassDeclaration(node) && node.name) {
2738
+ chunks.push({
2739
+ content: getNodeText(node),
2740
+ startLine,
2741
+ endLine,
2742
+ type: "class",
2743
+ name: node.name.text,
2744
+ isExported: isExported(node),
2745
+ jsDoc: getJSDoc(node)
2746
+ });
2747
+ return;
2748
+ }
2749
+ if (ts.isInterfaceDeclaration(node)) {
2750
+ chunks.push({
2751
+ content: getNodeText(node),
2752
+ startLine,
2753
+ endLine,
2754
+ type: "interface",
2755
+ name: node.name.text,
2756
+ isExported: isExported(node),
2757
+ jsDoc: getJSDoc(node)
2758
+ });
2759
+ return;
2760
+ }
2761
+ if (ts.isTypeAliasDeclaration(node)) {
2762
+ chunks.push({
2763
+ content: getNodeText(node),
2764
+ startLine,
2765
+ endLine,
2766
+ type: "type",
2767
+ name: node.name.text,
2768
+ isExported: isExported(node),
2769
+ jsDoc: getJSDoc(node)
2770
+ });
2771
+ return;
2772
+ }
2773
+ if (ts.isEnumDeclaration(node)) {
2774
+ chunks.push({
2775
+ content: getNodeText(node),
2776
+ startLine,
2777
+ endLine,
2778
+ type: "enum",
2779
+ name: node.name.text,
2780
+ isExported: isExported(node),
2781
+ jsDoc: getJSDoc(node)
2782
+ });
2783
+ return;
2784
+ }
2785
+ if (ts.isVariableStatement(node) && isExported(node)) {
2786
+ for (const decl of node.declarationList.declarations) {
2787
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2788
+ continue;
2789
+ }
2790
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2791
+ chunks.push({
2792
+ content: getNodeText(node),
2793
+ startLine,
2794
+ endLine,
2795
+ type: "variable",
2796
+ name,
2797
+ isExported: true,
2798
+ jsDoc: getJSDoc(node)
2799
+ });
2800
+ }
2801
+ return;
2802
+ }
2803
+ ts.forEachChild(node, visit);
2804
+ }
2805
+ ts.forEachChild(sourceFile, visit);
2806
+ if (chunks.length === 0) {
2807
+ const lines2 = content.split(`
2808
+ `);
2809
+ return [
2810
+ {
2811
+ content,
2812
+ startLine: 1,
2813
+ endLine: lines2.length,
2814
+ type: "file"
2815
+ }
2816
+ ];
2817
+ }
2818
+ return chunks;
2819
+ }
2820
+ function generateChunkId2(filepath, startLine, endLine) {
2821
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2822
+ return `${safePath}-${startLine}-${endLine}`;
2823
+ }
2824
+ var init_parseCode = () => {};
2825
+
2826
+ // src/infrastructure/storage/fileIndexStorage.ts
2827
+ var init_fileIndexStorage = __esm(() => {
2828
+ init_entities();
2829
+ });
2830
+
2831
+ // src/infrastructure/storage/symbolicIndex.ts
2832
+ import * as fs3 from "fs/promises";
2833
+ import * as path8 from "path";
2834
+
2835
+ class SymbolicIndex {
2836
+ meta = null;
2837
+ fileSummaries = new Map;
2838
+ bm25Index = null;
2839
+ symbolicPath;
2840
+ moduleId;
2841
+ constructor(indexDir, moduleId) {
2842
+ this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
2843
+ this.moduleId = moduleId;
2711
2844
  }
2712
2845
  async initialize() {
2713
2846
  try {
@@ -2724,194 +2857,763 @@ class SymbolicIndex {
2724
2857
  totalDocs: 0
2725
2858
  }
2726
2859
  };
2727
- this.bm25Index = new BM25Index;
2860
+ this.bm25Index = new BM25Index;
2861
+ }
2862
+ }
2863
+ addFile(summary) {
2864
+ this.fileSummaries.set(summary.filepath, summary);
2865
+ }
2866
+ removeFile(filepath) {
2867
+ return this.fileSummaries.delete(filepath);
2868
+ }
2869
+ buildBM25Index() {
2870
+ this.bm25Index = new BM25Index;
2871
+ for (const [filepath, summary] of this.fileSummaries) {
2872
+ const content = [
2873
+ ...summary.keywords,
2874
+ ...summary.exports,
2875
+ ...extractPathKeywords(filepath)
2876
+ ].join(" ");
2877
+ this.bm25Index.addDocuments([{ id: filepath, content }]);
2878
+ }
2879
+ if (this.meta) {
2880
+ this.meta.fileCount = this.fileSummaries.size;
2881
+ this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2882
+ }
2883
+ }
2884
+ findCandidates(query, maxCandidates = 20) {
2885
+ if (!this.bm25Index) {
2886
+ return Array.from(this.fileSummaries.keys());
2887
+ }
2888
+ const results = this.bm25Index.search(query, maxCandidates);
2889
+ return results.map((r) => r.id);
2890
+ }
2891
+ getAllFiles() {
2892
+ return Array.from(this.fileSummaries.keys());
2893
+ }
2894
+ getFileSummary(filepath) {
2895
+ return this.fileSummaries.get(filepath);
2896
+ }
2897
+ async save() {
2898
+ if (!this.meta)
2899
+ throw new Error("Index not initialized");
2900
+ this.meta.lastUpdated = new Date().toISOString();
2901
+ this.meta.fileCount = this.fileSummaries.size;
2902
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
2903
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2904
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2905
+ for (const [filepath, summary] of this.fileSummaries) {
2906
+ const summaryPath = this.getFileSummaryPath(filepath);
2907
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
2908
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
2909
+ }
2910
+ }
2911
+ async load() {
2912
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2913
+ const metaContent = await fs3.readFile(metaPath, "utf-8");
2914
+ this.meta = JSON.parse(metaContent);
2915
+ this.fileSummaries.clear();
2916
+ await this.loadFileSummariesRecursive(this.symbolicPath);
2917
+ this.buildBM25Index();
2918
+ }
2919
+ async loadFileSummariesRecursive(dir) {
2920
+ try {
2921
+ const entries = await fs3.readdir(dir, { withFileTypes: true });
2922
+ for (const entry of entries) {
2923
+ const fullPath = path8.join(dir, entry.name);
2924
+ if (entry.isDirectory()) {
2925
+ await this.loadFileSummariesRecursive(fullPath);
2926
+ } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2927
+ try {
2928
+ const content = await fs3.readFile(fullPath, "utf-8");
2929
+ const summary = JSON.parse(content);
2930
+ if (summary.filepath) {
2931
+ this.fileSummaries.set(summary.filepath, summary);
2932
+ }
2933
+ } catch {}
2934
+ }
2935
+ }
2936
+ } catch {}
2937
+ }
2938
+ getFileSummaryPath(filepath) {
2939
+ const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2940
+ return path8.join(this.symbolicPath, jsonPath);
2941
+ }
2942
+ async deleteFileSummary(filepath) {
2943
+ try {
2944
+ await fs3.unlink(this.getFileSummaryPath(filepath));
2945
+ } catch {}
2946
+ this.fileSummaries.delete(filepath);
2947
+ }
2948
+ async exists() {
2949
+ try {
2950
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2951
+ await fs3.access(metaPath);
2952
+ return true;
2953
+ } catch {
2954
+ return false;
2955
+ }
2956
+ }
2957
+ get size() {
2958
+ return this.fileSummaries.size;
2959
+ }
2960
+ clear() {
2961
+ this.fileSummaries.clear();
2962
+ if (this.meta) {
2963
+ this.meta.fileCount = 0;
2964
+ this.meta.bm25Data = {
2965
+ avgDocLength: 0,
2966
+ documentFrequencies: {},
2967
+ totalDocs: 0
2968
+ };
2728
2969
  }
2970
+ this.bm25Index = new BM25Index;
2729
2971
  }
2730
- addFile(summary) {
2731
- this.fileSummaries.set(summary.filepath, summary);
2972
+ }
2973
+ var init_symbolicIndex = __esm(() => {
2974
+ init_keywords();
2975
+ });
2976
+
2977
+ // src/infrastructure/storage/index.ts
2978
+ var init_storage = __esm(() => {
2979
+ init_fileIndexStorage();
2980
+ init_symbolicIndex();
2981
+ });
2982
+
2983
+ // src/modules/language/typescript/index.ts
2984
+ var exports_typescript = {};
2985
+ __export(exports_typescript, {
2986
+ supportsFile: () => supportsFile,
2987
+ isTypeScriptFile: () => isTypeScriptFile,
2988
+ TypeScriptModule: () => TypeScriptModule,
2989
+ TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
2990
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2991
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2992
+ });
2993
+ import * as path9 from "path";
2994
+ function isTypeScriptFile(filepath) {
2995
+ const ext = path9.extname(filepath).toLowerCase();
2996
+ return TYPESCRIPT_EXTENSIONS.includes(ext);
2997
+ }
2998
+ function calculateChunkTypeBoost(chunk) {
2999
+ switch (chunk.type) {
3000
+ case "function":
3001
+ return 0.05;
3002
+ case "class":
3003
+ case "interface":
3004
+ return 0.04;
3005
+ case "type":
3006
+ case "enum":
3007
+ return 0.03;
3008
+ case "variable":
3009
+ return 0.02;
3010
+ case "file":
3011
+ case "block":
3012
+ default:
3013
+ return 0;
2732
3014
  }
2733
- removeFile(filepath) {
2734
- return this.fileSummaries.delete(filepath);
3015
+ }
3016
+ function calculateExportBoost(chunk) {
3017
+ return chunk.isExported ? 0.03 : 0;
3018
+ }
3019
+
3020
+ class TypeScriptModule {
3021
+ id = "language/typescript";
3022
+ name = "TypeScript Search";
3023
+ description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3024
+ version = "1.0.0";
3025
+ supportsFile(filepath) {
3026
+ return isTypeScriptFile(filepath);
3027
+ }
3028
+ embeddingConfig = null;
3029
+ symbolicIndex = null;
3030
+ pendingSummaries = new Map;
3031
+ rootDir = "";
3032
+ logger = undefined;
3033
+ async initialize(config) {
3034
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3035
+ this.logger = config.options?.logger;
3036
+ if (this.logger) {
3037
+ this.embeddingConfig = {
3038
+ ...this.embeddingConfig,
3039
+ logger: this.logger
3040
+ };
3041
+ }
3042
+ configureEmbeddings(this.embeddingConfig);
3043
+ this.pendingSummaries.clear();
3044
+ }
3045
+ async indexFile(filepath, content, ctx) {
3046
+ if (!isTypeScriptFile(filepath)) {
3047
+ return null;
3048
+ }
3049
+ this.rootDir = ctx.rootDir;
3050
+ const parsedChunks = parseTypeScriptCode(content, filepath);
3051
+ if (parsedChunks.length === 0) {
3052
+ return null;
3053
+ }
3054
+ const pathContext = parsePathContext(filepath);
3055
+ const pathPrefix = formatPathContextForEmbedding(pathContext);
3056
+ const chunkContents = parsedChunks.map((c) => {
3057
+ const namePrefix = c.name ? `${c.name}: ` : "";
3058
+ return `${pathPrefix} ${namePrefix}${c.content}`;
3059
+ });
3060
+ const embeddings = await getEmbeddings(chunkContents);
3061
+ const chunks = parsedChunks.map((pc) => ({
3062
+ id: generateChunkId2(filepath, pc.startLine, pc.endLine),
3063
+ content: pc.content,
3064
+ startLine: pc.startLine,
3065
+ endLine: pc.endLine,
3066
+ type: pc.type,
3067
+ name: pc.name,
3068
+ isExported: pc.isExported,
3069
+ jsDoc: pc.jsDoc
3070
+ }));
3071
+ const references = this.extractReferences(content, filepath);
3072
+ const stats = await ctx.getFileStats(filepath);
3073
+ const currentConfig = getEmbeddingConfig();
3074
+ const moduleData = {
3075
+ embeddings,
3076
+ embeddingModel: currentConfig.model
3077
+ };
3078
+ const chunkTypes = [
3079
+ ...new Set(parsedChunks.map((pc) => pc.type))
3080
+ ];
3081
+ const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
3082
+ const allKeywords = new Set;
3083
+ for (const pc of parsedChunks) {
3084
+ const keywords = extractKeywords(pc.content, pc.name);
3085
+ keywords.forEach((k) => allKeywords.add(k));
3086
+ }
3087
+ pathContext.keywords.forEach((k) => allKeywords.add(k));
3088
+ const fileSummary = {
3089
+ filepath,
3090
+ chunkCount: chunks.length,
3091
+ chunkTypes,
3092
+ keywords: Array.from(allKeywords),
3093
+ exports,
3094
+ lastModified: stats.lastModified,
3095
+ pathContext: {
3096
+ segments: pathContext.segments,
3097
+ layer: pathContext.layer,
3098
+ domain: pathContext.domain,
3099
+ depth: pathContext.depth
3100
+ }
3101
+ };
3102
+ this.pendingSummaries.set(filepath, fileSummary);
3103
+ return {
3104
+ filepath,
3105
+ lastModified: stats.lastModified,
3106
+ chunks,
3107
+ moduleData,
3108
+ references
3109
+ };
3110
+ }
3111
+ async finalize(ctx) {
3112
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3113
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3114
+ await this.symbolicIndex.initialize();
3115
+ for (const [filepath, summary] of this.pendingSummaries) {
3116
+ this.symbolicIndex.addFile(summary);
3117
+ }
3118
+ this.symbolicIndex.buildBM25Index();
3119
+ await this.symbolicIndex.save();
3120
+ this.pendingSummaries.clear();
3121
+ }
3122
+ async search(query, ctx, options = {}) {
3123
+ const {
3124
+ topK = DEFAULT_TOP_K2,
3125
+ minScore = DEFAULT_MIN_SCORE2,
3126
+ filePatterns
3127
+ } = options;
3128
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3129
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3130
+ let allFiles;
3131
+ try {
3132
+ await symbolicIndex.initialize();
3133
+ allFiles = symbolicIndex.getAllFiles();
3134
+ } catch {
3135
+ allFiles = await ctx.listIndexedFiles();
3136
+ }
3137
+ let filesToSearch = allFiles;
3138
+ if (filePatterns && filePatterns.length > 0) {
3139
+ filesToSearch = allFiles.filter((filepath) => {
3140
+ return filePatterns.some((pattern) => {
3141
+ if (pattern.startsWith("*.")) {
3142
+ const ext = pattern.slice(1);
3143
+ return filepath.endsWith(ext);
3144
+ }
3145
+ return filepath.includes(pattern);
3146
+ });
3147
+ });
3148
+ }
3149
+ const queryEmbedding = await getEmbedding(query);
3150
+ const bm25Index = new BM25Index;
3151
+ const allChunksData = [];
3152
+ for (const filepath of filesToSearch) {
3153
+ const fileIndex = await ctx.loadFileIndex(filepath);
3154
+ if (!fileIndex)
3155
+ continue;
3156
+ const moduleData = fileIndex.moduleData;
3157
+ if (!moduleData?.embeddings)
3158
+ continue;
3159
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3160
+ const chunk = fileIndex.chunks[i];
3161
+ const embedding = moduleData.embeddings[i];
3162
+ if (!embedding)
3163
+ continue;
3164
+ allChunksData.push({
3165
+ filepath: fileIndex.filepath,
3166
+ chunk,
3167
+ embedding
3168
+ });
3169
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3170
+ }
3171
+ }
3172
+ const bm25Results = bm25Index.search(query, topK * 3);
3173
+ const bm25Scores = new Map;
3174
+ for (const result of bm25Results) {
3175
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3176
+ }
3177
+ const queryTerms = extractQueryTerms(query);
3178
+ const pathBoosts = new Map;
3179
+ for (const filepath of filesToSearch) {
3180
+ const summary = symbolicIndex.getFileSummary(filepath);
3181
+ if (summary?.pathContext) {
3182
+ let boost = 0;
3183
+ const ctx2 = summary.pathContext;
3184
+ if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3185
+ boost += 0.1;
3186
+ }
3187
+ if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3188
+ boost += 0.05;
3189
+ }
3190
+ const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3191
+ if (segmentMatch) {
3192
+ boost += 0.05;
3193
+ }
3194
+ pathBoosts.set(filepath, boost);
3195
+ }
3196
+ }
3197
+ const results = [];
3198
+ for (const { filepath, chunk, embedding } of allChunksData) {
3199
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3200
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3201
+ const pathBoost = pathBoosts.get(filepath) || 0;
3202
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3203
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3204
+ const exportBoost = calculateExportBoost(chunk);
3205
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3206
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3207
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3208
+ results.push({
3209
+ filepath,
3210
+ chunk,
3211
+ score: hybridScore,
3212
+ moduleId: this.id,
3213
+ context: {
3214
+ semanticScore,
3215
+ bm25Score,
3216
+ pathBoost,
3217
+ fileTypeBoost,
3218
+ chunkTypeBoost,
3219
+ exportBoost
3220
+ }
3221
+ });
3222
+ }
3223
+ }
3224
+ results.sort((a, b) => b.score - a.score);
3225
+ return results.slice(0, topK);
2735
3226
  }
2736
- buildBM25Index() {
2737
- this.bm25Index = new BM25Index;
2738
- for (const [filepath, summary] of this.fileSummaries) {
2739
- const content = [
2740
- ...summary.keywords,
2741
- ...summary.exports,
2742
- ...extractPathKeywords(filepath)
2743
- ].join(" ");
2744
- this.bm25Index.addDocuments([{ id: filepath, content }]);
3227
+ extractReferences(content, filepath) {
3228
+ const references = [];
3229
+ const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3230
+ const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3231
+ let match;
3232
+ while ((match = importRegex.exec(content)) !== null) {
3233
+ const importPath = match[1];
3234
+ if (importPath.startsWith(".")) {
3235
+ const dir = path9.dirname(filepath);
3236
+ const resolved = path9.normalize(path9.join(dir, importPath));
3237
+ references.push(resolved);
3238
+ }
2745
3239
  }
2746
- if (this.meta) {
2747
- this.meta.fileCount = this.fileSummaries.size;
2748
- this.meta.bm25Data.totalDocs = this.fileSummaries.size;
3240
+ while ((match = requireRegex.exec(content)) !== null) {
3241
+ const importPath = match[1];
3242
+ if (importPath.startsWith(".")) {
3243
+ const dir = path9.dirname(filepath);
3244
+ const resolved = path9.normalize(path9.join(dir, importPath));
3245
+ references.push(resolved);
3246
+ }
2749
3247
  }
3248
+ return references;
2750
3249
  }
2751
- findCandidates(query, maxCandidates = 20) {
2752
- if (!this.bm25Index) {
2753
- return Array.from(this.fileSummaries.keys());
3250
+ }
3251
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS, supportsFile;
3252
+ var init_typescript = __esm(() => {
3253
+ init_embeddings();
3254
+ init_services();
3255
+ init_config2();
3256
+ init_parseCode();
3257
+ init_storage();
3258
+ TYPESCRIPT_EXTENSIONS = [
3259
+ ".ts",
3260
+ ".tsx",
3261
+ ".js",
3262
+ ".jsx",
3263
+ ".mjs",
3264
+ ".cjs",
3265
+ ".mts",
3266
+ ".cts"
3267
+ ];
3268
+ supportsFile = isTypeScriptFile;
3269
+ });
3270
+
3271
+ // src/modules/data/json/index.ts
3272
+ var exports_json = {};
3273
+ __export(exports_json, {
3274
+ supportsFile: () => supportsFile2,
3275
+ isJsonFile: () => isJsonFile,
3276
+ JsonModule: () => JsonModule,
3277
+ JSON_EXTENSIONS: () => JSON_EXTENSIONS,
3278
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
3279
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
3280
+ });
3281
+ import * as path10 from "path";
3282
+ function isJsonFile(filepath) {
3283
+ const ext = path10.extname(filepath).toLowerCase();
3284
+ return JSON_EXTENSIONS.includes(ext);
3285
+ }
3286
+ function extractJsonKeys(obj, prefix = "") {
3287
+ const keys = [];
3288
+ if (obj === null || obj === undefined) {
3289
+ return keys;
3290
+ }
3291
+ if (Array.isArray(obj)) {
3292
+ obj.forEach((item, index) => {
3293
+ keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3294
+ });
3295
+ } else if (typeof obj === "object") {
3296
+ for (const [key, value] of Object.entries(obj)) {
3297
+ const fullKey = prefix ? `${prefix}.${key}` : key;
3298
+ keys.push(key);
3299
+ keys.push(...extractJsonKeys(value, fullKey));
2754
3300
  }
2755
- const results = this.bm25Index.search(query, maxCandidates);
2756
- return results.map((r) => r.id);
2757
3301
  }
2758
- getAllFiles() {
2759
- return Array.from(this.fileSummaries.keys());
3302
+ return keys;
3303
+ }
3304
+ function extractJsonKeywords(content) {
3305
+ try {
3306
+ const parsed = JSON.parse(content);
3307
+ const keys = extractJsonKeys(parsed);
3308
+ const stringValues = [];
3309
+ const extractStrings = (obj) => {
3310
+ if (typeof obj === "string") {
3311
+ const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3312
+ stringValues.push(...words);
3313
+ } else if (Array.isArray(obj)) {
3314
+ obj.forEach(extractStrings);
3315
+ } else if (obj && typeof obj === "object") {
3316
+ Object.values(obj).forEach(extractStrings);
3317
+ }
3318
+ };
3319
+ extractStrings(parsed);
3320
+ return [...new Set([...keys, ...stringValues])];
3321
+ } catch {
3322
+ return [];
2760
3323
  }
2761
- getFileSummary(filepath) {
2762
- return this.fileSummaries.get(filepath);
3324
+ }
3325
+
3326
+ class JsonModule {
3327
+ id = "data/json";
3328
+ name = "JSON Search";
3329
+ description = "JSON file search with structure-aware indexing";
3330
+ version = "1.0.0";
3331
+ supportsFile(filepath) {
3332
+ return isJsonFile(filepath);
2763
3333
  }
2764
- async save() {
2765
- if (!this.meta)
2766
- throw new Error("Index not initialized");
2767
- this.meta.lastUpdated = new Date().toISOString();
2768
- this.meta.fileCount = this.fileSummaries.size;
2769
- await fs3.mkdir(this.symbolicPath, { recursive: true });
2770
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2771
- await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2772
- for (const [filepath, summary] of this.fileSummaries) {
2773
- const summaryPath = this.getFileSummaryPath(filepath);
2774
- await fs3.mkdir(path7.dirname(summaryPath), { recursive: true });
2775
- await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
3334
+ embeddingConfig = null;
3335
+ symbolicIndex = null;
3336
+ pendingSummaries = new Map;
3337
+ rootDir = "";
3338
+ logger = undefined;
3339
+ async initialize(config) {
3340
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3341
+ this.logger = config.options?.logger;
3342
+ if (this.logger) {
3343
+ this.embeddingConfig = {
3344
+ ...this.embeddingConfig,
3345
+ logger: this.logger
3346
+ };
2776
3347
  }
3348
+ configureEmbeddings(this.embeddingConfig);
3349
+ this.pendingSummaries.clear();
2777
3350
  }
2778
- async load() {
2779
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2780
- const metaContent = await fs3.readFile(metaPath, "utf-8");
2781
- this.meta = JSON.parse(metaContent);
2782
- this.fileSummaries.clear();
2783
- await this.loadFileSummariesRecursive(this.symbolicPath);
2784
- this.buildBM25Index();
2785
- }
2786
- async loadFileSummariesRecursive(dir) {
2787
- try {
2788
- const entries = await fs3.readdir(dir, { withFileTypes: true });
2789
- for (const entry of entries) {
2790
- const fullPath = path7.join(dir, entry.name);
2791
- if (entry.isDirectory()) {
2792
- await this.loadFileSummariesRecursive(fullPath);
2793
- } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2794
- try {
2795
- const content = await fs3.readFile(fullPath, "utf-8");
2796
- const summary = JSON.parse(content);
2797
- if (summary.filepath) {
2798
- this.fileSummaries.set(summary.filepath, summary);
2799
- }
2800
- } catch {}
2801
- }
3351
+ async indexFile(filepath, content, ctx) {
3352
+ if (!isJsonFile(filepath)) {
3353
+ return null;
3354
+ }
3355
+ this.rootDir = ctx.rootDir;
3356
+ const textChunks = createLineBasedChunks(content, {
3357
+ chunkSize: 50,
3358
+ overlap: 10
3359
+ });
3360
+ if (textChunks.length === 0) {
3361
+ return null;
3362
+ }
3363
+ const chunkContents = textChunks.map((c) => {
3364
+ const filename = path10.basename(filepath);
3365
+ return `${filename}: ${c.content}`;
3366
+ });
3367
+ const embeddings = await getEmbeddings(chunkContents);
3368
+ const chunks = textChunks.map((tc, i) => ({
3369
+ id: generateChunkId(filepath, tc.startLine, tc.endLine),
3370
+ content: tc.content,
3371
+ startLine: tc.startLine,
3372
+ endLine: tc.endLine,
3373
+ type: tc.type
3374
+ }));
3375
+ const jsonKeys = extractJsonKeys((() => {
3376
+ try {
3377
+ return JSON.parse(content);
3378
+ } catch {
3379
+ return {};
2802
3380
  }
2803
- } catch {}
2804
- }
2805
- getFileSummaryPath(filepath) {
2806
- const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2807
- return path7.join(this.symbolicPath, jsonPath);
3381
+ })());
3382
+ const stats = await ctx.getFileStats(filepath);
3383
+ const currentConfig = getEmbeddingConfig();
3384
+ const moduleData = {
3385
+ embeddings,
3386
+ embeddingModel: currentConfig.model,
3387
+ jsonKeys
3388
+ };
3389
+ const keywords = extractJsonKeywords(content);
3390
+ const fileSummary = {
3391
+ filepath,
3392
+ chunkCount: chunks.length,
3393
+ chunkTypes: ["file"],
3394
+ keywords,
3395
+ exports: [],
3396
+ lastModified: stats.lastModified
3397
+ };
3398
+ this.pendingSummaries.set(filepath, fileSummary);
3399
+ return {
3400
+ filepath,
3401
+ lastModified: stats.lastModified,
3402
+ chunks,
3403
+ moduleData
3404
+ };
2808
3405
  }
2809
- async deleteFileSummary(filepath) {
2810
- try {
2811
- await fs3.unlink(this.getFileSummaryPath(filepath));
2812
- } catch {}
2813
- this.fileSummaries.delete(filepath);
3406
+ async finalize(ctx) {
3407
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3408
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3409
+ await this.symbolicIndex.initialize();
3410
+ for (const [filepath, summary] of this.pendingSummaries) {
3411
+ this.symbolicIndex.addFile(summary);
3412
+ }
3413
+ this.symbolicIndex.buildBM25Index();
3414
+ await this.symbolicIndex.save();
3415
+ this.pendingSummaries.clear();
2814
3416
  }
2815
- async exists() {
3417
+ async search(query, ctx, options = {}) {
3418
+ const {
3419
+ topK = DEFAULT_TOP_K3,
3420
+ minScore = DEFAULT_MIN_SCORE3,
3421
+ filePatterns
3422
+ } = options;
3423
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3424
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3425
+ let allFiles;
2816
3426
  try {
2817
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2818
- await fs3.access(metaPath);
2819
- return true;
3427
+ await symbolicIndex.initialize();
3428
+ allFiles = symbolicIndex.getAllFiles();
2820
3429
  } catch {
2821
- return false;
3430
+ allFiles = await ctx.listIndexedFiles();
2822
3431
  }
2823
- }
2824
- get size() {
2825
- return this.fileSummaries.size;
2826
- }
2827
- clear() {
2828
- this.fileSummaries.clear();
2829
- if (this.meta) {
2830
- this.meta.fileCount = 0;
2831
- this.meta.bm25Data = {
2832
- avgDocLength: 0,
2833
- documentFrequencies: {},
2834
- totalDocs: 0
2835
- };
3432
+ let filesToSearch = allFiles.filter((f) => isJsonFile(f));
3433
+ if (filePatterns && filePatterns.length > 0) {
3434
+ filesToSearch = filesToSearch.filter((filepath) => {
3435
+ return filePatterns.some((pattern) => {
3436
+ if (pattern.startsWith("*.")) {
3437
+ const ext = pattern.slice(1);
3438
+ return filepath.endsWith(ext);
3439
+ }
3440
+ return filepath.includes(pattern);
3441
+ });
3442
+ });
2836
3443
  }
2837
- this.bm25Index = new BM25Index;
3444
+ const queryEmbedding = await getEmbedding(query);
3445
+ const bm25Index = new BM25Index;
3446
+ const allChunksData = [];
3447
+ for (const filepath of filesToSearch) {
3448
+ const fileIndex = await ctx.loadFileIndex(filepath);
3449
+ if (!fileIndex)
3450
+ continue;
3451
+ const moduleData = fileIndex.moduleData;
3452
+ if (!moduleData?.embeddings)
3453
+ continue;
3454
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3455
+ const chunk = fileIndex.chunks[i];
3456
+ const embedding = moduleData.embeddings[i];
3457
+ if (!embedding)
3458
+ continue;
3459
+ allChunksData.push({
3460
+ filepath: fileIndex.filepath,
3461
+ chunk,
3462
+ embedding
3463
+ });
3464
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3465
+ }
3466
+ }
3467
+ const bm25Results = bm25Index.search(query, topK * 3);
3468
+ const bm25Scores = new Map;
3469
+ for (const result of bm25Results) {
3470
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3471
+ }
3472
+ const queryTerms = extractQueryTerms(query);
3473
+ const results = [];
3474
+ for (const { filepath, chunk, embedding } of allChunksData) {
3475
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3476
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3477
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
3478
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3479
+ results.push({
3480
+ filepath,
3481
+ chunk,
3482
+ score: hybridScore,
3483
+ moduleId: this.id,
3484
+ context: {
3485
+ semanticScore,
3486
+ bm25Score
3487
+ }
3488
+ });
3489
+ }
3490
+ }
3491
+ results.sort((a, b) => b.score - a.score);
3492
+ return results.slice(0, topK);
2838
3493
  }
2839
3494
  }
2840
- var init_symbolicIndex = __esm(() => {
2841
- init_keywords();
2842
- });
2843
-
2844
- // src/infrastructure/storage/index.ts
2845
- var init_storage = __esm(() => {
2846
- init_fileIndexStorage();
2847
- init_symbolicIndex();
3495
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
3496
+ var init_json = __esm(() => {
3497
+ init_embeddings();
3498
+ init_services();
3499
+ init_config2();
3500
+ init_storage();
3501
+ JSON_EXTENSIONS = [".json"];
3502
+ supportsFile2 = isJsonFile;
2848
3503
  });
2849
3504
 
2850
- // src/modules/language/typescript/index.ts
2851
- var exports_typescript = {};
2852
- __export(exports_typescript, {
2853
- TypeScriptModule: () => TypeScriptModule,
2854
- DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2855
- DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
3505
+ // src/modules/docs/markdown/index.ts
3506
+ var exports_markdown = {};
3507
+ __export(exports_markdown, {
3508
+ supportsFile: () => supportsFile3,
3509
+ isMarkdownFile: () => isMarkdownFile,
3510
+ MarkdownModule: () => MarkdownModule,
3511
+ MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
3512
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
3513
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
2856
3514
  });
2857
- import * as path8 from "path";
2858
- function detectQueryIntent(queryTerms) {
2859
- const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2860
- const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2861
- if (hasDocumentationTerm) {
2862
- return "documentation";
2863
- }
2864
- if (hasImplementationTerm) {
2865
- return "implementation";
2866
- }
2867
- return "neutral";
3515
+ import * as path11 from "path";
3516
+ function isMarkdownFile(filepath) {
3517
+ const ext = path11.extname(filepath).toLowerCase();
3518
+ return MARKDOWN_EXTENSIONS.includes(ext);
2868
3519
  }
2869
- function calculateFileTypeBoost(filepath, queryTerms) {
2870
- const ext = path8.extname(filepath).toLowerCase();
2871
- const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2872
- const isDoc = DOC_EXTENSIONS.includes(ext);
2873
- const intent = detectQueryIntent(queryTerms);
2874
- if (intent === "implementation") {
2875
- if (isSourceCode) {
2876
- return 0.06;
3520
+ function parseMarkdownSections(content) {
3521
+ const lines = content.split(`
3522
+ `);
3523
+ const sections = [];
3524
+ let currentSection = null;
3525
+ let currentContent = [];
3526
+ let startLine = 1;
3527
+ for (let i = 0;i < lines.length; i++) {
3528
+ const line = lines[i];
3529
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
3530
+ if (headingMatch) {
3531
+ if (currentSection) {
3532
+ currentSection.content = currentContent.join(`
3533
+ `).trim();
3534
+ currentSection.endLine = i;
3535
+ if (currentSection.content || currentSection.heading) {
3536
+ sections.push(currentSection);
3537
+ }
3538
+ } else if (currentContent.length > 0) {
3539
+ sections.push({
3540
+ heading: "",
3541
+ level: 0,
3542
+ content: currentContent.join(`
3543
+ `).trim(),
3544
+ startLine: 1,
3545
+ endLine: i
3546
+ });
3547
+ }
3548
+ currentSection = {
3549
+ heading: headingMatch[2],
3550
+ level: headingMatch[1].length,
3551
+ content: "",
3552
+ startLine: i + 1,
3553
+ endLine: lines.length
3554
+ };
3555
+ currentContent = [];
3556
+ } else {
3557
+ currentContent.push(line);
2877
3558
  }
2878
- return 0;
2879
3559
  }
2880
- if (intent === "documentation") {
2881
- if (isDoc) {
2882
- return 0.08;
3560
+ if (currentSection) {
3561
+ currentSection.content = currentContent.join(`
3562
+ `).trim();
3563
+ currentSection.endLine = lines.length;
3564
+ if (currentSection.content || currentSection.heading) {
3565
+ sections.push(currentSection);
2883
3566
  }
2884
- return 0;
3567
+ } else if (currentContent.length > 0) {
3568
+ sections.push({
3569
+ heading: "",
3570
+ level: 0,
3571
+ content: currentContent.join(`
3572
+ `).trim(),
3573
+ startLine: 1,
3574
+ endLine: lines.length
3575
+ });
2885
3576
  }
2886
- return 0;
3577
+ return sections;
2887
3578
  }
2888
- function calculateChunkTypeBoost(chunk) {
2889
- switch (chunk.type) {
2890
- case "function":
2891
- return 0.05;
2892
- case "class":
2893
- case "interface":
2894
- return 0.04;
2895
- case "type":
2896
- case "enum":
2897
- return 0.03;
2898
- case "variable":
2899
- return 0.02;
2900
- case "file":
2901
- case "block":
2902
- default:
2903
- return 0;
3579
+ function extractMarkdownKeywords(content) {
3580
+ const keywords = [];
3581
+ const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
3582
+ for (const match of headingMatches) {
3583
+ const heading = match[1].toLowerCase();
3584
+ const words = heading.split(/\s+/).filter((w) => w.length > 2);
3585
+ keywords.push(...words);
3586
+ }
3587
+ const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
3588
+ for (const match of emphasisMatches) {
3589
+ const text = (match[1] || match[2] || "").toLowerCase();
3590
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3591
+ keywords.push(...words);
3592
+ }
3593
+ const codeMatches = content.matchAll(/`([^`]+)`/g);
3594
+ for (const match of codeMatches) {
3595
+ const code = match[1].toLowerCase();
3596
+ if (code.length > 2 && code.length < 50) {
3597
+ keywords.push(code);
3598
+ }
2904
3599
  }
2905
- }
2906
- function calculateExportBoost(chunk) {
2907
- return chunk.isExported ? 0.03 : 0;
3600
+ const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
3601
+ for (const match of linkMatches) {
3602
+ const text = match[1].toLowerCase();
3603
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3604
+ keywords.push(...words);
3605
+ }
3606
+ return [...new Set(keywords)];
2908
3607
  }
2909
3608
 
2910
- class TypeScriptModule {
2911
- id = "language/typescript";
2912
- name = "TypeScript Search";
2913
- description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3609
+ class MarkdownModule {
3610
+ id = "docs/markdown";
3611
+ name = "Markdown Search";
3612
+ description = "Markdown documentation search with section-aware indexing";
2914
3613
  version = "1.0.0";
3614
+ supportsFile(filepath) {
3615
+ return isMarkdownFile(filepath);
3616
+ }
2915
3617
  embeddingConfig = null;
2916
3618
  symbolicIndex = null;
2917
3619
  pendingSummaries = new Map;
@@ -2930,66 +3632,53 @@ class TypeScriptModule {
2930
3632
  this.pendingSummaries.clear();
2931
3633
  }
2932
3634
  async indexFile(filepath, content, ctx) {
3635
+ if (!isMarkdownFile(filepath)) {
3636
+ return null;
3637
+ }
2933
3638
  this.rootDir = ctx.rootDir;
2934
- const parsedChunks = parseCode(content, filepath);
2935
- if (parsedChunks.length === 0) {
3639
+ const sections = parseMarkdownSections(content);
3640
+ if (sections.length === 0) {
2936
3641
  return null;
2937
3642
  }
2938
- const pathContext = parsePathContext(filepath);
2939
- const pathPrefix = formatPathContextForEmbedding(pathContext);
2940
- const chunkContents = parsedChunks.map((c) => {
2941
- const namePrefix = c.name ? `${c.name}: ` : "";
2942
- return `${pathPrefix} ${namePrefix}${c.content}`;
3643
+ const chunkContents = sections.map((s) => {
3644
+ const filename = path11.basename(filepath);
3645
+ const headingContext = s.heading ? `${s.heading}: ` : "";
3646
+ return `${filename} ${headingContext}${s.content}`;
2943
3647
  });
2944
3648
  const embeddings = await getEmbeddings(chunkContents);
2945
- const chunks = parsedChunks.map((pc) => ({
2946
- id: generateChunkId(filepath, pc.startLine, pc.endLine),
2947
- content: pc.content,
2948
- startLine: pc.startLine,
2949
- endLine: pc.endLine,
2950
- type: pc.type,
2951
- name: pc.name,
2952
- isExported: pc.isExported,
2953
- jsDoc: pc.jsDoc
3649
+ const chunks = sections.map((section, i) => ({
3650
+ id: generateChunkId(filepath, section.startLine, section.endLine),
3651
+ content: section.heading ? `## ${section.heading}
3652
+
3653
+ ${section.content}` : section.content,
3654
+ startLine: section.startLine,
3655
+ endLine: section.endLine,
3656
+ type: "block",
3657
+ name: section.heading || undefined
2954
3658
  }));
2955
- const references = this.extractReferences(content, filepath);
3659
+ const headings = sections.filter((s) => s.heading).map((s) => s.heading);
2956
3660
  const stats = await ctx.getFileStats(filepath);
2957
3661
  const currentConfig = getEmbeddingConfig();
2958
3662
  const moduleData = {
2959
3663
  embeddings,
2960
- embeddingModel: currentConfig.model
3664
+ embeddingModel: currentConfig.model,
3665
+ headings
2961
3666
  };
2962
- const chunkTypes = [
2963
- ...new Set(parsedChunks.map((pc) => pc.type))
2964
- ];
2965
- const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2966
- const allKeywords = new Set;
2967
- for (const pc of parsedChunks) {
2968
- const keywords = extractKeywords(pc.content, pc.name);
2969
- keywords.forEach((k) => allKeywords.add(k));
2970
- }
2971
- pathContext.keywords.forEach((k) => allKeywords.add(k));
3667
+ const keywords = extractMarkdownKeywords(content);
2972
3668
  const fileSummary = {
2973
3669
  filepath,
2974
3670
  chunkCount: chunks.length,
2975
- chunkTypes,
2976
- keywords: Array.from(allKeywords),
2977
- exports,
2978
- lastModified: stats.lastModified,
2979
- pathContext: {
2980
- segments: pathContext.segments,
2981
- layer: pathContext.layer,
2982
- domain: pathContext.domain,
2983
- depth: pathContext.depth
2984
- }
3671
+ chunkTypes: ["block"],
3672
+ keywords,
3673
+ exports: headings,
3674
+ lastModified: stats.lastModified
2985
3675
  };
2986
3676
  this.pendingSummaries.set(filepath, fileSummary);
2987
3677
  return {
2988
3678
  filepath,
2989
3679
  lastModified: stats.lastModified,
2990
3680
  chunks,
2991
- moduleData,
2992
- references
3681
+ moduleData
2993
3682
  };
2994
3683
  }
2995
3684
  async finalize(ctx) {
@@ -3005,8 +3694,8 @@ class TypeScriptModule {
3005
3694
  }
3006
3695
  async search(query, ctx, options = {}) {
3007
3696
  const {
3008
- topK = DEFAULT_TOP_K2,
3009
- minScore = DEFAULT_MIN_SCORE2,
3697
+ topK = DEFAULT_TOP_K4,
3698
+ minScore = DEFAULT_MIN_SCORE4,
3010
3699
  filePatterns
3011
3700
  } = options;
3012
3701
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
@@ -3018,9 +3707,9 @@ class TypeScriptModule {
3018
3707
  } catch {
3019
3708
  allFiles = await ctx.listIndexedFiles();
3020
3709
  }
3021
- let filesToSearch = allFiles;
3710
+ let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
3022
3711
  if (filePatterns && filePatterns.length > 0) {
3023
- filesToSearch = allFiles.filter((filepath) => {
3712
+ filesToSearch = filesToSearch.filter((filepath) => {
3024
3713
  return filePatterns.some((pattern) => {
3025
3714
  if (pattern.startsWith("*.")) {
3026
3715
  const ext = pattern.slice(1);
@@ -3058,36 +3747,24 @@ class TypeScriptModule {
3058
3747
  for (const result of bm25Results) {
3059
3748
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
3060
3749
  }
3061
- const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
3062
- const pathBoosts = new Map;
3063
- for (const filepath of filesToSearch) {
3064
- const summary = symbolicIndex.getFileSummary(filepath);
3065
- if (summary?.pathContext) {
3066
- let boost = 0;
3067
- const ctx2 = summary.pathContext;
3068
- if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3069
- boost += 0.1;
3070
- }
3071
- if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3072
- boost += 0.05;
3073
- }
3074
- const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3075
- if (segmentMatch) {
3076
- boost += 0.05;
3077
- }
3078
- pathBoosts.set(filepath, boost);
3079
- }
3080
- }
3750
+ const queryTerms = extractQueryTerms(query);
3081
3751
  const results = [];
3082
3752
  for (const { filepath, chunk, embedding } of allChunksData) {
3083
3753
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3084
3754
  const bm25Score = bm25Scores.get(chunk.id) || 0;
3085
- const pathBoost = pathBoosts.get(filepath) || 0;
3086
- const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3087
- const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3088
- const exportBoost = calculateExportBoost(chunk);
3089
- const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3090
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3755
+ let docBoost = 0;
3756
+ if (queryTerms.some((t) => [
3757
+ "docs",
3758
+ "documentation",
3759
+ "readme",
3760
+ "guide",
3761
+ "how",
3762
+ "what",
3763
+ "explain"
3764
+ ].includes(t))) {
3765
+ docBoost = 0.05;
3766
+ }
3767
+ const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
3091
3768
  if (hybridScore >= minScore || bm25Score > 0.3) {
3092
3769
  results.push({
3093
3770
  filepath,
@@ -3097,10 +3774,7 @@ class TypeScriptModule {
3097
3774
  context: {
3098
3775
  semanticScore,
3099
3776
  bm25Score,
3100
- pathBoost,
3101
- fileTypeBoost,
3102
- chunkTypeBoost,
3103
- exportBoost
3777
+ docBoost
3104
3778
  }
3105
3779
  });
3106
3780
  }
@@ -3108,84 +3782,15 @@ class TypeScriptModule {
3108
3782
  results.sort((a, b) => b.score - a.score);
3109
3783
  return results.slice(0, topK);
3110
3784
  }
3111
- extractReferences(content, filepath) {
3112
- const references = [];
3113
- const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3114
- const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3115
- let match;
3116
- while ((match = importRegex.exec(content)) !== null) {
3117
- const importPath = match[1];
3118
- if (importPath.startsWith(".")) {
3119
- const dir = path8.dirname(filepath);
3120
- const resolved = path8.normalize(path8.join(dir, importPath));
3121
- references.push(resolved);
3122
- }
3123
- }
3124
- while ((match = requireRegex.exec(content)) !== null) {
3125
- const importPath = match[1];
3126
- if (importPath.startsWith(".")) {
3127
- const dir = path8.dirname(filepath);
3128
- const resolved = path8.normalize(path8.join(dir, importPath));
3129
- references.push(resolved);
3130
- }
3131
- }
3132
- return references;
3133
- }
3134
3785
  }
3135
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
3136
- var init_typescript = __esm(() => {
3786
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
3787
+ var init_markdown = __esm(() => {
3137
3788
  init_embeddings();
3789
+ init_services();
3138
3790
  init_config2();
3139
- init_parseCode();
3140
3791
  init_storage();
3141
- init_keywords();
3142
- init_keywords();
3143
- IMPLEMENTATION_TERMS = [
3144
- "function",
3145
- "method",
3146
- "class",
3147
- "interface",
3148
- "implement",
3149
- "implementation",
3150
- "endpoint",
3151
- "route",
3152
- "handler",
3153
- "controller",
3154
- "module",
3155
- "code"
3156
- ];
3157
- DOCUMENTATION_TERMS = [
3158
- "documentation",
3159
- "docs",
3160
- "guide",
3161
- "tutorial",
3162
- "readme",
3163
- "how",
3164
- "what",
3165
- "why",
3166
- "explain",
3167
- "overview",
3168
- "getting",
3169
- "started",
3170
- "requirements",
3171
- "setup",
3172
- "install",
3173
- "configure",
3174
- "configuration"
3175
- ];
3176
- SOURCE_CODE_EXTENSIONS = [
3177
- ".ts",
3178
- ".tsx",
3179
- ".js",
3180
- ".jsx",
3181
- ".mjs",
3182
- ".cjs",
3183
- ".py",
3184
- ".go",
3185
- ".rs",
3186
- ".java"
3187
- ];
3188
- DOC_EXTENSIONS = [".md", ".txt", ".rst"];
3792
+ MARKDOWN_EXTENSIONS = [".md", ".txt"];
3793
+ supportsFile3 = isMarkdownFile;
3189
3794
  });
3190
3795
 
3191
3796
  // src/modules/registry.ts
@@ -3210,8 +3815,12 @@ class ModuleRegistryImpl {
3210
3815
  async function registerBuiltInModules() {
3211
3816
  const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
3212
3817
  const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
3818
+ const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
3819
+ const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
3213
3820
  registry.register(new CoreModule2);
3214
3821
  registry.register(new TypeScriptModule2);
3822
+ registry.register(new JsonModule2);
3823
+ registry.register(new MarkdownModule2);
3215
3824
  }
3216
3825
  var registry;
3217
3826
  var init_registry = __esm(() => {
@@ -3219,13 +3828,13 @@ var init_registry = __esm(() => {
3219
3828
  });
3220
3829
 
3221
3830
  // src/infrastructure/introspection/projectDetector.ts
3222
- import * as path9 from "path";
3831
+ import * as path12 from "path";
3223
3832
  import * as fs4 from "fs/promises";
3224
3833
  async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3225
3834
  if (depth > MAX_SCAN_DEPTH)
3226
3835
  return [];
3227
3836
  const results = [];
3228
- const fullDir = currentDir ? path9.join(rootDir, currentDir) : rootDir;
3837
+ const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
3229
3838
  try {
3230
3839
  const entries = await fs4.readdir(fullDir, { withFileTypes: true });
3231
3840
  const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
@@ -3248,10 +3857,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3248
3857
  }
3249
3858
  async function parsePackageJson(rootDir, relativePath) {
3250
3859
  try {
3251
- const packageJsonPath = path9.join(rootDir, relativePath, "package.json");
3860
+ const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
3252
3861
  const content = await fs4.readFile(packageJsonPath, "utf-8");
3253
3862
  const pkg = JSON.parse(content);
3254
- const name = pkg.name || path9.basename(relativePath);
3863
+ const name = pkg.name || path12.basename(relativePath);
3255
3864
  const deps = { ...pkg.dependencies, ...pkg.devDependencies };
3256
3865
  let type = "unknown";
3257
3866
  if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
@@ -3296,7 +3905,7 @@ async function detectProjectStructure(rootDir) {
3296
3905
  for (const pattern of monorepoPatterns) {
3297
3906
  if (!dirNames.includes(pattern))
3298
3907
  continue;
3299
- const patternDir = path9.join(rootDir, pattern);
3908
+ const patternDir = path12.join(rootDir, pattern);
3300
3909
  try {
3301
3910
  const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
3302
3911
  for (const subDir of subDirs) {
@@ -3327,7 +3936,7 @@ async function detectProjectStructure(rootDir) {
3327
3936
  }
3328
3937
  let rootType = "unknown";
3329
3938
  try {
3330
- const rootPkgPath = path9.join(rootDir, "package.json");
3939
+ const rootPkgPath = path12.join(rootDir, "package.json");
3331
3940
  const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
3332
3941
  if (rootPkg.workspaces)
3333
3942
  isMonorepo = true;
@@ -3367,7 +3976,7 @@ var init_projectDetector = __esm(() => {
3367
3976
  });
3368
3977
 
3369
3978
  // src/infrastructure/introspection/IntrospectionIndex.ts
3370
- import * as path10 from "path";
3979
+ import * as path13 from "path";
3371
3980
  import * as fs5 from "fs/promises";
3372
3981
 
3373
3982
  class IntrospectionIndex {
@@ -3381,7 +3990,7 @@ class IntrospectionIndex {
3381
3990
  async initialize() {
3382
3991
  this.structure = await detectProjectStructure(this.rootDir);
3383
3992
  try {
3384
- const configPath = path10.join(this.rootDir, ".raggrep", "config.json");
3993
+ const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
3385
3994
  const configContent = await fs5.readFile(configPath, "utf-8");
3386
3995
  const config = JSON.parse(configContent);
3387
3996
  this.config = config.introspection || {};
@@ -3421,28 +4030,28 @@ class IntrospectionIndex {
3421
4030
  }
3422
4031
  }
3423
4032
  async save(config) {
3424
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4033
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3425
4034
  await fs5.mkdir(introDir, { recursive: true });
3426
- const projectPath = path10.join(introDir, "_project.json");
4035
+ const projectPath = path13.join(introDir, "_project.json");
3427
4036
  await fs5.writeFile(projectPath, JSON.stringify({
3428
4037
  version: "1.0.0",
3429
4038
  lastUpdated: new Date().toISOString(),
3430
4039
  structure: this.structure
3431
4040
  }, null, 2));
3432
4041
  for (const [filepath, intro] of this.files) {
3433
- const introFilePath = path10.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
3434
- await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
4042
+ const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
4043
+ await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
3435
4044
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3436
4045
  }
3437
4046
  }
3438
4047
  async load(config) {
3439
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4048
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3440
4049
  try {
3441
- const projectPath = path10.join(introDir, "_project.json");
4050
+ const projectPath = path13.join(introDir, "_project.json");
3442
4051
  const projectContent = await fs5.readFile(projectPath, "utf-8");
3443
4052
  const projectData = JSON.parse(projectContent);
3444
4053
  this.structure = projectData.structure;
3445
- await this.loadFilesRecursive(path10.join(introDir, "files"), "");
4054
+ await this.loadFilesRecursive(path13.join(introDir, "files"), "");
3446
4055
  } catch {
3447
4056
  this.structure = null;
3448
4057
  this.files.clear();
@@ -3452,7 +4061,7 @@ class IntrospectionIndex {
3452
4061
  try {
3453
4062
  const entries = await fs5.readdir(basePath, { withFileTypes: true });
3454
4063
  for (const entry of entries) {
3455
- const entryPath = path10.join(basePath, entry.name);
4064
+ const entryPath = path13.join(basePath, entry.name);
3456
4065
  const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
3457
4066
  if (entry.isDirectory()) {
3458
4067
  await this.loadFilesRecursive(entryPath, relativePath);
@@ -3483,7 +4092,7 @@ var init_introspection2 = __esm(() => {
3483
4092
 
3484
4093
  // src/app/indexer/watcher.ts
3485
4094
  import { watch } from "chokidar";
3486
- import * as path11 from "path";
4095
+ import * as path14 from "path";
3487
4096
  async function watchDirectory(rootDir, options = {}) {
3488
4097
  const {
3489
4098
  debounceMs = DEFAULT_DEBOUNCE_MS,
@@ -3494,7 +4103,7 @@ async function watchDirectory(rootDir, options = {}) {
3494
4103
  onFileChange,
3495
4104
  onError
3496
4105
  } = options;
3497
- rootDir = path11.resolve(rootDir);
4106
+ rootDir = path14.resolve(rootDir);
3498
4107
  const config = await loadConfig(rootDir);
3499
4108
  const indexLocation = getIndexLocation(rootDir);
3500
4109
  const validExtensions = new Set(config.extensions);
@@ -3504,7 +4113,7 @@ async function watchDirectory(rootDir, options = {}) {
3504
4113
  "**/.git/**"
3505
4114
  ];
3506
4115
  function shouldWatchFile(filepath) {
3507
- const ext = path11.extname(filepath);
4116
+ const ext = path14.extname(filepath);
3508
4117
  return validExtensions.has(ext);
3509
4118
  }
3510
4119
  let isRunning = true;
@@ -3586,7 +4195,7 @@ async function watchDirectory(rootDir, options = {}) {
3586
4195
  function handleFileEvent(event, filepath) {
3587
4196
  if (!isRunning)
3588
4197
  return;
3589
- const relativePath = path11.relative(rootDir, filepath);
4198
+ const relativePath = path14.relative(rootDir, filepath);
3590
4199
  if (!shouldWatchFile(filepath)) {
3591
4200
  return;
3592
4201
  }
@@ -3665,15 +4274,48 @@ __export(exports_indexer, {
3665
4274
  });
3666
4275
  import { glob } from "glob";
3667
4276
  import * as fs6 from "fs/promises";
3668
- import * as path12 from "path";
4277
+ import * as path15 from "path";
4278
+ async function parallelMap(items, processor, concurrency) {
4279
+ const results = new Array(items.length);
4280
+ let nextIndex = 0;
4281
+ async function worker() {
4282
+ while (nextIndex < items.length) {
4283
+ const index = nextIndex++;
4284
+ const item = items[index];
4285
+ try {
4286
+ const value = await processor(item, index);
4287
+ results[index] = { success: true, value };
4288
+ } catch (error) {
4289
+ results[index] = { success: false, error };
4290
+ }
4291
+ }
4292
+ }
4293
+ const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
4294
+ await Promise.all(workers);
4295
+ return results;
4296
+ }
4297
+ function formatDuration(ms) {
4298
+ if (ms < 1000) {
4299
+ return `${ms}ms`;
4300
+ }
4301
+ const seconds = ms / 1000;
4302
+ if (seconds < 60) {
4303
+ return `${seconds.toFixed(1)}s`;
4304
+ }
4305
+ const minutes = Math.floor(seconds / 60);
4306
+ const remainingSeconds = seconds % 60;
4307
+ return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
4308
+ }
3669
4309
  async function indexDirectory(rootDir, options = {}) {
3670
4310
  const verbose = options.verbose ?? false;
3671
4311
  const quiet = options.quiet ?? false;
4312
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
3672
4313
  const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
3673
- rootDir = path12.resolve(rootDir);
4314
+ rootDir = path15.resolve(rootDir);
3674
4315
  const location = getIndexLocation(rootDir);
3675
4316
  logger.info(`Indexing directory: ${rootDir}`);
3676
4317
  logger.info(`Index location: ${location.indexDir}`);
4318
+ logger.debug(`Concurrency: ${concurrency}`);
3677
4319
  const config = await loadConfig(rootDir);
3678
4320
  const introspection = new IntrospectionIndex(rootDir);
3679
4321
  await introspection.initialize();
@@ -3690,8 +4332,10 @@ async function indexDirectory(rootDir, options = {}) {
3690
4332
  logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
3691
4333
  const files = await findFiles(rootDir, config);
3692
4334
  logger.info(`Found ${files.length} files to index`);
4335
+ const overallStart = Date.now();
3693
4336
  const results = [];
3694
4337
  for (const module of enabledModules) {
4338
+ const moduleStart = Date.now();
3695
4339
  logger.info(`
3696
4340
  [${module.name}] Starting indexing...`);
3697
4341
  const moduleConfig = getModuleConfig(config, module.id);
@@ -3709,7 +4353,9 @@ async function indexDirectory(rootDir, options = {}) {
3709
4353
  };
3710
4354
  await module.initialize(configWithOverrides);
3711
4355
  }
3712
- const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger);
4356
+ const moduleFiles = module.supportsFile ? files.filter((f) => module.supportsFile(f)) : files;
4357
+ logger.info(` Processing ${moduleFiles.length} files...`);
4358
+ const result = await indexWithModule(rootDir, moduleFiles, module, config, verbose, introspection, logger, concurrency);
3713
4359
  results.push(result);
3714
4360
  if (module.finalize) {
3715
4361
  logger.info(`[${module.name}] Building secondary indexes...`);
@@ -3717,20 +4363,29 @@ async function indexDirectory(rootDir, options = {}) {
3717
4363
  rootDir,
3718
4364
  config,
3719
4365
  readFile: async (filepath) => {
3720
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4366
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3721
4367
  return fs6.readFile(fullPath, "utf-8");
3722
4368
  },
3723
4369
  getFileStats: async (filepath) => {
3724
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4370
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3725
4371
  const stats = await fs6.stat(fullPath);
3726
4372
  return { lastModified: stats.mtime.toISOString() };
3727
4373
  }
3728
4374
  };
3729
4375
  await module.finalize(ctx);
3730
4376
  }
3731
- logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
4377
+ const moduleDuration = Date.now() - moduleStart;
4378
+ result.durationMs = moduleDuration;
4379
+ logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors (${formatDuration(moduleDuration)})`);
3732
4380
  }
3733
4381
  await introspection.save(config);
4382
+ const overallDuration = Date.now() - overallStart;
4383
+ logger.info(`
4384
+ Indexing complete in ${formatDuration(overallDuration)}`);
4385
+ const totalIndexed = results.reduce((sum, r) => sum + r.indexed, 0);
4386
+ const totalSkipped = results.reduce((sum, r) => sum + r.skipped, 0);
4387
+ const totalErrors = results.reduce((sum, r) => sum + r.errors, 0);
4388
+ logger.info(`Total: ${totalIndexed} indexed, ${totalSkipped} skipped, ${totalErrors} errors`);
3734
4389
  await updateGlobalManifest(rootDir, enabledModules, config);
3735
4390
  return results;
3736
4391
  }
@@ -3752,7 +4407,7 @@ async function deleteIndex(rootDir) {
3752
4407
  } catch {}
3753
4408
  }
3754
4409
  async function resetIndex(rootDir) {
3755
- rootDir = path12.resolve(rootDir);
4410
+ rootDir = path15.resolve(rootDir);
3756
4411
  const status = await getIndexStatus(rootDir);
3757
4412
  if (!status.exists) {
3758
4413
  throw new Error(`No index found for ${rootDir}`);
@@ -3767,7 +4422,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3767
4422
  const verbose = options.verbose ?? false;
3768
4423
  const quiet = options.quiet ?? false;
3769
4424
  const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
3770
- rootDir = path12.resolve(rootDir);
4425
+ rootDir = path15.resolve(rootDir);
3771
4426
  const status = await getIndexStatus(rootDir);
3772
4427
  if (!status.exists) {
3773
4428
  logger.info(`No index found. Creating index...
@@ -3794,7 +4449,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3794
4449
  const introspection = new IntrospectionIndex(rootDir);
3795
4450
  await introspection.initialize();
3796
4451
  const currentFiles = await findFiles(rootDir, config);
3797
- const currentFileSet = new Set(currentFiles.map((f) => path12.relative(rootDir, f)));
4452
+ const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
3798
4453
  let totalIndexed = 0;
3799
4454
  let totalRemoved = 0;
3800
4455
  let totalUnchanged = 0;
@@ -3824,11 +4479,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3824
4479
  }
3825
4480
  for (const filepath of filesToRemove) {
3826
4481
  logger.debug(` Removing stale: ${filepath}`);
3827
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4482
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3828
4483
  try {
3829
4484
  await fs6.unlink(indexFilePath);
3830
4485
  } catch {}
3831
- const symbolicFilePath = path12.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4486
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3832
4487
  try {
3833
4488
  await fs6.unlink(symbolicFilePath);
3834
4489
  } catch {}
@@ -3839,11 +4494,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3839
4494
  rootDir,
3840
4495
  config,
3841
4496
  readFile: async (filepath) => {
3842
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4497
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3843
4498
  return fs6.readFile(fullPath, "utf-8");
3844
4499
  },
3845
4500
  getFileStats: async (filepath) => {
3846
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4501
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3847
4502
  const stats = await fs6.stat(fullPath);
3848
4503
  return { lastModified: stats.mtime.toISOString() };
3849
4504
  },
@@ -3852,7 +4507,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3852
4507
  const totalFiles = currentFiles.length;
3853
4508
  for (let i = 0;i < currentFiles.length; i++) {
3854
4509
  const filepath = currentFiles[i];
3855
- const relativePath = path12.relative(rootDir, filepath);
4510
+ const relativePath = path15.relative(rootDir, filepath);
3856
4511
  const progress = `[${i + 1}/${totalFiles}]`;
3857
4512
  try {
3858
4513
  const stats = await fs6.stat(filepath);
@@ -3903,7 +4558,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3903
4558
  unchanged: totalUnchanged
3904
4559
  };
3905
4560
  }
3906
- async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger) {
4561
+ async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
3907
4562
  const result = {
3908
4563
  moduleId: module.id,
3909
4564
  indexed: 0,
@@ -3912,7 +4567,7 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3912
4567
  };
3913
4568
  const manifest = await loadModuleManifest(rootDir, module.id, config);
3914
4569
  const indexPath = getModuleIndexPath(rootDir, module.id, config);
3915
- const currentFileSet = new Set(files.map((f) => path12.relative(rootDir, f)));
4570
+ const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
3916
4571
  const filesToRemove = [];
3917
4572
  for (const filepath of Object.keys(manifest.files)) {
3918
4573
  if (!currentFileSet.has(filepath)) {
@@ -3923,11 +4578,11 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3923
4578
  logger.info(` Removing ${filesToRemove.length} stale entries...`);
3924
4579
  for (const filepath of filesToRemove) {
3925
4580
  logger.debug(` Removing: ${filepath}`);
3926
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4581
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3927
4582
  try {
3928
4583
  await fs6.unlink(indexFilePath);
3929
4584
  } catch {}
3930
- const symbolicFilePath = path12.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4585
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3931
4586
  try {
3932
4587
  await fs6.unlink(symbolicFilePath);
3933
4588
  } catch {}
@@ -3939,52 +4594,76 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3939
4594
  rootDir,
3940
4595
  config,
3941
4596
  readFile: async (filepath) => {
3942
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4597
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3943
4598
  return fs6.readFile(fullPath, "utf-8");
3944
4599
  },
3945
4600
  getFileStats: async (filepath) => {
3946
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4601
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3947
4602
  const stats = await fs6.stat(fullPath);
3948
4603
  return { lastModified: stats.mtime.toISOString() };
3949
4604
  },
3950
4605
  getIntrospection: (filepath) => introspection.getFile(filepath)
3951
4606
  };
3952
4607
  const totalFiles = files.length;
3953
- for (let i = 0;i < files.length; i++) {
3954
- const filepath = files[i];
3955
- const relativePath = path12.relative(rootDir, filepath);
3956
- const progress = `[${i + 1}/${totalFiles}]`;
4608
+ let completedCount = 0;
4609
+ const processFile = async (filepath, _index) => {
4610
+ const relativePath = path15.relative(rootDir, filepath);
3957
4611
  try {
3958
4612
  const stats = await fs6.stat(filepath);
3959
4613
  const lastModified = stats.mtime.toISOString();
3960
4614
  const existingEntry = manifest.files[relativePath];
3961
4615
  if (existingEntry && existingEntry.lastModified === lastModified) {
3962
- logger.debug(` ${progress} Skipped ${relativePath} (unchanged)`);
3963
- result.skipped++;
3964
- continue;
4616
+ completedCount++;
4617
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
4618
+ return { relativePath, status: "skipped" };
3965
4619
  }
3966
4620
  const content = await fs6.readFile(filepath, "utf-8");
3967
4621
  introspection.addFile(relativePath, content);
3968
- logger.progress(` ${progress} Processing: ${relativePath}`);
4622
+ completedCount++;
4623
+ logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
3969
4624
  const fileIndex = await module.indexFile(relativePath, content, ctx);
3970
4625
  if (!fileIndex) {
3971
- logger.debug(` ${progress} Skipped ${relativePath} (no chunks)`);
3972
- result.skipped++;
3973
- continue;
4626
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
4627
+ return { relativePath, status: "skipped" };
3974
4628
  }
3975
4629
  await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3976
- manifest.files[relativePath] = {
4630
+ return {
4631
+ relativePath,
4632
+ status: "indexed",
3977
4633
  lastModified,
3978
4634
  chunkCount: fileIndex.chunks.length
3979
4635
  };
3980
- result.indexed++;
3981
4636
  } catch (error) {
3982
- logger.clearProgress();
3983
- logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
4637
+ completedCount++;
4638
+ return { relativePath, status: "error", error };
4639
+ }
4640
+ };
4641
+ logger.debug(` Using concurrency: ${concurrency}`);
4642
+ const results = await parallelMap(files, processFile, concurrency);
4643
+ logger.clearProgress();
4644
+ for (const item of results) {
4645
+ if (!item.success) {
3984
4646
  result.errors++;
4647
+ continue;
4648
+ }
4649
+ const fileResult = item.value;
4650
+ switch (fileResult.status) {
4651
+ case "indexed":
4652
+ manifest.files[fileResult.relativePath] = {
4653
+ lastModified: fileResult.lastModified,
4654
+ chunkCount: fileResult.chunkCount
4655
+ };
4656
+ result.indexed++;
4657
+ break;
4658
+ case "skipped":
4659
+ result.skipped++;
4660
+ break;
4661
+ case "error":
4662
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
4663
+ result.errors++;
4664
+ break;
3985
4665
  }
3986
4666
  }
3987
- logger.clearProgress();
3988
4667
  manifest.lastUpdated = new Date().toISOString();
3989
4668
  await writeModuleManifest(rootDir, module.id, manifest, config);
3990
4669
  return result;
@@ -4019,13 +4698,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
4019
4698
  }
4020
4699
  async function writeModuleManifest(rootDir, moduleId, manifest, config) {
4021
4700
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
4022
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4701
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
4023
4702
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
4024
4703
  }
4025
4704
  async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
4026
4705
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
4027
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4028
- await fs6.mkdir(path12.dirname(indexFilePath), { recursive: true });
4706
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4707
+ await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
4029
4708
  await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
4030
4709
  }
4031
4710
  async function updateGlobalManifest(rootDir, modules, config) {
@@ -4035,13 +4714,13 @@ async function updateGlobalManifest(rootDir, modules, config) {
4035
4714
  lastUpdated: new Date().toISOString(),
4036
4715
  modules: modules.map((m) => m.id)
4037
4716
  };
4038
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4717
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
4039
4718
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
4040
4719
  }
4041
4720
  async function cleanupIndex(rootDir, options = {}) {
4042
4721
  const verbose = options.verbose ?? false;
4043
4722
  const logger = options.logger ?? createLogger({ verbose });
4044
- rootDir = path12.resolve(rootDir);
4723
+ rootDir = path15.resolve(rootDir);
4045
4724
  logger.info(`Cleaning up index in: ${rootDir}`);
4046
4725
  const config = await loadConfig(rootDir);
4047
4726
  await registerBuiltInModules();
@@ -4071,7 +4750,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
4071
4750
  const filesToRemove = [];
4072
4751
  const updatedFiles = {};
4073
4752
  for (const [filepath, entry] of Object.entries(manifest.files)) {
4074
- const fullPath = path12.join(rootDir, filepath);
4753
+ const fullPath = path15.join(rootDir, filepath);
4075
4754
  try {
4076
4755
  await fs6.access(fullPath);
4077
4756
  updatedFiles[filepath] = entry;
@@ -4083,7 +4762,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
4083
4762
  }
4084
4763
  }
4085
4764
  for (const filepath of filesToRemove) {
4086
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4765
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4087
4766
  try {
4088
4767
  await fs6.unlink(indexFilePath);
4089
4768
  } catch {}
@@ -4099,7 +4778,7 @@ async function cleanupEmptyDirectories(dir) {
4099
4778
  const entries = await fs6.readdir(dir, { withFileTypes: true });
4100
4779
  for (const entry of entries) {
4101
4780
  if (entry.isDirectory()) {
4102
- const subDir = path12.join(dir, entry.name);
4781
+ const subDir = path15.join(dir, entry.name);
4103
4782
  await cleanupEmptyDirectories(subDir);
4104
4783
  }
4105
4784
  }
@@ -4114,7 +4793,7 @@ async function cleanupEmptyDirectories(dir) {
4114
4793
  }
4115
4794
  }
4116
4795
  async function getIndexStatus(rootDir) {
4117
- rootDir = path12.resolve(rootDir);
4796
+ rootDir = path15.resolve(rootDir);
4118
4797
  const config = await loadConfig(rootDir);
4119
4798
  const location = getIndexLocation(rootDir);
4120
4799
  const indexDir = location.indexDir;
@@ -4150,7 +4829,7 @@ async function getIndexStatus(rootDir) {
4150
4829
  }
4151
4830
  } catch {
4152
4831
  try {
4153
- const entries = await fs6.readdir(path12.join(indexDir, "index"));
4832
+ const entries = await fs6.readdir(path15.join(indexDir, "index"));
4154
4833
  if (entries.length > 0) {
4155
4834
  status.exists = true;
4156
4835
  for (const entry of entries) {
@@ -4170,7 +4849,7 @@ async function getIndexStatus(rootDir) {
4170
4849
  }
4171
4850
  return status;
4172
4851
  }
4173
- var INDEX_SCHEMA_VERSION = "1.0.0";
4852
+ var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
4174
4853
  var init_indexer = __esm(() => {
4175
4854
  init_config2();
4176
4855
  init_registry();
@@ -4191,9 +4870,9 @@ __export(exports_search, {
4191
4870
  formatSearchResults: () => formatSearchResults
4192
4871
  });
4193
4872
  import * as fs7 from "fs/promises";
4194
- import * as path13 from "path";
4873
+ import * as path16 from "path";
4195
4874
  async function search(rootDir, query, options = {}) {
4196
- rootDir = path13.resolve(rootDir);
4875
+ rootDir = path16.resolve(rootDir);
4197
4876
  const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
4198
4877
  if (ensureFresh) {
4199
4878
  await ensureIndexFresh(rootDir, { quiet: true });
@@ -4227,9 +4906,17 @@ async function search(rootDir, query, options = {}) {
4227
4906
  const moduleResults = await module.search(query, ctx, options);
4228
4907
  allResults.push(...moduleResults);
4229
4908
  }
4230
- allResults.sort((a, b) => b.score - a.score);
4909
+ let filteredResults = allResults;
4910
+ if (options.pathFilter && options.pathFilter.length > 0) {
4911
+ const normalizedFilters = options.pathFilter.map((p) => p.replace(/\\/g, "/").replace(/^\//, "").replace(/\/$/, ""));
4912
+ filteredResults = allResults.filter((result) => {
4913
+ const normalizedPath = result.filepath.replace(/\\/g, "/");
4914
+ return normalizedFilters.some((filter) => normalizedPath.startsWith(filter + "/") || normalizedPath === filter || normalizedPath.startsWith("./" + filter + "/") || normalizedPath === "./" + filter);
4915
+ });
4916
+ }
4917
+ filteredResults.sort((a, b) => b.score - a.score);
4231
4918
  const topK = options.topK ?? 10;
4232
- return allResults.slice(0, topK);
4919
+ return filteredResults.slice(0, topK);
4233
4920
  }
4234
4921
  function createSearchContext(rootDir, moduleId, config) {
4235
4922
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
@@ -4238,7 +4925,7 @@ function createSearchContext(rootDir, moduleId, config) {
4238
4925
  config,
4239
4926
  loadFileIndex: async (filepath) => {
4240
4927
  const hasExtension = /\.[^./]+$/.test(filepath);
4241
- const indexFilePath = hasExtension ? path13.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path13.join(indexPath, filepath + ".json");
4928
+ const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
4242
4929
  try {
4243
4930
  const content = await fs7.readFile(indexFilePath, "utf-8");
4244
4931
  return JSON.parse(content);
@@ -4250,7 +4937,7 @@ function createSearchContext(rootDir, moduleId, config) {
4250
4937
  const files = [];
4251
4938
  await traverseDirectory(indexPath, files, indexPath);
4252
4939
  return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
4253
- const relative4 = path13.relative(indexPath, f);
4940
+ const relative4 = path16.relative(indexPath, f);
4254
4941
  return relative4.replace(/\.json$/, "");
4255
4942
  });
4256
4943
  }
@@ -4260,7 +4947,7 @@ async function traverseDirectory(dir, files, basePath) {
4260
4947
  try {
4261
4948
  const entries = await fs7.readdir(dir, { withFileTypes: true });
4262
4949
  for (const entry of entries) {
4263
- const fullPath = path13.join(dir, entry.name);
4950
+ const fullPath = path16.join(dir, entry.name);
4264
4951
  if (entry.isDirectory()) {
4265
4952
  await traverseDirectory(fullPath, files, basePath);
4266
4953
  } else if (entry.isFile()) {
@@ -4338,7 +5025,7 @@ init_logger();
4338
5025
  // package.json
4339
5026
  var package_default = {
4340
5027
  name: "raggrep",
4341
- version: "0.4.0",
5028
+ version: "0.5.1",
4342
5029
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
4343
5030
  type: "module",
4344
5031
  main: "./dist/index.js",
@@ -4474,6 +5161,25 @@ function parseFlags(args2) {
4474
5161
  console.error("--type requires a file extension (e.g., ts, tsx, js)");
4475
5162
  process.exit(1);
4476
5163
  }
5164
+ } else if (arg === "--concurrency" || arg === "-c") {
5165
+ const c = parseInt(args2[++i], 10);
5166
+ if (!isNaN(c) && c > 0) {
5167
+ flags.concurrency = c;
5168
+ } else {
5169
+ console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
5170
+ process.exit(1);
5171
+ }
5172
+ } else if (arg === "--filter" || arg === "-f") {
5173
+ const filterPath = args2[++i];
5174
+ if (filterPath) {
5175
+ if (!flags.pathFilter) {
5176
+ flags.pathFilter = [];
5177
+ }
5178
+ flags.pathFilter.push(filterPath);
5179
+ } else {
5180
+ console.error("--filter requires a path (e.g., src/auth)");
5181
+ process.exit(1);
5182
+ }
4477
5183
  } else if (!arg.startsWith("-")) {
4478
5184
  flags.remaining.push(arg);
4479
5185
  }
@@ -4493,10 +5199,11 @@ Usage:
4493
5199
  raggrep index [options]
4494
5200
 
4495
5201
  Options:
4496
- -w, --watch Watch for file changes and re-index automatically
4497
- -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
4498
- -v, --verbose Show detailed progress
4499
- -h, --help Show this help message
5202
+ -w, --watch Watch for file changes and re-index automatically
5203
+ -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
5204
+ -c, --concurrency <n> Number of files to process in parallel (default: 4)
5205
+ -v, --verbose Show detailed progress
5206
+ -h, --help Show this help message
4500
5207
 
4501
5208
  Available Models:
4502
5209
  ${models}
@@ -4507,6 +5214,7 @@ Examples:
4507
5214
  raggrep index
4508
5215
  raggrep index --watch
4509
5216
  raggrep index --model bge-small-en-v1.5
5217
+ raggrep index --concurrency 8
4510
5218
  raggrep index --verbose
4511
5219
  `);
4512
5220
  process.exit(0);
@@ -4520,6 +5228,7 @@ Examples:
4520
5228
  const results = await indexDirectory2(process.cwd(), {
4521
5229
  model: flags.model,
4522
5230
  verbose: flags.verbose,
5231
+ concurrency: flags.concurrency,
4523
5232
  logger
4524
5233
  });
4525
5234
  console.log(`
@@ -4579,6 +5288,7 @@ Options:
4579
5288
  -k, --top <n> Number of results to return (default: 10)
4580
5289
  -s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
4581
5290
  -t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
5291
+ -f, --filter <path> Filter by path prefix (can be used multiple times)
4582
5292
  -h, --help Show this help message
4583
5293
 
4584
5294
  Note:
@@ -4593,6 +5303,8 @@ Examples:
4593
5303
  raggrep query "handle errors" --top 5
4594
5304
  raggrep query "database" --min-score 0.1
4595
5305
  raggrep query "interface" --type ts
5306
+ raggrep query "login" --filter src/auth
5307
+ raggrep query "api" --filter src/api --filter src/routes
4596
5308
  `);
4597
5309
  process.exit(0);
4598
5310
  }
@@ -4633,6 +5345,7 @@ Examples:
4633
5345
  topK: flags.topK ?? 10,
4634
5346
  minScore: flags.minScore,
4635
5347
  filePatterns,
5348
+ pathFilter: flags.pathFilter,
4636
5349
  ensureFresh: false
4637
5350
  });
4638
5351
  console.log(formatSearchResults2(results));
@@ -4773,4 +5486,4 @@ Run 'raggrep <command> --help' for more information.
4773
5486
  }
4774
5487
  main();
4775
5488
 
4776
- //# debugId=B729BEE1B814E8D564756E2164756E21
5489
+ //# debugId=E73618F0DDE8326264756E2164756E21