raggrep 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -1,5 +1,21 @@
1
1
  #!/usr/bin/env node
2
+ import { createRequire } from "node:module";
3
+ var __create = Object.create;
4
+ var __getProtoOf = Object.getPrototypeOf;
2
5
  var __defProp = Object.defineProperty;
6
+ var __getOwnPropNames = Object.getOwnPropertyNames;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __toESM = (mod, isNodeMode, target) => {
9
+ target = mod != null ? __create(__getProtoOf(mod)) : {};
10
+ const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
11
+ for (let key of __getOwnPropNames(mod))
12
+ if (!__hasOwnProp.call(to, key))
13
+ __defProp(to, key, {
14
+ get: () => mod[key],
15
+ enumerable: true
16
+ });
17
+ return to;
18
+ };
3
19
  var __export = (target, all) => {
4
20
  for (var name in all)
5
21
  __defProp(target, name, {
@@ -10,6 +26,7 @@ var __export = (target, all) => {
10
26
  });
11
27
  };
12
28
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
29
+ var __require = /* @__PURE__ */ createRequire(import.meta.url);
13
30
 
14
31
  // src/infrastructure/embeddings/transformersEmbedding.ts
15
32
  import {
@@ -27,7 +44,8 @@ class TransformersEmbeddingProvider {
27
44
  constructor(config) {
28
45
  this.config = {
29
46
  model: config?.model ?? "all-MiniLM-L6-v2",
30
- showProgress: config?.showProgress ?? false
47
+ showProgress: config?.showProgress ?? false,
48
+ logger: config?.logger
31
49
  };
32
50
  }
33
51
  async initialize(config) {
@@ -49,29 +67,55 @@ class TransformersEmbeddingProvider {
49
67
  this.isInitializing = true;
50
68
  this.initPromise = (async () => {
51
69
  const modelId = EMBEDDING_MODELS[this.config.model];
52
- if (this.config.showProgress) {
53
- console.log(`
54
- Loading embedding model: ${this.config.model}`);
55
- console.log(` Cache: ${CACHE_DIR}`);
56
- }
70
+ const logger = this.config.logger;
71
+ const showProgress = this.config.showProgress || !!logger;
72
+ const isCached = await isModelCached(this.config.model);
73
+ let hasDownloads = false;
57
74
  try {
58
75
  this.pipeline = await pipeline("feature-extraction", modelId, {
59
- progress_callback: this.config.showProgress ? (progress) => {
76
+ progress_callback: showProgress && !isCached ? (progress) => {
60
77
  if (progress.status === "progress" && progress.file) {
78
+ if (!hasDownloads) {
79
+ hasDownloads = true;
80
+ if (logger) {
81
+ logger.info(`Downloading embedding model: ${this.config.model}`);
82
+ } else {
83
+ console.log(`
84
+ Loading embedding model: ${this.config.model}`);
85
+ console.log(` Cache: ${CACHE_DIR}`);
86
+ }
87
+ }
61
88
  const pct = progress.progress ? Math.round(progress.progress) : 0;
62
- process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
89
+ if (logger) {
90
+ logger.progress(` Downloading ${progress.file}: ${pct}%`);
91
+ } else {
92
+ process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
93
+ }
63
94
  } else if (progress.status === "done" && progress.file) {
64
- process.stdout.write(`\r Downloaded ${progress.file}
95
+ if (logger) {
96
+ logger.clearProgress();
97
+ logger.info(` Downloaded ${progress.file}`);
98
+ } else if (hasDownloads) {
99
+ process.stdout.write(`\r Downloaded ${progress.file}
65
100
  `);
101
+ }
66
102
  }
67
103
  } : undefined
68
104
  });
69
- if (this.config.showProgress) {
70
- console.log(` Model ready.
105
+ if (hasDownloads) {
106
+ if (logger) {
107
+ logger.clearProgress();
108
+ logger.info(`Model ready: ${this.config.model}`);
109
+ } else {
110
+ console.log(` Model ready.
71
111
  `);
112
+ }
72
113
  }
73
114
  } catch (error) {
74
115
  this.pipeline = null;
116
+ if (logger) {
117
+ logger.clearProgress();
118
+ }
75
119
  throw new Error(`Failed to load embedding model: ${error}`);
76
120
  } finally {
77
121
  this.isInitializing = false;
@@ -125,9 +169,21 @@ class TransformersEmbeddingProvider {
125
169
  function getCacheDir() {
126
170
  return CACHE_DIR;
127
171
  }
172
+ async function isModelCached(model) {
173
+ const modelId = EMBEDDING_MODELS[model];
174
+ const modelPath = path.join(CACHE_DIR, modelId);
175
+ try {
176
+ const fs = await import("fs/promises");
177
+ const onnxPath = path.join(modelPath, "onnx", "model_quantized.onnx");
178
+ await fs.access(onnxPath);
179
+ return true;
180
+ } catch {
181
+ return false;
182
+ }
183
+ }
128
184
  function configureEmbeddings(config) {
129
185
  const newConfig = { ...globalConfig, ...config };
130
- if (newConfig.model !== globalConfig.model) {
186
+ if (newConfig.model !== globalConfig.model || newConfig.logger !== globalConfig.logger) {
131
187
  globalProvider = null;
132
188
  }
133
189
  globalConfig = newConfig;
@@ -163,7 +219,8 @@ var init_transformersEmbedding = __esm(() => {
163
219
  };
164
220
  globalConfig = {
165
221
  model: "all-MiniLM-L6-v2",
166
- showProgress: false
222
+ showProgress: false,
223
+ logger: undefined
167
224
  };
168
225
  });
169
226
 
@@ -171,6 +228,96 @@ var init_transformersEmbedding = __esm(() => {
171
228
  var init_embeddings = __esm(() => {
172
229
  init_transformersEmbedding();
173
230
  });
231
+
232
+ // src/infrastructure/logger/loggers.ts
233
+ class ConsoleLogger {
234
+ verbose;
235
+ constructor(options) {
236
+ this.verbose = options?.verbose ?? false;
237
+ }
238
+ info(message) {
239
+ console.log(message);
240
+ }
241
+ warn(message) {
242
+ console.warn(message);
243
+ }
244
+ error(message) {
245
+ console.error(message);
246
+ }
247
+ debug(message) {
248
+ if (this.verbose) {
249
+ console.log(message);
250
+ }
251
+ }
252
+ progress(message) {
253
+ console.log(message);
254
+ }
255
+ clearProgress() {}
256
+ }
257
+
258
+ class InlineProgressLogger {
259
+ verbose;
260
+ lastProgressLength = 0;
261
+ hasProgress = false;
262
+ constructor(options) {
263
+ this.verbose = options?.verbose ?? false;
264
+ }
265
+ info(message) {
266
+ this.clearProgress();
267
+ console.log(message);
268
+ }
269
+ warn(message) {
270
+ this.clearProgress();
271
+ console.warn(message);
272
+ }
273
+ error(message) {
274
+ this.clearProgress();
275
+ console.error(message);
276
+ }
277
+ debug(message) {
278
+ if (this.verbose) {
279
+ this.clearProgress();
280
+ console.log(message);
281
+ }
282
+ }
283
+ progress(message) {
284
+ process.stdout.write(`\r${message}`);
285
+ const padding = Math.max(0, this.lastProgressLength - message.length);
286
+ if (padding > 0) {
287
+ process.stdout.write(" ".repeat(padding));
288
+ }
289
+ this.lastProgressLength = message.length;
290
+ this.hasProgress = true;
291
+ }
292
+ clearProgress() {
293
+ if (this.hasProgress && this.lastProgressLength > 0) {
294
+ process.stdout.write("\r" + " ".repeat(this.lastProgressLength) + "\r");
295
+ this.lastProgressLength = 0;
296
+ this.hasProgress = false;
297
+ }
298
+ }
299
+ }
300
+
301
+ class SilentLogger {
302
+ info() {}
303
+ warn() {}
304
+ error() {}
305
+ debug() {}
306
+ progress() {}
307
+ clearProgress() {}
308
+ }
309
+ function createLogger(options) {
310
+ return new ConsoleLogger(options);
311
+ }
312
+ function createInlineLogger(options) {
313
+ return new InlineProgressLogger(options);
314
+ }
315
+ function createSilentLogger() {
316
+ return new SilentLogger;
317
+ }
318
+
319
+ // src/infrastructure/logger/index.ts
320
+ var init_logger = () => {};
174
321
  // src/domain/entities/searchResult.ts
175
322
  var DEFAULT_SEARCH_OPTIONS;
176
323
  var init_searchResult = __esm(() => {
@@ -201,6 +348,20 @@ function createDefaultConfig() {
201
348
  options: {
202
349
  embeddingModel: "all-MiniLM-L6-v2"
203
350
  }
351
+ },
352
+ {
353
+ id: "data/json",
354
+ enabled: true,
355
+ options: {
356
+ embeddingModel: "all-MiniLM-L6-v2"
357
+ }
358
+ },
359
+ {
360
+ id: "docs/markdown",
361
+ enabled: true,
362
+ options: {
363
+ embeddingModel: "all-MiniLM-L6-v2"
364
+ }
204
365
  }
205
366
  ]
206
367
  };
@@ -244,16 +405,18 @@ var init_config = __esm(() => {
244
405
  ".jsx",
245
406
  ".mjs",
246
407
  ".cjs",
408
+ ".mts",
409
+ ".cts",
410
+ ".json",
411
+ ".md",
247
412
  ".py",
248
413
  ".go",
249
414
  ".rs",
250
415
  ".java",
251
- ".json",
252
416
  ".yaml",
253
417
  ".yml",
254
418
  ".toml",
255
419
  ".sql",
256
- ".md",
257
420
  ".txt"
258
421
  ];
259
422
  });
@@ -2145,6 +2308,194 @@ var init_core = __esm(() => {
2145
2308
  init_symbols();
2146
2309
  });
2147
2310
 
2311
+ // src/domain/services/keywords.ts
2312
+ function extractKeywords(content, name, maxKeywords = 50) {
2313
+ const keywords = new Set;
2314
+ if (name) {
2315
+ keywords.add(name.toLowerCase());
2316
+ const parts = name.split(/(?=[A-Z])/).map((p) => p.toLowerCase());
2317
+ parts.forEach((p) => p.length > 2 && keywords.add(p));
2318
+ }
2319
+ const identifierRegex = /\b([a-zA-Z_][a-zA-Z0-9_]{2,})\b/g;
2320
+ let match;
2321
+ while ((match = identifierRegex.exec(content)) !== null) {
2322
+ const word = match[1].toLowerCase();
2323
+ if (!COMMON_KEYWORDS.has(word) && word.length > 2) {
2324
+ keywords.add(word);
2325
+ }
2326
+ }
2327
+ return Array.from(keywords).slice(0, maxKeywords);
2328
+ }
2329
+ function splitIdentifier(str) {
2330
+ return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
2331
+ }
2332
+ function extractPathKeywords(filepath) {
2333
+ const keywords = new Set;
2334
+ const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
2335
+ const segments = pathWithoutExt.split(/[/\\]/);
2336
+ for (const segment of segments) {
2337
+ if (segment.length < 2)
2338
+ continue;
2339
+ const lower = segment.toLowerCase();
2340
+ if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
2341
+ keywords.add(lower);
2342
+ }
2343
+ const parts = splitIdentifier(segment);
2344
+ for (const part of parts) {
2345
+ if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
2346
+ keywords.add(part);
2347
+ }
2348
+ }
2349
+ }
2350
+ return Array.from(keywords);
2351
+ }
2352
+ function parsePathContext(filepath) {
2353
+ const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
2354
+ const allSegments = pathWithoutExt.split(/[/\\]/);
2355
+ const filename = allSegments[allSegments.length - 1];
2356
+ const dirSegments = allSegments.slice(0, -1);
2357
+ const keywords = extractPathKeywords(filepath);
2358
+ let layer;
2359
+ const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
2360
+ const filenameLower = filename.toLowerCase();
2361
+ for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS2)) {
2362
+ for (const pattern of patterns) {
2363
+ if (filenameLower.includes(pattern)) {
2364
+ layer = layerName;
2365
+ break;
2366
+ }
2367
+ if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
2368
+ layer = layerName;
2369
+ break;
2370
+ }
2371
+ }
2372
+ if (layer)
2373
+ break;
2374
+ }
2375
+ let domain;
2376
+ const layerPatternSet = new Set(Object.values(LAYER_PATTERNS2).flat());
2377
+ const reversedSegments = [...dirSegments].reverse();
2378
+ for (const segment of reversedSegments) {
2379
+ const lower = segment.toLowerCase();
2380
+ if (["src", "lib", "app", "packages", "modules"].includes(lower))
2381
+ continue;
2382
+ if (layerPatternSet.has(lower))
2383
+ continue;
2384
+ if (lower.length > 2) {
2385
+ domain = lower;
2386
+ break;
2387
+ }
2388
+ }
2389
+ return {
2390
+ segments: dirSegments,
2391
+ layer,
2392
+ domain,
2393
+ depth: dirSegments.length,
2394
+ keywords
2395
+ };
2396
+ }
2397
+ function formatPathContextForEmbedding(pathContext) {
2398
+ const parts = [];
2399
+ if (pathContext.domain) {
2400
+ parts.push(pathContext.domain);
2401
+ }
2402
+ if (pathContext.layer) {
2403
+ parts.push(pathContext.layer);
2404
+ }
2405
+ const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
2406
+ if (significantSegments.length > 0) {
2407
+ parts.push(...significantSegments.map((s) => s.toLowerCase()));
2408
+ }
2409
+ if (parts.length === 0)
2410
+ return "";
2411
+ const unique = [...new Set(parts)];
2412
+ return `[${unique.join(" ")}]`;
2413
+ }
2414
+ var COMMON_KEYWORDS, LAYER_PATTERNS2;
2415
+ var init_keywords = __esm(() => {
2416
+ COMMON_KEYWORDS = new Set([
2417
+ "const",
2418
+ "let",
2419
+ "var",
2420
+ "function",
2421
+ "class",
2422
+ "interface",
2423
+ "type",
2424
+ "enum",
2425
+ "export",
2426
+ "import",
2427
+ "from",
2428
+ "return",
2429
+ "async",
2430
+ "await",
2431
+ "new",
2432
+ "this",
2433
+ "true",
2434
+ "false",
2435
+ "null",
2436
+ "undefined",
2437
+ "if",
2438
+ "else",
2439
+ "for",
2440
+ "while",
2441
+ "switch",
2442
+ "case",
2443
+ "break",
2444
+ "continue",
2445
+ "try",
2446
+ "catch",
2447
+ "finally",
2448
+ "throw",
2449
+ "typeof",
2450
+ "instanceof",
2451
+ "void",
2452
+ "delete",
2453
+ "in",
2454
+ "of",
2455
+ "string",
2456
+ "number",
2457
+ "boolean",
2458
+ "any",
2459
+ "unknown",
2460
+ "never",
2461
+ "object",
2462
+ "public",
2463
+ "private",
2464
+ "protected",
2465
+ "static",
2466
+ "readonly",
2467
+ "abstract",
2468
+ "implements",
2469
+ "extends",
2470
+ "super",
2471
+ "get",
2472
+ "set",
2473
+ "constructor",
2474
+ "the",
2475
+ "and",
2476
+ "for",
2477
+ "not",
2478
+ "with",
2479
+ "are",
2480
+ "was",
2481
+ "has",
2482
+ "have"
2483
+ ]);
2484
+ LAYER_PATTERNS2 = {
2485
+ controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
2486
+ service: ["service", "services", "usecase", "usecases", "application"],
2487
+ repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
2488
+ model: ["model", "models", "entity", "entities", "schema", "schemas"],
2489
+ util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
2490
+ config: ["config", "configs", "configuration", "settings"],
2491
+ middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
2492
+ domain: ["domain", "core", "business"],
2493
+ infrastructure: ["infrastructure", "infra", "external", "adapters"],
2494
+ presentation: ["presentation", "view", "views", "component", "components", "ui"],
2495
+ test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
2496
+ };
2497
+ });
2498
+
2148
2499
  // src/domain/services/similarity.ts
2149
2500
  function cosineSimilarity(a, b) {
2150
2501
  if (a.length !== b.length) {
@@ -2164,17 +2515,151 @@ function cosineSimilarity(a, b) {
2164
2515
  return dotProduct / magnitude;
2165
2516
  }
2166
2517
 
2167
- // src/modules/language/typescript/parseCode.ts
2168
- import * as ts from "typescript";
2169
- function parseCode(content, filepath) {
2170
- const ext = filepath.split(".").pop()?.toLowerCase();
2171
- if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
2172
- return parseTypeScript(content, filepath);
2518
+ // src/domain/services/queryIntent.ts
2519
+ import * as path7 from "path";
2520
+ function detectQueryIntent(queryTerms) {
2521
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2522
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2523
+ if (hasDocumentationTerm) {
2524
+ return "documentation";
2525
+ }
2526
+ if (hasImplementationTerm) {
2527
+ return "implementation";
2173
2528
  }
2174
- return parseGenericCode(content);
2529
+ return "neutral";
2175
2530
  }
2176
- function parseTypeScript(content, filepath) {
2177
- const chunks = [];
2531
+ function extractQueryTerms(query) {
2532
+ return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2533
+ }
2534
+ function isSourceCodeFile(filepath) {
2535
+ const ext = path7.extname(filepath).toLowerCase();
2536
+ return SOURCE_CODE_EXTENSIONS.includes(ext);
2537
+ }
2538
+ function isDocFile(filepath) {
2539
+ const ext = path7.extname(filepath).toLowerCase();
2540
+ return DOC_EXTENSIONS.includes(ext);
2541
+ }
2542
+ function calculateFileTypeBoost(filepath, queryTerms) {
2543
+ const isSourceCode = isSourceCodeFile(filepath);
2544
+ const isDoc = isDocFile(filepath);
2545
+ const intent = detectQueryIntent(queryTerms);
2546
+ if (intent === "implementation") {
2547
+ if (isSourceCode) {
2548
+ return 0.06;
2549
+ }
2550
+ return 0;
2551
+ }
2552
+ if (intent === "documentation") {
2553
+ if (isDoc) {
2554
+ return 0.08;
2555
+ }
2556
+ return 0;
2557
+ }
2558
+ return 0;
2559
+ }
2560
+ var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2561
+ var init_queryIntent = __esm(() => {
2562
+ IMPLEMENTATION_TERMS = [
2563
+ "function",
2564
+ "method",
2565
+ "class",
2566
+ "interface",
2567
+ "implement",
2568
+ "implementation",
2569
+ "endpoint",
2570
+ "route",
2571
+ "handler",
2572
+ "controller",
2573
+ "module",
2574
+ "code"
2575
+ ];
2576
+ DOCUMENTATION_TERMS = [
2577
+ "documentation",
2578
+ "docs",
2579
+ "guide",
2580
+ "tutorial",
2581
+ "readme",
2582
+ "how",
2583
+ "what",
2584
+ "why",
2585
+ "explain",
2586
+ "overview",
2587
+ "getting",
2588
+ "started",
2589
+ "requirements",
2590
+ "setup",
2591
+ "install",
2592
+ "configure",
2593
+ "configuration"
2594
+ ];
2595
+ SOURCE_CODE_EXTENSIONS = [
2596
+ ".ts",
2597
+ ".tsx",
2598
+ ".js",
2599
+ ".jsx",
2600
+ ".mjs",
2601
+ ".cjs",
2602
+ ".py",
2603
+ ".go",
2604
+ ".rs",
2605
+ ".java"
2606
+ ];
2607
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2608
+ });
2609
+
2610
+ // src/domain/services/chunking.ts
2611
+ function createLineBasedChunks(content, options = {}) {
2612
+ const {
2613
+ chunkSize = DEFAULT_CHUNK_SIZE,
2614
+ overlap = DEFAULT_OVERLAP,
2615
+ minLinesForMultipleChunks = chunkSize
2616
+ } = options;
2617
+ const lines = content.split(`
2618
+ `);
2619
+ const chunks = [];
2620
+ if (lines.length <= minLinesForMultipleChunks) {
2621
+ return [
2622
+ {
2623
+ content,
2624
+ startLine: 1,
2625
+ endLine: lines.length,
2626
+ type: "file"
2627
+ }
2628
+ ];
2629
+ }
2630
+ for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2631
+ const endIdx = Math.min(i + chunkSize, lines.length);
2632
+ chunks.push({
2633
+ content: lines.slice(i, endIdx).join(`
2634
+ `),
2635
+ startLine: i + 1,
2636
+ endLine: endIdx,
2637
+ type: "block"
2638
+ });
2639
+ if (endIdx >= lines.length)
2640
+ break;
2641
+ }
2642
+ return chunks;
2643
+ }
2644
+ function generateChunkId(filepath, startLine, endLine) {
2645
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2646
+ return `${safePath}-${startLine}-${endLine}`;
2647
+ }
2648
+ var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2649
+
2650
+ // src/domain/services/index.ts
2651
+ var init_services = __esm(() => {
2652
+ init_keywords();
2653
+ init_queryIntent();
2654
+ });
2655
+
2656
+ // src/modules/language/typescript/parseCode.ts
2657
+ import * as ts from "typescript";
2658
+ function parseTypeScriptCode(content, filepath) {
2659
+ return parseTypeScript(content, filepath);
2660
+ }
2661
+ function parseTypeScript(content, filepath) {
2662
+ const chunks = [];
2178
2663
  const lines = content.split(`
2179
2664
  `);
2180
2665
  const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
@@ -2315,41 +2800,20 @@ function parseTypeScript(content, filepath) {
2315
2800
  }
2316
2801
  ts.forEachChild(sourceFile, visit);
2317
2802
  if (chunks.length === 0) {
2318
- return parseGenericCode(content);
2319
- }
2320
- return chunks;
2321
- }
2322
- function parseGenericCode(content) {
2323
- const chunks = [];
2324
- const lines = content.split(`
2803
+ const lines2 = content.split(`
2325
2804
  `);
2326
- const CHUNK_SIZE = 30;
2327
- const OVERLAP = 5;
2328
- if (lines.length <= CHUNK_SIZE) {
2329
2805
  return [
2330
2806
  {
2331
2807
  content,
2332
2808
  startLine: 1,
2333
- endLine: lines.length,
2809
+ endLine: lines2.length,
2334
2810
  type: "file"
2335
2811
  }
2336
2812
  ];
2337
2813
  }
2338
- for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
2339
- const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
2340
- chunks.push({
2341
- content: lines.slice(i, endIdx).join(`
2342
- `),
2343
- startLine: i + 1,
2344
- endLine: endIdx,
2345
- type: "block"
2346
- });
2347
- if (endIdx >= lines.length)
2348
- break;
2349
- }
2350
2814
  return chunks;
2351
2815
  }
2352
- function generateChunkId(filepath, startLine, endLine) {
2816
+ function generateChunkId2(filepath, startLine, endLine) {
2353
2817
  const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2354
2818
  return `${safePath}-${startLine}-${endLine}`;
2355
2819
  }
@@ -2360,256 +2824,68 @@ var init_fileIndexStorage = __esm(() => {
2360
2824
  init_entities();
2361
2825
  });
2362
2826
 
2363
- // src/domain/services/keywords.ts
2364
- function extractKeywords(content, name, maxKeywords = 50) {
2365
- const keywords = new Set;
2366
- if (name) {
2367
- keywords.add(name.toLowerCase());
2368
- const parts = name.split(/(?=[A-Z])/).map((p) => p.toLowerCase());
2369
- parts.forEach((p) => p.length > 2 && keywords.add(p));
2827
+ // src/infrastructure/storage/symbolicIndex.ts
2828
+ import * as fs3 from "fs/promises";
2829
+ import * as path8 from "path";
2830
+
2831
+ class SymbolicIndex {
2832
+ meta = null;
2833
+ fileSummaries = new Map;
2834
+ bm25Index = null;
2835
+ symbolicPath;
2836
+ moduleId;
2837
+ constructor(indexDir, moduleId) {
2838
+ this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
2839
+ this.moduleId = moduleId;
2370
2840
  }
2371
- const identifierRegex = /\b([a-zA-Z_][a-zA-Z0-9_]{2,})\b/g;
2372
- let match;
2373
- while ((match = identifierRegex.exec(content)) !== null) {
2374
- const word = match[1].toLowerCase();
2375
- if (!COMMON_KEYWORDS.has(word) && word.length > 2) {
2376
- keywords.add(word);
2841
+ async initialize() {
2842
+ try {
2843
+ await this.load();
2844
+ } catch {
2845
+ this.meta = {
2846
+ version: "1.0.0",
2847
+ lastUpdated: new Date().toISOString(),
2848
+ moduleId: this.moduleId,
2849
+ fileCount: 0,
2850
+ bm25Data: {
2851
+ avgDocLength: 0,
2852
+ documentFrequencies: {},
2853
+ totalDocs: 0
2854
+ }
2855
+ };
2856
+ this.bm25Index = new BM25Index;
2377
2857
  }
2378
2858
  }
2379
- return Array.from(keywords).slice(0, maxKeywords);
2380
- }
2381
- function splitIdentifier(str) {
2382
- return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
2383
- }
2384
- function extractPathKeywords(filepath) {
2385
- const keywords = new Set;
2386
- const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
2387
- const segments = pathWithoutExt.split(/[/\\]/);
2388
- for (const segment of segments) {
2389
- if (segment.length < 2)
2390
- continue;
2391
- const lower = segment.toLowerCase();
2392
- if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
2393
- keywords.add(lower);
2859
+ addFile(summary) {
2860
+ this.fileSummaries.set(summary.filepath, summary);
2861
+ }
2862
+ removeFile(filepath) {
2863
+ return this.fileSummaries.delete(filepath);
2864
+ }
2865
+ buildBM25Index() {
2866
+ this.bm25Index = new BM25Index;
2867
+ for (const [filepath, summary] of this.fileSummaries) {
2868
+ const content = [
2869
+ ...summary.keywords,
2870
+ ...summary.exports,
2871
+ ...extractPathKeywords(filepath)
2872
+ ].join(" ");
2873
+ this.bm25Index.addDocuments([{ id: filepath, content }]);
2394
2874
  }
2395
- const parts = splitIdentifier(segment);
2396
- for (const part of parts) {
2397
- if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
2398
- keywords.add(part);
2399
- }
2875
+ if (this.meta) {
2876
+ this.meta.fileCount = this.fileSummaries.size;
2877
+ this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2400
2878
  }
2401
2879
  }
2402
- return Array.from(keywords);
2403
- }
2404
- function parsePathContext(filepath) {
2405
- const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
2406
- const allSegments = pathWithoutExt.split(/[/\\]/);
2407
- const filename = allSegments[allSegments.length - 1];
2408
- const dirSegments = allSegments.slice(0, -1);
2409
- const keywords = extractPathKeywords(filepath);
2410
- let layer;
2411
- const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
2412
- const filenameLower = filename.toLowerCase();
2413
- for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS2)) {
2414
- for (const pattern of patterns) {
2415
- if (filenameLower.includes(pattern)) {
2416
- layer = layerName;
2417
- break;
2418
- }
2419
- if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
2420
- layer = layerName;
2421
- break;
2422
- }
2880
+ findCandidates(query, maxCandidates = 20) {
2881
+ if (!this.bm25Index) {
2882
+ return Array.from(this.fileSummaries.keys());
2423
2883
  }
2424
- if (layer)
2425
- break;
2884
+ const results = this.bm25Index.search(query, maxCandidates);
2885
+ return results.map((r) => r.id);
2426
2886
  }
2427
- let domain;
2428
- const layerPatternSet = new Set(Object.values(LAYER_PATTERNS2).flat());
2429
- const reversedSegments = [...dirSegments].reverse();
2430
- for (const segment of reversedSegments) {
2431
- const lower = segment.toLowerCase();
2432
- if (["src", "lib", "app", "packages", "modules"].includes(lower))
2433
- continue;
2434
- if (layerPatternSet.has(lower))
2435
- continue;
2436
- if (lower.length > 2) {
2437
- domain = lower;
2438
- break;
2439
- }
2440
- }
2441
- return {
2442
- segments: dirSegments,
2443
- layer,
2444
- domain,
2445
- depth: dirSegments.length,
2446
- keywords
2447
- };
2448
- }
2449
- function formatPathContextForEmbedding(pathContext) {
2450
- const parts = [];
2451
- if (pathContext.domain) {
2452
- parts.push(pathContext.domain);
2453
- }
2454
- if (pathContext.layer) {
2455
- parts.push(pathContext.layer);
2456
- }
2457
- const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
2458
- if (significantSegments.length > 0) {
2459
- parts.push(...significantSegments.map((s) => s.toLowerCase()));
2460
- }
2461
- if (parts.length === 0)
2462
- return "";
2463
- const unique = [...new Set(parts)];
2464
- return `[${unique.join(" ")}]`;
2465
- }
2466
- var COMMON_KEYWORDS, LAYER_PATTERNS2;
2467
- var init_keywords = __esm(() => {
2468
- COMMON_KEYWORDS = new Set([
2469
- "const",
2470
- "let",
2471
- "var",
2472
- "function",
2473
- "class",
2474
- "interface",
2475
- "type",
2476
- "enum",
2477
- "export",
2478
- "import",
2479
- "from",
2480
- "return",
2481
- "async",
2482
- "await",
2483
- "new",
2484
- "this",
2485
- "true",
2486
- "false",
2487
- "null",
2488
- "undefined",
2489
- "if",
2490
- "else",
2491
- "for",
2492
- "while",
2493
- "switch",
2494
- "case",
2495
- "break",
2496
- "continue",
2497
- "try",
2498
- "catch",
2499
- "finally",
2500
- "throw",
2501
- "typeof",
2502
- "instanceof",
2503
- "void",
2504
- "delete",
2505
- "in",
2506
- "of",
2507
- "string",
2508
- "number",
2509
- "boolean",
2510
- "any",
2511
- "unknown",
2512
- "never",
2513
- "object",
2514
- "public",
2515
- "private",
2516
- "protected",
2517
- "static",
2518
- "readonly",
2519
- "abstract",
2520
- "implements",
2521
- "extends",
2522
- "super",
2523
- "get",
2524
- "set",
2525
- "constructor",
2526
- "the",
2527
- "and",
2528
- "for",
2529
- "not",
2530
- "with",
2531
- "are",
2532
- "was",
2533
- "has",
2534
- "have"
2535
- ]);
2536
- LAYER_PATTERNS2 = {
2537
- controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
2538
- service: ["service", "services", "usecase", "usecases", "application"],
2539
- repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
2540
- model: ["model", "models", "entity", "entities", "schema", "schemas"],
2541
- util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
2542
- config: ["config", "configs", "configuration", "settings"],
2543
- middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
2544
- domain: ["domain", "core", "business"],
2545
- infrastructure: ["infrastructure", "infra", "external", "adapters"],
2546
- presentation: ["presentation", "view", "views", "component", "components", "ui"],
2547
- test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
2548
- };
2549
- });
2550
-
2551
- // src/infrastructure/storage/symbolicIndex.ts
2552
- import * as fs3 from "fs/promises";
2553
- import * as path7 from "path";
2554
-
2555
- class SymbolicIndex {
2556
- meta = null;
2557
- fileSummaries = new Map;
2558
- bm25Index = null;
2559
- symbolicPath;
2560
- moduleId;
2561
- constructor(indexDir, moduleId) {
2562
- this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
2563
- this.moduleId = moduleId;
2564
- }
2565
- async initialize() {
2566
- try {
2567
- await this.load();
2568
- } catch {
2569
- this.meta = {
2570
- version: "1.0.0",
2571
- lastUpdated: new Date().toISOString(),
2572
- moduleId: this.moduleId,
2573
- fileCount: 0,
2574
- bm25Data: {
2575
- avgDocLength: 0,
2576
- documentFrequencies: {},
2577
- totalDocs: 0
2578
- }
2579
- };
2580
- this.bm25Index = new BM25Index;
2581
- }
2582
- }
2583
- addFile(summary) {
2584
- this.fileSummaries.set(summary.filepath, summary);
2585
- }
2586
- removeFile(filepath) {
2587
- return this.fileSummaries.delete(filepath);
2588
- }
2589
- buildBM25Index() {
2590
- this.bm25Index = new BM25Index;
2591
- for (const [filepath, summary] of this.fileSummaries) {
2592
- const content = [
2593
- ...summary.keywords,
2594
- ...summary.exports,
2595
- ...extractPathKeywords(filepath)
2596
- ].join(" ");
2597
- this.bm25Index.addDocuments([{ id: filepath, content }]);
2598
- }
2599
- if (this.meta) {
2600
- this.meta.fileCount = this.fileSummaries.size;
2601
- this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2602
- }
2603
- }
2604
- findCandidates(query, maxCandidates = 20) {
2605
- if (!this.bm25Index) {
2606
- return Array.from(this.fileSummaries.keys());
2607
- }
2608
- const results = this.bm25Index.search(query, maxCandidates);
2609
- return results.map((r) => r.id);
2610
- }
2611
- getAllFiles() {
2612
- return Array.from(this.fileSummaries.keys());
2887
+ getAllFiles() {
2888
+ return Array.from(this.fileSummaries.keys());
2613
2889
  }
2614
2890
  getFileSummary(filepath) {
2615
2891
  return this.fileSummaries.get(filepath);
@@ -2620,16 +2896,16 @@ class SymbolicIndex {
2620
2896
  this.meta.lastUpdated = new Date().toISOString();
2621
2897
  this.meta.fileCount = this.fileSummaries.size;
2622
2898
  await fs3.mkdir(this.symbolicPath, { recursive: true });
2623
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2899
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2624
2900
  await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2625
2901
  for (const [filepath, summary] of this.fileSummaries) {
2626
2902
  const summaryPath = this.getFileSummaryPath(filepath);
2627
- await fs3.mkdir(path7.dirname(summaryPath), { recursive: true });
2903
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
2628
2904
  await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
2629
2905
  }
2630
2906
  }
2631
2907
  async load() {
2632
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2908
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2633
2909
  const metaContent = await fs3.readFile(metaPath, "utf-8");
2634
2910
  this.meta = JSON.parse(metaContent);
2635
2911
  this.fileSummaries.clear();
@@ -2640,7 +2916,7 @@ class SymbolicIndex {
2640
2916
  try {
2641
2917
  const entries = await fs3.readdir(dir, { withFileTypes: true });
2642
2918
  for (const entry of entries) {
2643
- const fullPath = path7.join(dir, entry.name);
2919
+ const fullPath = path8.join(dir, entry.name);
2644
2920
  if (entry.isDirectory()) {
2645
2921
  await this.loadFileSummariesRecursive(fullPath);
2646
2922
  } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
@@ -2657,7 +2933,7 @@ class SymbolicIndex {
2657
2933
  }
2658
2934
  getFileSummaryPath(filepath) {
2659
2935
  const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2660
- return path7.join(this.symbolicPath, jsonPath);
2936
+ return path8.join(this.symbolicPath, jsonPath);
2661
2937
  }
2662
2938
  async deleteFileSummary(filepath) {
2663
2939
  try {
@@ -2667,7 +2943,7 @@ class SymbolicIndex {
2667
2943
  }
2668
2944
  async exists() {
2669
2945
  try {
2670
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2946
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2671
2947
  await fs3.access(metaPath);
2672
2948
  return true;
2673
2949
  } catch {
@@ -2703,40 +2979,16 @@ var init_storage = __esm(() => {
2703
2979
  // src/modules/language/typescript/index.ts
2704
2980
  var exports_typescript = {};
2705
2981
  __export(exports_typescript, {
2982
+ isTypeScriptFile: () => isTypeScriptFile,
2706
2983
  TypeScriptModule: () => TypeScriptModule,
2984
+ TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
2707
2985
  DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2708
2986
  DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2709
2987
  });
2710
- import * as path8 from "path";
2711
- function detectQueryIntent(queryTerms) {
2712
- const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2713
- const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2714
- if (hasDocumentationTerm) {
2715
- return "documentation";
2716
- }
2717
- if (hasImplementationTerm) {
2718
- return "implementation";
2719
- }
2720
- return "neutral";
2721
- }
2722
- function calculateFileTypeBoost(filepath, queryTerms) {
2723
- const ext = path8.extname(filepath).toLowerCase();
2724
- const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2725
- const isDoc = DOC_EXTENSIONS.includes(ext);
2726
- const intent = detectQueryIntent(queryTerms);
2727
- if (intent === "implementation") {
2728
- if (isSourceCode) {
2729
- return 0.06;
2730
- }
2731
- return 0;
2732
- }
2733
- if (intent === "documentation") {
2734
- if (isDoc) {
2735
- return 0.08;
2736
- }
2737
- return 0;
2738
- }
2739
- return 0;
2988
+ import * as path9 from "path";
2989
+ function isTypeScriptFile(filepath) {
2990
+ const ext = path9.extname(filepath).toLowerCase();
2991
+ return TYPESCRIPT_EXTENSIONS.includes(ext);
2740
2992
  }
2741
2993
  function calculateChunkTypeBoost(chunk) {
2742
2994
  switch (chunk.type) {
@@ -2756,85 +3008,659 @@ function calculateChunkTypeBoost(chunk) {
2756
3008
  return 0;
2757
3009
  }
2758
3010
  }
2759
- function calculateExportBoost(chunk) {
2760
- return chunk.isExported ? 0.03 : 0;
3011
+ function calculateExportBoost(chunk) {
3012
+ return chunk.isExported ? 0.03 : 0;
3013
+ }
3014
+
3015
+ class TypeScriptModule {
3016
+ id = "language/typescript";
3017
+ name = "TypeScript Search";
3018
+ description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3019
+ version = "1.0.0";
3020
+ embeddingConfig = null;
3021
+ symbolicIndex = null;
3022
+ pendingSummaries = new Map;
3023
+ rootDir = "";
3024
+ logger = undefined;
3025
+ async initialize(config) {
3026
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3027
+ this.logger = config.options?.logger;
3028
+ if (this.logger) {
3029
+ this.embeddingConfig = {
3030
+ ...this.embeddingConfig,
3031
+ logger: this.logger
3032
+ };
3033
+ }
3034
+ configureEmbeddings(this.embeddingConfig);
3035
+ this.pendingSummaries.clear();
3036
+ }
3037
+ async indexFile(filepath, content, ctx) {
3038
+ if (!isTypeScriptFile(filepath)) {
3039
+ return null;
3040
+ }
3041
+ this.rootDir = ctx.rootDir;
3042
+ const parsedChunks = parseTypeScriptCode(content, filepath);
3043
+ if (parsedChunks.length === 0) {
3044
+ return null;
3045
+ }
3046
+ const pathContext = parsePathContext(filepath);
3047
+ const pathPrefix = formatPathContextForEmbedding(pathContext);
3048
+ const chunkContents = parsedChunks.map((c) => {
3049
+ const namePrefix = c.name ? `${c.name}: ` : "";
3050
+ return `${pathPrefix} ${namePrefix}${c.content}`;
3051
+ });
3052
+ const embeddings = await getEmbeddings(chunkContents);
3053
+ const chunks = parsedChunks.map((pc) => ({
3054
+ id: generateChunkId2(filepath, pc.startLine, pc.endLine),
3055
+ content: pc.content,
3056
+ startLine: pc.startLine,
3057
+ endLine: pc.endLine,
3058
+ type: pc.type,
3059
+ name: pc.name,
3060
+ isExported: pc.isExported,
3061
+ jsDoc: pc.jsDoc
3062
+ }));
3063
+ const references = this.extractReferences(content, filepath);
3064
+ const stats = await ctx.getFileStats(filepath);
3065
+ const currentConfig = getEmbeddingConfig();
3066
+ const moduleData = {
3067
+ embeddings,
3068
+ embeddingModel: currentConfig.model
3069
+ };
3070
+ const chunkTypes = [
3071
+ ...new Set(parsedChunks.map((pc) => pc.type))
3072
+ ];
3073
+ const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
3074
+ const allKeywords = new Set;
3075
+ for (const pc of parsedChunks) {
3076
+ const keywords = extractKeywords(pc.content, pc.name);
3077
+ keywords.forEach((k) => allKeywords.add(k));
3078
+ }
3079
+ pathContext.keywords.forEach((k) => allKeywords.add(k));
3080
+ const fileSummary = {
3081
+ filepath,
3082
+ chunkCount: chunks.length,
3083
+ chunkTypes,
3084
+ keywords: Array.from(allKeywords),
3085
+ exports,
3086
+ lastModified: stats.lastModified,
3087
+ pathContext: {
3088
+ segments: pathContext.segments,
3089
+ layer: pathContext.layer,
3090
+ domain: pathContext.domain,
3091
+ depth: pathContext.depth
3092
+ }
3093
+ };
3094
+ this.pendingSummaries.set(filepath, fileSummary);
3095
+ return {
3096
+ filepath,
3097
+ lastModified: stats.lastModified,
3098
+ chunks,
3099
+ moduleData,
3100
+ references
3101
+ };
3102
+ }
3103
+ async finalize(ctx) {
3104
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3105
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3106
+ await this.symbolicIndex.initialize();
3107
+ for (const [filepath, summary] of this.pendingSummaries) {
3108
+ this.symbolicIndex.addFile(summary);
3109
+ }
3110
+ this.symbolicIndex.buildBM25Index();
3111
+ await this.symbolicIndex.save();
3112
+ this.pendingSummaries.clear();
3113
+ }
3114
+ async search(query, ctx, options = {}) {
3115
+ const {
3116
+ topK = DEFAULT_TOP_K2,
3117
+ minScore = DEFAULT_MIN_SCORE2,
3118
+ filePatterns
3119
+ } = options;
3120
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3121
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3122
+ let allFiles;
3123
+ try {
3124
+ await symbolicIndex.initialize();
3125
+ allFiles = symbolicIndex.getAllFiles();
3126
+ } catch {
3127
+ allFiles = await ctx.listIndexedFiles();
3128
+ }
3129
+ let filesToSearch = allFiles;
3130
+ if (filePatterns && filePatterns.length > 0) {
3131
+ filesToSearch = allFiles.filter((filepath) => {
3132
+ return filePatterns.some((pattern) => {
3133
+ if (pattern.startsWith("*.")) {
3134
+ const ext = pattern.slice(1);
3135
+ return filepath.endsWith(ext);
3136
+ }
3137
+ return filepath.includes(pattern);
3138
+ });
3139
+ });
3140
+ }
3141
+ const queryEmbedding = await getEmbedding(query);
3142
+ const bm25Index = new BM25Index;
3143
+ const allChunksData = [];
3144
+ for (const filepath of filesToSearch) {
3145
+ const fileIndex = await ctx.loadFileIndex(filepath);
3146
+ if (!fileIndex)
3147
+ continue;
3148
+ const moduleData = fileIndex.moduleData;
3149
+ if (!moduleData?.embeddings)
3150
+ continue;
3151
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3152
+ const chunk = fileIndex.chunks[i];
3153
+ const embedding = moduleData.embeddings[i];
3154
+ if (!embedding)
3155
+ continue;
3156
+ allChunksData.push({
3157
+ filepath: fileIndex.filepath,
3158
+ chunk,
3159
+ embedding
3160
+ });
3161
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3162
+ }
3163
+ }
3164
+ const bm25Results = bm25Index.search(query, topK * 3);
3165
+ const bm25Scores = new Map;
3166
+ for (const result of bm25Results) {
3167
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3168
+ }
3169
+ const queryTerms = extractQueryTerms(query);
3170
+ const pathBoosts = new Map;
3171
+ for (const filepath of filesToSearch) {
3172
+ const summary = symbolicIndex.getFileSummary(filepath);
3173
+ if (summary?.pathContext) {
3174
+ let boost = 0;
3175
+ const ctx2 = summary.pathContext;
3176
+ if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3177
+ boost += 0.1;
3178
+ }
3179
+ if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3180
+ boost += 0.05;
3181
+ }
3182
+ const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3183
+ if (segmentMatch) {
3184
+ boost += 0.05;
3185
+ }
3186
+ pathBoosts.set(filepath, boost);
3187
+ }
3188
+ }
3189
+ const results = [];
3190
+ for (const { filepath, chunk, embedding } of allChunksData) {
3191
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3192
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3193
+ const pathBoost = pathBoosts.get(filepath) || 0;
3194
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3195
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3196
+ const exportBoost = calculateExportBoost(chunk);
3197
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3198
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3199
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3200
+ results.push({
3201
+ filepath,
3202
+ chunk,
3203
+ score: hybridScore,
3204
+ moduleId: this.id,
3205
+ context: {
3206
+ semanticScore,
3207
+ bm25Score,
3208
+ pathBoost,
3209
+ fileTypeBoost,
3210
+ chunkTypeBoost,
3211
+ exportBoost
3212
+ }
3213
+ });
3214
+ }
3215
+ }
3216
+ results.sort((a, b) => b.score - a.score);
3217
+ return results.slice(0, topK);
3218
+ }
3219
+ extractReferences(content, filepath) {
3220
+ const references = [];
3221
+ const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3222
+ const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3223
+ let match;
3224
+ while ((match = importRegex.exec(content)) !== null) {
3225
+ const importPath = match[1];
3226
+ if (importPath.startsWith(".")) {
3227
+ const dir = path9.dirname(filepath);
3228
+ const resolved = path9.normalize(path9.join(dir, importPath));
3229
+ references.push(resolved);
3230
+ }
3231
+ }
3232
+ while ((match = requireRegex.exec(content)) !== null) {
3233
+ const importPath = match[1];
3234
+ if (importPath.startsWith(".")) {
3235
+ const dir = path9.dirname(filepath);
3236
+ const resolved = path9.normalize(path9.join(dir, importPath));
3237
+ references.push(resolved);
3238
+ }
3239
+ }
3240
+ return references;
3241
+ }
3242
+ }
3243
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
3244
+ var init_typescript = __esm(() => {
3245
+ init_embeddings();
3246
+ init_services();
3247
+ init_config2();
3248
+ init_parseCode();
3249
+ init_storage();
3250
+ TYPESCRIPT_EXTENSIONS = [
3251
+ ".ts",
3252
+ ".tsx",
3253
+ ".js",
3254
+ ".jsx",
3255
+ ".mjs",
3256
+ ".cjs",
3257
+ ".mts",
3258
+ ".cts"
3259
+ ];
3260
+ });
3261
+
3262
+ // src/modules/data/json/index.ts
3263
+ var exports_json = {};
3264
+ __export(exports_json, {
3265
+ isJsonFile: () => isJsonFile,
3266
+ JsonModule: () => JsonModule,
3267
+ JSON_EXTENSIONS: () => JSON_EXTENSIONS,
3268
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
3269
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
3270
+ });
3271
+ import * as path10 from "path";
3272
+ function isJsonFile(filepath) {
3273
+ const ext = path10.extname(filepath).toLowerCase();
3274
+ return JSON_EXTENSIONS.includes(ext);
3275
+ }
3276
+ function extractJsonKeys(obj, prefix = "") {
3277
+ const keys = [];
3278
+ if (obj === null || obj === undefined) {
3279
+ return keys;
3280
+ }
3281
+ if (Array.isArray(obj)) {
3282
+ obj.forEach((item, index) => {
3283
+ keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3284
+ });
3285
+ } else if (typeof obj === "object") {
3286
+ for (const [key, value] of Object.entries(obj)) {
3287
+ const fullKey = prefix ? `${prefix}.${key}` : key;
3288
+ keys.push(key);
3289
+ keys.push(...extractJsonKeys(value, fullKey));
3290
+ }
3291
+ }
3292
+ return keys;
3293
+ }
3294
+ function extractJsonKeywords(content) {
3295
+ try {
3296
+ const parsed = JSON.parse(content);
3297
+ const keys = extractJsonKeys(parsed);
3298
+ const stringValues = [];
3299
+ const extractStrings = (obj) => {
3300
+ if (typeof obj === "string") {
3301
+ const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3302
+ stringValues.push(...words);
3303
+ } else if (Array.isArray(obj)) {
3304
+ obj.forEach(extractStrings);
3305
+ } else if (obj && typeof obj === "object") {
3306
+ Object.values(obj).forEach(extractStrings);
3307
+ }
3308
+ };
3309
+ extractStrings(parsed);
3310
+ return [...new Set([...keys, ...stringValues])];
3311
+ } catch {
3312
+ return [];
3313
+ }
3314
+ }
3315
+
3316
+ class JsonModule {
3317
+ id = "data/json";
3318
+ name = "JSON Search";
3319
+ description = "JSON file search with structure-aware indexing";
3320
+ version = "1.0.0";
3321
+ embeddingConfig = null;
3322
+ symbolicIndex = null;
3323
+ pendingSummaries = new Map;
3324
+ rootDir = "";
3325
+ logger = undefined;
3326
+ async initialize(config) {
3327
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3328
+ this.logger = config.options?.logger;
3329
+ if (this.logger) {
3330
+ this.embeddingConfig = {
3331
+ ...this.embeddingConfig,
3332
+ logger: this.logger
3333
+ };
3334
+ }
3335
+ configureEmbeddings(this.embeddingConfig);
3336
+ this.pendingSummaries.clear();
3337
+ }
3338
+ async indexFile(filepath, content, ctx) {
3339
+ if (!isJsonFile(filepath)) {
3340
+ return null;
3341
+ }
3342
+ this.rootDir = ctx.rootDir;
3343
+ const textChunks = createLineBasedChunks(content, {
3344
+ chunkSize: 50,
3345
+ overlap: 10
3346
+ });
3347
+ if (textChunks.length === 0) {
3348
+ return null;
3349
+ }
3350
+ const chunkContents = textChunks.map((c) => {
3351
+ const filename = path10.basename(filepath);
3352
+ return `${filename}: ${c.content}`;
3353
+ });
3354
+ const embeddings = await getEmbeddings(chunkContents);
3355
+ const chunks = textChunks.map((tc, i) => ({
3356
+ id: generateChunkId(filepath, tc.startLine, tc.endLine),
3357
+ content: tc.content,
3358
+ startLine: tc.startLine,
3359
+ endLine: tc.endLine,
3360
+ type: tc.type
3361
+ }));
3362
+ const jsonKeys = extractJsonKeys((() => {
3363
+ try {
3364
+ return JSON.parse(content);
3365
+ } catch {
3366
+ return {};
3367
+ }
3368
+ })());
3369
+ const stats = await ctx.getFileStats(filepath);
3370
+ const currentConfig = getEmbeddingConfig();
3371
+ const moduleData = {
3372
+ embeddings,
3373
+ embeddingModel: currentConfig.model,
3374
+ jsonKeys
3375
+ };
3376
+ const keywords = extractJsonKeywords(content);
3377
+ const fileSummary = {
3378
+ filepath,
3379
+ chunkCount: chunks.length,
3380
+ chunkTypes: ["file"],
3381
+ keywords,
3382
+ exports: [],
3383
+ lastModified: stats.lastModified
3384
+ };
3385
+ this.pendingSummaries.set(filepath, fileSummary);
3386
+ return {
3387
+ filepath,
3388
+ lastModified: stats.lastModified,
3389
+ chunks,
3390
+ moduleData
3391
+ };
3392
+ }
3393
+ async finalize(ctx) {
3394
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3395
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3396
+ await this.symbolicIndex.initialize();
3397
+ for (const [filepath, summary] of this.pendingSummaries) {
3398
+ this.symbolicIndex.addFile(summary);
3399
+ }
3400
+ this.symbolicIndex.buildBM25Index();
3401
+ await this.symbolicIndex.save();
3402
+ this.pendingSummaries.clear();
3403
+ }
3404
+ async search(query, ctx, options = {}) {
3405
+ const {
3406
+ topK = DEFAULT_TOP_K3,
3407
+ minScore = DEFAULT_MIN_SCORE3,
3408
+ filePatterns
3409
+ } = options;
3410
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3411
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3412
+ let allFiles;
3413
+ try {
3414
+ await symbolicIndex.initialize();
3415
+ allFiles = symbolicIndex.getAllFiles();
3416
+ } catch {
3417
+ allFiles = await ctx.listIndexedFiles();
3418
+ }
3419
+ let filesToSearch = allFiles.filter((f) => isJsonFile(f));
3420
+ if (filePatterns && filePatterns.length > 0) {
3421
+ filesToSearch = filesToSearch.filter((filepath) => {
3422
+ return filePatterns.some((pattern) => {
3423
+ if (pattern.startsWith("*.")) {
3424
+ const ext = pattern.slice(1);
3425
+ return filepath.endsWith(ext);
3426
+ }
3427
+ return filepath.includes(pattern);
3428
+ });
3429
+ });
3430
+ }
3431
+ const queryEmbedding = await getEmbedding(query);
3432
+ const bm25Index = new BM25Index;
3433
+ const allChunksData = [];
3434
+ for (const filepath of filesToSearch) {
3435
+ const fileIndex = await ctx.loadFileIndex(filepath);
3436
+ if (!fileIndex)
3437
+ continue;
3438
+ const moduleData = fileIndex.moduleData;
3439
+ if (!moduleData?.embeddings)
3440
+ continue;
3441
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3442
+ const chunk = fileIndex.chunks[i];
3443
+ const embedding = moduleData.embeddings[i];
3444
+ if (!embedding)
3445
+ continue;
3446
+ allChunksData.push({
3447
+ filepath: fileIndex.filepath,
3448
+ chunk,
3449
+ embedding
3450
+ });
3451
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3452
+ }
3453
+ }
3454
+ const bm25Results = bm25Index.search(query, topK * 3);
3455
+ const bm25Scores = new Map;
3456
+ for (const result of bm25Results) {
3457
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3458
+ }
3459
+ const queryTerms = extractQueryTerms(query);
3460
+ const results = [];
3461
+ for (const { filepath, chunk, embedding } of allChunksData) {
3462
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3463
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3464
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
3465
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3466
+ results.push({
3467
+ filepath,
3468
+ chunk,
3469
+ score: hybridScore,
3470
+ moduleId: this.id,
3471
+ context: {
3472
+ semanticScore,
3473
+ bm25Score
3474
+ }
3475
+ });
3476
+ }
3477
+ }
3478
+ results.sort((a, b) => b.score - a.score);
3479
+ return results.slice(0, topK);
3480
+ }
3481
+ }
3482
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
3483
+ var init_json = __esm(() => {
3484
+ init_embeddings();
3485
+ init_services();
3486
+ init_config2();
3487
+ init_storage();
3488
+ JSON_EXTENSIONS = [".json"];
3489
+ });
3490
+
3491
+ // src/modules/docs/markdown/index.ts
3492
+ var exports_markdown = {};
3493
+ __export(exports_markdown, {
3494
+ isMarkdownFile: () => isMarkdownFile,
3495
+ MarkdownModule: () => MarkdownModule,
3496
+ MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
3497
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
3498
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
3499
+ });
3500
+ import * as path11 from "path";
3501
+ function isMarkdownFile(filepath) {
3502
+ const ext = path11.extname(filepath).toLowerCase();
3503
+ return MARKDOWN_EXTENSIONS.includes(ext);
3504
+ }
3505
+ function parseMarkdownSections(content) {
3506
+ const lines = content.split(`
3507
+ `);
3508
+ const sections = [];
3509
+ let currentSection = null;
3510
+ let currentContent = [];
3511
+ let startLine = 1;
3512
+ for (let i = 0;i < lines.length; i++) {
3513
+ const line = lines[i];
3514
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
3515
+ if (headingMatch) {
3516
+ if (currentSection) {
3517
+ currentSection.content = currentContent.join(`
3518
+ `).trim();
3519
+ currentSection.endLine = i;
3520
+ if (currentSection.content || currentSection.heading) {
3521
+ sections.push(currentSection);
3522
+ }
3523
+ } else if (currentContent.length > 0) {
3524
+ sections.push({
3525
+ heading: "",
3526
+ level: 0,
3527
+ content: currentContent.join(`
3528
+ `).trim(),
3529
+ startLine: 1,
3530
+ endLine: i
3531
+ });
3532
+ }
3533
+ currentSection = {
3534
+ heading: headingMatch[2],
3535
+ level: headingMatch[1].length,
3536
+ content: "",
3537
+ startLine: i + 1,
3538
+ endLine: lines.length
3539
+ };
3540
+ currentContent = [];
3541
+ } else {
3542
+ currentContent.push(line);
3543
+ }
3544
+ }
3545
+ if (currentSection) {
3546
+ currentSection.content = currentContent.join(`
3547
+ `).trim();
3548
+ currentSection.endLine = lines.length;
3549
+ if (currentSection.content || currentSection.heading) {
3550
+ sections.push(currentSection);
3551
+ }
3552
+ } else if (currentContent.length > 0) {
3553
+ sections.push({
3554
+ heading: "",
3555
+ level: 0,
3556
+ content: currentContent.join(`
3557
+ `).trim(),
3558
+ startLine: 1,
3559
+ endLine: lines.length
3560
+ });
3561
+ }
3562
+ return sections;
3563
+ }
3564
+ function extractMarkdownKeywords(content) {
3565
+ const keywords = [];
3566
+ const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
3567
+ for (const match of headingMatches) {
3568
+ const heading = match[1].toLowerCase();
3569
+ const words = heading.split(/\s+/).filter((w) => w.length > 2);
3570
+ keywords.push(...words);
3571
+ }
3572
+ const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
3573
+ for (const match of emphasisMatches) {
3574
+ const text = (match[1] || match[2] || "").toLowerCase();
3575
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3576
+ keywords.push(...words);
3577
+ }
3578
+ const codeMatches = content.matchAll(/`([^`]+)`/g);
3579
+ for (const match of codeMatches) {
3580
+ const code = match[1].toLowerCase();
3581
+ if (code.length > 2 && code.length < 50) {
3582
+ keywords.push(code);
3583
+ }
3584
+ }
3585
+ const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
3586
+ for (const match of linkMatches) {
3587
+ const text = match[1].toLowerCase();
3588
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3589
+ keywords.push(...words);
3590
+ }
3591
+ return [...new Set(keywords)];
2761
3592
  }
2762
3593
 
2763
- class TypeScriptModule {
2764
- id = "language/typescript";
2765
- name = "TypeScript Search";
2766
- description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3594
+ class MarkdownModule {
3595
+ id = "docs/markdown";
3596
+ name = "Markdown Search";
3597
+ description = "Markdown documentation search with section-aware indexing";
2767
3598
  version = "1.0.0";
2768
3599
  embeddingConfig = null;
2769
3600
  symbolicIndex = null;
2770
3601
  pendingSummaries = new Map;
2771
3602
  rootDir = "";
3603
+ logger = undefined;
2772
3604
  async initialize(config) {
2773
3605
  this.embeddingConfig = getEmbeddingConfigFromModule(config);
3606
+ this.logger = config.options?.logger;
3607
+ if (this.logger) {
3608
+ this.embeddingConfig = {
3609
+ ...this.embeddingConfig,
3610
+ logger: this.logger
3611
+ };
3612
+ }
2774
3613
  configureEmbeddings(this.embeddingConfig);
2775
3614
  this.pendingSummaries.clear();
2776
3615
  }
2777
3616
  async indexFile(filepath, content, ctx) {
3617
+ if (!isMarkdownFile(filepath)) {
3618
+ return null;
3619
+ }
2778
3620
  this.rootDir = ctx.rootDir;
2779
- const parsedChunks = parseCode(content, filepath);
2780
- if (parsedChunks.length === 0) {
3621
+ const sections = parseMarkdownSections(content);
3622
+ if (sections.length === 0) {
2781
3623
  return null;
2782
3624
  }
2783
- const pathContext = parsePathContext(filepath);
2784
- const pathPrefix = formatPathContextForEmbedding(pathContext);
2785
- const chunkContents = parsedChunks.map((c) => {
2786
- const namePrefix = c.name ? `${c.name}: ` : "";
2787
- return `${pathPrefix} ${namePrefix}${c.content}`;
3625
+ const chunkContents = sections.map((s) => {
3626
+ const filename = path11.basename(filepath);
3627
+ const headingContext = s.heading ? `${s.heading}: ` : "";
3628
+ return `${filename} ${headingContext}${s.content}`;
2788
3629
  });
2789
3630
  const embeddings = await getEmbeddings(chunkContents);
2790
- const chunks = parsedChunks.map((pc) => ({
2791
- id: generateChunkId(filepath, pc.startLine, pc.endLine),
2792
- content: pc.content,
2793
- startLine: pc.startLine,
2794
- endLine: pc.endLine,
2795
- type: pc.type,
2796
- name: pc.name,
2797
- isExported: pc.isExported,
2798
- jsDoc: pc.jsDoc
3631
+ const chunks = sections.map((section, i) => ({
3632
+ id: generateChunkId(filepath, section.startLine, section.endLine),
3633
+ content: section.heading ? `## ${section.heading}
3634
+
3635
+ ${section.content}` : section.content,
3636
+ startLine: section.startLine,
3637
+ endLine: section.endLine,
3638
+ type: "block",
3639
+ name: section.heading || undefined
2799
3640
  }));
2800
- const references = this.extractReferences(content, filepath);
3641
+ const headings = sections.filter((s) => s.heading).map((s) => s.heading);
2801
3642
  const stats = await ctx.getFileStats(filepath);
2802
3643
  const currentConfig = getEmbeddingConfig();
2803
3644
  const moduleData = {
2804
3645
  embeddings,
2805
- embeddingModel: currentConfig.model
3646
+ embeddingModel: currentConfig.model,
3647
+ headings
2806
3648
  };
2807
- const chunkTypes = [
2808
- ...new Set(parsedChunks.map((pc) => pc.type))
2809
- ];
2810
- const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2811
- const allKeywords = new Set;
2812
- for (const pc of parsedChunks) {
2813
- const keywords = extractKeywords(pc.content, pc.name);
2814
- keywords.forEach((k) => allKeywords.add(k));
2815
- }
2816
- pathContext.keywords.forEach((k) => allKeywords.add(k));
3649
+ const keywords = extractMarkdownKeywords(content);
2817
3650
  const fileSummary = {
2818
3651
  filepath,
2819
3652
  chunkCount: chunks.length,
2820
- chunkTypes,
2821
- keywords: Array.from(allKeywords),
2822
- exports,
2823
- lastModified: stats.lastModified,
2824
- pathContext: {
2825
- segments: pathContext.segments,
2826
- layer: pathContext.layer,
2827
- domain: pathContext.domain,
2828
- depth: pathContext.depth
2829
- }
3653
+ chunkTypes: ["block"],
3654
+ keywords,
3655
+ exports: headings,
3656
+ lastModified: stats.lastModified
2830
3657
  };
2831
3658
  this.pendingSummaries.set(filepath, fileSummary);
2832
3659
  return {
2833
3660
  filepath,
2834
3661
  lastModified: stats.lastModified,
2835
3662
  chunks,
2836
- moduleData,
2837
- references
3663
+ moduleData
2838
3664
  };
2839
3665
  }
2840
3666
  async finalize(ctx) {
@@ -2850,8 +3676,8 @@ class TypeScriptModule {
2850
3676
  }
2851
3677
  async search(query, ctx, options = {}) {
2852
3678
  const {
2853
- topK = DEFAULT_TOP_K2,
2854
- minScore = DEFAULT_MIN_SCORE2,
3679
+ topK = DEFAULT_TOP_K4,
3680
+ minScore = DEFAULT_MIN_SCORE4,
2855
3681
  filePatterns
2856
3682
  } = options;
2857
3683
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
@@ -2863,9 +3689,9 @@ class TypeScriptModule {
2863
3689
  } catch {
2864
3690
  allFiles = await ctx.listIndexedFiles();
2865
3691
  }
2866
- let filesToSearch = allFiles;
3692
+ let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
2867
3693
  if (filePatterns && filePatterns.length > 0) {
2868
- filesToSearch = allFiles.filter((filepath) => {
3694
+ filesToSearch = filesToSearch.filter((filepath) => {
2869
3695
  return filePatterns.some((pattern) => {
2870
3696
  if (pattern.startsWith("*.")) {
2871
3697
  const ext = pattern.slice(1);
@@ -2903,36 +3729,24 @@ class TypeScriptModule {
2903
3729
  for (const result of bm25Results) {
2904
3730
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
2905
3731
  }
2906
- const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2907
- const pathBoosts = new Map;
2908
- for (const filepath of filesToSearch) {
2909
- const summary = symbolicIndex.getFileSummary(filepath);
2910
- if (summary?.pathContext) {
2911
- let boost = 0;
2912
- const ctx2 = summary.pathContext;
2913
- if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
2914
- boost += 0.1;
2915
- }
2916
- if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
2917
- boost += 0.05;
2918
- }
2919
- const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
2920
- if (segmentMatch) {
2921
- boost += 0.05;
2922
- }
2923
- pathBoosts.set(filepath, boost);
2924
- }
2925
- }
3732
+ const queryTerms = extractQueryTerms(query);
2926
3733
  const results = [];
2927
3734
  for (const { filepath, chunk, embedding } of allChunksData) {
2928
3735
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
2929
3736
  const bm25Score = bm25Scores.get(chunk.id) || 0;
2930
- const pathBoost = pathBoosts.get(filepath) || 0;
2931
- const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
2932
- const chunkTypeBoost = calculateChunkTypeBoost(chunk);
2933
- const exportBoost = calculateExportBoost(chunk);
2934
- const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
2935
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3737
+ let docBoost = 0;
3738
+ if (queryTerms.some((t) => [
3739
+ "docs",
3740
+ "documentation",
3741
+ "readme",
3742
+ "guide",
3743
+ "how",
3744
+ "what",
3745
+ "explain"
3746
+ ].includes(t))) {
3747
+ docBoost = 0.05;
3748
+ }
3749
+ const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
2936
3750
  if (hybridScore >= minScore || bm25Score > 0.3) {
2937
3751
  results.push({
2938
3752
  filepath,
@@ -2942,10 +3756,7 @@ class TypeScriptModule {
2942
3756
  context: {
2943
3757
  semanticScore,
2944
3758
  bm25Score,
2945
- pathBoost,
2946
- fileTypeBoost,
2947
- chunkTypeBoost,
2948
- exportBoost
3759
+ docBoost
2949
3760
  }
2950
3761
  });
2951
3762
  }
@@ -2953,84 +3764,14 @@ class TypeScriptModule {
2953
3764
  results.sort((a, b) => b.score - a.score);
2954
3765
  return results.slice(0, topK);
2955
3766
  }
2956
- extractReferences(content, filepath) {
2957
- const references = [];
2958
- const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
2959
- const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
2960
- let match;
2961
- while ((match = importRegex.exec(content)) !== null) {
2962
- const importPath = match[1];
2963
- if (importPath.startsWith(".")) {
2964
- const dir = path8.dirname(filepath);
2965
- const resolved = path8.normalize(path8.join(dir, importPath));
2966
- references.push(resolved);
2967
- }
2968
- }
2969
- while ((match = requireRegex.exec(content)) !== null) {
2970
- const importPath = match[1];
2971
- if (importPath.startsWith(".")) {
2972
- const dir = path8.dirname(filepath);
2973
- const resolved = path8.normalize(path8.join(dir, importPath));
2974
- references.push(resolved);
2975
- }
2976
- }
2977
- return references;
2978
- }
2979
3767
  }
2980
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2981
- var init_typescript = __esm(() => {
3768
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
3769
+ var init_markdown = __esm(() => {
2982
3770
  init_embeddings();
3771
+ init_services();
2983
3772
  init_config2();
2984
- init_parseCode();
2985
3773
  init_storage();
2986
- init_keywords();
2987
- init_keywords();
2988
- IMPLEMENTATION_TERMS = [
2989
- "function",
2990
- "method",
2991
- "class",
2992
- "interface",
2993
- "implement",
2994
- "implementation",
2995
- "endpoint",
2996
- "route",
2997
- "handler",
2998
- "controller",
2999
- "module",
3000
- "code"
3001
- ];
3002
- DOCUMENTATION_TERMS = [
3003
- "documentation",
3004
- "docs",
3005
- "guide",
3006
- "tutorial",
3007
- "readme",
3008
- "how",
3009
- "what",
3010
- "why",
3011
- "explain",
3012
- "overview",
3013
- "getting",
3014
- "started",
3015
- "requirements",
3016
- "setup",
3017
- "install",
3018
- "configure",
3019
- "configuration"
3020
- ];
3021
- SOURCE_CODE_EXTENSIONS = [
3022
- ".ts",
3023
- ".tsx",
3024
- ".js",
3025
- ".jsx",
3026
- ".mjs",
3027
- ".cjs",
3028
- ".py",
3029
- ".go",
3030
- ".rs",
3031
- ".java"
3032
- ];
3033
- DOC_EXTENSIONS = [".md", ".txt", ".rst"];
3774
+ MARKDOWN_EXTENSIONS = [".md", ".txt"];
3034
3775
  });
3035
3776
 
3036
3777
  // src/modules/registry.ts
@@ -3055,8 +3796,12 @@ class ModuleRegistryImpl {
3055
3796
  async function registerBuiltInModules() {
3056
3797
  const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
3057
3798
  const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
3799
+ const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
3800
+ const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
3058
3801
  registry.register(new CoreModule2);
3059
3802
  registry.register(new TypeScriptModule2);
3803
+ registry.register(new JsonModule2);
3804
+ registry.register(new MarkdownModule2);
3060
3805
  }
3061
3806
  var registry;
3062
3807
  var init_registry = __esm(() => {
@@ -3064,13 +3809,13 @@ var init_registry = __esm(() => {
3064
3809
  });
3065
3810
 
3066
3811
  // src/infrastructure/introspection/projectDetector.ts
3067
- import * as path9 from "path";
3812
+ import * as path12 from "path";
3068
3813
  import * as fs4 from "fs/promises";
3069
3814
  async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3070
3815
  if (depth > MAX_SCAN_DEPTH)
3071
3816
  return [];
3072
3817
  const results = [];
3073
- const fullDir = currentDir ? path9.join(rootDir, currentDir) : rootDir;
3818
+ const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
3074
3819
  try {
3075
3820
  const entries = await fs4.readdir(fullDir, { withFileTypes: true });
3076
3821
  const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
@@ -3093,10 +3838,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3093
3838
  }
3094
3839
  async function parsePackageJson(rootDir, relativePath) {
3095
3840
  try {
3096
- const packageJsonPath = path9.join(rootDir, relativePath, "package.json");
3841
+ const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
3097
3842
  const content = await fs4.readFile(packageJsonPath, "utf-8");
3098
3843
  const pkg = JSON.parse(content);
3099
- const name = pkg.name || path9.basename(relativePath);
3844
+ const name = pkg.name || path12.basename(relativePath);
3100
3845
  const deps = { ...pkg.dependencies, ...pkg.devDependencies };
3101
3846
  let type = "unknown";
3102
3847
  if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
@@ -3141,7 +3886,7 @@ async function detectProjectStructure(rootDir) {
3141
3886
  for (const pattern of monorepoPatterns) {
3142
3887
  if (!dirNames.includes(pattern))
3143
3888
  continue;
3144
- const patternDir = path9.join(rootDir, pattern);
3889
+ const patternDir = path12.join(rootDir, pattern);
3145
3890
  try {
3146
3891
  const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
3147
3892
  for (const subDir of subDirs) {
@@ -3172,7 +3917,7 @@ async function detectProjectStructure(rootDir) {
3172
3917
  }
3173
3918
  let rootType = "unknown";
3174
3919
  try {
3175
- const rootPkgPath = path9.join(rootDir, "package.json");
3920
+ const rootPkgPath = path12.join(rootDir, "package.json");
3176
3921
  const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
3177
3922
  if (rootPkg.workspaces)
3178
3923
  isMonorepo = true;
@@ -3212,7 +3957,7 @@ var init_projectDetector = __esm(() => {
3212
3957
  });
3213
3958
 
3214
3959
  // src/infrastructure/introspection/IntrospectionIndex.ts
3215
- import * as path10 from "path";
3960
+ import * as path13 from "path";
3216
3961
  import * as fs5 from "fs/promises";
3217
3962
 
3218
3963
  class IntrospectionIndex {
@@ -3226,7 +3971,7 @@ class IntrospectionIndex {
3226
3971
  async initialize() {
3227
3972
  this.structure = await detectProjectStructure(this.rootDir);
3228
3973
  try {
3229
- const configPath = path10.join(this.rootDir, ".raggrep", "config.json");
3974
+ const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
3230
3975
  const configContent = await fs5.readFile(configPath, "utf-8");
3231
3976
  const config = JSON.parse(configContent);
3232
3977
  this.config = config.introspection || {};
@@ -3266,28 +4011,28 @@ class IntrospectionIndex {
3266
4011
  }
3267
4012
  }
3268
4013
  async save(config) {
3269
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4014
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3270
4015
  await fs5.mkdir(introDir, { recursive: true });
3271
- const projectPath = path10.join(introDir, "_project.json");
4016
+ const projectPath = path13.join(introDir, "_project.json");
3272
4017
  await fs5.writeFile(projectPath, JSON.stringify({
3273
4018
  version: "1.0.0",
3274
4019
  lastUpdated: new Date().toISOString(),
3275
4020
  structure: this.structure
3276
4021
  }, null, 2));
3277
4022
  for (const [filepath, intro] of this.files) {
3278
- const introFilePath = path10.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
3279
- await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
4023
+ const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
4024
+ await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
3280
4025
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3281
4026
  }
3282
4027
  }
3283
4028
  async load(config) {
3284
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
4029
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3285
4030
  try {
3286
- const projectPath = path10.join(introDir, "_project.json");
4031
+ const projectPath = path13.join(introDir, "_project.json");
3287
4032
  const projectContent = await fs5.readFile(projectPath, "utf-8");
3288
4033
  const projectData = JSON.parse(projectContent);
3289
4034
  this.structure = projectData.structure;
3290
- await this.loadFilesRecursive(path10.join(introDir, "files"), "");
4035
+ await this.loadFilesRecursive(path13.join(introDir, "files"), "");
3291
4036
  } catch {
3292
4037
  this.structure = null;
3293
4038
  this.files.clear();
@@ -3297,7 +4042,7 @@ class IntrospectionIndex {
3297
4042
  try {
3298
4043
  const entries = await fs5.readdir(basePath, { withFileTypes: true });
3299
4044
  for (const entry of entries) {
3300
- const entryPath = path10.join(basePath, entry.name);
4045
+ const entryPath = path13.join(basePath, entry.name);
3301
4046
  const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
3302
4047
  if (entry.isDirectory()) {
3303
4048
  await this.loadFilesRecursive(entryPath, relativePath);
@@ -3328,7 +4073,7 @@ var init_introspection2 = __esm(() => {
3328
4073
 
3329
4074
  // src/app/indexer/watcher.ts
3330
4075
  import { watch } from "chokidar";
3331
- import * as path11 from "path";
4076
+ import * as path14 from "path";
3332
4077
  async function watchDirectory(rootDir, options = {}) {
3333
4078
  const {
3334
4079
  debounceMs = DEFAULT_DEBOUNCE_MS,
@@ -3339,7 +4084,7 @@ async function watchDirectory(rootDir, options = {}) {
3339
4084
  onFileChange,
3340
4085
  onError
3341
4086
  } = options;
3342
- rootDir = path11.resolve(rootDir);
4087
+ rootDir = path14.resolve(rootDir);
3343
4088
  const config = await loadConfig(rootDir);
3344
4089
  const indexLocation = getIndexLocation(rootDir);
3345
4090
  const validExtensions = new Set(config.extensions);
@@ -3349,7 +4094,7 @@ async function watchDirectory(rootDir, options = {}) {
3349
4094
  "**/.git/**"
3350
4095
  ];
3351
4096
  function shouldWatchFile(filepath) {
3352
- const ext = path11.extname(filepath);
4097
+ const ext = path14.extname(filepath);
3353
4098
  return validExtensions.has(ext);
3354
4099
  }
3355
4100
  let isRunning = true;
@@ -3431,7 +4176,7 @@ async function watchDirectory(rootDir, options = {}) {
3431
4176
  function handleFileEvent(event, filepath) {
3432
4177
  if (!isRunning)
3433
4178
  return;
3434
- const relativePath = path11.relative(rootDir, filepath);
4179
+ const relativePath = path14.relative(rootDir, filepath);
3435
4180
  if (!shouldWatchFile(filepath)) {
3436
4181
  return;
3437
4182
  }
@@ -3502,6 +4247,7 @@ var init_watcher = __esm(() => {
3502
4247
  var exports_indexer = {};
3503
4248
  __export(exports_indexer, {
3504
4249
  watchDirectory: () => watchDirectory,
4250
+ resetIndex: () => resetIndex,
3505
4251
  indexDirectory: () => indexDirectory,
3506
4252
  getIndexStatus: () => getIndexStatus,
3507
4253
  ensureIndexFresh: () => ensureIndexFresh,
@@ -3509,46 +4255,56 @@ __export(exports_indexer, {
3509
4255
  });
3510
4256
  import { glob } from "glob";
3511
4257
  import * as fs6 from "fs/promises";
3512
- import * as path12 from "path";
4258
+ import * as path15 from "path";
4259
+ async function parallelMap(items, processor, concurrency) {
4260
+ const results = new Array(items.length);
4261
+ let nextIndex = 0;
4262
+ async function worker() {
4263
+ while (nextIndex < items.length) {
4264
+ const index = nextIndex++;
4265
+ const item = items[index];
4266
+ try {
4267
+ const value = await processor(item, index);
4268
+ results[index] = { success: true, value };
4269
+ } catch (error) {
4270
+ results[index] = { success: false, error };
4271
+ }
4272
+ }
4273
+ }
4274
+ const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
4275
+ await Promise.all(workers);
4276
+ return results;
4277
+ }
3513
4278
  async function indexDirectory(rootDir, options = {}) {
3514
4279
  const verbose = options.verbose ?? false;
3515
4280
  const quiet = options.quiet ?? false;
3516
- rootDir = path12.resolve(rootDir);
4281
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
4282
+ const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
4283
+ rootDir = path15.resolve(rootDir);
3517
4284
  const location = getIndexLocation(rootDir);
3518
- if (!quiet) {
3519
- console.log(`Indexing directory: ${rootDir}`);
3520
- console.log(`Index location: ${location.indexDir}`);
3521
- }
4285
+ logger.info(`Indexing directory: ${rootDir}`);
4286
+ logger.info(`Index location: ${location.indexDir}`);
4287
+ logger.debug(`Concurrency: ${concurrency}`);
3522
4288
  const config = await loadConfig(rootDir);
3523
4289
  const introspection = new IntrospectionIndex(rootDir);
3524
4290
  await introspection.initialize();
3525
- if (verbose) {
3526
- const structure = introspection.getStructure();
3527
- if (structure?.isMonorepo) {
3528
- console.log(`Detected monorepo with ${structure.projects.length} projects`);
3529
- }
4291
+ const structure = introspection.getStructure();
4292
+ if (structure?.isMonorepo) {
4293
+ logger.debug(`Detected monorepo with ${structure.projects.length} projects`);
3530
4294
  }
3531
4295
  await registerBuiltInModules();
3532
4296
  const enabledModules = registry.getEnabled(config);
3533
4297
  if (enabledModules.length === 0) {
3534
- if (!quiet) {
3535
- console.log("No modules enabled. Check your configuration.");
3536
- }
4298
+ logger.info("No modules enabled. Check your configuration.");
3537
4299
  return [];
3538
4300
  }
3539
- if (!quiet) {
3540
- console.log(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
3541
- }
4301
+ logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
3542
4302
  const files = await findFiles(rootDir, config);
3543
- if (!quiet) {
3544
- console.log(`Found ${files.length} files to index`);
3545
- }
4303
+ logger.info(`Found ${files.length} files to index`);
3546
4304
  const results = [];
3547
4305
  for (const module of enabledModules) {
3548
- if (!quiet) {
3549
- console.log(`
4306
+ logger.info(`
3550
4307
  [${module.name}] Starting indexing...`);
3551
- }
3552
4308
  const moduleConfig = getModuleConfig(config, module.id);
3553
4309
  if (module.initialize && moduleConfig) {
3554
4310
  const configWithOverrides = { ...moduleConfig };
@@ -3558,32 +4314,32 @@ async function indexDirectory(rootDir, options = {}) {
3558
4314
  embeddingModel: options.model
3559
4315
  };
3560
4316
  }
4317
+ configWithOverrides.options = {
4318
+ ...configWithOverrides.options,
4319
+ logger
4320
+ };
3561
4321
  await module.initialize(configWithOverrides);
3562
4322
  }
3563
- const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
4323
+ const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
3564
4324
  results.push(result);
3565
4325
  if (module.finalize) {
3566
- if (!quiet) {
3567
- console.log(`[${module.name}] Building secondary indexes...`);
3568
- }
4326
+ logger.info(`[${module.name}] Building secondary indexes...`);
3569
4327
  const ctx = {
3570
4328
  rootDir,
3571
4329
  config,
3572
4330
  readFile: async (filepath) => {
3573
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4331
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3574
4332
  return fs6.readFile(fullPath, "utf-8");
3575
4333
  },
3576
4334
  getFileStats: async (filepath) => {
3577
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4335
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3578
4336
  const stats = await fs6.stat(fullPath);
3579
4337
  return { lastModified: stats.mtime.toISOString() };
3580
4338
  }
3581
4339
  };
3582
4340
  await module.finalize(ctx);
3583
4341
  }
3584
- if (!quiet) {
3585
- console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
3586
- }
4342
+ logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
3587
4343
  }
3588
4344
  await introspection.save(config);
3589
4345
  await updateGlobalManifest(rootDir, enabledModules, config);
@@ -3606,28 +4362,37 @@ async function deleteIndex(rootDir) {
3606
4362
  await fs6.rm(indexDir, { recursive: true, force: true });
3607
4363
  } catch {}
3608
4364
  }
4365
+ async function resetIndex(rootDir) {
4366
+ rootDir = path15.resolve(rootDir);
4367
+ const status = await getIndexStatus(rootDir);
4368
+ if (!status.exists) {
4369
+ throw new Error(`No index found for ${rootDir}`);
4370
+ }
4371
+ await deleteIndex(rootDir);
4372
+ return {
4373
+ success: true,
4374
+ indexDir: status.indexDir
4375
+ };
4376
+ }
3609
4377
  async function ensureIndexFresh(rootDir, options = {}) {
3610
4378
  const verbose = options.verbose ?? false;
3611
4379
  const quiet = options.quiet ?? false;
3612
- rootDir = path12.resolve(rootDir);
4380
+ const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
4381
+ rootDir = path15.resolve(rootDir);
3613
4382
  const status = await getIndexStatus(rootDir);
3614
4383
  if (!status.exists) {
3615
- if (!quiet) {
3616
- console.log(`No index found. Creating index...
4384
+ logger.info(`No index found. Creating index...
3617
4385
  `);
3618
- }
3619
- const results = await indexDirectory(rootDir, { ...options, quiet });
4386
+ const results = await indexDirectory(rootDir, { ...options, logger });
3620
4387
  const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3621
4388
  return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3622
4389
  }
3623
4390
  const versionCompatible = await isIndexVersionCompatible(rootDir);
3624
4391
  if (!versionCompatible) {
3625
- if (!quiet) {
3626
- console.log(`Index version incompatible. Rebuilding...
4392
+ logger.info(`Index version incompatible. Rebuilding...
3627
4393
  `);
3628
- }
3629
4394
  await deleteIndex(rootDir);
3630
- const results = await indexDirectory(rootDir, { ...options, quiet });
4395
+ const results = await indexDirectory(rootDir, { ...options, logger });
3631
4396
  const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3632
4397
  return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3633
4398
  }
@@ -3640,7 +4405,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3640
4405
  const introspection = new IntrospectionIndex(rootDir);
3641
4406
  await introspection.initialize();
3642
4407
  const currentFiles = await findFiles(rootDir, config);
3643
- const currentFileSet = new Set(currentFiles.map((f) => path12.relative(rootDir, f)));
4408
+ const currentFileSet = new Set(currentFiles.map((f) => path15.relative(rootDir, f)));
3644
4409
  let totalIndexed = 0;
3645
4410
  let totalRemoved = 0;
3646
4411
  let totalUnchanged = 0;
@@ -3654,6 +4419,10 @@ async function ensureIndexFresh(rootDir, options = {}) {
3654
4419
  embeddingModel: options.model
3655
4420
  };
3656
4421
  }
4422
+ configWithOverrides.options = {
4423
+ ...configWithOverrides.options,
4424
+ logger
4425
+ };
3657
4426
  await module.initialize(configWithOverrides);
3658
4427
  }
3659
4428
  const manifest = await loadModuleManifest(rootDir, module.id, config);
@@ -3665,14 +4434,12 @@ async function ensureIndexFresh(rootDir, options = {}) {
3665
4434
  }
3666
4435
  }
3667
4436
  for (const filepath of filesToRemove) {
3668
- if (verbose) {
3669
- console.log(` Removing stale: ${filepath}`);
3670
- }
3671
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4437
+ logger.debug(` Removing stale: ${filepath}`);
4438
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3672
4439
  try {
3673
4440
  await fs6.unlink(indexFilePath);
3674
4441
  } catch {}
3675
- const symbolicFilePath = path12.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4442
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3676
4443
  try {
3677
4444
  await fs6.unlink(symbolicFilePath);
3678
4445
  } catch {}
@@ -3683,18 +4450,21 @@ async function ensureIndexFresh(rootDir, options = {}) {
3683
4450
  rootDir,
3684
4451
  config,
3685
4452
  readFile: async (filepath) => {
3686
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4453
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3687
4454
  return fs6.readFile(fullPath, "utf-8");
3688
4455
  },
3689
4456
  getFileStats: async (filepath) => {
3690
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4457
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3691
4458
  const stats = await fs6.stat(fullPath);
3692
4459
  return { lastModified: stats.mtime.toISOString() };
3693
4460
  },
3694
4461
  getIntrospection: (filepath) => introspection.getFile(filepath)
3695
4462
  };
3696
- for (const filepath of currentFiles) {
3697
- const relativePath = path12.relative(rootDir, filepath);
4463
+ const totalFiles = currentFiles.length;
4464
+ for (let i = 0;i < currentFiles.length; i++) {
4465
+ const filepath = currentFiles[i];
4466
+ const relativePath = path15.relative(rootDir, filepath);
4467
+ const progress = `[${i + 1}/${totalFiles}]`;
3698
4468
  try {
3699
4469
  const stats = await fs6.stat(filepath);
3700
4470
  const lastModified = stats.mtime.toISOString();
@@ -3703,9 +4473,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3703
4473
  totalUnchanged++;
3704
4474
  continue;
3705
4475
  }
3706
- if (verbose) {
3707
- console.log(` Indexing: ${relativePath}`);
3708
- }
4476
+ logger.progress(` ${progress} Indexing: ${relativePath}`);
3709
4477
  const content = await fs6.readFile(filepath, "utf-8");
3710
4478
  introspection.addFile(relativePath, content);
3711
4479
  const fileIndex = await module.indexFile(relativePath, content, ctx);
@@ -3718,11 +4486,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3718
4486
  totalIndexed++;
3719
4487
  }
3720
4488
  } catch (error) {
3721
- if (verbose) {
3722
- console.error(` Error indexing ${relativePath}:`, error);
3723
- }
4489
+ logger.clearProgress();
4490
+ logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
3724
4491
  }
3725
4492
  }
4493
+ logger.clearProgress();
3726
4494
  if (totalIndexed > 0 || totalRemoved > 0) {
3727
4495
  manifest.lastUpdated = new Date().toISOString();
3728
4496
  await writeModuleManifest(rootDir, module.id, manifest, config);
@@ -3746,7 +4514,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3746
4514
  unchanged: totalUnchanged
3747
4515
  };
3748
4516
  }
3749
- async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
4517
+ async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
3750
4518
  const result = {
3751
4519
  moduleId: module.id,
3752
4520
  indexed: 0,
@@ -3754,55 +4522,102 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3754
4522
  errors: 0
3755
4523
  };
3756
4524
  const manifest = await loadModuleManifest(rootDir, module.id, config);
4525
+ const indexPath = getModuleIndexPath(rootDir, module.id, config);
4526
+ const currentFileSet = new Set(files.map((f) => path15.relative(rootDir, f)));
4527
+ const filesToRemove = [];
4528
+ for (const filepath of Object.keys(manifest.files)) {
4529
+ if (!currentFileSet.has(filepath)) {
4530
+ filesToRemove.push(filepath);
4531
+ }
4532
+ }
4533
+ if (filesToRemove.length > 0) {
4534
+ logger.info(` Removing ${filesToRemove.length} stale entries...`);
4535
+ for (const filepath of filesToRemove) {
4536
+ logger.debug(` Removing: ${filepath}`);
4537
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4538
+ try {
4539
+ await fs6.unlink(indexFilePath);
4540
+ } catch {}
4541
+ const symbolicFilePath = path15.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4542
+ try {
4543
+ await fs6.unlink(symbolicFilePath);
4544
+ } catch {}
4545
+ delete manifest.files[filepath];
4546
+ }
4547
+ await cleanupEmptyDirectories(indexPath);
4548
+ }
3757
4549
  const ctx = {
3758
4550
  rootDir,
3759
4551
  config,
3760
4552
  readFile: async (filepath) => {
3761
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4553
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3762
4554
  return fs6.readFile(fullPath, "utf-8");
3763
4555
  },
3764
4556
  getFileStats: async (filepath) => {
3765
- const fullPath = path12.isAbsolute(filepath) ? filepath : path12.join(rootDir, filepath);
4557
+ const fullPath = path15.isAbsolute(filepath) ? filepath : path15.join(rootDir, filepath);
3766
4558
  const stats = await fs6.stat(fullPath);
3767
4559
  return { lastModified: stats.mtime.toISOString() };
3768
4560
  },
3769
4561
  getIntrospection: (filepath) => introspection.getFile(filepath)
3770
4562
  };
3771
- for (const filepath of files) {
3772
- const relativePath = path12.relative(rootDir, filepath);
4563
+ const totalFiles = files.length;
4564
+ let completedCount = 0;
4565
+ const processFile = async (filepath, _index) => {
4566
+ const relativePath = path15.relative(rootDir, filepath);
3773
4567
  try {
3774
4568
  const stats = await fs6.stat(filepath);
3775
4569
  const lastModified = stats.mtime.toISOString();
3776
4570
  const existingEntry = manifest.files[relativePath];
3777
4571
  if (existingEntry && existingEntry.lastModified === lastModified) {
3778
- if (verbose) {
3779
- console.log(` Skipped ${relativePath} (unchanged)`);
3780
- }
3781
- result.skipped++;
3782
- continue;
4572
+ completedCount++;
4573
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
4574
+ return { relativePath, status: "skipped" };
3783
4575
  }
3784
4576
  const content = await fs6.readFile(filepath, "utf-8");
3785
4577
  introspection.addFile(relativePath, content);
3786
- if (verbose) {
3787
- console.log(` Processing ${relativePath}...`);
3788
- }
4578
+ completedCount++;
4579
+ logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
3789
4580
  const fileIndex = await module.indexFile(relativePath, content, ctx);
3790
4581
  if (!fileIndex) {
3791
- if (verbose) {
3792
- console.log(` Skipped ${relativePath} (no chunks)`);
3793
- }
3794
- result.skipped++;
3795
- continue;
4582
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
4583
+ return { relativePath, status: "skipped" };
3796
4584
  }
3797
4585
  await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3798
- manifest.files[relativePath] = {
4586
+ return {
4587
+ relativePath,
4588
+ status: "indexed",
3799
4589
  lastModified,
3800
4590
  chunkCount: fileIndex.chunks.length
3801
4591
  };
3802
- result.indexed++;
3803
4592
  } catch (error) {
3804
- console.error(` Error indexing ${relativePath}:`, error);
4593
+ completedCount++;
4594
+ return { relativePath, status: "error", error };
4595
+ }
4596
+ };
4597
+ logger.debug(` Using concurrency: ${concurrency}`);
4598
+ const results = await parallelMap(files, processFile, concurrency);
4599
+ logger.clearProgress();
4600
+ for (const item of results) {
4601
+ if (!item.success) {
3805
4602
  result.errors++;
4603
+ continue;
4604
+ }
4605
+ const fileResult = item.value;
4606
+ switch (fileResult.status) {
4607
+ case "indexed":
4608
+ manifest.files[fileResult.relativePath] = {
4609
+ lastModified: fileResult.lastModified,
4610
+ chunkCount: fileResult.chunkCount
4611
+ };
4612
+ result.indexed++;
4613
+ break;
4614
+ case "skipped":
4615
+ result.skipped++;
4616
+ break;
4617
+ case "error":
4618
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
4619
+ result.errors++;
4620
+ break;
3806
4621
  }
3807
4622
  }
3808
4623
  manifest.lastUpdated = new Date().toISOString();
@@ -3839,13 +4654,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
3839
4654
  }
3840
4655
  async function writeModuleManifest(rootDir, moduleId, manifest, config) {
3841
4656
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
3842
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4657
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
3843
4658
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
3844
4659
  }
3845
4660
  async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
3846
4661
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
3847
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3848
- await fs6.mkdir(path12.dirname(indexFilePath), { recursive: true });
4662
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4663
+ await fs6.mkdir(path15.dirname(indexFilePath), { recursive: true });
3849
4664
  await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
3850
4665
  }
3851
4666
  async function updateGlobalManifest(rootDir, modules, config) {
@@ -3855,31 +4670,32 @@ async function updateGlobalManifest(rootDir, modules, config) {
3855
4670
  lastUpdated: new Date().toISOString(),
3856
4671
  modules: modules.map((m) => m.id)
3857
4672
  };
3858
- await fs6.mkdir(path12.dirname(manifestPath), { recursive: true });
4673
+ await fs6.mkdir(path15.dirname(manifestPath), { recursive: true });
3859
4674
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
3860
4675
  }
3861
4676
  async function cleanupIndex(rootDir, options = {}) {
3862
4677
  const verbose = options.verbose ?? false;
3863
- rootDir = path12.resolve(rootDir);
3864
- console.log(`Cleaning up index in: ${rootDir}`);
4678
+ const logger = options.logger ?? createLogger({ verbose });
4679
+ rootDir = path15.resolve(rootDir);
4680
+ logger.info(`Cleaning up index in: ${rootDir}`);
3865
4681
  const config = await loadConfig(rootDir);
3866
4682
  await registerBuiltInModules();
3867
4683
  const enabledModules = registry.getEnabled(config);
3868
4684
  if (enabledModules.length === 0) {
3869
- console.log("No modules enabled.");
4685
+ logger.info("No modules enabled.");
3870
4686
  return [];
3871
4687
  }
3872
4688
  const results = [];
3873
4689
  for (const module of enabledModules) {
3874
- console.log(`
4690
+ logger.info(`
3875
4691
  [${module.name}] Checking for stale entries...`);
3876
- const result = await cleanupModuleIndex(rootDir, module.id, config, verbose);
4692
+ const result = await cleanupModuleIndex(rootDir, module.id, config, logger);
3877
4693
  results.push(result);
3878
- console.log(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
4694
+ logger.info(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
3879
4695
  }
3880
4696
  return results;
3881
4697
  }
3882
- async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
4698
+ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
3883
4699
  const result = {
3884
4700
  moduleId,
3885
4701
  removed: 0,
@@ -3890,7 +4706,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
3890
4706
  const filesToRemove = [];
3891
4707
  const updatedFiles = {};
3892
4708
  for (const [filepath, entry] of Object.entries(manifest.files)) {
3893
- const fullPath = path12.join(rootDir, filepath);
4709
+ const fullPath = path15.join(rootDir, filepath);
3894
4710
  try {
3895
4711
  await fs6.access(fullPath);
3896
4712
  updatedFiles[filepath] = entry;
@@ -3898,13 +4714,11 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
3898
4714
  } catch {
3899
4715
  filesToRemove.push(filepath);
3900
4716
  result.removed++;
3901
- if (verbose) {
3902
- console.log(` Removing stale entry: ${filepath}`);
3903
- }
4717
+ logger.debug(` Removing stale entry: ${filepath}`);
3904
4718
  }
3905
4719
  }
3906
4720
  for (const filepath of filesToRemove) {
3907
- const indexFilePath = path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4721
+ const indexFilePath = path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3908
4722
  try {
3909
4723
  await fs6.unlink(indexFilePath);
3910
4724
  } catch {}
@@ -3920,7 +4734,7 @@ async function cleanupEmptyDirectories(dir) {
3920
4734
  const entries = await fs6.readdir(dir, { withFileTypes: true });
3921
4735
  for (const entry of entries) {
3922
4736
  if (entry.isDirectory()) {
3923
- const subDir = path12.join(dir, entry.name);
4737
+ const subDir = path15.join(dir, entry.name);
3924
4738
  await cleanupEmptyDirectories(subDir);
3925
4739
  }
3926
4740
  }
@@ -3935,7 +4749,7 @@ async function cleanupEmptyDirectories(dir) {
3935
4749
  }
3936
4750
  }
3937
4751
  async function getIndexStatus(rootDir) {
3938
- rootDir = path12.resolve(rootDir);
4752
+ rootDir = path15.resolve(rootDir);
3939
4753
  const config = await loadConfig(rootDir);
3940
4754
  const location = getIndexLocation(rootDir);
3941
4755
  const indexDir = location.indexDir;
@@ -3971,7 +4785,7 @@ async function getIndexStatus(rootDir) {
3971
4785
  }
3972
4786
  } catch {
3973
4787
  try {
3974
- const entries = await fs6.readdir(path12.join(indexDir, "index"));
4788
+ const entries = await fs6.readdir(path15.join(indexDir, "index"));
3975
4789
  if (entries.length > 0) {
3976
4790
  status.exists = true;
3977
4791
  for (const entry of entries) {
@@ -3991,11 +4805,12 @@ async function getIndexStatus(rootDir) {
3991
4805
  }
3992
4806
  return status;
3993
4807
  }
3994
- var INDEX_SCHEMA_VERSION = "1.0.0";
4808
+ var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
3995
4809
  var init_indexer = __esm(() => {
3996
4810
  init_config2();
3997
4811
  init_registry();
3998
4812
  init_introspection2();
4813
+ init_logger();
3999
4814
  init_watcher();
4000
4815
  });
4001
4816
 
@@ -4011,9 +4826,9 @@ __export(exports_search, {
4011
4826
  formatSearchResults: () => formatSearchResults
4012
4827
  });
4013
4828
  import * as fs7 from "fs/promises";
4014
- import * as path13 from "path";
4829
+ import * as path16 from "path";
4015
4830
  async function search(rootDir, query, options = {}) {
4016
- rootDir = path13.resolve(rootDir);
4831
+ rootDir = path16.resolve(rootDir);
4017
4832
  const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
4018
4833
  if (ensureFresh) {
4019
4834
  await ensureIndexFresh(rootDir, { quiet: true });
@@ -4058,7 +4873,7 @@ function createSearchContext(rootDir, moduleId, config) {
4058
4873
  config,
4059
4874
  loadFileIndex: async (filepath) => {
4060
4875
  const hasExtension = /\.[^./]+$/.test(filepath);
4061
- const indexFilePath = hasExtension ? path13.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path13.join(indexPath, filepath + ".json");
4876
+ const indexFilePath = hasExtension ? path16.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path16.join(indexPath, filepath + ".json");
4062
4877
  try {
4063
4878
  const content = await fs7.readFile(indexFilePath, "utf-8");
4064
4879
  return JSON.parse(content);
@@ -4070,7 +4885,7 @@ function createSearchContext(rootDir, moduleId, config) {
4070
4885
  const files = [];
4071
4886
  await traverseDirectory(indexPath, files, indexPath);
4072
4887
  return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
4073
- const relative4 = path13.relative(indexPath, f);
4888
+ const relative4 = path16.relative(indexPath, f);
4074
4889
  return relative4.replace(/\.json$/, "");
4075
4890
  });
4076
4891
  }
@@ -4080,7 +4895,7 @@ async function traverseDirectory(dir, files, basePath) {
4080
4895
  try {
4081
4896
  const entries = await fs7.readdir(dir, { withFileTypes: true });
4082
4897
  for (const entry of entries) {
4083
- const fullPath = path13.join(dir, entry.name);
4898
+ const fullPath = path16.join(dir, entry.name);
4084
4899
  if (entry.isDirectory()) {
4085
4900
  await traverseDirectory(fullPath, files, basePath);
4086
4901
  } else if (entry.isFile()) {
@@ -4154,10 +4969,11 @@ var init_search = __esm(() => {
4154
4969
 
4155
4970
  // src/app/cli/main.ts
4156
4971
  init_embeddings();
4972
+ init_logger();
4157
4973
  // package.json
4158
4974
  var package_default = {
4159
4975
  name: "raggrep",
4160
- version: "0.3.0",
4976
+ version: "0.5.0",
4161
4977
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
4162
4978
  type: "module",
4163
4979
  main: "./dist/index.js",
@@ -4293,6 +5109,14 @@ function parseFlags(args2) {
4293
5109
  console.error("--type requires a file extension (e.g., ts, tsx, js)");
4294
5110
  process.exit(1);
4295
5111
  }
5112
+ } else if (arg === "--concurrency" || arg === "-c") {
5113
+ const c = parseInt(args2[++i], 10);
5114
+ if (!isNaN(c) && c > 0) {
5115
+ flags.concurrency = c;
5116
+ } else {
5117
+ console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
5118
+ process.exit(1);
5119
+ }
4296
5120
  } else if (!arg.startsWith("-")) {
4297
5121
  flags.remaining.push(arg);
4298
5122
  }
@@ -4312,10 +5136,11 @@ Usage:
4312
5136
  raggrep index [options]
4313
5137
 
4314
5138
  Options:
4315
- -w, --watch Watch for file changes and re-index automatically
4316
- -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
4317
- -v, --verbose Show detailed progress
4318
- -h, --help Show this help message
5139
+ -w, --watch Watch for file changes and re-index automatically
5140
+ -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
5141
+ -c, --concurrency <n> Number of files to process in parallel (default: 4)
5142
+ -v, --verbose Show detailed progress
5143
+ -h, --help Show this help message
4319
5144
 
4320
5145
  Available Models:
4321
5146
  ${models}
@@ -4326,18 +5151,22 @@ Examples:
4326
5151
  raggrep index
4327
5152
  raggrep index --watch
4328
5153
  raggrep index --model bge-small-en-v1.5
5154
+ raggrep index --concurrency 8
4329
5155
  raggrep index --verbose
4330
5156
  `);
4331
5157
  process.exit(0);
4332
5158
  }
4333
5159
  const { indexDirectory: indexDirectory2, watchDirectory: watchDirectory2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
5160
+ const logger = createInlineLogger({ verbose: flags.verbose });
4334
5161
  console.log("RAGgrep Indexer");
4335
5162
  console.log(`================
4336
5163
  `);
4337
5164
  try {
4338
5165
  const results = await indexDirectory2(process.cwd(), {
4339
5166
  model: flags.model,
4340
- verbose: flags.verbose
5167
+ verbose: flags.verbose,
5168
+ concurrency: flags.concurrency,
5169
+ logger
4341
5170
  });
4342
5171
  console.log(`
4343
5172
  ================`);
@@ -4422,9 +5251,11 @@ Examples:
4422
5251
  process.exit(1);
4423
5252
  }
4424
5253
  try {
5254
+ const silentLogger = createSilentLogger();
4425
5255
  const freshStats = await ensureIndexFresh2(process.cwd(), {
4426
5256
  model: flags.model,
4427
- quiet: true
5257
+ quiet: true,
5258
+ logger: silentLogger
4428
5259
  });
4429
5260
  console.log("RAGgrep Search");
4430
5261
  console.log(`==============
@@ -4457,44 +5288,37 @@ Examples:
4457
5288
  }
4458
5289
  break;
4459
5290
  }
4460
- case "cleanup": {
5291
+ case "reset": {
4461
5292
  if (flags.help) {
4462
5293
  console.log(`
4463
- raggrep cleanup - Remove stale index entries for deleted files
5294
+ raggrep reset - Clear the index for the current directory
4464
5295
 
4465
5296
  Usage:
4466
- raggrep cleanup [options]
5297
+ raggrep reset [options]
4467
5298
 
4468
5299
  Options:
4469
- -v, --verbose Show detailed progress
4470
5300
  -h, --help Show this help message
4471
5301
 
4472
5302
  Description:
4473
- Scans the index and removes entries for files that no longer exist.
4474
- Run this command after deleting files to clean up the index.
5303
+ Completely removes the index for the current directory.
5304
+ The next 'raggrep index' or 'raggrep query' will rebuild from scratch.
4475
5305
 
4476
5306
  Examples:
4477
- raggrep cleanup
4478
- raggrep cleanup --verbose
5307
+ raggrep reset
4479
5308
  `);
4480
5309
  process.exit(0);
4481
5310
  }
4482
- const { cleanupIndex: cleanupIndex2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
4483
- console.log("RAGgrep Cleanup");
4484
- console.log(`===============
4485
- `);
5311
+ const { resetIndex: resetIndex2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
4486
5312
  try {
4487
- const results = await cleanupIndex2(process.cwd(), {
4488
- verbose: flags.verbose
4489
- });
4490
- console.log(`
4491
- ===============`);
4492
- console.log("Summary:");
4493
- for (const result of results) {
4494
- console.log(` ${result.moduleId}: ${result.removed} removed, ${result.kept} kept`);
4495
- }
5313
+ const result = await resetIndex2(process.cwd());
5314
+ console.log("Index cleared successfully.");
5315
+ console.log(` Removed: ${result.indexDir}`);
4496
5316
  } catch (error) {
4497
- console.error("Error during cleanup:", error);
5317
+ if (error instanceof Error && error.message.includes("No index found")) {
5318
+ console.error("Error: No index found for this directory.");
5319
+ process.exit(1);
5320
+ }
5321
+ console.error("Error during reset:", error);
4498
5322
  process.exit(1);
4499
5323
  }
4500
5324
  break;
@@ -4573,7 +5397,7 @@ Commands:
4573
5397
  index Index the current directory
4574
5398
  query Search the indexed codebase
4575
5399
  status Show the current state of the index
4576
- cleanup Remove stale index entries for deleted files
5400
+ reset Clear the index for the current directory
4577
5401
 
4578
5402
  Options:
4579
5403
  -h, --help Show help for a command
@@ -4583,7 +5407,7 @@ Examples:
4583
5407
  raggrep index
4584
5408
  raggrep query "user login"
4585
5409
  raggrep status
4586
- raggrep cleanup
5410
+ raggrep reset
4587
5411
 
4588
5412
  Run 'raggrep <command> --help' for more information.
4589
5413
  `);
@@ -4595,4 +5419,4 @@ Run 'raggrep <command> --help' for more information.
4595
5419
  }
4596
5420
  main();
4597
5421
 
4598
- //# debugId=F7638DADE034B49B64756E2164756E21
5422
+ //# debugId=5CA623D9974ACF4364756E2164756E21