raggrep 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,20 @@
1
+ import { createRequire } from "node:module";
2
+ var __create = Object.create;
3
+ var __getProtoOf = Object.getPrototypeOf;
1
4
  var __defProp = Object.defineProperty;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __toESM = (mod, isNodeMode, target) => {
8
+ target = mod != null ? __create(__getProtoOf(mod)) : {};
9
+ const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
+ for (let key of __getOwnPropNames(mod))
11
+ if (!__hasOwnProp.call(to, key))
12
+ __defProp(to, key, {
13
+ get: () => mod[key],
14
+ enumerable: true
15
+ });
16
+ return to;
17
+ };
2
18
  var __export = (target, all) => {
3
19
  for (var name in all)
4
20
  __defProp(target, name, {
@@ -9,6 +25,7 @@ var __export = (target, all) => {
9
25
  });
10
26
  };
11
27
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
28
+ var __require = /* @__PURE__ */ createRequire(import.meta.url);
12
29
  // src/domain/entities/searchResult.ts
13
30
  var DEFAULT_SEARCH_OPTIONS;
14
31
  var init_searchResult = __esm(() => {
@@ -39,6 +56,20 @@ function createDefaultConfig() {
39
56
  options: {
40
57
  embeddingModel: "all-MiniLM-L6-v2"
41
58
  }
59
+ },
60
+ {
61
+ id: "data/json",
62
+ enabled: true,
63
+ options: {
64
+ embeddingModel: "all-MiniLM-L6-v2"
65
+ }
66
+ },
67
+ {
68
+ id: "docs/markdown",
69
+ enabled: true,
70
+ options: {
71
+ embeddingModel: "all-MiniLM-L6-v2"
72
+ }
42
73
  }
43
74
  ]
44
75
  };
@@ -82,16 +113,18 @@ var init_config = __esm(() => {
82
113
  ".jsx",
83
114
  ".mjs",
84
115
  ".cjs",
116
+ ".mts",
117
+ ".cts",
118
+ ".json",
119
+ ".md",
85
120
  ".py",
86
121
  ".go",
87
122
  ".rs",
88
123
  ".java",
89
- ".json",
90
124
  ".yaml",
91
125
  ".yml",
92
126
  ".toml",
93
127
  ".sql",
94
- ".md",
95
128
  ".txt"
96
129
  ];
97
130
  });
@@ -1999,7 +2032,8 @@ class TransformersEmbeddingProvider {
1999
2032
  constructor(config) {
2000
2033
  this.config = {
2001
2034
  model: config?.model ?? "all-MiniLM-L6-v2",
2002
- showProgress: config?.showProgress ?? false
2035
+ showProgress: config?.showProgress ?? false,
2036
+ logger: config?.logger
2003
2037
  };
2004
2038
  }
2005
2039
  async initialize(config) {
@@ -2021,29 +2055,55 @@ class TransformersEmbeddingProvider {
2021
2055
  this.isInitializing = true;
2022
2056
  this.initPromise = (async () => {
2023
2057
  const modelId = EMBEDDING_MODELS2[this.config.model];
2024
- if (this.config.showProgress) {
2025
- console.log(`
2026
- Loading embedding model: ${this.config.model}`);
2027
- console.log(` Cache: ${CACHE_DIR}`);
2028
- }
2058
+ const logger = this.config.logger;
2059
+ const showProgress = this.config.showProgress || !!logger;
2060
+ const isCached = await isModelCached(this.config.model);
2061
+ let hasDownloads = false;
2029
2062
  try {
2030
2063
  this.pipeline = await pipeline("feature-extraction", modelId, {
2031
- progress_callback: this.config.showProgress ? (progress) => {
2064
+ progress_callback: showProgress && !isCached ? (progress) => {
2032
2065
  if (progress.status === "progress" && progress.file) {
2066
+ if (!hasDownloads) {
2067
+ hasDownloads = true;
2068
+ if (logger) {
2069
+ logger.info(`Downloading embedding model: ${this.config.model}`);
2070
+ } else {
2071
+ console.log(`
2072
+ Loading embedding model: ${this.config.model}`);
2073
+ console.log(` Cache: ${CACHE_DIR}`);
2074
+ }
2075
+ }
2033
2076
  const pct = progress.progress ? Math.round(progress.progress) : 0;
2034
- process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
2077
+ if (logger) {
2078
+ logger.progress(` Downloading ${progress.file}: ${pct}%`);
2079
+ } else {
2080
+ process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
2081
+ }
2035
2082
  } else if (progress.status === "done" && progress.file) {
2036
- process.stdout.write(`\r Downloaded ${progress.file}
2083
+ if (logger) {
2084
+ logger.clearProgress();
2085
+ logger.info(` Downloaded ${progress.file}`);
2086
+ } else if (hasDownloads) {
2087
+ process.stdout.write(`\r Downloaded ${progress.file}
2037
2088
  `);
2089
+ }
2038
2090
  }
2039
2091
  } : undefined
2040
2092
  });
2041
- if (this.config.showProgress) {
2042
- console.log(` Model ready.
2093
+ if (hasDownloads) {
2094
+ if (logger) {
2095
+ logger.clearProgress();
2096
+ logger.info(`Model ready: ${this.config.model}`);
2097
+ } else {
2098
+ console.log(` Model ready.
2043
2099
  `);
2100
+ }
2044
2101
  }
2045
2102
  } catch (error) {
2046
2103
  this.pipeline = null;
2104
+ if (logger) {
2105
+ logger.clearProgress();
2106
+ }
2047
2107
  throw new Error(`Failed to load embedding model: ${error}`);
2048
2108
  } finally {
2049
2109
  this.isInitializing = false;
@@ -2094,9 +2154,21 @@ class TransformersEmbeddingProvider {
2094
2154
  this.pipeline = null;
2095
2155
  }
2096
2156
  }
2157
+ async function isModelCached(model) {
2158
+ const modelId = EMBEDDING_MODELS2[model];
2159
+ const modelPath = path6.join(CACHE_DIR, modelId);
2160
+ try {
2161
+ const fs3 = await import("fs/promises");
2162
+ const onnxPath = path6.join(modelPath, "onnx", "model_quantized.onnx");
2163
+ await fs3.access(onnxPath);
2164
+ return true;
2165
+ } catch {
2166
+ return false;
2167
+ }
2168
+ }
2097
2169
  function configureEmbeddings(config) {
2098
2170
  const newConfig = { ...globalConfig, ...config };
2099
- if (newConfig.model !== globalConfig.model) {
2171
+ if (newConfig.model !== globalConfig.model || newConfig.logger !== globalConfig.logger) {
2100
2172
  globalProvider = null;
2101
2173
  }
2102
2174
  globalConfig = newConfig;
@@ -2132,7 +2204,8 @@ var init_transformersEmbedding = __esm(() => {
2132
2204
  };
2133
2205
  globalConfig = {
2134
2206
  model: "all-MiniLM-L6-v2",
2135
- showProgress: false
2207
+ showProgress: false,
2208
+ logger: undefined
2136
2209
  };
2137
2210
  });
2138
2211
 
@@ -2141,221 +2214,6 @@ var init_embeddings = __esm(() => {
2141
2214
  init_transformersEmbedding();
2142
2215
  });
2143
2216
 
2144
- // src/domain/services/similarity.ts
2145
- function cosineSimilarity(a, b) {
2146
- if (a.length !== b.length) {
2147
- throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2148
- }
2149
- let dotProduct = 0;
2150
- let normA = 0;
2151
- let normB = 0;
2152
- for (let i = 0;i < a.length; i++) {
2153
- dotProduct += a[i] * b[i];
2154
- normA += a[i] * a[i];
2155
- normB += b[i] * b[i];
2156
- }
2157
- const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2158
- if (magnitude === 0)
2159
- return 0;
2160
- return dotProduct / magnitude;
2161
- }
2162
-
2163
- // src/modules/language/typescript/parseCode.ts
2164
- import * as ts from "typescript";
2165
- function parseCode(content, filepath) {
2166
- const ext = filepath.split(".").pop()?.toLowerCase();
2167
- if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
2168
- return parseTypeScript(content, filepath);
2169
- }
2170
- return parseGenericCode(content);
2171
- }
2172
- function parseTypeScript(content, filepath) {
2173
- const chunks = [];
2174
- const lines = content.split(`
2175
- `);
2176
- const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2177
- function getLineNumbers(node) {
2178
- const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2179
- const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2180
- return {
2181
- startLine: start.line + 1,
2182
- endLine: end.line + 1
2183
- };
2184
- }
2185
- function getNodeText(node) {
2186
- return node.getText(sourceFile);
2187
- }
2188
- function isExported(node) {
2189
- if (!ts.canHaveModifiers(node))
2190
- return false;
2191
- const modifiers = ts.getModifiers(node);
2192
- return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2193
- }
2194
- function getJSDoc(node) {
2195
- const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2196
- if (jsDocNodes.length === 0)
2197
- return;
2198
- return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2199
- `);
2200
- }
2201
- function getFunctionName(node) {
2202
- if (ts.isFunctionDeclaration(node) && node.name) {
2203
- return node.name.text;
2204
- }
2205
- if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2206
- return node.name.text;
2207
- }
2208
- if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2209
- return node.name.text;
2210
- }
2211
- return;
2212
- }
2213
- function visit(node) {
2214
- const { startLine, endLine } = getLineNumbers(node);
2215
- if (ts.isFunctionDeclaration(node) && node.name) {
2216
- chunks.push({
2217
- content: getNodeText(node),
2218
- startLine,
2219
- endLine,
2220
- type: "function",
2221
- name: node.name.text,
2222
- isExported: isExported(node),
2223
- jsDoc: getJSDoc(node)
2224
- });
2225
- return;
2226
- }
2227
- if (ts.isVariableStatement(node)) {
2228
- for (const decl of node.declarationList.declarations) {
2229
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2230
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2231
- chunks.push({
2232
- content: getNodeText(node),
2233
- startLine,
2234
- endLine,
2235
- type: "function",
2236
- name,
2237
- isExported: isExported(node),
2238
- jsDoc: getJSDoc(node)
2239
- });
2240
- return;
2241
- }
2242
- }
2243
- }
2244
- if (ts.isClassDeclaration(node) && node.name) {
2245
- chunks.push({
2246
- content: getNodeText(node),
2247
- startLine,
2248
- endLine,
2249
- type: "class",
2250
- name: node.name.text,
2251
- isExported: isExported(node),
2252
- jsDoc: getJSDoc(node)
2253
- });
2254
- return;
2255
- }
2256
- if (ts.isInterfaceDeclaration(node)) {
2257
- chunks.push({
2258
- content: getNodeText(node),
2259
- startLine,
2260
- endLine,
2261
- type: "interface",
2262
- name: node.name.text,
2263
- isExported: isExported(node),
2264
- jsDoc: getJSDoc(node)
2265
- });
2266
- return;
2267
- }
2268
- if (ts.isTypeAliasDeclaration(node)) {
2269
- chunks.push({
2270
- content: getNodeText(node),
2271
- startLine,
2272
- endLine,
2273
- type: "type",
2274
- name: node.name.text,
2275
- isExported: isExported(node),
2276
- jsDoc: getJSDoc(node)
2277
- });
2278
- return;
2279
- }
2280
- if (ts.isEnumDeclaration(node)) {
2281
- chunks.push({
2282
- content: getNodeText(node),
2283
- startLine,
2284
- endLine,
2285
- type: "enum",
2286
- name: node.name.text,
2287
- isExported: isExported(node),
2288
- jsDoc: getJSDoc(node)
2289
- });
2290
- return;
2291
- }
2292
- if (ts.isVariableStatement(node) && isExported(node)) {
2293
- for (const decl of node.declarationList.declarations) {
2294
- if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2295
- continue;
2296
- }
2297
- const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2298
- chunks.push({
2299
- content: getNodeText(node),
2300
- startLine,
2301
- endLine,
2302
- type: "variable",
2303
- name,
2304
- isExported: true,
2305
- jsDoc: getJSDoc(node)
2306
- });
2307
- }
2308
- return;
2309
- }
2310
- ts.forEachChild(node, visit);
2311
- }
2312
- ts.forEachChild(sourceFile, visit);
2313
- if (chunks.length === 0) {
2314
- return parseGenericCode(content);
2315
- }
2316
- return chunks;
2317
- }
2318
- function parseGenericCode(content) {
2319
- const chunks = [];
2320
- const lines = content.split(`
2321
- `);
2322
- const CHUNK_SIZE = 30;
2323
- const OVERLAP = 5;
2324
- if (lines.length <= CHUNK_SIZE) {
2325
- return [
2326
- {
2327
- content,
2328
- startLine: 1,
2329
- endLine: lines.length,
2330
- type: "file"
2331
- }
2332
- ];
2333
- }
2334
- for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
2335
- const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
2336
- chunks.push({
2337
- content: lines.slice(i, endIdx).join(`
2338
- `),
2339
- startLine: i + 1,
2340
- endLine: endIdx,
2341
- type: "block"
2342
- });
2343
- if (endIdx >= lines.length)
2344
- break;
2345
- }
2346
- return chunks;
2347
- }
2348
- function generateChunkId(filepath, startLine, endLine) {
2349
- const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2350
- return `${safePath}-${startLine}-${endLine}`;
2351
- }
2352
- var init_parseCode = () => {};
2353
-
2354
- // src/infrastructure/storage/fileIndexStorage.ts
2355
- var init_fileIndexStorage = __esm(() => {
2356
- init_entities();
2357
- });
2358
-
2359
2217
  // src/domain/services/keywords.ts
2360
2218
  function extractKeywords(content, name, maxKeywords = 50) {
2361
2219
  const keywords = new Set;
@@ -2544,166 +2402,27 @@ var init_keywords = __esm(() => {
2544
2402
  };
2545
2403
  });
2546
2404
 
2547
- // src/infrastructure/storage/symbolicIndex.ts
2548
- import * as fs3 from "fs/promises";
2549
- import * as path7 from "path";
2550
-
2551
- class SymbolicIndex {
2552
- meta = null;
2553
- fileSummaries = new Map;
2554
- bm25Index = null;
2555
- symbolicPath;
2556
- moduleId;
2557
- constructor(indexDir, moduleId) {
2558
- this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
2559
- this.moduleId = moduleId;
2405
+ // src/domain/services/similarity.ts
2406
+ function cosineSimilarity(a, b) {
2407
+ if (a.length !== b.length) {
2408
+ throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
2560
2409
  }
2561
- async initialize() {
2562
- try {
2563
- await this.load();
2564
- } catch {
2565
- this.meta = {
2566
- version: "1.0.0",
2567
- lastUpdated: new Date().toISOString(),
2568
- moduleId: this.moduleId,
2569
- fileCount: 0,
2570
- bm25Data: {
2571
- avgDocLength: 0,
2572
- documentFrequencies: {},
2573
- totalDocs: 0
2574
- }
2575
- };
2576
- this.bm25Index = new BM25Index;
2577
- }
2578
- }
2579
- addFile(summary) {
2580
- this.fileSummaries.set(summary.filepath, summary);
2581
- }
2582
- removeFile(filepath) {
2583
- return this.fileSummaries.delete(filepath);
2584
- }
2585
- buildBM25Index() {
2586
- this.bm25Index = new BM25Index;
2587
- for (const [filepath, summary] of this.fileSummaries) {
2588
- const content = [
2589
- ...summary.keywords,
2590
- ...summary.exports,
2591
- ...extractPathKeywords(filepath)
2592
- ].join(" ");
2593
- this.bm25Index.addDocuments([{ id: filepath, content }]);
2594
- }
2595
- if (this.meta) {
2596
- this.meta.fileCount = this.fileSummaries.size;
2597
- this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2598
- }
2599
- }
2600
- findCandidates(query, maxCandidates = 20) {
2601
- if (!this.bm25Index) {
2602
- return Array.from(this.fileSummaries.keys());
2603
- }
2604
- const results = this.bm25Index.search(query, maxCandidates);
2605
- return results.map((r) => r.id);
2606
- }
2607
- getAllFiles() {
2608
- return Array.from(this.fileSummaries.keys());
2609
- }
2610
- getFileSummary(filepath) {
2611
- return this.fileSummaries.get(filepath);
2612
- }
2613
- async save() {
2614
- if (!this.meta)
2615
- throw new Error("Index not initialized");
2616
- this.meta.lastUpdated = new Date().toISOString();
2617
- this.meta.fileCount = this.fileSummaries.size;
2618
- await fs3.mkdir(this.symbolicPath, { recursive: true });
2619
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2620
- await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2621
- for (const [filepath, summary] of this.fileSummaries) {
2622
- const summaryPath = this.getFileSummaryPath(filepath);
2623
- await fs3.mkdir(path7.dirname(summaryPath), { recursive: true });
2624
- await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
2625
- }
2626
- }
2627
- async load() {
2628
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2629
- const metaContent = await fs3.readFile(metaPath, "utf-8");
2630
- this.meta = JSON.parse(metaContent);
2631
- this.fileSummaries.clear();
2632
- await this.loadFileSummariesRecursive(this.symbolicPath);
2633
- this.buildBM25Index();
2634
- }
2635
- async loadFileSummariesRecursive(dir) {
2636
- try {
2637
- const entries = await fs3.readdir(dir, { withFileTypes: true });
2638
- for (const entry of entries) {
2639
- const fullPath = path7.join(dir, entry.name);
2640
- if (entry.isDirectory()) {
2641
- await this.loadFileSummariesRecursive(fullPath);
2642
- } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2643
- try {
2644
- const content = await fs3.readFile(fullPath, "utf-8");
2645
- const summary = JSON.parse(content);
2646
- if (summary.filepath) {
2647
- this.fileSummaries.set(summary.filepath, summary);
2648
- }
2649
- } catch {}
2650
- }
2651
- }
2652
- } catch {}
2653
- }
2654
- getFileSummaryPath(filepath) {
2655
- const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2656
- return path7.join(this.symbolicPath, jsonPath);
2657
- }
2658
- async deleteFileSummary(filepath) {
2659
- try {
2660
- await fs3.unlink(this.getFileSummaryPath(filepath));
2661
- } catch {}
2662
- this.fileSummaries.delete(filepath);
2663
- }
2664
- async exists() {
2665
- try {
2666
- const metaPath = path7.join(this.symbolicPath, "_meta.json");
2667
- await fs3.access(metaPath);
2668
- return true;
2669
- } catch {
2670
- return false;
2671
- }
2672
- }
2673
- get size() {
2674
- return this.fileSummaries.size;
2675
- }
2676
- clear() {
2677
- this.fileSummaries.clear();
2678
- if (this.meta) {
2679
- this.meta.fileCount = 0;
2680
- this.meta.bm25Data = {
2681
- avgDocLength: 0,
2682
- documentFrequencies: {},
2683
- totalDocs: 0
2684
- };
2685
- }
2686
- this.bm25Index = new BM25Index;
2410
+ let dotProduct = 0;
2411
+ let normA = 0;
2412
+ let normB = 0;
2413
+ for (let i = 0;i < a.length; i++) {
2414
+ dotProduct += a[i] * b[i];
2415
+ normA += a[i] * a[i];
2416
+ normB += b[i] * b[i];
2687
2417
  }
2418
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
2419
+ if (magnitude === 0)
2420
+ return 0;
2421
+ return dotProduct / magnitude;
2688
2422
  }
2689
- var init_symbolicIndex = __esm(() => {
2690
- init_keywords();
2691
- });
2692
-
2693
- // src/infrastructure/storage/index.ts
2694
- var init_storage = __esm(() => {
2695
- init_fileIndexStorage();
2696
- init_symbolicIndex();
2697
- });
2698
2423
 
2699
- // src/modules/language/typescript/index.ts
2700
- var exports_typescript = {};
2701
- __export(exports_typescript, {
2702
- TypeScriptModule: () => TypeScriptModule,
2703
- DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2704
- DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2705
- });
2706
- import * as path8 from "path";
2424
+ // src/domain/services/queryIntent.ts
2425
+ import * as path7 from "path";
2707
2426
  function detectQueryIntent(queryTerms) {
2708
2427
  const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2709
2428
  const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
@@ -2715,10 +2434,20 @@ function detectQueryIntent(queryTerms) {
2715
2434
  }
2716
2435
  return "neutral";
2717
2436
  }
2437
+ function extractQueryTerms(query) {
2438
+ return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2439
+ }
2440
+ function isSourceCodeFile(filepath) {
2441
+ const ext = path7.extname(filepath).toLowerCase();
2442
+ return SOURCE_CODE_EXTENSIONS.includes(ext);
2443
+ }
2444
+ function isDocFile(filepath) {
2445
+ const ext = path7.extname(filepath).toLowerCase();
2446
+ return DOC_EXTENSIONS.includes(ext);
2447
+ }
2718
2448
  function calculateFileTypeBoost(filepath, queryTerms) {
2719
- const ext = path8.extname(filepath).toLowerCase();
2720
- const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2721
- const isDoc = DOC_EXTENSIONS.includes(ext);
2449
+ const isSourceCode = isSourceCodeFile(filepath);
2450
+ const isDoc = isDocFile(filepath);
2722
2451
  const intent = detectQueryIntent(queryTerms);
2723
2452
  if (intent === "implementation") {
2724
2453
  if (isSourceCode) {
@@ -2734,103 +2463,1110 @@ function calculateFileTypeBoost(filepath, queryTerms) {
2734
2463
  }
2735
2464
  return 0;
2736
2465
  }
2737
- function calculateChunkTypeBoost(chunk) {
2738
- switch (chunk.type) {
2739
- case "function":
2740
- return 0.05;
2741
- case "class":
2742
- case "interface":
2743
- return 0.04;
2744
- case "type":
2745
- case "enum":
2746
- return 0.03;
2747
- case "variable":
2748
- return 0.02;
2749
- case "file":
2750
- case "block":
2751
- default:
2752
- return 0;
2466
+ var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2467
+ var init_queryIntent = __esm(() => {
2468
+ IMPLEMENTATION_TERMS = [
2469
+ "function",
2470
+ "method",
2471
+ "class",
2472
+ "interface",
2473
+ "implement",
2474
+ "implementation",
2475
+ "endpoint",
2476
+ "route",
2477
+ "handler",
2478
+ "controller",
2479
+ "module",
2480
+ "code"
2481
+ ];
2482
+ DOCUMENTATION_TERMS = [
2483
+ "documentation",
2484
+ "docs",
2485
+ "guide",
2486
+ "tutorial",
2487
+ "readme",
2488
+ "how",
2489
+ "what",
2490
+ "why",
2491
+ "explain",
2492
+ "overview",
2493
+ "getting",
2494
+ "started",
2495
+ "requirements",
2496
+ "setup",
2497
+ "install",
2498
+ "configure",
2499
+ "configuration"
2500
+ ];
2501
+ SOURCE_CODE_EXTENSIONS = [
2502
+ ".ts",
2503
+ ".tsx",
2504
+ ".js",
2505
+ ".jsx",
2506
+ ".mjs",
2507
+ ".cjs",
2508
+ ".py",
2509
+ ".go",
2510
+ ".rs",
2511
+ ".java"
2512
+ ];
2513
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2514
+ });
2515
+
2516
+ // src/domain/services/chunking.ts
2517
+ function createLineBasedChunks(content, options = {}) {
2518
+ const {
2519
+ chunkSize = DEFAULT_CHUNK_SIZE,
2520
+ overlap = DEFAULT_OVERLAP,
2521
+ minLinesForMultipleChunks = chunkSize
2522
+ } = options;
2523
+ const lines = content.split(`
2524
+ `);
2525
+ const chunks = [];
2526
+ if (lines.length <= minLinesForMultipleChunks) {
2527
+ return [
2528
+ {
2529
+ content,
2530
+ startLine: 1,
2531
+ endLine: lines.length,
2532
+ type: "file"
2533
+ }
2534
+ ];
2535
+ }
2536
+ for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2537
+ const endIdx = Math.min(i + chunkSize, lines.length);
2538
+ chunks.push({
2539
+ content: lines.slice(i, endIdx).join(`
2540
+ `),
2541
+ startLine: i + 1,
2542
+ endLine: endIdx,
2543
+ type: "block"
2544
+ });
2545
+ if (endIdx >= lines.length)
2546
+ break;
2547
+ }
2548
+ return chunks;
2549
+ }
2550
+ function generateChunkId(filepath, startLine, endLine) {
2551
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2552
+ return `${safePath}-${startLine}-${endLine}`;
2553
+ }
2554
+ var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2555
+
2556
+ // src/domain/services/index.ts
2557
+ var init_services = __esm(() => {
2558
+ init_keywords();
2559
+ init_queryIntent();
2560
+ });
2561
+
2562
+ // src/modules/language/typescript/parseCode.ts
2563
+ import * as ts from "typescript";
2564
+ function parseTypeScriptCode(content, filepath) {
2565
+ return parseTypeScript(content, filepath);
2566
+ }
2567
+ function parseTypeScript(content, filepath) {
2568
+ const chunks = [];
2569
+ const lines = content.split(`
2570
+ `);
2571
+ const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
2572
+ function getLineNumbers(node) {
2573
+ const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
2574
+ const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
2575
+ return {
2576
+ startLine: start.line + 1,
2577
+ endLine: end.line + 1
2578
+ };
2579
+ }
2580
+ function getNodeText(node) {
2581
+ return node.getText(sourceFile);
2582
+ }
2583
+ function isExported(node) {
2584
+ if (!ts.canHaveModifiers(node))
2585
+ return false;
2586
+ const modifiers = ts.getModifiers(node);
2587
+ return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
2588
+ }
2589
+ function getJSDoc(node) {
2590
+ const jsDocNodes = ts.getJSDocCommentsAndTags(node);
2591
+ if (jsDocNodes.length === 0)
2592
+ return;
2593
+ return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
2594
+ `);
2595
+ }
2596
+ function getFunctionName(node) {
2597
+ if (ts.isFunctionDeclaration(node) && node.name) {
2598
+ return node.name.text;
2599
+ }
2600
+ if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
2601
+ return node.name.text;
2602
+ }
2603
+ if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
2604
+ return node.name.text;
2605
+ }
2606
+ return;
2607
+ }
2608
+ function visit(node) {
2609
+ const { startLine, endLine } = getLineNumbers(node);
2610
+ if (ts.isFunctionDeclaration(node) && node.name) {
2611
+ chunks.push({
2612
+ content: getNodeText(node),
2613
+ startLine,
2614
+ endLine,
2615
+ type: "function",
2616
+ name: node.name.text,
2617
+ isExported: isExported(node),
2618
+ jsDoc: getJSDoc(node)
2619
+ });
2620
+ return;
2621
+ }
2622
+ if (ts.isVariableStatement(node)) {
2623
+ for (const decl of node.declarationList.declarations) {
2624
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2625
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2626
+ chunks.push({
2627
+ content: getNodeText(node),
2628
+ startLine,
2629
+ endLine,
2630
+ type: "function",
2631
+ name,
2632
+ isExported: isExported(node),
2633
+ jsDoc: getJSDoc(node)
2634
+ });
2635
+ return;
2636
+ }
2637
+ }
2638
+ }
2639
+ if (ts.isClassDeclaration(node) && node.name) {
2640
+ chunks.push({
2641
+ content: getNodeText(node),
2642
+ startLine,
2643
+ endLine,
2644
+ type: "class",
2645
+ name: node.name.text,
2646
+ isExported: isExported(node),
2647
+ jsDoc: getJSDoc(node)
2648
+ });
2649
+ return;
2650
+ }
2651
+ if (ts.isInterfaceDeclaration(node)) {
2652
+ chunks.push({
2653
+ content: getNodeText(node),
2654
+ startLine,
2655
+ endLine,
2656
+ type: "interface",
2657
+ name: node.name.text,
2658
+ isExported: isExported(node),
2659
+ jsDoc: getJSDoc(node)
2660
+ });
2661
+ return;
2662
+ }
2663
+ if (ts.isTypeAliasDeclaration(node)) {
2664
+ chunks.push({
2665
+ content: getNodeText(node),
2666
+ startLine,
2667
+ endLine,
2668
+ type: "type",
2669
+ name: node.name.text,
2670
+ isExported: isExported(node),
2671
+ jsDoc: getJSDoc(node)
2672
+ });
2673
+ return;
2674
+ }
2675
+ if (ts.isEnumDeclaration(node)) {
2676
+ chunks.push({
2677
+ content: getNodeText(node),
2678
+ startLine,
2679
+ endLine,
2680
+ type: "enum",
2681
+ name: node.name.text,
2682
+ isExported: isExported(node),
2683
+ jsDoc: getJSDoc(node)
2684
+ });
2685
+ return;
2686
+ }
2687
+ if (ts.isVariableStatement(node) && isExported(node)) {
2688
+ for (const decl of node.declarationList.declarations) {
2689
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
2690
+ continue;
2691
+ }
2692
+ const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
2693
+ chunks.push({
2694
+ content: getNodeText(node),
2695
+ startLine,
2696
+ endLine,
2697
+ type: "variable",
2698
+ name,
2699
+ isExported: true,
2700
+ jsDoc: getJSDoc(node)
2701
+ });
2702
+ }
2703
+ return;
2704
+ }
2705
+ ts.forEachChild(node, visit);
2706
+ }
2707
+ ts.forEachChild(sourceFile, visit);
2708
+ if (chunks.length === 0) {
2709
+ const lines2 = content.split(`
2710
+ `);
2711
+ return [
2712
+ {
2713
+ content,
2714
+ startLine: 1,
2715
+ endLine: lines2.length,
2716
+ type: "file"
2717
+ }
2718
+ ];
2719
+ }
2720
+ return chunks;
2721
+ }
2722
+ function generateChunkId2(filepath, startLine, endLine) {
2723
+ const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2724
+ return `${safePath}-${startLine}-${endLine}`;
2725
+ }
2726
+ var init_parseCode = () => {};
2727
+
2728
+ // src/infrastructure/storage/fileIndexStorage.ts
2729
+ var init_fileIndexStorage = __esm(() => {
2730
+ init_entities();
2731
+ });
2732
+
2733
+ // src/infrastructure/storage/symbolicIndex.ts
2734
+ import * as fs3 from "fs/promises";
2735
+ import * as path8 from "path";
2736
+
2737
+ class SymbolicIndex {
2738
+ meta = null;
2739
+ fileSummaries = new Map;
2740
+ bm25Index = null;
2741
+ symbolicPath;
2742
+ moduleId;
2743
+ constructor(indexDir, moduleId) {
2744
+ this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
2745
+ this.moduleId = moduleId;
2746
+ }
2747
+ async initialize() {
2748
+ try {
2749
+ await this.load();
2750
+ } catch {
2751
+ this.meta = {
2752
+ version: "1.0.0",
2753
+ lastUpdated: new Date().toISOString(),
2754
+ moduleId: this.moduleId,
2755
+ fileCount: 0,
2756
+ bm25Data: {
2757
+ avgDocLength: 0,
2758
+ documentFrequencies: {},
2759
+ totalDocs: 0
2760
+ }
2761
+ };
2762
+ this.bm25Index = new BM25Index;
2763
+ }
2764
+ }
2765
+ addFile(summary) {
2766
+ this.fileSummaries.set(summary.filepath, summary);
2767
+ }
2768
+ removeFile(filepath) {
2769
+ return this.fileSummaries.delete(filepath);
2770
+ }
2771
+ buildBM25Index() {
2772
+ this.bm25Index = new BM25Index;
2773
+ for (const [filepath, summary] of this.fileSummaries) {
2774
+ const content = [
2775
+ ...summary.keywords,
2776
+ ...summary.exports,
2777
+ ...extractPathKeywords(filepath)
2778
+ ].join(" ");
2779
+ this.bm25Index.addDocuments([{ id: filepath, content }]);
2780
+ }
2781
+ if (this.meta) {
2782
+ this.meta.fileCount = this.fileSummaries.size;
2783
+ this.meta.bm25Data.totalDocs = this.fileSummaries.size;
2784
+ }
2785
+ }
2786
+ findCandidates(query, maxCandidates = 20) {
2787
+ if (!this.bm25Index) {
2788
+ return Array.from(this.fileSummaries.keys());
2789
+ }
2790
+ const results = this.bm25Index.search(query, maxCandidates);
2791
+ return results.map((r) => r.id);
2792
+ }
2793
+ getAllFiles() {
2794
+ return Array.from(this.fileSummaries.keys());
2795
+ }
2796
+ getFileSummary(filepath) {
2797
+ return this.fileSummaries.get(filepath);
2798
+ }
2799
+ async save() {
2800
+ if (!this.meta)
2801
+ throw new Error("Index not initialized");
2802
+ this.meta.lastUpdated = new Date().toISOString();
2803
+ this.meta.fileCount = this.fileSummaries.size;
2804
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
2805
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2806
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
2807
+ for (const [filepath, summary] of this.fileSummaries) {
2808
+ const summaryPath = this.getFileSummaryPath(filepath);
2809
+ await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
2810
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
2811
+ }
2812
+ }
2813
+ async load() {
2814
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2815
+ const metaContent = await fs3.readFile(metaPath, "utf-8");
2816
+ this.meta = JSON.parse(metaContent);
2817
+ this.fileSummaries.clear();
2818
+ await this.loadFileSummariesRecursive(this.symbolicPath);
2819
+ this.buildBM25Index();
2820
+ }
2821
+ async loadFileSummariesRecursive(dir) {
2822
+ try {
2823
+ const entries = await fs3.readdir(dir, { withFileTypes: true });
2824
+ for (const entry of entries) {
2825
+ const fullPath = path8.join(dir, entry.name);
2826
+ if (entry.isDirectory()) {
2827
+ await this.loadFileSummariesRecursive(fullPath);
2828
+ } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
2829
+ try {
2830
+ const content = await fs3.readFile(fullPath, "utf-8");
2831
+ const summary = JSON.parse(content);
2832
+ if (summary.filepath) {
2833
+ this.fileSummaries.set(summary.filepath, summary);
2834
+ }
2835
+ } catch {}
2836
+ }
2837
+ }
2838
+ } catch {}
2839
+ }
2840
+ getFileSummaryPath(filepath) {
2841
+ const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
2842
+ return path8.join(this.symbolicPath, jsonPath);
2843
+ }
2844
+ async deleteFileSummary(filepath) {
2845
+ try {
2846
+ await fs3.unlink(this.getFileSummaryPath(filepath));
2847
+ } catch {}
2848
+ this.fileSummaries.delete(filepath);
2849
+ }
2850
+ async exists() {
2851
+ try {
2852
+ const metaPath = path8.join(this.symbolicPath, "_meta.json");
2853
+ await fs3.access(metaPath);
2854
+ return true;
2855
+ } catch {
2856
+ return false;
2857
+ }
2858
+ }
2859
+ get size() {
2860
+ return this.fileSummaries.size;
2861
+ }
2862
+ clear() {
2863
+ this.fileSummaries.clear();
2864
+ if (this.meta) {
2865
+ this.meta.fileCount = 0;
2866
+ this.meta.bm25Data = {
2867
+ avgDocLength: 0,
2868
+ documentFrequencies: {},
2869
+ totalDocs: 0
2870
+ };
2871
+ }
2872
+ this.bm25Index = new BM25Index;
2873
+ }
2874
+ }
2875
+ var init_symbolicIndex = __esm(() => {
2876
+ init_keywords();
2877
+ });
2878
+
2879
+ // src/infrastructure/storage/index.ts
2880
+ var init_storage = __esm(() => {
2881
+ init_fileIndexStorage();
2882
+ init_symbolicIndex();
2883
+ });
2884
+
2885
+ // src/modules/language/typescript/index.ts
2886
+ var exports_typescript = {};
2887
+ __export(exports_typescript, {
2888
+ isTypeScriptFile: () => isTypeScriptFile,
2889
+ TypeScriptModule: () => TypeScriptModule,
2890
+ TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
2891
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
2892
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2893
+ });
2894
+ import * as path9 from "path";
2895
+ function isTypeScriptFile(filepath) {
2896
+ const ext = path9.extname(filepath).toLowerCase();
2897
+ return TYPESCRIPT_EXTENSIONS.includes(ext);
2898
+ }
2899
+ function calculateChunkTypeBoost(chunk) {
2900
+ switch (chunk.type) {
2901
+ case "function":
2902
+ return 0.05;
2903
+ case "class":
2904
+ case "interface":
2905
+ return 0.04;
2906
+ case "type":
2907
+ case "enum":
2908
+ return 0.03;
2909
+ case "variable":
2910
+ return 0.02;
2911
+ case "file":
2912
+ case "block":
2913
+ default:
2914
+ return 0;
2915
+ }
2916
+ }
2917
+ function calculateExportBoost(chunk) {
2918
+ return chunk.isExported ? 0.03 : 0;
2919
+ }
2920
+
2921
+ class TypeScriptModule {
2922
+ id = "language/typescript";
2923
+ name = "TypeScript Search";
2924
+ description = "TypeScript-aware code search with AST parsing and semantic embeddings";
2925
+ version = "1.0.0";
2926
+ embeddingConfig = null;
2927
+ symbolicIndex = null;
2928
+ pendingSummaries = new Map;
2929
+ rootDir = "";
2930
+ logger = undefined;
2931
+ async initialize(config) {
2932
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
2933
+ this.logger = config.options?.logger;
2934
+ if (this.logger) {
2935
+ this.embeddingConfig = {
2936
+ ...this.embeddingConfig,
2937
+ logger: this.logger
2938
+ };
2939
+ }
2940
+ configureEmbeddings(this.embeddingConfig);
2941
+ this.pendingSummaries.clear();
2942
+ }
2943
+ async indexFile(filepath, content, ctx) {
2944
+ if (!isTypeScriptFile(filepath)) {
2945
+ return null;
2946
+ }
2947
+ this.rootDir = ctx.rootDir;
2948
+ const parsedChunks = parseTypeScriptCode(content, filepath);
2949
+ if (parsedChunks.length === 0) {
2950
+ return null;
2951
+ }
2952
+ const pathContext = parsePathContext(filepath);
2953
+ const pathPrefix = formatPathContextForEmbedding(pathContext);
2954
+ const chunkContents = parsedChunks.map((c) => {
2955
+ const namePrefix = c.name ? `${c.name}: ` : "";
2956
+ return `${pathPrefix} ${namePrefix}${c.content}`;
2957
+ });
2958
+ const embeddings = await getEmbeddings(chunkContents);
2959
+ const chunks = parsedChunks.map((pc) => ({
2960
+ id: generateChunkId2(filepath, pc.startLine, pc.endLine),
2961
+ content: pc.content,
2962
+ startLine: pc.startLine,
2963
+ endLine: pc.endLine,
2964
+ type: pc.type,
2965
+ name: pc.name,
2966
+ isExported: pc.isExported,
2967
+ jsDoc: pc.jsDoc
2968
+ }));
2969
+ const references = this.extractReferences(content, filepath);
2970
+ const stats = await ctx.getFileStats(filepath);
2971
+ const currentConfig = getEmbeddingConfig();
2972
+ const moduleData = {
2973
+ embeddings,
2974
+ embeddingModel: currentConfig.model
2975
+ };
2976
+ const chunkTypes = [
2977
+ ...new Set(parsedChunks.map((pc) => pc.type))
2978
+ ];
2979
+ const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2980
+ const allKeywords = new Set;
2981
+ for (const pc of parsedChunks) {
2982
+ const keywords = extractKeywords(pc.content, pc.name);
2983
+ keywords.forEach((k) => allKeywords.add(k));
2984
+ }
2985
+ pathContext.keywords.forEach((k) => allKeywords.add(k));
2986
+ const fileSummary = {
2987
+ filepath,
2988
+ chunkCount: chunks.length,
2989
+ chunkTypes,
2990
+ keywords: Array.from(allKeywords),
2991
+ exports,
2992
+ lastModified: stats.lastModified,
2993
+ pathContext: {
2994
+ segments: pathContext.segments,
2995
+ layer: pathContext.layer,
2996
+ domain: pathContext.domain,
2997
+ depth: pathContext.depth
2998
+ }
2999
+ };
3000
+ this.pendingSummaries.set(filepath, fileSummary);
3001
+ return {
3002
+ filepath,
3003
+ lastModified: stats.lastModified,
3004
+ chunks,
3005
+ moduleData,
3006
+ references
3007
+ };
3008
+ }
3009
+ async finalize(ctx) {
3010
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3011
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3012
+ await this.symbolicIndex.initialize();
3013
+ for (const [filepath, summary] of this.pendingSummaries) {
3014
+ this.symbolicIndex.addFile(summary);
3015
+ }
3016
+ this.symbolicIndex.buildBM25Index();
3017
+ await this.symbolicIndex.save();
3018
+ this.pendingSummaries.clear();
3019
+ }
3020
+ async search(query, ctx, options = {}) {
3021
+ const {
3022
+ topK = DEFAULT_TOP_K2,
3023
+ minScore = DEFAULT_MIN_SCORE2,
3024
+ filePatterns
3025
+ } = options;
3026
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3027
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3028
+ let allFiles;
3029
+ try {
3030
+ await symbolicIndex.initialize();
3031
+ allFiles = symbolicIndex.getAllFiles();
3032
+ } catch {
3033
+ allFiles = await ctx.listIndexedFiles();
3034
+ }
3035
+ let filesToSearch = allFiles;
3036
+ if (filePatterns && filePatterns.length > 0) {
3037
+ filesToSearch = allFiles.filter((filepath) => {
3038
+ return filePatterns.some((pattern) => {
3039
+ if (pattern.startsWith("*.")) {
3040
+ const ext = pattern.slice(1);
3041
+ return filepath.endsWith(ext);
3042
+ }
3043
+ return filepath.includes(pattern);
3044
+ });
3045
+ });
3046
+ }
3047
+ const queryEmbedding = await getEmbedding(query);
3048
+ const bm25Index = new BM25Index;
3049
+ const allChunksData = [];
3050
+ for (const filepath of filesToSearch) {
3051
+ const fileIndex = await ctx.loadFileIndex(filepath);
3052
+ if (!fileIndex)
3053
+ continue;
3054
+ const moduleData = fileIndex.moduleData;
3055
+ if (!moduleData?.embeddings)
3056
+ continue;
3057
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3058
+ const chunk = fileIndex.chunks[i];
3059
+ const embedding = moduleData.embeddings[i];
3060
+ if (!embedding)
3061
+ continue;
3062
+ allChunksData.push({
3063
+ filepath: fileIndex.filepath,
3064
+ chunk,
3065
+ embedding
3066
+ });
3067
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3068
+ }
3069
+ }
3070
+ const bm25Results = bm25Index.search(query, topK * 3);
3071
+ const bm25Scores = new Map;
3072
+ for (const result of bm25Results) {
3073
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3074
+ }
3075
+ const queryTerms = extractQueryTerms(query);
3076
+ const pathBoosts = new Map;
3077
+ for (const filepath of filesToSearch) {
3078
+ const summary = symbolicIndex.getFileSummary(filepath);
3079
+ if (summary?.pathContext) {
3080
+ let boost = 0;
3081
+ const ctx2 = summary.pathContext;
3082
+ if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
3083
+ boost += 0.1;
3084
+ }
3085
+ if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
3086
+ boost += 0.05;
3087
+ }
3088
+ const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
3089
+ if (segmentMatch) {
3090
+ boost += 0.05;
3091
+ }
3092
+ pathBoosts.set(filepath, boost);
3093
+ }
3094
+ }
3095
+ const results = [];
3096
+ for (const { filepath, chunk, embedding } of allChunksData) {
3097
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3098
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3099
+ const pathBoost = pathBoosts.get(filepath) || 0;
3100
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
3101
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
3102
+ const exportBoost = calculateExportBoost(chunk);
3103
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
3104
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3105
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3106
+ results.push({
3107
+ filepath,
3108
+ chunk,
3109
+ score: hybridScore,
3110
+ moduleId: this.id,
3111
+ context: {
3112
+ semanticScore,
3113
+ bm25Score,
3114
+ pathBoost,
3115
+ fileTypeBoost,
3116
+ chunkTypeBoost,
3117
+ exportBoost
3118
+ }
3119
+ });
3120
+ }
3121
+ }
3122
+ results.sort((a, b) => b.score - a.score);
3123
+ return results.slice(0, topK);
3124
+ }
3125
+ extractReferences(content, filepath) {
3126
+ const references = [];
3127
+ const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
3128
+ const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
3129
+ let match;
3130
+ while ((match = importRegex.exec(content)) !== null) {
3131
+ const importPath = match[1];
3132
+ if (importPath.startsWith(".")) {
3133
+ const dir = path9.dirname(filepath);
3134
+ const resolved = path9.normalize(path9.join(dir, importPath));
3135
+ references.push(resolved);
3136
+ }
3137
+ }
3138
+ while ((match = requireRegex.exec(content)) !== null) {
3139
+ const importPath = match[1];
3140
+ if (importPath.startsWith(".")) {
3141
+ const dir = path9.dirname(filepath);
3142
+ const resolved = path9.normalize(path9.join(dir, importPath));
3143
+ references.push(resolved);
3144
+ }
3145
+ }
3146
+ return references;
3147
+ }
3148
+ }
3149
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
3150
+ var init_typescript = __esm(() => {
3151
+ init_embeddings();
3152
+ init_services();
3153
+ init_config2();
3154
+ init_parseCode();
3155
+ init_storage();
3156
+ TYPESCRIPT_EXTENSIONS = [
3157
+ ".ts",
3158
+ ".tsx",
3159
+ ".js",
3160
+ ".jsx",
3161
+ ".mjs",
3162
+ ".cjs",
3163
+ ".mts",
3164
+ ".cts"
3165
+ ];
3166
+ });
3167
+
3168
+ // src/modules/data/json/index.ts
3169
+ var exports_json = {};
3170
+ __export(exports_json, {
3171
+ isJsonFile: () => isJsonFile,
3172
+ JsonModule: () => JsonModule,
3173
+ JSON_EXTENSIONS: () => JSON_EXTENSIONS,
3174
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
3175
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
3176
+ });
3177
+ import * as path10 from "path";
3178
+ function isJsonFile(filepath) {
3179
+ const ext = path10.extname(filepath).toLowerCase();
3180
+ return JSON_EXTENSIONS.includes(ext);
3181
+ }
3182
+ function extractJsonKeys(obj, prefix = "") {
3183
+ const keys = [];
3184
+ if (obj === null || obj === undefined) {
3185
+ return keys;
3186
+ }
3187
+ if (Array.isArray(obj)) {
3188
+ obj.forEach((item, index) => {
3189
+ keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3190
+ });
3191
+ } else if (typeof obj === "object") {
3192
+ for (const [key, value] of Object.entries(obj)) {
3193
+ const fullKey = prefix ? `${prefix}.${key}` : key;
3194
+ keys.push(key);
3195
+ keys.push(...extractJsonKeys(value, fullKey));
3196
+ }
3197
+ }
3198
+ return keys;
3199
+ }
3200
+ function extractJsonKeywords(content) {
3201
+ try {
3202
+ const parsed = JSON.parse(content);
3203
+ const keys = extractJsonKeys(parsed);
3204
+ const stringValues = [];
3205
+ const extractStrings = (obj) => {
3206
+ if (typeof obj === "string") {
3207
+ const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3208
+ stringValues.push(...words);
3209
+ } else if (Array.isArray(obj)) {
3210
+ obj.forEach(extractStrings);
3211
+ } else if (obj && typeof obj === "object") {
3212
+ Object.values(obj).forEach(extractStrings);
3213
+ }
3214
+ };
3215
+ extractStrings(parsed);
3216
+ return [...new Set([...keys, ...stringValues])];
3217
+ } catch {
3218
+ return [];
3219
+ }
3220
+ }
3221
+
3222
+ class JsonModule {
3223
+ id = "data/json";
3224
+ name = "JSON Search";
3225
+ description = "JSON file search with structure-aware indexing";
3226
+ version = "1.0.0";
3227
+ embeddingConfig = null;
3228
+ symbolicIndex = null;
3229
+ pendingSummaries = new Map;
3230
+ rootDir = "";
3231
+ logger = undefined;
3232
+ async initialize(config) {
3233
+ this.embeddingConfig = getEmbeddingConfigFromModule(config);
3234
+ this.logger = config.options?.logger;
3235
+ if (this.logger) {
3236
+ this.embeddingConfig = {
3237
+ ...this.embeddingConfig,
3238
+ logger: this.logger
3239
+ };
3240
+ }
3241
+ configureEmbeddings(this.embeddingConfig);
3242
+ this.pendingSummaries.clear();
3243
+ }
3244
+ async indexFile(filepath, content, ctx) {
3245
+ if (!isJsonFile(filepath)) {
3246
+ return null;
3247
+ }
3248
+ this.rootDir = ctx.rootDir;
3249
+ const textChunks = createLineBasedChunks(content, {
3250
+ chunkSize: 50,
3251
+ overlap: 10
3252
+ });
3253
+ if (textChunks.length === 0) {
3254
+ return null;
3255
+ }
3256
+ const chunkContents = textChunks.map((c) => {
3257
+ const filename = path10.basename(filepath);
3258
+ return `${filename}: ${c.content}`;
3259
+ });
3260
+ const embeddings = await getEmbeddings(chunkContents);
3261
+ const chunks = textChunks.map((tc, i) => ({
3262
+ id: generateChunkId(filepath, tc.startLine, tc.endLine),
3263
+ content: tc.content,
3264
+ startLine: tc.startLine,
3265
+ endLine: tc.endLine,
3266
+ type: tc.type
3267
+ }));
3268
+ const jsonKeys = extractJsonKeys((() => {
3269
+ try {
3270
+ return JSON.parse(content);
3271
+ } catch {
3272
+ return {};
3273
+ }
3274
+ })());
3275
+ const stats = await ctx.getFileStats(filepath);
3276
+ const currentConfig = getEmbeddingConfig();
3277
+ const moduleData = {
3278
+ embeddings,
3279
+ embeddingModel: currentConfig.model,
3280
+ jsonKeys
3281
+ };
3282
+ const keywords = extractJsonKeywords(content);
3283
+ const fileSummary = {
3284
+ filepath,
3285
+ chunkCount: chunks.length,
3286
+ chunkTypes: ["file"],
3287
+ keywords,
3288
+ exports: [],
3289
+ lastModified: stats.lastModified
3290
+ };
3291
+ this.pendingSummaries.set(filepath, fileSummary);
3292
+ return {
3293
+ filepath,
3294
+ lastModified: stats.lastModified,
3295
+ chunks,
3296
+ moduleData
3297
+ };
3298
+ }
3299
+ async finalize(ctx) {
3300
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3301
+ this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
3302
+ await this.symbolicIndex.initialize();
3303
+ for (const [filepath, summary] of this.pendingSummaries) {
3304
+ this.symbolicIndex.addFile(summary);
3305
+ }
3306
+ this.symbolicIndex.buildBM25Index();
3307
+ await this.symbolicIndex.save();
3308
+ this.pendingSummaries.clear();
3309
+ }
3310
+ async search(query, ctx, options = {}) {
3311
+ const {
3312
+ topK = DEFAULT_TOP_K3,
3313
+ minScore = DEFAULT_MIN_SCORE3,
3314
+ filePatterns
3315
+ } = options;
3316
+ const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3317
+ const symbolicIndex = new SymbolicIndex(indexDir, this.id);
3318
+ let allFiles;
3319
+ try {
3320
+ await symbolicIndex.initialize();
3321
+ allFiles = symbolicIndex.getAllFiles();
3322
+ } catch {
3323
+ allFiles = await ctx.listIndexedFiles();
3324
+ }
3325
+ let filesToSearch = allFiles.filter((f) => isJsonFile(f));
3326
+ if (filePatterns && filePatterns.length > 0) {
3327
+ filesToSearch = filesToSearch.filter((filepath) => {
3328
+ return filePatterns.some((pattern) => {
3329
+ if (pattern.startsWith("*.")) {
3330
+ const ext = pattern.slice(1);
3331
+ return filepath.endsWith(ext);
3332
+ }
3333
+ return filepath.includes(pattern);
3334
+ });
3335
+ });
3336
+ }
3337
+ const queryEmbedding = await getEmbedding(query);
3338
+ const bm25Index = new BM25Index;
3339
+ const allChunksData = [];
3340
+ for (const filepath of filesToSearch) {
3341
+ const fileIndex = await ctx.loadFileIndex(filepath);
3342
+ if (!fileIndex)
3343
+ continue;
3344
+ const moduleData = fileIndex.moduleData;
3345
+ if (!moduleData?.embeddings)
3346
+ continue;
3347
+ for (let i = 0;i < fileIndex.chunks.length; i++) {
3348
+ const chunk = fileIndex.chunks[i];
3349
+ const embedding = moduleData.embeddings[i];
3350
+ if (!embedding)
3351
+ continue;
3352
+ allChunksData.push({
3353
+ filepath: fileIndex.filepath,
3354
+ chunk,
3355
+ embedding
3356
+ });
3357
+ bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3358
+ }
3359
+ }
3360
+ const bm25Results = bm25Index.search(query, topK * 3);
3361
+ const bm25Scores = new Map;
3362
+ for (const result of bm25Results) {
3363
+ bm25Scores.set(result.id, normalizeScore(result.score, 3));
3364
+ }
3365
+ const queryTerms = extractQueryTerms(query);
3366
+ const results = [];
3367
+ for (const { filepath, chunk, embedding } of allChunksData) {
3368
+ const semanticScore = cosineSimilarity(queryEmbedding, embedding);
3369
+ const bm25Score = bm25Scores.get(chunk.id) || 0;
3370
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
3371
+ if (hybridScore >= minScore || bm25Score > 0.3) {
3372
+ results.push({
3373
+ filepath,
3374
+ chunk,
3375
+ score: hybridScore,
3376
+ moduleId: this.id,
3377
+ context: {
3378
+ semanticScore,
3379
+ bm25Score
3380
+ }
3381
+ });
3382
+ }
3383
+ }
3384
+ results.sort((a, b) => b.score - a.score);
3385
+ return results.slice(0, topK);
3386
+ }
3387
+ }
3388
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
3389
+ var init_json = __esm(() => {
3390
+ init_embeddings();
3391
+ init_services();
3392
+ init_config2();
3393
+ init_storage();
3394
+ JSON_EXTENSIONS = [".json"];
3395
+ });
3396
+
3397
+ // src/modules/docs/markdown/index.ts
3398
+ var exports_markdown = {};
3399
+ __export(exports_markdown, {
3400
+ isMarkdownFile: () => isMarkdownFile,
3401
+ MarkdownModule: () => MarkdownModule,
3402
+ MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
3403
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
3404
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
3405
+ });
3406
+ import * as path11 from "path";
3407
+ function isMarkdownFile(filepath) {
3408
+ const ext = path11.extname(filepath).toLowerCase();
3409
+ return MARKDOWN_EXTENSIONS.includes(ext);
3410
+ }
3411
+ function parseMarkdownSections(content) {
3412
+ const lines = content.split(`
3413
+ `);
3414
+ const sections = [];
3415
+ let currentSection = null;
3416
+ let currentContent = [];
3417
+ let startLine = 1;
3418
+ for (let i = 0;i < lines.length; i++) {
3419
+ const line = lines[i];
3420
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
3421
+ if (headingMatch) {
3422
+ if (currentSection) {
3423
+ currentSection.content = currentContent.join(`
3424
+ `).trim();
3425
+ currentSection.endLine = i;
3426
+ if (currentSection.content || currentSection.heading) {
3427
+ sections.push(currentSection);
3428
+ }
3429
+ } else if (currentContent.length > 0) {
3430
+ sections.push({
3431
+ heading: "",
3432
+ level: 0,
3433
+ content: currentContent.join(`
3434
+ `).trim(),
3435
+ startLine: 1,
3436
+ endLine: i
3437
+ });
3438
+ }
3439
+ currentSection = {
3440
+ heading: headingMatch[2],
3441
+ level: headingMatch[1].length,
3442
+ content: "",
3443
+ startLine: i + 1,
3444
+ endLine: lines.length
3445
+ };
3446
+ currentContent = [];
3447
+ } else {
3448
+ currentContent.push(line);
3449
+ }
3450
+ }
3451
+ if (currentSection) {
3452
+ currentSection.content = currentContent.join(`
3453
+ `).trim();
3454
+ currentSection.endLine = lines.length;
3455
+ if (currentSection.content || currentSection.heading) {
3456
+ sections.push(currentSection);
3457
+ }
3458
+ } else if (currentContent.length > 0) {
3459
+ sections.push({
3460
+ heading: "",
3461
+ level: 0,
3462
+ content: currentContent.join(`
3463
+ `).trim(),
3464
+ startLine: 1,
3465
+ endLine: lines.length
3466
+ });
2753
3467
  }
3468
+ return sections;
2754
3469
  }
2755
- function calculateExportBoost(chunk) {
2756
- return chunk.isExported ? 0.03 : 0;
3470
+ function extractMarkdownKeywords(content) {
3471
+ const keywords = [];
3472
+ const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
3473
+ for (const match of headingMatches) {
3474
+ const heading = match[1].toLowerCase();
3475
+ const words = heading.split(/\s+/).filter((w) => w.length > 2);
3476
+ keywords.push(...words);
3477
+ }
3478
+ const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
3479
+ for (const match of emphasisMatches) {
3480
+ const text = (match[1] || match[2] || "").toLowerCase();
3481
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3482
+ keywords.push(...words);
3483
+ }
3484
+ const codeMatches = content.matchAll(/`([^`]+)`/g);
3485
+ for (const match of codeMatches) {
3486
+ const code = match[1].toLowerCase();
3487
+ if (code.length > 2 && code.length < 50) {
3488
+ keywords.push(code);
3489
+ }
3490
+ }
3491
+ const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
3492
+ for (const match of linkMatches) {
3493
+ const text = match[1].toLowerCase();
3494
+ const words = text.split(/\s+/).filter((w) => w.length > 2);
3495
+ keywords.push(...words);
3496
+ }
3497
+ return [...new Set(keywords)];
2757
3498
  }
2758
3499
 
2759
- class TypeScriptModule {
2760
- id = "language/typescript";
2761
- name = "TypeScript Search";
2762
- description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3500
+ class MarkdownModule {
3501
+ id = "docs/markdown";
3502
+ name = "Markdown Search";
3503
+ description = "Markdown documentation search with section-aware indexing";
2763
3504
  version = "1.0.0";
2764
3505
  embeddingConfig = null;
2765
3506
  symbolicIndex = null;
2766
3507
  pendingSummaries = new Map;
2767
3508
  rootDir = "";
3509
+ logger = undefined;
2768
3510
  async initialize(config) {
2769
3511
  this.embeddingConfig = getEmbeddingConfigFromModule(config);
3512
+ this.logger = config.options?.logger;
3513
+ if (this.logger) {
3514
+ this.embeddingConfig = {
3515
+ ...this.embeddingConfig,
3516
+ logger: this.logger
3517
+ };
3518
+ }
2770
3519
  configureEmbeddings(this.embeddingConfig);
2771
3520
  this.pendingSummaries.clear();
2772
3521
  }
2773
3522
  async indexFile(filepath, content, ctx) {
3523
+ if (!isMarkdownFile(filepath)) {
3524
+ return null;
3525
+ }
2774
3526
  this.rootDir = ctx.rootDir;
2775
- const parsedChunks = parseCode(content, filepath);
2776
- if (parsedChunks.length === 0) {
3527
+ const sections = parseMarkdownSections(content);
3528
+ if (sections.length === 0) {
2777
3529
  return null;
2778
3530
  }
2779
- const pathContext = parsePathContext(filepath);
2780
- const pathPrefix = formatPathContextForEmbedding(pathContext);
2781
- const chunkContents = parsedChunks.map((c) => {
2782
- const namePrefix = c.name ? `${c.name}: ` : "";
2783
- return `${pathPrefix} ${namePrefix}${c.content}`;
3531
+ const chunkContents = sections.map((s) => {
3532
+ const filename = path11.basename(filepath);
3533
+ const headingContext = s.heading ? `${s.heading}: ` : "";
3534
+ return `${filename} ${headingContext}${s.content}`;
2784
3535
  });
2785
3536
  const embeddings = await getEmbeddings(chunkContents);
2786
- const chunks = parsedChunks.map((pc) => ({
2787
- id: generateChunkId(filepath, pc.startLine, pc.endLine),
2788
- content: pc.content,
2789
- startLine: pc.startLine,
2790
- endLine: pc.endLine,
2791
- type: pc.type,
2792
- name: pc.name,
2793
- isExported: pc.isExported,
2794
- jsDoc: pc.jsDoc
3537
+ const chunks = sections.map((section, i) => ({
3538
+ id: generateChunkId(filepath, section.startLine, section.endLine),
3539
+ content: section.heading ? `## ${section.heading}
3540
+
3541
+ ${section.content}` : section.content,
3542
+ startLine: section.startLine,
3543
+ endLine: section.endLine,
3544
+ type: "block",
3545
+ name: section.heading || undefined
2795
3546
  }));
2796
- const references = this.extractReferences(content, filepath);
3547
+ const headings = sections.filter((s) => s.heading).map((s) => s.heading);
2797
3548
  const stats = await ctx.getFileStats(filepath);
2798
3549
  const currentConfig = getEmbeddingConfig();
2799
3550
  const moduleData = {
2800
3551
  embeddings,
2801
- embeddingModel: currentConfig.model
3552
+ embeddingModel: currentConfig.model,
3553
+ headings
2802
3554
  };
2803
- const chunkTypes = [
2804
- ...new Set(parsedChunks.map((pc) => pc.type))
2805
- ];
2806
- const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2807
- const allKeywords = new Set;
2808
- for (const pc of parsedChunks) {
2809
- const keywords = extractKeywords(pc.content, pc.name);
2810
- keywords.forEach((k) => allKeywords.add(k));
2811
- }
2812
- pathContext.keywords.forEach((k) => allKeywords.add(k));
3555
+ const keywords = extractMarkdownKeywords(content);
2813
3556
  const fileSummary = {
2814
3557
  filepath,
2815
3558
  chunkCount: chunks.length,
2816
- chunkTypes,
2817
- keywords: Array.from(allKeywords),
2818
- exports,
2819
- lastModified: stats.lastModified,
2820
- pathContext: {
2821
- segments: pathContext.segments,
2822
- layer: pathContext.layer,
2823
- domain: pathContext.domain,
2824
- depth: pathContext.depth
2825
- }
3559
+ chunkTypes: ["block"],
3560
+ keywords,
3561
+ exports: headings,
3562
+ lastModified: stats.lastModified
2826
3563
  };
2827
3564
  this.pendingSummaries.set(filepath, fileSummary);
2828
3565
  return {
2829
3566
  filepath,
2830
3567
  lastModified: stats.lastModified,
2831
3568
  chunks,
2832
- moduleData,
2833
- references
3569
+ moduleData
2834
3570
  };
2835
3571
  }
2836
3572
  async finalize(ctx) {
@@ -2846,8 +3582,8 @@ class TypeScriptModule {
2846
3582
  }
2847
3583
  async search(query, ctx, options = {}) {
2848
3584
  const {
2849
- topK = DEFAULT_TOP_K2,
2850
- minScore = DEFAULT_MIN_SCORE2,
3585
+ topK = DEFAULT_TOP_K4,
3586
+ minScore = DEFAULT_MIN_SCORE4,
2851
3587
  filePatterns
2852
3588
  } = options;
2853
3589
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
@@ -2859,9 +3595,9 @@ class TypeScriptModule {
2859
3595
  } catch {
2860
3596
  allFiles = await ctx.listIndexedFiles();
2861
3597
  }
2862
- let filesToSearch = allFiles;
3598
+ let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
2863
3599
  if (filePatterns && filePatterns.length > 0) {
2864
- filesToSearch = allFiles.filter((filepath) => {
3600
+ filesToSearch = filesToSearch.filter((filepath) => {
2865
3601
  return filePatterns.some((pattern) => {
2866
3602
  if (pattern.startsWith("*.")) {
2867
3603
  const ext = pattern.slice(1);
@@ -2899,36 +3635,24 @@ class TypeScriptModule {
2899
3635
  for (const result of bm25Results) {
2900
3636
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
2901
3637
  }
2902
- const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
2903
- const pathBoosts = new Map;
2904
- for (const filepath of filesToSearch) {
2905
- const summary = symbolicIndex.getFileSummary(filepath);
2906
- if (summary?.pathContext) {
2907
- let boost = 0;
2908
- const ctx2 = summary.pathContext;
2909
- if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
2910
- boost += 0.1;
2911
- }
2912
- if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
2913
- boost += 0.05;
2914
- }
2915
- const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
2916
- if (segmentMatch) {
2917
- boost += 0.05;
2918
- }
2919
- pathBoosts.set(filepath, boost);
2920
- }
2921
- }
3638
+ const queryTerms = extractQueryTerms(query);
2922
3639
  const results = [];
2923
3640
  for (const { filepath, chunk, embedding } of allChunksData) {
2924
3641
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
2925
3642
  const bm25Score = bm25Scores.get(chunk.id) || 0;
2926
- const pathBoost = pathBoosts.get(filepath) || 0;
2927
- const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
2928
- const chunkTypeBoost = calculateChunkTypeBoost(chunk);
2929
- const exportBoost = calculateExportBoost(chunk);
2930
- const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
2931
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
3643
+ let docBoost = 0;
3644
+ if (queryTerms.some((t) => [
3645
+ "docs",
3646
+ "documentation",
3647
+ "readme",
3648
+ "guide",
3649
+ "how",
3650
+ "what",
3651
+ "explain"
3652
+ ].includes(t))) {
3653
+ docBoost = 0.05;
3654
+ }
3655
+ const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
2932
3656
  if (hybridScore >= minScore || bm25Score > 0.3) {
2933
3657
  results.push({
2934
3658
  filepath,
@@ -2938,10 +3662,7 @@ class TypeScriptModule {
2938
3662
  context: {
2939
3663
  semanticScore,
2940
3664
  bm25Score,
2941
- pathBoost,
2942
- fileTypeBoost,
2943
- chunkTypeBoost,
2944
- exportBoost
3665
+ docBoost
2945
3666
  }
2946
3667
  });
2947
3668
  }
@@ -2949,91 +3670,21 @@ class TypeScriptModule {
2949
3670
  results.sort((a, b) => b.score - a.score);
2950
3671
  return results.slice(0, topK);
2951
3672
  }
2952
- extractReferences(content, filepath) {
2953
- const references = [];
2954
- const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
2955
- const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
2956
- let match;
2957
- while ((match = importRegex.exec(content)) !== null) {
2958
- const importPath = match[1];
2959
- if (importPath.startsWith(".")) {
2960
- const dir = path8.dirname(filepath);
2961
- const resolved = path8.normalize(path8.join(dir, importPath));
2962
- references.push(resolved);
2963
- }
2964
- }
2965
- while ((match = requireRegex.exec(content)) !== null) {
2966
- const importPath = match[1];
2967
- if (importPath.startsWith(".")) {
2968
- const dir = path8.dirname(filepath);
2969
- const resolved = path8.normalize(path8.join(dir, importPath));
2970
- references.push(resolved);
2971
- }
2972
- }
2973
- return references;
2974
- }
2975
3673
  }
2976
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2977
- var init_typescript = __esm(() => {
3674
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
3675
+ var init_markdown = __esm(() => {
2978
3676
  init_embeddings();
3677
+ init_services();
2979
3678
  init_config2();
2980
- init_parseCode();
2981
3679
  init_storage();
2982
- init_keywords();
2983
- init_keywords();
2984
- IMPLEMENTATION_TERMS = [
2985
- "function",
2986
- "method",
2987
- "class",
2988
- "interface",
2989
- "implement",
2990
- "implementation",
2991
- "endpoint",
2992
- "route",
2993
- "handler",
2994
- "controller",
2995
- "module",
2996
- "code"
2997
- ];
2998
- DOCUMENTATION_TERMS = [
2999
- "documentation",
3000
- "docs",
3001
- "guide",
3002
- "tutorial",
3003
- "readme",
3004
- "how",
3005
- "what",
3006
- "why",
3007
- "explain",
3008
- "overview",
3009
- "getting",
3010
- "started",
3011
- "requirements",
3012
- "setup",
3013
- "install",
3014
- "configure",
3015
- "configuration"
3016
- ];
3017
- SOURCE_CODE_EXTENSIONS = [
3018
- ".ts",
3019
- ".tsx",
3020
- ".js",
3021
- ".jsx",
3022
- ".mjs",
3023
- ".cjs",
3024
- ".py",
3025
- ".go",
3026
- ".rs",
3027
- ".java"
3028
- ];
3029
- DOC_EXTENSIONS = [".md", ".txt", ".rst"];
3680
+ MARKDOWN_EXTENSIONS = [".md", ".txt"];
3030
3681
  });
3031
3682
 
3032
3683
  // src/app/indexer/index.ts
3033
3684
  init_config2();
3034
3685
  import { glob } from "glob";
3035
3686
  import * as fs6 from "fs/promises";
3036
- import * as path11 from "path";
3687
+ import * as path14 from "path";
3037
3688
 
3038
3689
  // src/modules/registry.ts
3039
3690
  class ModuleRegistryImpl {
@@ -3058,16 +3709,20 @@ var registry = new ModuleRegistryImpl;
3058
3709
  async function registerBuiltInModules() {
3059
3710
  const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
3060
3711
  const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
3712
+ const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
3713
+ const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
3061
3714
  registry.register(new CoreModule2);
3062
3715
  registry.register(new TypeScriptModule2);
3716
+ registry.register(new JsonModule2);
3717
+ registry.register(new MarkdownModule2);
3063
3718
  }
3064
3719
 
3065
3720
  // src/infrastructure/introspection/IntrospectionIndex.ts
3066
- import * as path10 from "path";
3721
+ import * as path13 from "path";
3067
3722
  import * as fs5 from "fs/promises";
3068
3723
 
3069
3724
  // src/infrastructure/introspection/projectDetector.ts
3070
- import * as path9 from "path";
3725
+ import * as path12 from "path";
3071
3726
  import * as fs4 from "fs/promises";
3072
3727
  var MAX_SCAN_DEPTH = 4;
3073
3728
  var SKIP_DIRS = new Set([
@@ -3084,7 +3739,7 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3084
3739
  if (depth > MAX_SCAN_DEPTH)
3085
3740
  return [];
3086
3741
  const results = [];
3087
- const fullDir = currentDir ? path9.join(rootDir, currentDir) : rootDir;
3742
+ const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
3088
3743
  try {
3089
3744
  const entries = await fs4.readdir(fullDir, { withFileTypes: true });
3090
3745
  const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
@@ -3107,10 +3762,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
3107
3762
  }
3108
3763
  async function parsePackageJson(rootDir, relativePath) {
3109
3764
  try {
3110
- const packageJsonPath = path9.join(rootDir, relativePath, "package.json");
3765
+ const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
3111
3766
  const content = await fs4.readFile(packageJsonPath, "utf-8");
3112
3767
  const pkg = JSON.parse(content);
3113
- const name = pkg.name || path9.basename(relativePath);
3768
+ const name = pkg.name || path12.basename(relativePath);
3114
3769
  const deps = { ...pkg.dependencies, ...pkg.devDependencies };
3115
3770
  let type = "unknown";
3116
3771
  if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
@@ -3155,7 +3810,7 @@ async function detectProjectStructure(rootDir) {
3155
3810
  for (const pattern of monorepoPatterns) {
3156
3811
  if (!dirNames.includes(pattern))
3157
3812
  continue;
3158
- const patternDir = path9.join(rootDir, pattern);
3813
+ const patternDir = path12.join(rootDir, pattern);
3159
3814
  try {
3160
3815
  const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
3161
3816
  for (const subDir of subDirs) {
@@ -3186,7 +3841,7 @@ async function detectProjectStructure(rootDir) {
3186
3841
  }
3187
3842
  let rootType = "unknown";
3188
3843
  try {
3189
- const rootPkgPath = path9.join(rootDir, "package.json");
3844
+ const rootPkgPath = path12.join(rootDir, "package.json");
3190
3845
  const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
3191
3846
  if (rootPkg.workspaces)
3192
3847
  isMonorepo = true;
@@ -3227,7 +3882,7 @@ class IntrospectionIndex {
3227
3882
  async initialize() {
3228
3883
  this.structure = await detectProjectStructure(this.rootDir);
3229
3884
  try {
3230
- const configPath = path10.join(this.rootDir, ".raggrep", "config.json");
3885
+ const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
3231
3886
  const configContent = await fs5.readFile(configPath, "utf-8");
3232
3887
  const config = JSON.parse(configContent);
3233
3888
  this.config = config.introspection || {};
@@ -3267,28 +3922,28 @@ class IntrospectionIndex {
3267
3922
  }
3268
3923
  }
3269
3924
  async save(config) {
3270
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
3925
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3271
3926
  await fs5.mkdir(introDir, { recursive: true });
3272
- const projectPath = path10.join(introDir, "_project.json");
3927
+ const projectPath = path13.join(introDir, "_project.json");
3273
3928
  await fs5.writeFile(projectPath, JSON.stringify({
3274
3929
  version: "1.0.0",
3275
3930
  lastUpdated: new Date().toISOString(),
3276
3931
  structure: this.structure
3277
3932
  }, null, 2));
3278
3933
  for (const [filepath, intro] of this.files) {
3279
- const introFilePath = path10.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
3280
- await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
3934
+ const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
3935
+ await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
3281
3936
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3282
3937
  }
3283
3938
  }
3284
3939
  async load(config) {
3285
- const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
3940
+ const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
3286
3941
  try {
3287
- const projectPath = path10.join(introDir, "_project.json");
3942
+ const projectPath = path13.join(introDir, "_project.json");
3288
3943
  const projectContent = await fs5.readFile(projectPath, "utf-8");
3289
3944
  const projectData = JSON.parse(projectContent);
3290
3945
  this.structure = projectData.structure;
3291
- await this.loadFilesRecursive(path10.join(introDir, "files"), "");
3946
+ await this.loadFilesRecursive(path13.join(introDir, "files"), "");
3292
3947
  } catch {
3293
3948
  this.structure = null;
3294
3949
  this.files.clear();
@@ -3298,7 +3953,7 @@ class IntrospectionIndex {
3298
3953
  try {
3299
3954
  const entries = await fs5.readdir(basePath, { withFileTypes: true });
3300
3955
  for (const entry of entries) {
3301
- const entryPath = path10.join(basePath, entry.name);
3956
+ const entryPath = path13.join(basePath, entry.name);
3302
3957
  const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
3303
3958
  if (entry.isDirectory()) {
3304
3959
  await this.loadFilesRecursive(entryPath, relativePath);
@@ -3315,51 +3970,148 @@ class IntrospectionIndex {
3315
3970
  this.structure = null;
3316
3971
  }
3317
3972
  }
3973
+ // src/infrastructure/logger/loggers.ts
3974
+ class ConsoleLogger {
3975
+ verbose;
3976
+ constructor(options) {
3977
+ this.verbose = options?.verbose ?? false;
3978
+ }
3979
+ info(message) {
3980
+ console.log(message);
3981
+ }
3982
+ warn(message) {
3983
+ console.warn(message);
3984
+ }
3985
+ error(message) {
3986
+ console.error(message);
3987
+ }
3988
+ debug(message) {
3989
+ if (this.verbose) {
3990
+ console.log(message);
3991
+ }
3992
+ }
3993
+ progress(message) {
3994
+ console.log(message);
3995
+ }
3996
+ clearProgress() {}
3997
+ }
3998
+
3999
+ class InlineProgressLogger {
4000
+ verbose;
4001
+ lastProgressLength = 0;
4002
+ hasProgress = false;
4003
+ constructor(options) {
4004
+ this.verbose = options?.verbose ?? false;
4005
+ }
4006
+ info(message) {
4007
+ this.clearProgress();
4008
+ console.log(message);
4009
+ }
4010
+ warn(message) {
4011
+ this.clearProgress();
4012
+ console.warn(message);
4013
+ }
4014
+ error(message) {
4015
+ this.clearProgress();
4016
+ console.error(message);
4017
+ }
4018
+ debug(message) {
4019
+ if (this.verbose) {
4020
+ this.clearProgress();
4021
+ console.log(message);
4022
+ }
4023
+ }
4024
+ progress(message) {
4025
+ process.stdout.write(`\r${message}`);
4026
+ const padding = Math.max(0, this.lastProgressLength - message.length);
4027
+ if (padding > 0) {
4028
+ process.stdout.write(" ".repeat(padding));
4029
+ }
4030
+ this.lastProgressLength = message.length;
4031
+ this.hasProgress = true;
4032
+ }
4033
+ clearProgress() {
4034
+ if (this.hasProgress && this.lastProgressLength > 0) {
4035
+ process.stdout.write("\r" + " ".repeat(this.lastProgressLength) + "\r");
4036
+ this.lastProgressLength = 0;
4037
+ this.hasProgress = false;
4038
+ }
4039
+ }
4040
+ }
4041
+
4042
+ class SilentLogger {
4043
+ info() {}
4044
+ warn() {}
4045
+ error() {}
4046
+ debug() {}
4047
+ progress() {}
4048
+ clearProgress() {}
4049
+ }
4050
+ function createLogger(options) {
4051
+ return new ConsoleLogger(options);
4052
+ }
4053
+ function createInlineLogger(options) {
4054
+ return new InlineProgressLogger(options);
4055
+ }
4056
+ function createSilentLogger() {
4057
+ return new SilentLogger;
4058
+ }
3318
4059
  // src/app/indexer/watcher.ts
3319
4060
  import { watch } from "chokidar";
3320
4061
  init_config2();
3321
4062
 
3322
4063
  // src/app/indexer/index.ts
4064
+ async function parallelMap(items, processor, concurrency) {
4065
+ const results = new Array(items.length);
4066
+ let nextIndex = 0;
4067
+ async function worker() {
4068
+ while (nextIndex < items.length) {
4069
+ const index = nextIndex++;
4070
+ const item = items[index];
4071
+ try {
4072
+ const value = await processor(item, index);
4073
+ results[index] = { success: true, value };
4074
+ } catch (error) {
4075
+ results[index] = { success: false, error };
4076
+ }
4077
+ }
4078
+ }
4079
+ const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
4080
+ await Promise.all(workers);
4081
+ return results;
4082
+ }
3323
4083
  var INDEX_SCHEMA_VERSION = "1.0.0";
4084
+ var DEFAULT_CONCURRENCY = 4;
3324
4085
  async function indexDirectory(rootDir, options = {}) {
3325
4086
  const verbose = options.verbose ?? false;
3326
4087
  const quiet = options.quiet ?? false;
3327
- rootDir = path11.resolve(rootDir);
4088
+ const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
4089
+ const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
4090
+ rootDir = path14.resolve(rootDir);
3328
4091
  const location = getIndexLocation(rootDir);
3329
- if (!quiet) {
3330
- console.log(`Indexing directory: ${rootDir}`);
3331
- console.log(`Index location: ${location.indexDir}`);
3332
- }
4092
+ logger.info(`Indexing directory: ${rootDir}`);
4093
+ logger.info(`Index location: ${location.indexDir}`);
4094
+ logger.debug(`Concurrency: ${concurrency}`);
3333
4095
  const config = await loadConfig(rootDir);
3334
4096
  const introspection = new IntrospectionIndex(rootDir);
3335
4097
  await introspection.initialize();
3336
- if (verbose) {
3337
- const structure = introspection.getStructure();
3338
- if (structure?.isMonorepo) {
3339
- console.log(`Detected monorepo with ${structure.projects.length} projects`);
3340
- }
4098
+ const structure = introspection.getStructure();
4099
+ if (structure?.isMonorepo) {
4100
+ logger.debug(`Detected monorepo with ${structure.projects.length} projects`);
3341
4101
  }
3342
4102
  await registerBuiltInModules();
3343
4103
  const enabledModules = registry.getEnabled(config);
3344
4104
  if (enabledModules.length === 0) {
3345
- if (!quiet) {
3346
- console.log("No modules enabled. Check your configuration.");
3347
- }
4105
+ logger.info("No modules enabled. Check your configuration.");
3348
4106
  return [];
3349
4107
  }
3350
- if (!quiet) {
3351
- console.log(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
3352
- }
4108
+ logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
3353
4109
  const files = await findFiles(rootDir, config);
3354
- if (!quiet) {
3355
- console.log(`Found ${files.length} files to index`);
3356
- }
4110
+ logger.info(`Found ${files.length} files to index`);
3357
4111
  const results = [];
3358
4112
  for (const module of enabledModules) {
3359
- if (!quiet) {
3360
- console.log(`
4113
+ logger.info(`
3361
4114
  [${module.name}] Starting indexing...`);
3362
- }
3363
4115
  const moduleConfig = getModuleConfig(config, module.id);
3364
4116
  if (module.initialize && moduleConfig) {
3365
4117
  const configWithOverrides = { ...moduleConfig };
@@ -3369,32 +4121,32 @@ async function indexDirectory(rootDir, options = {}) {
3369
4121
  embeddingModel: options.model
3370
4122
  };
3371
4123
  }
4124
+ configWithOverrides.options = {
4125
+ ...configWithOverrides.options,
4126
+ logger
4127
+ };
3372
4128
  await module.initialize(configWithOverrides);
3373
4129
  }
3374
- const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
4130
+ const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
3375
4131
  results.push(result);
3376
4132
  if (module.finalize) {
3377
- if (!quiet) {
3378
- console.log(`[${module.name}] Building secondary indexes...`);
3379
- }
4133
+ logger.info(`[${module.name}] Building secondary indexes...`);
3380
4134
  const ctx = {
3381
4135
  rootDir,
3382
4136
  config,
3383
4137
  readFile: async (filepath) => {
3384
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4138
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3385
4139
  return fs6.readFile(fullPath, "utf-8");
3386
4140
  },
3387
4141
  getFileStats: async (filepath) => {
3388
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4142
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3389
4143
  const stats = await fs6.stat(fullPath);
3390
4144
  return { lastModified: stats.mtime.toISOString() };
3391
4145
  }
3392
4146
  };
3393
4147
  await module.finalize(ctx);
3394
4148
  }
3395
- if (!quiet) {
3396
- console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
3397
- }
4149
+ logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
3398
4150
  }
3399
4151
  await introspection.save(config);
3400
4152
  await updateGlobalManifest(rootDir, enabledModules, config);
@@ -3417,28 +4169,37 @@ async function deleteIndex(rootDir) {
3417
4169
  await fs6.rm(indexDir, { recursive: true, force: true });
3418
4170
  } catch {}
3419
4171
  }
4172
+ async function resetIndex(rootDir) {
4173
+ rootDir = path14.resolve(rootDir);
4174
+ const status = await getIndexStatus(rootDir);
4175
+ if (!status.exists) {
4176
+ throw new Error(`No index found for ${rootDir}`);
4177
+ }
4178
+ await deleteIndex(rootDir);
4179
+ return {
4180
+ success: true,
4181
+ indexDir: status.indexDir
4182
+ };
4183
+ }
3420
4184
  async function ensureIndexFresh(rootDir, options = {}) {
3421
4185
  const verbose = options.verbose ?? false;
3422
4186
  const quiet = options.quiet ?? false;
3423
- rootDir = path11.resolve(rootDir);
4187
+ const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
4188
+ rootDir = path14.resolve(rootDir);
3424
4189
  const status = await getIndexStatus(rootDir);
3425
4190
  if (!status.exists) {
3426
- if (!quiet) {
3427
- console.log(`No index found. Creating index...
4191
+ logger.info(`No index found. Creating index...
3428
4192
  `);
3429
- }
3430
- const results = await indexDirectory(rootDir, { ...options, quiet });
4193
+ const results = await indexDirectory(rootDir, { ...options, logger });
3431
4194
  const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3432
4195
  return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3433
4196
  }
3434
4197
  const versionCompatible = await isIndexVersionCompatible(rootDir);
3435
4198
  if (!versionCompatible) {
3436
- if (!quiet) {
3437
- console.log(`Index version incompatible. Rebuilding...
4199
+ logger.info(`Index version incompatible. Rebuilding...
3438
4200
  `);
3439
- }
3440
4201
  await deleteIndex(rootDir);
3441
- const results = await indexDirectory(rootDir, { ...options, quiet });
4202
+ const results = await indexDirectory(rootDir, { ...options, logger });
3442
4203
  const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3443
4204
  return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3444
4205
  }
@@ -3451,7 +4212,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3451
4212
  const introspection = new IntrospectionIndex(rootDir);
3452
4213
  await introspection.initialize();
3453
4214
  const currentFiles = await findFiles(rootDir, config);
3454
- const currentFileSet = new Set(currentFiles.map((f) => path11.relative(rootDir, f)));
4215
+ const currentFileSet = new Set(currentFiles.map((f) => path14.relative(rootDir, f)));
3455
4216
  let totalIndexed = 0;
3456
4217
  let totalRemoved = 0;
3457
4218
  let totalUnchanged = 0;
@@ -3465,6 +4226,10 @@ async function ensureIndexFresh(rootDir, options = {}) {
3465
4226
  embeddingModel: options.model
3466
4227
  };
3467
4228
  }
4229
+ configWithOverrides.options = {
4230
+ ...configWithOverrides.options,
4231
+ logger
4232
+ };
3468
4233
  await module.initialize(configWithOverrides);
3469
4234
  }
3470
4235
  const manifest = await loadModuleManifest(rootDir, module.id, config);
@@ -3476,14 +4241,12 @@ async function ensureIndexFresh(rootDir, options = {}) {
3476
4241
  }
3477
4242
  }
3478
4243
  for (const filepath of filesToRemove) {
3479
- if (verbose) {
3480
- console.log(` Removing stale: ${filepath}`);
3481
- }
3482
- const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4244
+ logger.debug(` Removing stale: ${filepath}`);
4245
+ const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3483
4246
  try {
3484
4247
  await fs6.unlink(indexFilePath);
3485
4248
  } catch {}
3486
- const symbolicFilePath = path11.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4249
+ const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3487
4250
  try {
3488
4251
  await fs6.unlink(symbolicFilePath);
3489
4252
  } catch {}
@@ -3494,18 +4257,21 @@ async function ensureIndexFresh(rootDir, options = {}) {
3494
4257
  rootDir,
3495
4258
  config,
3496
4259
  readFile: async (filepath) => {
3497
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4260
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3498
4261
  return fs6.readFile(fullPath, "utf-8");
3499
4262
  },
3500
4263
  getFileStats: async (filepath) => {
3501
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4264
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3502
4265
  const stats = await fs6.stat(fullPath);
3503
4266
  return { lastModified: stats.mtime.toISOString() };
3504
4267
  },
3505
4268
  getIntrospection: (filepath) => introspection.getFile(filepath)
3506
4269
  };
3507
- for (const filepath of currentFiles) {
3508
- const relativePath = path11.relative(rootDir, filepath);
4270
+ const totalFiles = currentFiles.length;
4271
+ for (let i = 0;i < currentFiles.length; i++) {
4272
+ const filepath = currentFiles[i];
4273
+ const relativePath = path14.relative(rootDir, filepath);
4274
+ const progress = `[${i + 1}/${totalFiles}]`;
3509
4275
  try {
3510
4276
  const stats = await fs6.stat(filepath);
3511
4277
  const lastModified = stats.mtime.toISOString();
@@ -3514,9 +4280,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3514
4280
  totalUnchanged++;
3515
4281
  continue;
3516
4282
  }
3517
- if (verbose) {
3518
- console.log(` Indexing: ${relativePath}`);
3519
- }
4283
+ logger.progress(` ${progress} Indexing: ${relativePath}`);
3520
4284
  const content = await fs6.readFile(filepath, "utf-8");
3521
4285
  introspection.addFile(relativePath, content);
3522
4286
  const fileIndex = await module.indexFile(relativePath, content, ctx);
@@ -3529,11 +4293,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
3529
4293
  totalIndexed++;
3530
4294
  }
3531
4295
  } catch (error) {
3532
- if (verbose) {
3533
- console.error(` Error indexing ${relativePath}:`, error);
3534
- }
4296
+ logger.clearProgress();
4297
+ logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
3535
4298
  }
3536
4299
  }
4300
+ logger.clearProgress();
3537
4301
  if (totalIndexed > 0 || totalRemoved > 0) {
3538
4302
  manifest.lastUpdated = new Date().toISOString();
3539
4303
  await writeModuleManifest(rootDir, module.id, manifest, config);
@@ -3557,7 +4321,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
3557
4321
  unchanged: totalUnchanged
3558
4322
  };
3559
4323
  }
3560
- async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
4324
+ async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
3561
4325
  const result = {
3562
4326
  moduleId: module.id,
3563
4327
  indexed: 0,
@@ -3565,55 +4329,102 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
3565
4329
  errors: 0
3566
4330
  };
3567
4331
  const manifest = await loadModuleManifest(rootDir, module.id, config);
4332
+ const indexPath = getModuleIndexPath(rootDir, module.id, config);
4333
+ const currentFileSet = new Set(files.map((f) => path14.relative(rootDir, f)));
4334
+ const filesToRemove = [];
4335
+ for (const filepath of Object.keys(manifest.files)) {
4336
+ if (!currentFileSet.has(filepath)) {
4337
+ filesToRemove.push(filepath);
4338
+ }
4339
+ }
4340
+ if (filesToRemove.length > 0) {
4341
+ logger.info(` Removing ${filesToRemove.length} stale entries...`);
4342
+ for (const filepath of filesToRemove) {
4343
+ logger.debug(` Removing: ${filepath}`);
4344
+ const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4345
+ try {
4346
+ await fs6.unlink(indexFilePath);
4347
+ } catch {}
4348
+ const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
4349
+ try {
4350
+ await fs6.unlink(symbolicFilePath);
4351
+ } catch {}
4352
+ delete manifest.files[filepath];
4353
+ }
4354
+ await cleanupEmptyDirectories(indexPath);
4355
+ }
3568
4356
  const ctx = {
3569
4357
  rootDir,
3570
4358
  config,
3571
4359
  readFile: async (filepath) => {
3572
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4360
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3573
4361
  return fs6.readFile(fullPath, "utf-8");
3574
4362
  },
3575
4363
  getFileStats: async (filepath) => {
3576
- const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
4364
+ const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
3577
4365
  const stats = await fs6.stat(fullPath);
3578
4366
  return { lastModified: stats.mtime.toISOString() };
3579
4367
  },
3580
4368
  getIntrospection: (filepath) => introspection.getFile(filepath)
3581
4369
  };
3582
- for (const filepath of files) {
3583
- const relativePath = path11.relative(rootDir, filepath);
4370
+ const totalFiles = files.length;
4371
+ let completedCount = 0;
4372
+ const processFile = async (filepath, _index) => {
4373
+ const relativePath = path14.relative(rootDir, filepath);
3584
4374
  try {
3585
4375
  const stats = await fs6.stat(filepath);
3586
4376
  const lastModified = stats.mtime.toISOString();
3587
4377
  const existingEntry = manifest.files[relativePath];
3588
4378
  if (existingEntry && existingEntry.lastModified === lastModified) {
3589
- if (verbose) {
3590
- console.log(` Skipped ${relativePath} (unchanged)`);
3591
- }
3592
- result.skipped++;
3593
- continue;
4379
+ completedCount++;
4380
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
4381
+ return { relativePath, status: "skipped" };
3594
4382
  }
3595
4383
  const content = await fs6.readFile(filepath, "utf-8");
3596
4384
  introspection.addFile(relativePath, content);
3597
- if (verbose) {
3598
- console.log(` Processing ${relativePath}...`);
3599
- }
4385
+ completedCount++;
4386
+ logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
3600
4387
  const fileIndex = await module.indexFile(relativePath, content, ctx);
3601
4388
  if (!fileIndex) {
3602
- if (verbose) {
3603
- console.log(` Skipped ${relativePath} (no chunks)`);
3604
- }
3605
- result.skipped++;
3606
- continue;
4389
+ logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
4390
+ return { relativePath, status: "skipped" };
3607
4391
  }
3608
4392
  await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3609
- manifest.files[relativePath] = {
4393
+ return {
4394
+ relativePath,
4395
+ status: "indexed",
3610
4396
  lastModified,
3611
4397
  chunkCount: fileIndex.chunks.length
3612
4398
  };
3613
- result.indexed++;
3614
4399
  } catch (error) {
3615
- console.error(` Error indexing ${relativePath}:`, error);
4400
+ completedCount++;
4401
+ return { relativePath, status: "error", error };
4402
+ }
4403
+ };
4404
+ logger.debug(` Using concurrency: ${concurrency}`);
4405
+ const results = await parallelMap(files, processFile, concurrency);
4406
+ logger.clearProgress();
4407
+ for (const item of results) {
4408
+ if (!item.success) {
3616
4409
  result.errors++;
4410
+ continue;
4411
+ }
4412
+ const fileResult = item.value;
4413
+ switch (fileResult.status) {
4414
+ case "indexed":
4415
+ manifest.files[fileResult.relativePath] = {
4416
+ lastModified: fileResult.lastModified,
4417
+ chunkCount: fileResult.chunkCount
4418
+ };
4419
+ result.indexed++;
4420
+ break;
4421
+ case "skipped":
4422
+ result.skipped++;
4423
+ break;
4424
+ case "error":
4425
+ logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
4426
+ result.errors++;
4427
+ break;
3617
4428
  }
3618
4429
  }
3619
4430
  manifest.lastUpdated = new Date().toISOString();
@@ -3650,13 +4461,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
3650
4461
  }
3651
4462
  async function writeModuleManifest(rootDir, moduleId, manifest, config) {
3652
4463
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
3653
- await fs6.mkdir(path11.dirname(manifestPath), { recursive: true });
4464
+ await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
3654
4465
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
3655
4466
  }
3656
4467
  async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
3657
4468
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
3658
- const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3659
- await fs6.mkdir(path11.dirname(indexFilePath), { recursive: true });
4469
+ const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4470
+ await fs6.mkdir(path14.dirname(indexFilePath), { recursive: true });
3660
4471
  await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
3661
4472
  }
3662
4473
  async function updateGlobalManifest(rootDir, modules, config) {
@@ -3666,31 +4477,32 @@ async function updateGlobalManifest(rootDir, modules, config) {
3666
4477
  lastUpdated: new Date().toISOString(),
3667
4478
  modules: modules.map((m) => m.id)
3668
4479
  };
3669
- await fs6.mkdir(path11.dirname(manifestPath), { recursive: true });
4480
+ await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
3670
4481
  await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
3671
4482
  }
3672
4483
  async function cleanupIndex(rootDir, options = {}) {
3673
4484
  const verbose = options.verbose ?? false;
3674
- rootDir = path11.resolve(rootDir);
3675
- console.log(`Cleaning up index in: ${rootDir}`);
4485
+ const logger = options.logger ?? createLogger({ verbose });
4486
+ rootDir = path14.resolve(rootDir);
4487
+ logger.info(`Cleaning up index in: ${rootDir}`);
3676
4488
  const config = await loadConfig(rootDir);
3677
4489
  await registerBuiltInModules();
3678
4490
  const enabledModules = registry.getEnabled(config);
3679
4491
  if (enabledModules.length === 0) {
3680
- console.log("No modules enabled.");
4492
+ logger.info("No modules enabled.");
3681
4493
  return [];
3682
4494
  }
3683
4495
  const results = [];
3684
4496
  for (const module of enabledModules) {
3685
- console.log(`
4497
+ logger.info(`
3686
4498
  [${module.name}] Checking for stale entries...`);
3687
- const result = await cleanupModuleIndex(rootDir, module.id, config, verbose);
4499
+ const result = await cleanupModuleIndex(rootDir, module.id, config, logger);
3688
4500
  results.push(result);
3689
- console.log(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
4501
+ logger.info(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
3690
4502
  }
3691
4503
  return results;
3692
4504
  }
3693
- async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
4505
+ async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
3694
4506
  const result = {
3695
4507
  moduleId,
3696
4508
  removed: 0,
@@ -3701,7 +4513,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
3701
4513
  const filesToRemove = [];
3702
4514
  const updatedFiles = {};
3703
4515
  for (const [filepath, entry] of Object.entries(manifest.files)) {
3704
- const fullPath = path11.join(rootDir, filepath);
4516
+ const fullPath = path14.join(rootDir, filepath);
3705
4517
  try {
3706
4518
  await fs6.access(fullPath);
3707
4519
  updatedFiles[filepath] = entry;
@@ -3709,13 +4521,11 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
3709
4521
  } catch {
3710
4522
  filesToRemove.push(filepath);
3711
4523
  result.removed++;
3712
- if (verbose) {
3713
- console.log(` Removing stale entry: ${filepath}`);
3714
- }
4524
+ logger.debug(` Removing stale entry: ${filepath}`);
3715
4525
  }
3716
4526
  }
3717
4527
  for (const filepath of filesToRemove) {
3718
- const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
4528
+ const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3719
4529
  try {
3720
4530
  await fs6.unlink(indexFilePath);
3721
4531
  } catch {}
@@ -3731,7 +4541,7 @@ async function cleanupEmptyDirectories(dir) {
3731
4541
  const entries = await fs6.readdir(dir, { withFileTypes: true });
3732
4542
  for (const entry of entries) {
3733
4543
  if (entry.isDirectory()) {
3734
- const subDir = path11.join(dir, entry.name);
4544
+ const subDir = path14.join(dir, entry.name);
3735
4545
  await cleanupEmptyDirectories(subDir);
3736
4546
  }
3737
4547
  }
@@ -3746,7 +4556,7 @@ async function cleanupEmptyDirectories(dir) {
3746
4556
  }
3747
4557
  }
3748
4558
  async function getIndexStatus(rootDir) {
3749
- rootDir = path11.resolve(rootDir);
4559
+ rootDir = path14.resolve(rootDir);
3750
4560
  const config = await loadConfig(rootDir);
3751
4561
  const location = getIndexLocation(rootDir);
3752
4562
  const indexDir = location.indexDir;
@@ -3782,7 +4592,7 @@ async function getIndexStatus(rootDir) {
3782
4592
  }
3783
4593
  } catch {
3784
4594
  try {
3785
- const entries = await fs6.readdir(path11.join(indexDir, "index"));
4595
+ const entries = await fs6.readdir(path14.join(indexDir, "index"));
3786
4596
  if (entries.length > 0) {
3787
4597
  status.exists = true;
3788
4598
  for (const entry of entries) {
@@ -3805,7 +4615,7 @@ async function getIndexStatus(rootDir) {
3805
4615
 
3806
4616
  // src/app/search/index.ts
3807
4617
  import * as fs7 from "fs/promises";
3808
- import * as path12 from "path";
4618
+ import * as path15 from "path";
3809
4619
 
3810
4620
  // src/types.ts
3811
4621
  init_entities();
@@ -3813,7 +4623,7 @@ init_entities();
3813
4623
  // src/app/search/index.ts
3814
4624
  init_config2();
3815
4625
  async function search(rootDir, query, options = {}) {
3816
- rootDir = path12.resolve(rootDir);
4626
+ rootDir = path15.resolve(rootDir);
3817
4627
  const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
3818
4628
  if (ensureFresh) {
3819
4629
  await ensureIndexFresh(rootDir, { quiet: true });
@@ -3858,7 +4668,7 @@ function createSearchContext(rootDir, moduleId, config) {
3858
4668
  config,
3859
4669
  loadFileIndex: async (filepath) => {
3860
4670
  const hasExtension = /\.[^./]+$/.test(filepath);
3861
- const indexFilePath = hasExtension ? path12.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path12.join(indexPath, filepath + ".json");
4671
+ const indexFilePath = hasExtension ? path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path15.join(indexPath, filepath + ".json");
3862
4672
  try {
3863
4673
  const content = await fs7.readFile(indexFilePath, "utf-8");
3864
4674
  return JSON.parse(content);
@@ -3870,7 +4680,7 @@ function createSearchContext(rootDir, moduleId, config) {
3870
4680
  const files = [];
3871
4681
  await traverseDirectory(indexPath, files, indexPath);
3872
4682
  return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
3873
- const relative3 = path12.relative(indexPath, f);
4683
+ const relative3 = path15.relative(indexPath, f);
3874
4684
  return relative3.replace(/\.json$/, "");
3875
4685
  });
3876
4686
  }
@@ -3880,7 +4690,7 @@ async function traverseDirectory(dir, files, basePath) {
3880
4690
  try {
3881
4691
  const entries = await fs7.readdir(dir, { withFileTypes: true });
3882
4692
  for (const entry of entries) {
3883
- const fullPath = path12.join(dir, entry.name);
4693
+ const fullPath = path15.join(dir, entry.name);
3884
4694
  if (entry.isDirectory()) {
3885
4695
  await traverseDirectory(fullPath, files, basePath);
3886
4696
  } else if (entry.isFile()) {
@@ -3956,19 +4766,30 @@ async function search2(directory, query, options = {}) {
3956
4766
  async function cleanup(directory, options = {}) {
3957
4767
  return cleanupIndex(directory, options);
3958
4768
  }
4769
+ async function reset(directory) {
4770
+ return resetIndex(directory);
4771
+ }
3959
4772
  var raggrep = {
3960
4773
  index,
3961
4774
  search: search2,
3962
4775
  cleanup,
4776
+ reset,
3963
4777
  formatSearchResults
3964
4778
  };
3965
4779
  var src_default = raggrep;
3966
4780
  export {
3967
4781
  search2 as search,
4782
+ reset,
3968
4783
  index,
3969
4784
  formatSearchResults,
3970
4785
  src_default as default,
3971
- cleanup
4786
+ createSilentLogger,
4787
+ createLogger,
4788
+ createInlineLogger,
4789
+ cleanup,
4790
+ SilentLogger,
4791
+ InlineProgressLogger,
4792
+ ConsoleLogger
3972
4793
  };
3973
4794
 
3974
- //# debugId=25853E0D892AD2D964756E2164756E21
4795
+ //# debugId=984F0AA3FD08D5A664756E2164756E21