raggrep 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/{indexer → app/indexer}/index.d.ts +1 -1
  2. package/dist/{search → app/search}/index.d.ts +1 -1
  3. package/dist/cli/main.js +1364 -220
  4. package/dist/cli/main.js.map +25 -15
  5. package/dist/composition.d.ts +7 -7
  6. package/dist/domain/entities/chunk.d.ts +1 -1
  7. package/dist/domain/entities/fileIndex.d.ts +1 -1
  8. package/dist/domain/entities/index.d.ts +1 -1
  9. package/dist/domain/entities/searchResult.d.ts +47 -2
  10. package/dist/domain/index.d.ts +5 -3
  11. package/dist/domain/ports/embedding.d.ts +1 -5
  12. package/dist/domain/ports/index.d.ts +3 -4
  13. package/dist/domain/services/bm25.d.ts +24 -0
  14. package/dist/domain/services/index.d.ts +3 -2
  15. package/dist/domain/services/similarity.d.ts +23 -0
  16. package/dist/{application → domain}/usecases/cleanupIndex.d.ts +2 -2
  17. package/dist/{application → domain}/usecases/indexDirectory.d.ts +2 -2
  18. package/dist/{application → domain}/usecases/searchIndex.d.ts +2 -2
  19. package/dist/index.d.ts +5 -5
  20. package/dist/index.js +1305 -239
  21. package/dist/index.js.map +25 -15
  22. package/dist/{utils/config.d.ts → infrastructure/config/configLoader.d.ts} +7 -4
  23. package/dist/infrastructure/config/index.d.ts +6 -0
  24. package/dist/infrastructure/embeddings/index.d.ts +3 -1
  25. package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +16 -0
  26. package/dist/infrastructure/index.d.ts +4 -3
  27. package/dist/infrastructure/storage/index.d.ts +4 -1
  28. package/dist/{utils/tieredIndex.d.ts → infrastructure/storage/symbolicIndex.d.ts} +7 -18
  29. package/dist/introspection/fileIntrospector.d.ts +14 -0
  30. package/dist/introspection/index.d.ts +68 -0
  31. package/dist/introspection/introspection.test.d.ts +4 -0
  32. package/dist/introspection/projectDetector.d.ts +27 -0
  33. package/dist/introspection/types.d.ts +70 -0
  34. package/dist/modules/core/index.d.ts +69 -0
  35. package/dist/modules/core/symbols.d.ts +27 -0
  36. package/dist/modules/core/symbols.test.d.ts +4 -0
  37. package/dist/modules/{semantic → language/typescript}/index.d.ts +11 -12
  38. package/dist/types.d.ts +4 -1
  39. package/package.json +5 -5
  40. package/dist/application/index.d.ts +0 -7
  41. package/dist/utils/bm25.d.ts +0 -9
  42. package/dist/utils/embeddings.d.ts +0 -46
  43. /package/dist/{cli → app/cli}/main.d.ts +0 -0
  44. /package/dist/{indexer → app/indexer}/watcher.d.ts +0 -0
  45. /package/dist/{application → domain}/usecases/index.d.ts +0 -0
  46. /package/dist/{utils → infrastructure/embeddings}/embeddings.test.d.ts +0 -0
  47. /package/dist/modules/{semantic → language/typescript}/parseCode.d.ts +0 -0
  48. /package/dist/modules/{semantic → language/typescript}/parseCode.test.d.ts +0 -0
package/dist/cli/main.js CHANGED
@@ -11,117 +11,144 @@ var __export = (target, all) => {
11
11
  };
12
12
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
13
13
 
14
- // src/utils/embeddings.ts
14
+ // src/infrastructure/embeddings/transformersEmbedding.ts
15
15
  import { pipeline, env } from "@xenova/transformers";
16
16
  import * as path from "path";
17
17
  import * as os from "os";
18
- function configureEmbeddings(config) {
19
- const newConfig = { ...currentConfig, ...config };
20
- if (newConfig.model !== currentConfig.model) {
21
- embeddingPipeline = null;
22
- currentModelName = null;
23
- }
24
- currentConfig = newConfig;
25
- }
26
- async function initializePipeline() {
27
- if (embeddingPipeline && currentModelName === currentConfig.model) {
28
- return;
18
+
19
+ class TransformersEmbeddingProvider {
20
+ pipeline = null;
21
+ config;
22
+ isInitializing = false;
23
+ initPromise = null;
24
+ constructor(config) {
25
+ this.config = {
26
+ model: config?.model ?? "all-MiniLM-L6-v2",
27
+ showProgress: config?.showProgress ?? true
28
+ };
29
29
  }
30
- if (isInitializing && initPromise) {
31
- return initPromise;
30
+ async initialize(config) {
31
+ if (config) {
32
+ if (config.model !== this.config.model) {
33
+ this.pipeline = null;
34
+ }
35
+ this.config = { ...this.config, ...config };
36
+ }
37
+ await this.ensurePipeline();
32
38
  }
33
- isInitializing = true;
34
- initPromise = (async () => {
35
- const modelId = EMBEDDING_MODELS[currentConfig.model];
36
- if (currentConfig.showProgress) {
37
- console.log(`
38
- Loading embedding model: ${currentConfig.model}`);
39
- console.log(` Cache: ${CACHE_DIR}`);
39
+ async ensurePipeline() {
40
+ if (this.pipeline) {
41
+ return;
40
42
  }
41
- try {
42
- embeddingPipeline = await pipeline("feature-extraction", modelId, {
43
- progress_callback: currentConfig.showProgress ? (progress) => {
44
- if (progress.status === "progress" && progress.file) {
45
- const pct = progress.progress ? Math.round(progress.progress) : 0;
46
- process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
47
- } else if (progress.status === "done" && progress.file) {
48
- process.stdout.write(`\r Downloaded ${progress.file}
43
+ if (this.isInitializing && this.initPromise) {
44
+ return this.initPromise;
45
+ }
46
+ this.isInitializing = true;
47
+ this.initPromise = (async () => {
48
+ const modelId = EMBEDDING_MODELS[this.config.model];
49
+ if (this.config.showProgress) {
50
+ console.log(`
51
+ Loading embedding model: ${this.config.model}`);
52
+ console.log(` Cache: ${CACHE_DIR}`);
53
+ }
54
+ try {
55
+ this.pipeline = await pipeline("feature-extraction", modelId, {
56
+ progress_callback: this.config.showProgress ? (progress) => {
57
+ if (progress.status === "progress" && progress.file) {
58
+ const pct = progress.progress ? Math.round(progress.progress) : 0;
59
+ process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
60
+ } else if (progress.status === "done" && progress.file) {
61
+ process.stdout.write(`\r Downloaded ${progress.file}
49
62
  `);
50
- } else if (progress.status === "ready") {}
51
- } : undefined
52
- });
53
- currentModelName = currentConfig.model;
54
- if (currentConfig.showProgress) {
55
- console.log(` Model ready.
63
+ }
64
+ } : undefined
65
+ });
66
+ if (this.config.showProgress) {
67
+ console.log(` Model ready.
56
68
  `);
69
+ }
70
+ } catch (error) {
71
+ this.pipeline = null;
72
+ throw new Error(`Failed to load embedding model: ${error}`);
73
+ } finally {
74
+ this.isInitializing = false;
75
+ this.initPromise = null;
57
76
  }
58
- } catch (error) {
59
- embeddingPipeline = null;
60
- currentModelName = null;
61
- throw new Error(`Failed to load embedding model: ${error}`);
62
- } finally {
63
- isInitializing = false;
64
- initPromise = null;
77
+ })();
78
+ return this.initPromise;
79
+ }
80
+ async getEmbedding(text) {
81
+ await this.ensurePipeline();
82
+ if (!this.pipeline) {
83
+ throw new Error("Embedding pipeline not initialized");
65
84
  }
66
- })();
67
- return initPromise;
68
- }
69
- async function getEmbedding(text) {
70
- await initializePipeline();
71
- if (!embeddingPipeline) {
72
- throw new Error("Embedding pipeline not initialized");
85
+ const output = await this.pipeline(text, {
86
+ pooling: "mean",
87
+ normalize: true
88
+ });
89
+ return Array.from(output.data);
73
90
  }
74
- const output = await embeddingPipeline(text, {
75
- pooling: "mean",
76
- normalize: true
77
- });
78
- return Array.from(output.data);
79
- }
80
- async function getEmbeddings(texts) {
81
- if (texts.length === 0)
82
- return [];
83
- await initializePipeline();
84
- if (!embeddingPipeline) {
85
- throw new Error("Embedding pipeline not initialized");
91
+ async getEmbeddings(texts) {
92
+ if (texts.length === 0)
93
+ return [];
94
+ await this.ensurePipeline();
95
+ if (!this.pipeline) {
96
+ throw new Error("Embedding pipeline not initialized");
97
+ }
98
+ const results = [];
99
+ for (let i = 0;i < texts.length; i += BATCH_SIZE) {
100
+ const batch = texts.slice(i, i + BATCH_SIZE);
101
+ const outputs = await Promise.all(batch.map(async (text) => {
102
+ const output = await this.pipeline(text, {
103
+ pooling: "mean",
104
+ normalize: true
105
+ });
106
+ return Array.from(output.data);
107
+ }));
108
+ results.push(...outputs);
109
+ }
110
+ return results;
86
111
  }
87
- const results = [];
88
- for (let i = 0;i < texts.length; i += BATCH_SIZE) {
89
- const batch = texts.slice(i, i + BATCH_SIZE);
90
- const outputs = await Promise.all(batch.map(async (text) => {
91
- const output = await embeddingPipeline(text, {
92
- pooling: "mean",
93
- normalize: true
94
- });
95
- return Array.from(output.data);
96
- }));
97
- results.push(...outputs);
112
+ getDimension() {
113
+ return EMBEDDING_DIMENSION;
98
114
  }
99
- return results;
100
- }
101
- function cosineSimilarity(a, b) {
102
- if (a.length !== b.length) {
103
- throw new Error("Vectors must have the same length");
115
+ getModelName() {
116
+ return this.config.model;
104
117
  }
105
- let dotProduct = 0;
106
- let normA = 0;
107
- let normB = 0;
108
- for (let i = 0;i < a.length; i++) {
109
- dotProduct += a[i] * b[i];
110
- normA += a[i] * a[i];
111
- normB += b[i] * b[i];
118
+ async dispose() {
119
+ this.pipeline = null;
112
120
  }
113
- if (normA === 0 || normB === 0)
114
- return 0;
115
- return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
116
- }
117
- function getEmbeddingConfig() {
118
- return { ...currentConfig };
119
121
  }
120
122
  function getCacheDir() {
121
123
  return CACHE_DIR;
122
124
  }
123
- var CACHE_DIR, EMBEDDING_MODELS, embeddingPipeline = null, currentModelName = null, isInitializing = false, initPromise = null, DEFAULT_CONFIG, currentConfig, BATCH_SIZE = 32;
124
- var init_embeddings = __esm(() => {
125
+ function configureEmbeddings(config) {
126
+ const newConfig = { ...globalConfig, ...config };
127
+ if (newConfig.model !== globalConfig.model) {
128
+ globalProvider = null;
129
+ }
130
+ globalConfig = newConfig;
131
+ }
132
+ function getEmbeddingConfig() {
133
+ return { ...globalConfig };
134
+ }
135
+ async function ensureGlobalProvider() {
136
+ if (!globalProvider) {
137
+ globalProvider = new TransformersEmbeddingProvider(globalConfig);
138
+ await globalProvider.initialize();
139
+ }
140
+ return globalProvider;
141
+ }
142
+ async function getEmbedding(text) {
143
+ const provider = await ensureGlobalProvider();
144
+ return provider.getEmbedding(text);
145
+ }
146
+ async function getEmbeddings(texts) {
147
+ const provider = await ensureGlobalProvider();
148
+ return provider.getEmbeddings(texts);
149
+ }
150
+ var CACHE_DIR, EMBEDDING_MODELS, EMBEDDING_DIMENSION = 384, BATCH_SIZE = 32, globalProvider = null, globalConfig;
151
+ var init_transformersEmbedding = __esm(() => {
125
152
  CACHE_DIR = path.join(os.homedir(), ".cache", "raggrep", "models");
126
153
  env.cacheDir = CACHE_DIR;
127
154
  env.allowLocalModels = true;
@@ -131,11 +158,15 @@ var init_embeddings = __esm(() => {
131
158
  "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
132
159
  "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
133
160
  };
134
- DEFAULT_CONFIG = {
161
+ globalConfig = {
135
162
  model: "all-MiniLM-L6-v2",
136
163
  showProgress: true
137
164
  };
138
- currentConfig = { ...DEFAULT_CONFIG };
165
+ });
166
+
167
+ // src/infrastructure/embeddings/index.ts
168
+ var init_embeddings = __esm(() => {
169
+ init_transformersEmbedding();
139
170
  });
140
171
  // src/domain/entities/searchResult.ts
141
172
  var init_searchResult = () => {};
@@ -149,7 +180,12 @@ function createDefaultConfig() {
149
180
  ignorePaths: DEFAULT_IGNORE_PATHS,
150
181
  modules: [
151
182
  {
152
- id: "semantic",
183
+ id: "core",
184
+ enabled: true,
185
+ options: {}
186
+ },
187
+ {
188
+ id: "language/typescript",
153
189
  enabled: true,
154
190
  options: {
155
191
  embeddingModel: "all-MiniLM-L6-v2"
@@ -209,32 +245,32 @@ var init_entities = __esm(() => {
209
245
  init_config();
210
246
  });
211
247
 
212
- // src/utils/config.ts
248
+ // src/infrastructure/config/configLoader.ts
213
249
  import * as path2 from "path";
214
250
  import * as fs from "fs/promises";
215
- function getRaggrepDir(rootDir, config = DEFAULT_CONFIG2) {
251
+ function getRaggrepDir(rootDir, config = DEFAULT_CONFIG) {
216
252
  return path2.join(rootDir, config.indexDir);
217
253
  }
218
- function getModuleIndexPath(rootDir, moduleId, config = DEFAULT_CONFIG2) {
254
+ function getModuleIndexPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
219
255
  return path2.join(rootDir, config.indexDir, "index", moduleId);
220
256
  }
221
- function getModuleManifestPath(rootDir, moduleId, config = DEFAULT_CONFIG2) {
257
+ function getModuleManifestPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
222
258
  return path2.join(rootDir, config.indexDir, "index", moduleId, "manifest.json");
223
259
  }
224
- function getGlobalManifestPath(rootDir, config = DEFAULT_CONFIG2) {
260
+ function getGlobalManifestPath(rootDir, config = DEFAULT_CONFIG) {
225
261
  return path2.join(rootDir, config.indexDir, "manifest.json");
226
262
  }
227
- function getConfigPath(rootDir, config = DEFAULT_CONFIG2) {
263
+ function getConfigPath(rootDir, config = DEFAULT_CONFIG) {
228
264
  return path2.join(rootDir, config.indexDir, "config.json");
229
265
  }
230
266
  async function loadConfig(rootDir) {
231
- const configPath = getConfigPath(rootDir, DEFAULT_CONFIG2);
267
+ const configPath = getConfigPath(rootDir, DEFAULT_CONFIG);
232
268
  try {
233
269
  const content = await fs.readFile(configPath, "utf-8");
234
270
  const savedConfig = JSON.parse(content);
235
- return { ...DEFAULT_CONFIG2, ...savedConfig };
271
+ return { ...DEFAULT_CONFIG, ...savedConfig };
236
272
  } catch {
237
- return DEFAULT_CONFIG2;
273
+ return DEFAULT_CONFIG;
238
274
  }
239
275
  }
240
276
  function getModuleConfig(config, moduleId) {
@@ -243,7 +279,7 @@ function getModuleConfig(config, moduleId) {
243
279
  function getEmbeddingConfigFromModule(moduleConfig) {
244
280
  const options = moduleConfig.options || {};
245
281
  const modelName = options.embeddingModel || "all-MiniLM-L6-v2";
246
- if (!(modelName in EMBEDDING_MODELS)) {
282
+ if (!(modelName in EMBEDDING_MODELS2)) {
247
283
  console.warn(`Unknown embedding model: ${modelName}, falling back to all-MiniLM-L6-v2`);
248
284
  return { model: "all-MiniLM-L6-v2" };
249
285
  }
@@ -252,11 +288,21 @@ function getEmbeddingConfigFromModule(moduleConfig) {
252
288
  showProgress: options.showProgress !== false
253
289
  };
254
290
  }
255
- var DEFAULT_CONFIG2;
256
- var init_config2 = __esm(() => {
291
+ var DEFAULT_CONFIG, EMBEDDING_MODELS2;
292
+ var init_configLoader = __esm(() => {
257
293
  init_entities();
258
- init_embeddings();
259
- DEFAULT_CONFIG2 = createDefaultConfig();
294
+ DEFAULT_CONFIG = createDefaultConfig();
295
+ EMBEDDING_MODELS2 = {
296
+ "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
297
+ "all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
298
+ "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
299
+ "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
300
+ };
301
+ });
302
+
303
+ // src/infrastructure/config/index.ts
304
+ var init_config2 = __esm(() => {
305
+ init_configLoader();
260
306
  });
261
307
 
262
308
  // src/domain/services/bm25.ts
@@ -330,16 +376,449 @@ class BM25Index {
330
376
  this.avgDocLength = 0;
331
377
  this.totalDocs = 0;
332
378
  }
379
+ addDocument(id, tokens) {
380
+ this.addDocuments([{ id, content: "", tokens }]);
381
+ }
382
+ serialize() {
383
+ const documents = {};
384
+ for (const [id, { tokens }] of this.documents) {
385
+ documents[id] = tokens;
386
+ }
387
+ return {
388
+ documents,
389
+ avgDocLength: this.avgDocLength,
390
+ documentFrequencies: Object.fromEntries(this.documentFrequencies),
391
+ totalDocs: this.totalDocs
392
+ };
393
+ }
394
+ static deserialize(data) {
395
+ const index = new BM25Index;
396
+ index.avgDocLength = data.avgDocLength;
397
+ index.totalDocs = data.totalDocs;
398
+ index.documentFrequencies = new Map(Object.entries(data.documentFrequencies));
399
+ for (const [id, tokens] of Object.entries(data.documents)) {
400
+ index.documents.set(id, { content: "", tokens });
401
+ }
402
+ return index;
403
+ }
333
404
  }
334
405
  function normalizeScore(score, midpoint = 5) {
335
406
  return 1 / (1 + Math.exp(-score / midpoint + 1));
336
407
  }
337
408
  var BM25_K1 = 1.5, BM25_B = 0.75;
338
409
 
339
- // src/utils/bm25.ts
340
- var init_bm25 = () => {};
410
+ // src/modules/core/symbols.ts
411
+ function extractSymbols(content) {
412
+ const symbols = [];
413
+ const seenSymbols = new Set;
414
+ const lines = content.split(`
415
+ `);
416
+ for (const { type, pattern, exported } of SYMBOL_PATTERNS) {
417
+ pattern.lastIndex = 0;
418
+ let match;
419
+ while ((match = pattern.exec(content)) !== null) {
420
+ const name = match[1];
421
+ const symbolKey = `${name}:${type}`;
422
+ if (seenSymbols.has(symbolKey))
423
+ continue;
424
+ seenSymbols.add(symbolKey);
425
+ const beforeMatch = content.substring(0, match.index);
426
+ const line = beforeMatch.split(`
427
+ `).length;
428
+ symbols.push({
429
+ name,
430
+ type,
431
+ line,
432
+ isExported: exported
433
+ });
434
+ }
435
+ }
436
+ return symbols.sort((a, b) => a.line - b.line);
437
+ }
438
+ function symbolsToKeywords(symbols) {
439
+ const keywords = new Set;
440
+ for (const symbol of symbols) {
441
+ keywords.add(symbol.name.toLowerCase());
442
+ const parts = symbol.name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
443
+ for (const part of parts) {
444
+ if (part.length > 2) {
445
+ keywords.add(part);
446
+ }
447
+ }
448
+ }
449
+ return Array.from(keywords);
450
+ }
451
+ var SYMBOL_PATTERNS;
452
+ var init_symbols = __esm(() => {
453
+ SYMBOL_PATTERNS = [
454
+ {
455
+ type: "function",
456
+ pattern: /^export\s+(?:async\s+)?function\s+(\w+)/gm,
457
+ exported: true
458
+ },
459
+ {
460
+ type: "function",
461
+ pattern: /^export\s+(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
462
+ exported: true
463
+ },
464
+ {
465
+ type: "class",
466
+ pattern: /^export\s+(?:abstract\s+)?class\s+(\w+)/gm,
467
+ exported: true
468
+ },
469
+ {
470
+ type: "interface",
471
+ pattern: /^export\s+interface\s+(\w+)/gm,
472
+ exported: true
473
+ },
474
+ {
475
+ type: "type",
476
+ pattern: /^export\s+type\s+(\w+)/gm,
477
+ exported: true
478
+ },
479
+ {
480
+ type: "enum",
481
+ pattern: /^export\s+(?:const\s+)?enum\s+(\w+)/gm,
482
+ exported: true
483
+ },
484
+ {
485
+ type: "variable",
486
+ pattern: /^export\s+(?:const|let|var)\s+(\w+)\s*(?::|=)/gm,
487
+ exported: true
488
+ },
489
+ {
490
+ type: "function",
491
+ pattern: /^export\s+default\s+(?:async\s+)?function\s+(\w+)/gm,
492
+ exported: true
493
+ },
494
+ {
495
+ type: "class",
496
+ pattern: /^export\s+default\s+class\s+(\w+)/gm,
497
+ exported: true
498
+ },
499
+ {
500
+ type: "function",
501
+ pattern: /^(?:async\s+)?function\s+(\w+)/gm,
502
+ exported: false
503
+ },
504
+ {
505
+ type: "function",
506
+ pattern: /^(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
507
+ exported: false
508
+ },
509
+ {
510
+ type: "class",
511
+ pattern: /^(?:abstract\s+)?class\s+(\w+)/gm,
512
+ exported: false
513
+ },
514
+ {
515
+ type: "interface",
516
+ pattern: /^interface\s+(\w+)/gm,
517
+ exported: false
518
+ },
519
+ {
520
+ type: "type",
521
+ pattern: /^type\s+(\w+)/gm,
522
+ exported: false
523
+ },
524
+ {
525
+ type: "enum",
526
+ pattern: /^(?:const\s+)?enum\s+(\w+)/gm,
527
+ exported: false
528
+ },
529
+ {
530
+ type: "function",
531
+ pattern: /^def\s+(\w+)\s*\(/gm,
532
+ exported: false
533
+ },
534
+ {
535
+ type: "class",
536
+ pattern: /^class\s+(\w+)(?:\s*\(|:)/gm,
537
+ exported: false
538
+ },
539
+ {
540
+ type: "function",
541
+ pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/gm,
542
+ exported: false
543
+ },
544
+ {
545
+ type: "type",
546
+ pattern: /^type\s+(\w+)\s+(?:struct|interface)/gm,
547
+ exported: false
548
+ },
549
+ {
550
+ type: "function",
551
+ pattern: /^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/gm,
552
+ exported: false
553
+ },
554
+ {
555
+ type: "type",
556
+ pattern: /^(?:pub\s+)?struct\s+(\w+)/gm,
557
+ exported: false
558
+ },
559
+ {
560
+ type: "enum",
561
+ pattern: /^(?:pub\s+)?enum\s+(\w+)/gm,
562
+ exported: false
563
+ },
564
+ {
565
+ type: "interface",
566
+ pattern: /^(?:pub\s+)?trait\s+(\w+)/gm,
567
+ exported: false
568
+ }
569
+ ];
570
+ });
571
+
572
+ // src/modules/core/index.ts
573
+ var exports_core = {};
574
+ __export(exports_core, {
575
+ CoreModule: () => CoreModule
576
+ });
577
+ import * as path3 from "path";
578
+ import * as fs2 from "fs/promises";
341
579
 
342
- // src/modules/semantic/parseCode.ts
580
+ class CoreModule {
581
+ id = "core";
582
+ name = "Core Search";
583
+ description = "Language-agnostic text search with symbol extraction";
584
+ version = "1.0.0";
585
+ symbolIndex = new Map;
586
+ bm25Index = null;
587
+ rootDir = "";
588
+ async initialize(_config) {}
589
+ async indexFile(filepath, content, ctx) {
590
+ this.rootDir = ctx.rootDir;
591
+ const symbols = extractSymbols(content);
592
+ const symbolKeywords = symbolsToKeywords(symbols);
593
+ const contentTokens = tokenize(content);
594
+ const allTokens = [...new Set([...contentTokens, ...symbolKeywords])];
595
+ const chunks = this.createChunks(filepath, content, symbols);
596
+ const stats = await ctx.getFileStats(filepath);
597
+ this.symbolIndex.set(filepath, {
598
+ filepath,
599
+ symbols,
600
+ tokens: allTokens
601
+ });
602
+ const moduleData = {
603
+ symbols,
604
+ tokens: allTokens
605
+ };
606
+ return {
607
+ filepath,
608
+ lastModified: stats.lastModified,
609
+ chunks,
610
+ moduleData
611
+ };
612
+ }
613
+ createChunks(filepath, content, symbols) {
614
+ const lines = content.split(`
615
+ `);
616
+ const chunks = [];
617
+ for (let start = 0;start < lines.length; start += LINES_PER_CHUNK - CHUNK_OVERLAP) {
618
+ const end = Math.min(start + LINES_PER_CHUNK, lines.length);
619
+ const chunkLines = lines.slice(start, end);
620
+ const chunkContent = chunkLines.join(`
621
+ `);
622
+ const chunkSymbols = symbols.filter((s) => s.line >= start + 1 && s.line <= end);
623
+ let chunkType = "block";
624
+ let chunkName;
625
+ let isExported = false;
626
+ if (chunkSymbols.length > 0) {
627
+ const primarySymbol = chunkSymbols[0];
628
+ chunkType = this.symbolTypeToChunkType(primarySymbol.type);
629
+ chunkName = primarySymbol.name;
630
+ isExported = primarySymbol.isExported;
631
+ }
632
+ const chunkId = `${filepath}:${start + 1}-${end}`;
633
+ chunks.push({
634
+ id: chunkId,
635
+ content: chunkContent,
636
+ startLine: start + 1,
637
+ endLine: end,
638
+ type: chunkType,
639
+ name: chunkName,
640
+ isExported
641
+ });
642
+ if (end >= lines.length)
643
+ break;
644
+ }
645
+ return chunks;
646
+ }
647
+ symbolTypeToChunkType(symbolType) {
648
+ switch (symbolType) {
649
+ case "function":
650
+ case "method":
651
+ return "function";
652
+ case "class":
653
+ return "class";
654
+ case "interface":
655
+ return "interface";
656
+ case "type":
657
+ return "type";
658
+ case "enum":
659
+ return "enum";
660
+ case "variable":
661
+ return "variable";
662
+ default:
663
+ return "block";
664
+ }
665
+ }
666
+ async finalize(ctx) {
667
+ const config = ctx.config;
668
+ const coreDir = path3.join(getRaggrepDir(ctx.rootDir, config), "index", "core");
669
+ await fs2.mkdir(coreDir, { recursive: true });
670
+ this.bm25Index = new BM25Index;
671
+ for (const [filepath, entry] of this.symbolIndex) {
672
+ this.bm25Index.addDocument(filepath, entry.tokens);
673
+ }
674
+ const symbolIndexData = {
675
+ version: this.version,
676
+ lastUpdated: new Date().toISOString(),
677
+ files: Object.fromEntries(this.symbolIndex),
678
+ bm25Data: this.bm25Index.serialize()
679
+ };
680
+ await fs2.writeFile(path3.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
681
+ console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
682
+ }
683
+ async search(query, ctx, options) {
684
+ const config = ctx.config;
685
+ const topK = options?.topK ?? DEFAULT_TOP_K;
686
+ const minScore = options?.minScore ?? DEFAULT_MIN_SCORE;
687
+ if (this.symbolIndex.size === 0) {
688
+ await this.loadSymbolIndex(ctx.rootDir, config);
689
+ }
690
+ if (!this.bm25Index || this.symbolIndex.size === 0) {
691
+ return [];
692
+ }
693
+ const queryTokens = tokenize(query);
694
+ const bm25Results = this.bm25Index.search(query, topK * 2);
695
+ const bm25Scores = new Map(bm25Results.map((r) => [r.id, r.score]));
696
+ const symbolMatches = this.findSymbolMatches(queryTokens);
697
+ const results = [];
698
+ for (const filepath of this.symbolIndex.keys()) {
699
+ const entry = this.symbolIndex.get(filepath);
700
+ const bm25Score = bm25Scores.get(filepath) ?? 0;
701
+ const symbolScore = symbolMatches.get(filepath) ?? 0;
702
+ if (bm25Score === 0 && symbolScore === 0)
703
+ continue;
704
+ const combinedScore = 0.6 * normalizeScore(bm25Score) + 0.4 * symbolScore;
705
+ if (combinedScore >= minScore) {
706
+ const fileIndex = await ctx.loadFileIndex(filepath);
707
+ if (!fileIndex)
708
+ continue;
709
+ const bestChunk = this.findBestChunk(fileIndex.chunks, queryTokens, entry.symbols);
710
+ results.push({
711
+ filepath,
712
+ chunk: bestChunk,
713
+ score: combinedScore,
714
+ moduleId: this.id,
715
+ context: {
716
+ bm25Score: normalizeScore(bm25Score),
717
+ symbolScore
718
+ }
719
+ });
720
+ }
721
+ }
722
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
723
+ }
724
+ findSymbolMatches(queryTokens) {
725
+ const matches = new Map;
726
+ for (const [filepath, entry] of this.symbolIndex) {
727
+ let matchScore = 0;
728
+ for (const symbol of entry.symbols) {
729
+ const symbolName = symbol.name.toLowerCase();
730
+ const symbolParts = symbolsToKeywords([symbol]);
731
+ for (const token of queryTokens) {
732
+ if (symbolName === token) {
733
+ matchScore += symbol.isExported ? 1 : 0.8;
734
+ } else if (symbolName.includes(token) || token.includes(symbolName)) {
735
+ matchScore += symbol.isExported ? 0.5 : 0.4;
736
+ } else if (symbolParts.some((p) => p === token)) {
737
+ matchScore += symbol.isExported ? 0.3 : 0.2;
738
+ }
739
+ }
740
+ }
741
+ if (matchScore > 0) {
742
+ matches.set(filepath, Math.min(1, matchScore / queryTokens.length));
743
+ }
744
+ }
745
+ return matches;
746
+ }
747
+ findBestChunk(chunks, queryTokens, symbols) {
748
+ let bestChunk = chunks[0];
749
+ let bestScore = 0;
750
+ for (const chunk of chunks) {
751
+ let score = 0;
752
+ const chunkContent = chunk.content.toLowerCase();
753
+ for (const token of queryTokens) {
754
+ if (chunkContent.includes(token)) {
755
+ score += 1;
756
+ }
757
+ }
758
+ if (chunk.name) {
759
+ const nameLower = chunk.name.toLowerCase();
760
+ for (const token of queryTokens) {
761
+ if (nameLower.includes(token)) {
762
+ score += 2;
763
+ }
764
+ }
765
+ }
766
+ if (chunk.isExported) {
767
+ score += 0.5;
768
+ }
769
+ if (score > bestScore) {
770
+ bestScore = score;
771
+ bestChunk = chunk;
772
+ }
773
+ }
774
+ return bestChunk;
775
+ }
776
+ async loadSymbolIndex(rootDir, config) {
777
+ const coreDir = path3.join(getRaggrepDir(rootDir, config), "index", "core");
778
+ const symbolsPath = path3.join(coreDir, "symbols.json");
779
+ try {
780
+ const content = await fs2.readFile(symbolsPath, "utf-8");
781
+ const data = JSON.parse(content);
782
+ this.symbolIndex = new Map(Object.entries(data.files));
783
+ if (data.bm25Data) {
784
+ this.bm25Index = BM25Index.deserialize(data.bm25Data);
785
+ }
786
+ } catch (error) {
787
+ this.symbolIndex = new Map;
788
+ this.bm25Index = null;
789
+ }
790
+ }
791
+ async dispose() {
792
+ this.symbolIndex.clear();
793
+ this.bm25Index = null;
794
+ }
795
+ }
796
+ var DEFAULT_MIN_SCORE = 0.1, DEFAULT_TOP_K = 20, LINES_PER_CHUNK = 50, CHUNK_OVERLAP = 10;
797
+ var init_core = __esm(() => {
798
+ init_config2();
799
+ init_symbols();
800
+ });
801
+
802
+ // src/domain/services/similarity.ts
803
+ function cosineSimilarity(a, b) {
804
+ if (a.length !== b.length) {
805
+ throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
806
+ }
807
+ let dotProduct = 0;
808
+ let normA = 0;
809
+ let normB = 0;
810
+ for (let i = 0;i < a.length; i++) {
811
+ dotProduct += a[i] * b[i];
812
+ normA += a[i] * a[i];
813
+ normB += b[i] * b[i];
814
+ }
815
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
816
+ if (magnitude === 0)
817
+ return 0;
818
+ return dotProduct / magnitude;
819
+ }
820
+
821
+ // src/modules/language/typescript/parseCode.ts
343
822
  import * as ts from "typescript";
344
823
  function parseCode(content, filepath) {
345
824
  const ext = filepath.split(".").pop()?.toLowerCase();
@@ -530,6 +1009,11 @@ function generateChunkId(filepath, startLine, endLine) {
530
1009
  }
531
1010
  var init_parseCode = () => {};
532
1011
 
1012
+ // src/infrastructure/storage/fileIndexStorage.ts
1013
+ var init_fileIndexStorage = __esm(() => {
1014
+ init_entities();
1015
+ });
1016
+
533
1017
  // src/domain/services/keywords.ts
534
1018
  function extractKeywords(content, name, maxKeywords = 50) {
535
1019
  const keywords = new Set;
@@ -718,9 +1202,9 @@ var init_keywords = __esm(() => {
718
1202
  };
719
1203
  });
720
1204
 
721
- // src/utils/tieredIndex.ts
722
- import * as fs2 from "fs/promises";
723
- import * as path3 from "path";
1205
+ // src/infrastructure/storage/symbolicIndex.ts
1206
+ import * as fs3 from "fs/promises";
1207
+ import * as path4 from "path";
724
1208
 
725
1209
  class SymbolicIndex {
726
1210
  meta = null;
@@ -729,7 +1213,7 @@ class SymbolicIndex {
729
1213
  symbolicPath;
730
1214
  moduleId;
731
1215
  constructor(indexDir, moduleId) {
732
- this.symbolicPath = path3.join(indexDir, "index", moduleId, "symbolic");
1216
+ this.symbolicPath = path4.join(indexDir, "index", moduleId, "symbolic");
733
1217
  this.moduleId = moduleId;
734
1218
  }
735
1219
  async initialize() {
@@ -789,18 +1273,18 @@ class SymbolicIndex {
789
1273
  throw new Error("Index not initialized");
790
1274
  this.meta.lastUpdated = new Date().toISOString();
791
1275
  this.meta.fileCount = this.fileSummaries.size;
792
- await fs2.mkdir(this.symbolicPath, { recursive: true });
793
- const metaPath = path3.join(this.symbolicPath, "_meta.json");
794
- await fs2.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
1276
+ await fs3.mkdir(this.symbolicPath, { recursive: true });
1277
+ const metaPath = path4.join(this.symbolicPath, "_meta.json");
1278
+ await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
795
1279
  for (const [filepath, summary] of this.fileSummaries) {
796
1280
  const summaryPath = this.getFileSummaryPath(filepath);
797
- await fs2.mkdir(path3.dirname(summaryPath), { recursive: true });
798
- await fs2.writeFile(summaryPath, JSON.stringify(summary, null, 2));
1281
+ await fs3.mkdir(path4.dirname(summaryPath), { recursive: true });
1282
+ await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
799
1283
  }
800
1284
  }
801
1285
  async load() {
802
- const metaPath = path3.join(this.symbolicPath, "_meta.json");
803
- const metaContent = await fs2.readFile(metaPath, "utf-8");
1286
+ const metaPath = path4.join(this.symbolicPath, "_meta.json");
1287
+ const metaContent = await fs3.readFile(metaPath, "utf-8");
804
1288
  this.meta = JSON.parse(metaContent);
805
1289
  this.fileSummaries.clear();
806
1290
  await this.loadFileSummariesRecursive(this.symbolicPath);
@@ -808,14 +1292,14 @@ class SymbolicIndex {
808
1292
  }
809
1293
  async loadFileSummariesRecursive(dir) {
810
1294
  try {
811
- const entries = await fs2.readdir(dir, { withFileTypes: true });
1295
+ const entries = await fs3.readdir(dir, { withFileTypes: true });
812
1296
  for (const entry of entries) {
813
- const fullPath = path3.join(dir, entry.name);
1297
+ const fullPath = path4.join(dir, entry.name);
814
1298
  if (entry.isDirectory()) {
815
1299
  await this.loadFileSummariesRecursive(fullPath);
816
1300
  } else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
817
1301
  try {
818
- const content = await fs2.readFile(fullPath, "utf-8");
1302
+ const content = await fs3.readFile(fullPath, "utf-8");
819
1303
  const summary = JSON.parse(content);
820
1304
  if (summary.filepath) {
821
1305
  this.fileSummaries.set(summary.filepath, summary);
@@ -827,18 +1311,18 @@ class SymbolicIndex {
827
1311
  }
828
1312
  getFileSummaryPath(filepath) {
829
1313
  const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
830
- return path3.join(this.symbolicPath, jsonPath);
1314
+ return path4.join(this.symbolicPath, jsonPath);
831
1315
  }
832
1316
  async deleteFileSummary(filepath) {
833
1317
  try {
834
- await fs2.unlink(this.getFileSummaryPath(filepath));
1318
+ await fs3.unlink(this.getFileSummaryPath(filepath));
835
1319
  } catch {}
836
1320
  this.fileSummaries.delete(filepath);
837
1321
  }
838
1322
  async exists() {
839
1323
  try {
840
- const metaPath = path3.join(this.symbolicPath, "_meta.json");
841
- await fs2.access(metaPath);
1324
+ const metaPath = path4.join(this.symbolicPath, "_meta.json");
1325
+ await fs3.access(metaPath);
842
1326
  return true;
843
1327
  } catch {
844
1328
  return false;
@@ -860,24 +1344,29 @@ class SymbolicIndex {
860
1344
  this.bm25Index = new BM25Index;
861
1345
  }
862
1346
  }
863
- var init_tieredIndex = __esm(() => {
864
- init_keywords();
1347
+ var init_symbolicIndex = __esm(() => {
865
1348
  init_keywords();
866
1349
  });
867
1350
 
868
- // src/modules/semantic/index.ts
869
- var exports_semantic = {};
870
- __export(exports_semantic, {
871
- SemanticModule: () => SemanticModule,
872
- DEFAULT_TOP_K: () => DEFAULT_TOP_K,
873
- DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE
1351
+ // src/infrastructure/storage/index.ts
1352
+ var init_storage = __esm(() => {
1353
+ init_fileIndexStorage();
1354
+ init_symbolicIndex();
874
1355
  });
875
- import * as path4 from "path";
876
1356
 
877
- class SemanticModule {
878
- id = "semantic";
879
- name = "Semantic Search";
880
- description = "Natural language code search using local text embeddings";
1357
+ // src/modules/language/typescript/index.ts
1358
+ var exports_typescript = {};
1359
+ __export(exports_typescript, {
1360
+ TypeScriptModule: () => TypeScriptModule,
1361
+ DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
1362
+ DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
1363
+ });
1364
+ import * as path5 from "path";
1365
+
1366
+ class TypeScriptModule {
1367
+ id = "language/typescript";
1368
+ name = "TypeScript Search";
1369
+ description = "TypeScript-aware code search with AST parsing and semantic embeddings";
881
1370
  version = "1.0.0";
882
1371
  embeddingConfig = null;
883
1372
  symbolicIndex = null;
@@ -913,10 +1402,10 @@ class SemanticModule {
913
1402
  }));
914
1403
  const references = this.extractReferences(content, filepath);
915
1404
  const stats = await ctx.getFileStats(filepath);
916
- const currentConfig2 = getEmbeddingConfig();
1405
+ const currentConfig = getEmbeddingConfig();
917
1406
  const moduleData = {
918
1407
  embeddings,
919
- embeddingModel: currentConfig2.model
1408
+ embeddingModel: currentConfig.model
920
1409
  };
921
1410
  const chunkTypes = [...new Set(parsedChunks.map((pc) => pc.type))];
922
1411
  const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
@@ -962,7 +1451,7 @@ class SemanticModule {
962
1451
  this.pendingSummaries.clear();
963
1452
  }
964
1453
  async search(query, ctx, options = {}) {
965
- const { topK = DEFAULT_TOP_K, minScore = DEFAULT_MIN_SCORE, filePatterns } = options;
1454
+ const { topK = DEFAULT_TOP_K2, minScore = DEFAULT_MIN_SCORE2, filePatterns } = options;
966
1455
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
967
1456
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
968
1457
  let candidateFiles;
@@ -1066,29 +1555,29 @@ class SemanticModule {
1066
1555
  while ((match = importRegex.exec(content)) !== null) {
1067
1556
  const importPath = match[1];
1068
1557
  if (importPath.startsWith(".")) {
1069
- const dir = path4.dirname(filepath);
1070
- const resolved = path4.normalize(path4.join(dir, importPath));
1558
+ const dir = path5.dirname(filepath);
1559
+ const resolved = path5.normalize(path5.join(dir, importPath));
1071
1560
  references.push(resolved);
1072
1561
  }
1073
1562
  }
1074
1563
  while ((match = requireRegex.exec(content)) !== null) {
1075
1564
  const importPath = match[1];
1076
1565
  if (importPath.startsWith(".")) {
1077
- const dir = path4.dirname(filepath);
1078
- const resolved = path4.normalize(path4.join(dir, importPath));
1566
+ const dir = path5.dirname(filepath);
1567
+ const resolved = path5.normalize(path5.join(dir, importPath));
1079
1568
  references.push(resolved);
1080
1569
  }
1081
1570
  }
1082
1571
  return references;
1083
1572
  }
1084
1573
  }
1085
- var DEFAULT_MIN_SCORE = 0.15, DEFAULT_TOP_K = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TIER1_CANDIDATE_MULTIPLIER = 3;
1086
- var init_semantic = __esm(() => {
1574
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TIER1_CANDIDATE_MULTIPLIER = 3;
1575
+ var init_typescript = __esm(() => {
1087
1576
  init_embeddings();
1088
- init_bm25();
1089
1577
  init_config2();
1090
1578
  init_parseCode();
1091
- init_tieredIndex();
1579
+ init_storage();
1580
+ init_keywords();
1092
1581
  init_keywords();
1093
1582
  });
1094
1583
 
@@ -1113,17 +1602,593 @@ class ModuleRegistryImpl {
1113
1602
  }
1114
1603
  }
1115
1604
  async function registerBuiltInModules() {
1116
- const { SemanticModule: SemanticModule2 } = await Promise.resolve().then(() => (init_semantic(), exports_semantic));
1117
- registry.register(new SemanticModule2);
1605
+ const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
1606
+ const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
1607
+ registry.register(new CoreModule2);
1608
+ registry.register(new TypeScriptModule2);
1118
1609
  }
1119
1610
  var registry;
1120
1611
  var init_registry = __esm(() => {
1121
1612
  registry = new ModuleRegistryImpl;
1122
1613
  });
1123
1614
 
1124
- // src/indexer/watcher.ts
1615
+ // src/introspection/projectDetector.ts
1616
+ import * as path6 from "path";
1617
+ import * as fs4 from "fs/promises";
1618
+ function detectScopeFromName(name) {
1619
+ const nameLower = name.toLowerCase();
1620
+ for (const [scope, keywords] of Object.entries(SCOPE_KEYWORDS)) {
1621
+ if (scope === "unknown")
1622
+ continue;
1623
+ for (const keyword of keywords) {
1624
+ if (nameLower.includes(keyword)) {
1625
+ return scope;
1626
+ }
1627
+ }
1628
+ }
1629
+ return "unknown";
1630
+ }
1631
+ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
1632
+ if (depth > MAX_SCAN_DEPTH)
1633
+ return [];
1634
+ const results = [];
1635
+ const fullDir = currentDir ? path6.join(rootDir, currentDir) : rootDir;
1636
+ try {
1637
+ const entries = await fs4.readdir(fullDir, { withFileTypes: true });
1638
+ const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
1639
+ if (hasPackageJson && currentDir) {
1640
+ const info = await parsePackageJson(rootDir, currentDir);
1641
+ if (info) {
1642
+ results.push(info);
1643
+ }
1644
+ }
1645
+ for (const entry of entries) {
1646
+ if (!entry.isDirectory())
1647
+ continue;
1648
+ if (SKIP_DIRS.has(entry.name))
1649
+ continue;
1650
+ const subPath = currentDir ? `${currentDir}/${entry.name}` : entry.name;
1651
+ const subResults = await scanForPackageJsons(rootDir, subPath, depth + 1);
1652
+ results.push(...subResults);
1653
+ }
1654
+ } catch {}
1655
+ return results;
1656
+ }
1657
+ async function parsePackageJson(rootDir, relativePath) {
1658
+ try {
1659
+ const packageJsonPath = path6.join(rootDir, relativePath, "package.json");
1660
+ const content = await fs4.readFile(packageJsonPath, "utf-8");
1661
+ const pkg = JSON.parse(content);
1662
+ const name = pkg.name || path6.basename(relativePath);
1663
+ const deps = { ...pkg.dependencies, ...pkg.devDependencies };
1664
+ let type = "unknown";
1665
+ if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
1666
+ type = "app";
1667
+ } else if (deps["express"] || deps["fastify"] || deps["koa"] || deps["hono"]) {
1668
+ type = "service";
1669
+ } else if (pkg.main || pkg.exports) {
1670
+ type = "library";
1671
+ }
1672
+ const hasWorkspaces = Boolean(pkg.workspaces);
1673
+ return { name, relativePath, type, hasWorkspaces };
1674
+ } catch {
1675
+ return null;
1676
+ }
1677
+ }
1678
+ async function detectProjectStructure(rootDir) {
1679
+ const projectMap = new Map;
1680
+ let isMonorepo = false;
1681
+ try {
1682
+ const entries = await fs4.readdir(rootDir, { withFileTypes: true });
1683
+ const dirNames = entries.filter((e) => e.isDirectory()).map((e) => e.name);
1684
+ const monorepoPatterns = ["apps", "packages", "libs", "services"];
1685
+ const hasMonorepoStructure = monorepoPatterns.some((p) => dirNames.includes(p));
1686
+ if (hasMonorepoStructure) {
1687
+ isMonorepo = true;
1688
+ for (const pattern of monorepoPatterns) {
1689
+ if (!dirNames.includes(pattern))
1690
+ continue;
1691
+ const patternDir = path6.join(rootDir, pattern);
1692
+ try {
1693
+ const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
1694
+ for (const subDir of subDirs) {
1695
+ if (!subDir.isDirectory())
1696
+ continue;
1697
+ const projectRoot = `${pattern}/${subDir.name}`;
1698
+ const type = getProjectType(pattern);
1699
+ projectMap.set(projectRoot, {
1700
+ name: subDir.name,
1701
+ root: projectRoot,
1702
+ type
1703
+ });
1704
+ }
1705
+ } catch {}
1706
+ }
1707
+ }
1708
+ const packageJsons = await scanForPackageJsons(rootDir);
1709
+ for (const pkg of packageJsons) {
1710
+ if (pkg.hasWorkspaces) {
1711
+ isMonorepo = true;
1712
+ }
1713
+ if (packageJsons.length > 1) {
1714
+ isMonorepo = true;
1715
+ }
1716
+ projectMap.set(pkg.relativePath, {
1717
+ name: pkg.name,
1718
+ root: pkg.relativePath,
1719
+ type: pkg.type
1720
+ });
1721
+ }
1722
+ let rootType = "unknown";
1723
+ try {
1724
+ const rootPkgPath = path6.join(rootDir, "package.json");
1725
+ const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
1726
+ if (rootPkg.workspaces) {
1727
+ isMonorepo = true;
1728
+ }
1729
+ const deps = { ...rootPkg.dependencies, ...rootPkg.devDependencies };
1730
+ if (deps["next"] || deps["react"] || deps["vue"]) {
1731
+ rootType = "app";
1732
+ } else if (deps["express"] || deps["fastify"] || deps["koa"]) {
1733
+ rootType = "service";
1734
+ }
1735
+ } catch {}
1736
+ const projects = Array.from(projectMap.values()).sort((a, b) => a.root.length - b.root.length);
1737
+ return {
1738
+ projects,
1739
+ isMonorepo,
1740
+ rootType: isMonorepo ? undefined : rootType
1741
+ };
1742
+ } catch {
1743
+ return {
1744
+ projects: [],
1745
+ isMonorepo: false,
1746
+ rootType: "unknown"
1747
+ };
1748
+ }
1749
+ }
1750
+ function getProjectType(patternDir) {
1751
+ switch (patternDir) {
1752
+ case "apps":
1753
+ return "app";
1754
+ case "packages":
1755
+ case "libs":
1756
+ return "library";
1757
+ case "services":
1758
+ return "service";
1759
+ case "scripts":
1760
+ case "tools":
1761
+ return "script";
1762
+ default:
1763
+ return "unknown";
1764
+ }
1765
+ }
1766
+ function findProjectForFile(filepath, structure) {
1767
+ const normalizedPath = filepath.replace(/\\/g, "/");
1768
+ const matches = [];
1769
+ for (const project of structure.projects) {
1770
+ if (normalizedPath === project.root || normalizedPath.startsWith(project.root + "/")) {
1771
+ matches.push(project);
1772
+ }
1773
+ }
1774
+ if (matches.length > 0) {
1775
+ return matches.reduce((best, current) => current.root.length > best.root.length ? current : best);
1776
+ }
1777
+ for (const { pattern, type } of PROJECT_PATTERNS) {
1778
+ const match = normalizedPath.match(pattern);
1779
+ if (match) {
1780
+ return {
1781
+ name: match[1],
1782
+ root: match[0],
1783
+ type
1784
+ };
1785
+ }
1786
+ }
1787
+ return {
1788
+ name: "root",
1789
+ root: "",
1790
+ type: structure.rootType ?? "unknown"
1791
+ };
1792
+ }
1793
+ var MAX_SCAN_DEPTH = 4, SKIP_DIRS, PROJECT_PATTERNS, SCOPE_KEYWORDS;
1794
+ var init_projectDetector = __esm(() => {
1795
+ SKIP_DIRS = new Set([
1796
+ "node_modules",
1797
+ ".git",
1798
+ "dist",
1799
+ "build",
1800
+ ".next",
1801
+ ".nuxt",
1802
+ "coverage",
1803
+ ".raggrep"
1804
+ ]);
1805
+ PROJECT_PATTERNS = [
1806
+ { pattern: /^apps\/([^/]+)/, type: "app", defaultScope: "unknown" },
1807
+ { pattern: /^packages\/([^/]+)/, type: "library", defaultScope: "shared" },
1808
+ { pattern: /^libs\/([^/]+)/, type: "library", defaultScope: "shared" },
1809
+ { pattern: /^services\/([^/]+)/, type: "service", defaultScope: "backend" },
1810
+ { pattern: /^scripts\/([^/]+)/, type: "script", defaultScope: "tooling" },
1811
+ { pattern: /^tools\/([^/]+)/, type: "script", defaultScope: "tooling" }
1812
+ ];
1813
+ SCOPE_KEYWORDS = {
1814
+ frontend: [
1815
+ "web",
1816
+ "webapp",
1817
+ "frontend",
1818
+ "client",
1819
+ "ui",
1820
+ "app",
1821
+ "mobile",
1822
+ "react",
1823
+ "vue",
1824
+ "angular",
1825
+ "next",
1826
+ "nuxt"
1827
+ ],
1828
+ backend: [
1829
+ "api",
1830
+ "server",
1831
+ "backend",
1832
+ "service",
1833
+ "worker",
1834
+ "lambda",
1835
+ "functions"
1836
+ ],
1837
+ shared: ["shared", "common", "utils", "lib", "core", "types", "models"],
1838
+ tooling: [
1839
+ "scripts",
1840
+ "tools",
1841
+ "cli",
1842
+ "devtools",
1843
+ "build",
1844
+ "config",
1845
+ "infra"
1846
+ ],
1847
+ unknown: []
1848
+ };
1849
+ });
1850
+
1851
+ // src/introspection/fileIntrospector.ts
1852
+ import * as path7 from "path";
1853
+ function introspectFile(filepath, structure, fileContent) {
1854
+ const normalizedPath = filepath.replace(/\\/g, "/");
1855
+ const segments = normalizedPath.split("/").filter((s) => s.length > 0);
1856
+ const filename = segments[segments.length - 1] || "";
1857
+ const ext = path7.extname(filename);
1858
+ const project = findProjectForFile(normalizedPath, structure);
1859
+ const language = EXTENSION_TO_LANGUAGE[ext] || "unknown";
1860
+ const layer = detectLayer(segments, filename);
1861
+ const domain = detectDomain(segments);
1862
+ const scope = detectScope(segments, project, layer);
1863
+ let framework;
1864
+ if (fileContent) {
1865
+ framework = detectFramework(fileContent);
1866
+ }
1867
+ return {
1868
+ filepath: normalizedPath,
1869
+ project,
1870
+ scope,
1871
+ layer,
1872
+ domain,
1873
+ language,
1874
+ framework,
1875
+ depth: segments.length - 1,
1876
+ pathSegments: segments.slice(0, -1)
1877
+ };
1878
+ }
1879
+ function detectLayer(segments, filename) {
1880
+ const filenameLower = filename.toLowerCase();
1881
+ for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
1882
+ for (const pattern of patterns) {
1883
+ if (filenameLower.includes(pattern)) {
1884
+ return layer;
1885
+ }
1886
+ }
1887
+ }
1888
+ for (let i = segments.length - 2;i >= 0; i--) {
1889
+ const segment = segments[i].toLowerCase();
1890
+ for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
1891
+ if (patterns.includes(segment)) {
1892
+ return layer;
1893
+ }
1894
+ }
1895
+ }
1896
+ return;
1897
+ }
1898
+ function detectDomain(segments) {
1899
+ const skipSegments = new Set([
1900
+ "src",
1901
+ "lib",
1902
+ "app",
1903
+ "apps",
1904
+ "packages",
1905
+ "services",
1906
+ "modules",
1907
+ "features",
1908
+ ...Object.values(LAYER_PATTERNS2).flat()
1909
+ ]);
1910
+ for (const segment of segments) {
1911
+ const segmentLower = segment.toLowerCase();
1912
+ if (skipSegments.has(segmentLower))
1913
+ continue;
1914
+ if (DOMAIN_PATTERNS.includes(segmentLower)) {
1915
+ return segmentLower;
1916
+ }
1917
+ for (const domain of DOMAIN_PATTERNS) {
1918
+ if (segmentLower.startsWith(domain) || segmentLower.endsWith(domain)) {
1919
+ return domain;
1920
+ }
1921
+ }
1922
+ }
1923
+ return;
1924
+ }
1925
+ function detectScope(segments, project, layer) {
1926
+ const projectScope = detectScopeFromName(project.name);
1927
+ if (projectScope !== "unknown") {
1928
+ return projectScope;
1929
+ }
1930
+ if (layer) {
1931
+ switch (layer) {
1932
+ case "controller":
1933
+ case "repository":
1934
+ case "middleware":
1935
+ return "backend";
1936
+ case "presentation":
1937
+ return "frontend";
1938
+ case "util":
1939
+ case "model":
1940
+ return "shared";
1941
+ case "test":
1942
+ return "tooling";
1943
+ }
1944
+ }
1945
+ for (const segment of segments) {
1946
+ const segmentLower = segment.toLowerCase();
1947
+ if (["server", "api", "backend"].includes(segmentLower)) {
1948
+ return "backend";
1949
+ }
1950
+ if (["client", "web", "frontend", "ui"].includes(segmentLower)) {
1951
+ return "frontend";
1952
+ }
1953
+ if (["shared", "common", "lib", "libs"].includes(segmentLower)) {
1954
+ return "shared";
1955
+ }
1956
+ }
1957
+ return "unknown";
1958
+ }
1959
+ function detectFramework(content) {
1960
+ for (const [framework, indicators] of Object.entries(FRAMEWORK_INDICATORS)) {
1961
+ for (const indicator of indicators) {
1962
+ if (content.includes(`from '${indicator}`) || content.includes(`from "${indicator}`) || content.includes(`require('${indicator}`) || content.includes(`require("${indicator}`)) {
1963
+ return framework;
1964
+ }
1965
+ }
1966
+ }
1967
+ return;
1968
+ }
1969
+ var LAYER_PATTERNS2, DOMAIN_PATTERNS, FRAMEWORK_INDICATORS, EXTENSION_TO_LANGUAGE;
1970
+ var init_fileIntrospector = __esm(() => {
1971
+ init_projectDetector();
1972
+ LAYER_PATTERNS2 = {
1973
+ controller: ["controller", "api", "routes", "route", "handler"],
1974
+ service: ["service", "logic", "usecase", "usecases", "handler"],
1975
+ repository: ["repository", "repo", "dao", "store", "persistence"],
1976
+ model: ["model", "models", "entity", "entities", "schema", "schemas", "types", "type"],
1977
+ util: ["util", "utils", "helper", "helpers", "common", "lib"],
1978
+ config: ["config", "configuration", "settings"],
1979
+ middleware: ["middleware", "middlewares"],
1980
+ domain: ["domain"],
1981
+ infrastructure: ["infrastructure", "infra"],
1982
+ application: ["application", "app"],
1983
+ presentation: ["presentation", "ui", "views", "view", "component", "components"],
1984
+ test: ["test", "tests", "spec", "specs", "__tests__", "e2e"]
1985
+ };
1986
+ DOMAIN_PATTERNS = [
1987
+ "auth",
1988
+ "authentication",
1989
+ "user",
1990
+ "users",
1991
+ "account",
1992
+ "accounts",
1993
+ "profile",
1994
+ "profiles",
1995
+ "product",
1996
+ "products",
1997
+ "item",
1998
+ "items",
1999
+ "catalog",
2000
+ "order",
2001
+ "orders",
2002
+ "cart",
2003
+ "checkout",
2004
+ "payment",
2005
+ "payments",
2006
+ "billing",
2007
+ "subscription",
2008
+ "subscriptions",
2009
+ "notification",
2010
+ "notifications",
2011
+ "email",
2012
+ "sms",
2013
+ "report",
2014
+ "reports",
2015
+ "analytics",
2016
+ "metrics",
2017
+ "dashboard",
2018
+ "admin",
2019
+ "settings",
2020
+ "search",
2021
+ "chat",
2022
+ "message",
2023
+ "messages",
2024
+ "feed",
2025
+ "post",
2026
+ "posts",
2027
+ "comment",
2028
+ "comments",
2029
+ "media",
2030
+ "upload",
2031
+ "file",
2032
+ "files",
2033
+ "storage",
2034
+ "cache",
2035
+ "session",
2036
+ "log",
2037
+ "logs",
2038
+ "audit"
2039
+ ];
2040
+ FRAMEWORK_INDICATORS = {
2041
+ nextjs: ["next", "next/"],
2042
+ express: ["express"],
2043
+ fastify: ["fastify"],
2044
+ react: ["react"],
2045
+ vue: ["vue"],
2046
+ angular: ["@angular/"],
2047
+ nestjs: ["@nestjs/"],
2048
+ koa: ["koa"]
2049
+ };
2050
+ EXTENSION_TO_LANGUAGE = {
2051
+ ".ts": "typescript",
2052
+ ".tsx": "typescript",
2053
+ ".js": "javascript",
2054
+ ".jsx": "javascript",
2055
+ ".mjs": "javascript",
2056
+ ".cjs": "javascript",
2057
+ ".py": "python",
2058
+ ".go": "go",
2059
+ ".rs": "rust",
2060
+ ".java": "java",
2061
+ ".kt": "kotlin",
2062
+ ".swift": "swift",
2063
+ ".rb": "ruby",
2064
+ ".php": "php",
2065
+ ".cs": "csharp",
2066
+ ".cpp": "cpp",
2067
+ ".c": "c",
2068
+ ".h": "c",
2069
+ ".hpp": "cpp",
2070
+ ".md": "markdown",
2071
+ ".json": "json",
2072
+ ".yaml": "yaml",
2073
+ ".yml": "yaml"
2074
+ };
2075
+ });
2076
+
2077
+ // src/introspection/index.ts
2078
+ import * as path8 from "path";
2079
+ import * as fs5 from "fs/promises";
2080
+
2081
+ class IntrospectionIndex {
2082
+ rootDir;
2083
+ structure = null;
2084
+ files = new Map;
2085
+ config = {};
2086
+ constructor(rootDir) {
2087
+ this.rootDir = rootDir;
2088
+ }
2089
+ async initialize() {
2090
+ this.structure = await detectProjectStructure(this.rootDir);
2091
+ try {
2092
+ const configPath = path8.join(this.rootDir, ".raggrep", "config.json");
2093
+ const configContent = await fs5.readFile(configPath, "utf-8");
2094
+ const config = JSON.parse(configContent);
2095
+ this.config = config.introspection || {};
2096
+ } catch {}
2097
+ }
2098
+ getStructure() {
2099
+ return this.structure;
2100
+ }
2101
+ addFile(filepath, content) {
2102
+ if (!this.structure) {
2103
+ throw new Error("IntrospectionIndex not initialized");
2104
+ }
2105
+ const intro = introspectFile(filepath, this.structure, content);
2106
+ this.applyOverrides(intro);
2107
+ this.files.set(filepath, intro);
2108
+ return intro;
2109
+ }
2110
+ getFile(filepath) {
2111
+ return this.files.get(filepath);
2112
+ }
2113
+ getAllFiles() {
2114
+ return Array.from(this.files.values());
2115
+ }
2116
+ applyOverrides(intro) {
2117
+ if (!this.config.projects)
2118
+ return;
2119
+ for (const [projectPath, overrides] of Object.entries(this.config.projects)) {
2120
+ if (intro.filepath.startsWith(projectPath + "/") || intro.project.root === projectPath) {
2121
+ if (overrides.scope) {
2122
+ intro.scope = overrides.scope;
2123
+ }
2124
+ if (overrides.framework) {
2125
+ intro.framework = overrides.framework;
2126
+ }
2127
+ break;
2128
+ }
2129
+ }
2130
+ }
2131
+ async save(config) {
2132
+ const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
2133
+ await fs5.mkdir(introDir, { recursive: true });
2134
+ const projectPath = path8.join(introDir, "_project.json");
2135
+ await fs5.writeFile(projectPath, JSON.stringify({
2136
+ version: "1.0.0",
2137
+ lastUpdated: new Date().toISOString(),
2138
+ structure: this.structure
2139
+ }, null, 2));
2140
+ for (const [filepath, intro] of this.files) {
2141
+ const introFilePath = path8.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
2142
+ await fs5.mkdir(path8.dirname(introFilePath), { recursive: true });
2143
+ await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
2144
+ }
2145
+ console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
2146
+ }
2147
+ async load(config) {
2148
+ const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
2149
+ try {
2150
+ const projectPath = path8.join(introDir, "_project.json");
2151
+ const projectContent = await fs5.readFile(projectPath, "utf-8");
2152
+ const projectData = JSON.parse(projectContent);
2153
+ this.structure = projectData.structure;
2154
+ await this.loadFilesRecursive(path8.join(introDir, "files"), "");
2155
+ } catch {
2156
+ this.structure = null;
2157
+ this.files.clear();
2158
+ }
2159
+ }
2160
+ async loadFilesRecursive(basePath, prefix) {
2161
+ try {
2162
+ const entries = await fs5.readdir(basePath, { withFileTypes: true });
2163
+ for (const entry of entries) {
2164
+ const entryPath = path8.join(basePath, entry.name);
2165
+ const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
2166
+ if (entry.isDirectory()) {
2167
+ await this.loadFilesRecursive(entryPath, relativePath);
2168
+ } else if (entry.name.endsWith(".json")) {
2169
+ const content = await fs5.readFile(entryPath, "utf-8");
2170
+ const intro = JSON.parse(content);
2171
+ this.files.set(intro.filepath, intro);
2172
+ }
2173
+ }
2174
+ } catch {}
2175
+ }
2176
+ clear() {
2177
+ this.files.clear();
2178
+ this.structure = null;
2179
+ }
2180
+ }
2181
+ var init_introspection = __esm(() => {
2182
+ init_projectDetector();
2183
+ init_fileIntrospector();
2184
+ init_config2();
2185
+ init_fileIntrospector();
2186
+ init_projectDetector();
2187
+ });
2188
+
2189
+ // src/app/indexer/watcher.ts
1125
2190
  import { watch } from "chokidar";
1126
- import * as path5 from "path";
2191
+ import * as path9 from "path";
1127
2192
  async function watchDirectory(rootDir, options = {}) {
1128
2193
  const {
1129
2194
  debounceMs = DEFAULT_DEBOUNCE_MS,
@@ -1134,7 +2199,7 @@ async function watchDirectory(rootDir, options = {}) {
1134
2199
  onFileChange,
1135
2200
  onError
1136
2201
  } = options;
1137
- rootDir = path5.resolve(rootDir);
2202
+ rootDir = path9.resolve(rootDir);
1138
2203
  const config = await loadConfig(rootDir);
1139
2204
  const watchPatterns = config.extensions.map((ext) => `**/*${ext}`);
1140
2205
  const ignorePatterns = [
@@ -1220,7 +2285,7 @@ async function watchDirectory(rootDir, options = {}) {
1220
2285
  function handleFileEvent(event, filepath) {
1221
2286
  if (!isRunning)
1222
2287
  return;
1223
- const relativePath = path5.relative(rootDir, filepath);
2288
+ const relativePath = path9.relative(rootDir, filepath);
1224
2289
  for (const ignorePath of config.ignorePaths) {
1225
2290
  if (relativePath.startsWith(ignorePath) || relativePath.includes(`/${ignorePath}/`)) {
1226
2291
  return;
@@ -1248,9 +2313,9 @@ async function watchDirectory(rootDir, options = {}) {
1248
2313
  usePolling: false,
1249
2314
  atomic: true
1250
2315
  });
1251
- watcher.on("add", (filepath) => handleFileEvent("add", path5.join(rootDir, filepath)));
1252
- watcher.on("change", (filepath) => handleFileEvent("change", path5.join(rootDir, filepath)));
1253
- watcher.on("unlink", (filepath) => handleFileEvent("unlink", path5.join(rootDir, filepath)));
2316
+ watcher.on("add", (filepath) => handleFileEvent("add", path9.join(rootDir, filepath)));
2317
+ watcher.on("change", (filepath) => handleFileEvent("change", path9.join(rootDir, filepath)));
2318
+ watcher.on("unlink", (filepath) => handleFileEvent("unlink", path9.join(rootDir, filepath)));
1254
2319
  watcher.on("error", (error) => {
1255
2320
  const err = error instanceof Error ? error : new Error(String(error));
1256
2321
  console.error("[Watch] Watcher error:", err);
@@ -1284,7 +2349,7 @@ var init_watcher = __esm(() => {
1284
2349
  init_indexer();
1285
2350
  });
1286
2351
 
1287
- // src/indexer/index.ts
2352
+ // src/app/indexer/index.ts
1288
2353
  var exports_indexer = {};
1289
2354
  __export(exports_indexer, {
1290
2355
  watchDirectory: () => watchDirectory,
@@ -1293,13 +2358,21 @@ __export(exports_indexer, {
1293
2358
  cleanupIndex: () => cleanupIndex
1294
2359
  });
1295
2360
  import { glob } from "glob";
1296
- import * as fs3 from "fs/promises";
1297
- import * as path6 from "path";
2361
+ import * as fs6 from "fs/promises";
2362
+ import * as path10 from "path";
1298
2363
  async function indexDirectory(rootDir, options = {}) {
1299
2364
  const verbose = options.verbose ?? false;
1300
- rootDir = path6.resolve(rootDir);
2365
+ rootDir = path10.resolve(rootDir);
1301
2366
  console.log(`Indexing directory: ${rootDir}`);
1302
2367
  const config = await loadConfig(rootDir);
2368
+ const introspection = new IntrospectionIndex(rootDir);
2369
+ await introspection.initialize();
2370
+ if (verbose) {
2371
+ const structure = introspection.getStructure();
2372
+ if (structure?.isMonorepo) {
2373
+ console.log(`Detected monorepo with ${structure.projects.length} projects`);
2374
+ }
2375
+ }
1303
2376
  await registerBuiltInModules();
1304
2377
  const enabledModules = registry.getEnabled(config);
1305
2378
  if (enabledModules.length === 0) {
@@ -1316,7 +2389,7 @@ async function indexDirectory(rootDir, options = {}) {
1316
2389
  const moduleConfig = getModuleConfig(config, module.id);
1317
2390
  if (module.initialize && moduleConfig) {
1318
2391
  const configWithOverrides = { ...moduleConfig };
1319
- if (options.model && module.id === "semantic") {
2392
+ if (options.model && module.id === "language/typescript") {
1320
2393
  configWithOverrides.options = {
1321
2394
  ...configWithOverrides.options,
1322
2395
  embeddingModel: options.model
@@ -1324,7 +2397,7 @@ async function indexDirectory(rootDir, options = {}) {
1324
2397
  }
1325
2398
  await module.initialize(configWithOverrides);
1326
2399
  }
1327
- const result = await indexWithModule(rootDir, files, module, config, verbose);
2400
+ const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
1328
2401
  results.push(result);
1329
2402
  if (module.finalize) {
1330
2403
  console.log(`[${module.name}] Building secondary indexes...`);
@@ -1332,12 +2405,12 @@ async function indexDirectory(rootDir, options = {}) {
1332
2405
  rootDir,
1333
2406
  config,
1334
2407
  readFile: async (filepath) => {
1335
- const fullPath = path6.isAbsolute(filepath) ? filepath : path6.join(rootDir, filepath);
1336
- return fs3.readFile(fullPath, "utf-8");
2408
+ const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
2409
+ return fs6.readFile(fullPath, "utf-8");
1337
2410
  },
1338
2411
  getFileStats: async (filepath) => {
1339
- const fullPath = path6.isAbsolute(filepath) ? filepath : path6.join(rootDir, filepath);
1340
- const stats = await fs3.stat(fullPath);
2412
+ const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
2413
+ const stats = await fs6.stat(fullPath);
1341
2414
  return { lastModified: stats.mtime.toISOString() };
1342
2415
  }
1343
2416
  };
@@ -1345,10 +2418,11 @@ async function indexDirectory(rootDir, options = {}) {
1345
2418
  }
1346
2419
  console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
1347
2420
  }
2421
+ await introspection.save(config);
1348
2422
  await updateGlobalManifest(rootDir, enabledModules, config);
1349
2423
  return results;
1350
2424
  }
1351
- async function indexWithModule(rootDir, files, module, config, verbose) {
2425
+ async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
1352
2426
  const result = {
1353
2427
  moduleId: module.id,
1354
2428
  indexed: 0,
@@ -1360,19 +2434,20 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
1360
2434
  rootDir,
1361
2435
  config,
1362
2436
  readFile: async (filepath) => {
1363
- const fullPath = path6.isAbsolute(filepath) ? filepath : path6.join(rootDir, filepath);
1364
- return fs3.readFile(fullPath, "utf-8");
2437
+ const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
2438
+ return fs6.readFile(fullPath, "utf-8");
1365
2439
  },
1366
2440
  getFileStats: async (filepath) => {
1367
- const fullPath = path6.isAbsolute(filepath) ? filepath : path6.join(rootDir, filepath);
1368
- const stats = await fs3.stat(fullPath);
2441
+ const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
2442
+ const stats = await fs6.stat(fullPath);
1369
2443
  return { lastModified: stats.mtime.toISOString() };
1370
- }
2444
+ },
2445
+ getIntrospection: (filepath) => introspection.getFile(filepath)
1371
2446
  };
1372
2447
  for (const filepath of files) {
1373
- const relativePath = path6.relative(rootDir, filepath);
2448
+ const relativePath = path10.relative(rootDir, filepath);
1374
2449
  try {
1375
- const stats = await fs3.stat(filepath);
2450
+ const stats = await fs6.stat(filepath);
1376
2451
  const lastModified = stats.mtime.toISOString();
1377
2452
  const existingEntry = manifest.files[relativePath];
1378
2453
  if (existingEntry && existingEntry.lastModified === lastModified) {
@@ -1382,7 +2457,8 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
1382
2457
  result.skipped++;
1383
2458
  continue;
1384
2459
  }
1385
- const content = await fs3.readFile(filepath, "utf-8");
2460
+ const content = await fs6.readFile(filepath, "utf-8");
2461
+ introspection.addFile(relativePath, content);
1386
2462
  if (verbose) {
1387
2463
  console.log(` Processing ${relativePath}...`);
1388
2464
  }
@@ -1426,7 +2502,7 @@ async function findFiles(rootDir, config) {
1426
2502
  async function loadModuleManifest(rootDir, moduleId, config) {
1427
2503
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
1428
2504
  try {
1429
- const content = await fs3.readFile(manifestPath, "utf-8");
2505
+ const content = await fs6.readFile(manifestPath, "utf-8");
1430
2506
  return JSON.parse(content);
1431
2507
  } catch {
1432
2508
  return {
@@ -1439,14 +2515,14 @@ async function loadModuleManifest(rootDir, moduleId, config) {
1439
2515
  }
1440
2516
  async function writeModuleManifest(rootDir, moduleId, manifest, config) {
1441
2517
  const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
1442
- await fs3.mkdir(path6.dirname(manifestPath), { recursive: true });
1443
- await fs3.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
2518
+ await fs6.mkdir(path10.dirname(manifestPath), { recursive: true });
2519
+ await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
1444
2520
  }
1445
2521
  async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
1446
2522
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
1447
- const indexFilePath = path6.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
1448
- await fs3.mkdir(path6.dirname(indexFilePath), { recursive: true });
1449
- await fs3.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
2523
+ const indexFilePath = path10.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
2524
+ await fs6.mkdir(path10.dirname(indexFilePath), { recursive: true });
2525
+ await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
1450
2526
  }
1451
2527
  async function updateGlobalManifest(rootDir, modules, config) {
1452
2528
  const manifestPath = getGlobalManifestPath(rootDir, config);
@@ -1455,12 +2531,12 @@ async function updateGlobalManifest(rootDir, modules, config) {
1455
2531
  lastUpdated: new Date().toISOString(),
1456
2532
  modules: modules.map((m) => m.id)
1457
2533
  };
1458
- await fs3.mkdir(path6.dirname(manifestPath), { recursive: true });
1459
- await fs3.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
2534
+ await fs6.mkdir(path10.dirname(manifestPath), { recursive: true });
2535
+ await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
1460
2536
  }
1461
2537
  async function cleanupIndex(rootDir, options = {}) {
1462
2538
  const verbose = options.verbose ?? false;
1463
- rootDir = path6.resolve(rootDir);
2539
+ rootDir = path10.resolve(rootDir);
1464
2540
  console.log(`Cleaning up index in: ${rootDir}`);
1465
2541
  const config = await loadConfig(rootDir);
1466
2542
  await registerBuiltInModules();
@@ -1490,9 +2566,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
1490
2566
  const filesToRemove = [];
1491
2567
  const updatedFiles = {};
1492
2568
  for (const [filepath, entry] of Object.entries(manifest.files)) {
1493
- const fullPath = path6.join(rootDir, filepath);
2569
+ const fullPath = path10.join(rootDir, filepath);
1494
2570
  try {
1495
- await fs3.access(fullPath);
2571
+ await fs6.access(fullPath);
1496
2572
  updatedFiles[filepath] = entry;
1497
2573
  result.kept++;
1498
2574
  } catch {
@@ -1504,9 +2580,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
1504
2580
  }
1505
2581
  }
1506
2582
  for (const filepath of filesToRemove) {
1507
- const indexFilePath = path6.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
2583
+ const indexFilePath = path10.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
1508
2584
  try {
1509
- await fs3.unlink(indexFilePath);
2585
+ await fs6.unlink(indexFilePath);
1510
2586
  } catch {}
1511
2587
  }
1512
2588
  manifest.files = updatedFiles;
@@ -1517,16 +2593,16 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
1517
2593
  }
1518
2594
  async function cleanupEmptyDirectories(dir) {
1519
2595
  try {
1520
- const entries = await fs3.readdir(dir, { withFileTypes: true });
2596
+ const entries = await fs6.readdir(dir, { withFileTypes: true });
1521
2597
  for (const entry of entries) {
1522
2598
  if (entry.isDirectory()) {
1523
- const subDir = path6.join(dir, entry.name);
2599
+ const subDir = path10.join(dir, entry.name);
1524
2600
  await cleanupEmptyDirectories(subDir);
1525
2601
  }
1526
2602
  }
1527
- const remainingEntries = await fs3.readdir(dir);
2603
+ const remainingEntries = await fs6.readdir(dir);
1528
2604
  if (remainingEntries.length === 0) {
1529
- await fs3.rmdir(dir);
2605
+ await fs6.rmdir(dir);
1530
2606
  return true;
1531
2607
  }
1532
2608
  return false;
@@ -1535,9 +2611,9 @@ async function cleanupEmptyDirectories(dir) {
1535
2611
  }
1536
2612
  }
1537
2613
  async function getIndexStatus(rootDir) {
1538
- rootDir = path6.resolve(rootDir);
2614
+ rootDir = path10.resolve(rootDir);
1539
2615
  const config = await loadConfig(rootDir);
1540
- const indexDir = path6.join(rootDir, config.indexDir);
2616
+ const indexDir = path10.join(rootDir, config.indexDir);
1541
2617
  const status = {
1542
2618
  exists: false,
1543
2619
  rootDir,
@@ -1546,13 +2622,13 @@ async function getIndexStatus(rootDir) {
1546
2622
  totalFiles: 0
1547
2623
  };
1548
2624
  try {
1549
- await fs3.access(indexDir);
2625
+ await fs6.access(indexDir);
1550
2626
  } catch {
1551
2627
  return status;
1552
2628
  }
1553
2629
  try {
1554
2630
  const globalManifestPath = getGlobalManifestPath(rootDir, config);
1555
- const content = await fs3.readFile(globalManifestPath, "utf-8");
2631
+ const content = await fs6.readFile(globalManifestPath, "utf-8");
1556
2632
  const globalManifest = JSON.parse(content);
1557
2633
  status.exists = true;
1558
2634
  status.lastUpdated = globalManifest.lastUpdated;
@@ -1570,7 +2646,7 @@ async function getIndexStatus(rootDir) {
1570
2646
  }
1571
2647
  } catch {
1572
2648
  try {
1573
- const entries = await fs3.readdir(path6.join(indexDir, "index"));
2649
+ const entries = await fs6.readdir(path10.join(indexDir, "index"));
1574
2650
  if (entries.length > 0) {
1575
2651
  status.exists = true;
1576
2652
  for (const entry of entries) {
@@ -1593,19 +2669,20 @@ async function getIndexStatus(rootDir) {
1593
2669
  var init_indexer = __esm(() => {
1594
2670
  init_config2();
1595
2671
  init_registry();
2672
+ init_introspection();
1596
2673
  init_watcher();
1597
2674
  });
1598
2675
 
1599
- // src/search/index.ts
2676
+ // src/app/search/index.ts
1600
2677
  var exports_search = {};
1601
2678
  __export(exports_search, {
1602
2679
  search: () => search,
1603
2680
  formatSearchResults: () => formatSearchResults
1604
2681
  });
1605
- import * as fs4 from "fs/promises";
1606
- import * as path7 from "path";
2682
+ import * as fs7 from "fs/promises";
2683
+ import * as path11 from "path";
1607
2684
  async function search(rootDir, query, options = {}) {
1608
- rootDir = path7.resolve(rootDir);
2685
+ rootDir = path11.resolve(rootDir);
1609
2686
  console.log(`Searching for: "${query}"`);
1610
2687
  const config = await loadConfig(rootDir);
1611
2688
  await registerBuiltInModules();
@@ -1646,9 +2723,9 @@ function createSearchContext(rootDir, moduleId, config) {
1646
2723
  config,
1647
2724
  loadFileIndex: async (filepath) => {
1648
2725
  const hasExtension = /\.[^./]+$/.test(filepath);
1649
- const indexFilePath = hasExtension ? path7.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path7.join(indexPath, filepath + ".json");
2726
+ const indexFilePath = hasExtension ? path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path11.join(indexPath, filepath + ".json");
1650
2727
  try {
1651
- const content = await fs4.readFile(indexFilePath, "utf-8");
2728
+ const content = await fs7.readFile(indexFilePath, "utf-8");
1652
2729
  return JSON.parse(content);
1653
2730
  } catch {
1654
2731
  return null;
@@ -1658,7 +2735,7 @@ function createSearchContext(rootDir, moduleId, config) {
1658
2735
  const files = [];
1659
2736
  await traverseDirectory(indexPath, files, indexPath);
1660
2737
  return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
1661
- const relative4 = path7.relative(indexPath, f);
2738
+ const relative4 = path11.relative(indexPath, f);
1662
2739
  return relative4.replace(/\.json$/, "");
1663
2740
  });
1664
2741
  }
@@ -1666,9 +2743,9 @@ function createSearchContext(rootDir, moduleId, config) {
1666
2743
  }
1667
2744
  async function traverseDirectory(dir, files, basePath) {
1668
2745
  try {
1669
- const entries = await fs4.readdir(dir, { withFileTypes: true });
2746
+ const entries = await fs7.readdir(dir, { withFileTypes: true });
1670
2747
  for (const entry of entries) {
1671
- const fullPath = path7.join(dir, entry.name);
2748
+ const fullPath = path11.join(dir, entry.name);
1672
2749
  if (entry.isDirectory()) {
1673
2750
  await traverseDirectory(fullPath, files, basePath);
1674
2751
  } else if (entry.isFile()) {
@@ -1680,7 +2757,7 @@ async function traverseDirectory(dir, files, basePath) {
1680
2757
  async function loadGlobalManifest(rootDir, config) {
1681
2758
  const manifestPath = getGlobalManifestPath(rootDir, config);
1682
2759
  try {
1683
- const content = await fs4.readFile(manifestPath, "utf-8");
2760
+ const content = await fs7.readFile(manifestPath, "utf-8");
1684
2761
  return JSON.parse(content);
1685
2762
  } catch {
1686
2763
  return null;
@@ -1723,12 +2800,79 @@ var init_search = __esm(() => {
1723
2800
  init_registry();
1724
2801
  });
1725
2802
 
1726
- // src/cli/main.ts
2803
+ // src/app/cli/main.ts
1727
2804
  init_embeddings();
1728
- import { createRequire } from "module";
1729
- var require2 = createRequire(import.meta.url);
1730
- var pkg = require2("../../package.json");
1731
- var VERSION = pkg.version;
2805
+ // package.json
2806
+ var package_default = {
2807
+ name: "raggrep",
2808
+ version: "0.1.5",
2809
+ description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
2810
+ type: "module",
2811
+ main: "./dist/index.js",
2812
+ types: "./dist/index.d.ts",
2813
+ exports: {
2814
+ ".": {
2815
+ import: "./dist/index.js",
2816
+ types: "./dist/index.d.ts"
2817
+ }
2818
+ },
2819
+ bin: {
2820
+ raggrep: "dist/cli/main.js"
2821
+ },
2822
+ files: [
2823
+ "dist",
2824
+ "README.md",
2825
+ "LICENSE"
2826
+ ],
2827
+ scripts: {
2828
+ build: "bun run build:clean && bun run build:bundle && bun run build:types && bun run build:shebang",
2829
+ "build:clean": "rm -rf dist",
2830
+ "build:bundle": "bun build src/index.ts --outdir dist --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' --external 'chokidar' && bun build src/app/cli/main.ts --outdir dist/cli --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' --external 'chokidar'",
2831
+ "build:types": "tsc --emitDeclarationOnly --outDir dist",
2832
+ "build:shebang": "echo '#!/usr/bin/env node' | cat - dist/cli/main.js > temp && mv temp dist/cli/main.js && chmod +x dist/cli/main.js",
2833
+ prepublishOnly: "bun run build",
2834
+ raggrep: "bun run src/app/cli/main.ts",
2835
+ test: "bun test",
2836
+ dev: "bun run src/app/cli/main.ts"
2837
+ },
2838
+ keywords: [
2839
+ "rag",
2840
+ "search",
2841
+ "semantic-search",
2842
+ "embeddings",
2843
+ "codebase",
2844
+ "local",
2845
+ "ai",
2846
+ "code-search",
2847
+ "transformers"
2848
+ ],
2849
+ author: "",
2850
+ license: "MIT",
2851
+ repository: {
2852
+ type: "git",
2853
+ url: "git+https://github.com/conradkoh/raggrep.git"
2854
+ },
2855
+ bugs: {
2856
+ url: "https://github.com/conradkoh/raggrep/issues"
2857
+ },
2858
+ homepage: "https://github.com/conradkoh/raggrep#readme",
2859
+ engines: {
2860
+ node: ">=18.0.0"
2861
+ },
2862
+ dependencies: {
2863
+ "@xenova/transformers": "^2.17.0",
2864
+ chokidar: "^5.0.0",
2865
+ glob: "^10.0.0",
2866
+ typescript: "^5.0.0"
2867
+ },
2868
+ devDependencies: {
2869
+ "@types/bun": "latest",
2870
+ "@types/node": "^20.0.0"
2871
+ }
2872
+ };
2873
+
2874
+ // src/app/cli/main.ts
2875
+ var VERSION = package_default.version;
1732
2876
  var args = process.argv.slice(2);
1733
2877
  var command = args[0];
1734
2878
  if (command === "--version" || command === "-v") {
@@ -2096,4 +3240,4 @@ Run 'raggrep <command> --help' for more information.
2096
3240
  }
2097
3241
  main();
2098
3242
 
2099
- //# debugId=0D3D8495D3A140B664756E2164756E21
3243
+ //# debugId=70A5CEDDD33322C164756E2164756E21