raggrep 0.1.0 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -6
- package/dist/{indexer → app/indexer}/index.d.ts +26 -1
- package/dist/app/indexer/watcher.d.ts +33 -0
- package/dist/{search → app/search}/index.d.ts +1 -1
- package/dist/cli/main.js +1802 -222
- package/dist/cli/main.js.map +26 -15
- package/dist/composition.d.ts +7 -7
- package/dist/domain/entities/fileSummary.d.ts +18 -0
- package/dist/domain/entities/index.d.ts +1 -1
- package/dist/domain/entities/searchResult.d.ts +47 -2
- package/dist/domain/index.d.ts +5 -3
- package/dist/domain/ports/embedding.d.ts +0 -4
- package/dist/domain/ports/index.d.ts +3 -4
- package/dist/domain/services/bm25.d.ts +24 -0
- package/dist/domain/services/index.d.ts +3 -2
- package/dist/domain/services/keywords.d.ts +45 -0
- package/dist/domain/services/similarity.d.ts +23 -0
- package/dist/{application → domain}/usecases/cleanupIndex.d.ts +2 -2
- package/dist/{application → domain}/usecases/indexDirectory.d.ts +2 -2
- package/dist/{application → domain}/usecases/searchIndex.d.ts +2 -2
- package/dist/index.d.ts +5 -5
- package/dist/index.js +1444 -244
- package/dist/index.js.map +26 -15
- package/dist/{utils/config.d.ts → infrastructure/config/configLoader.d.ts} +7 -4
- package/dist/infrastructure/config/index.d.ts +6 -0
- package/dist/infrastructure/embeddings/index.d.ts +3 -1
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +16 -0
- package/dist/infrastructure/index.d.ts +4 -3
- package/dist/infrastructure/storage/index.d.ts +4 -1
- package/dist/{utils/tieredIndex.d.ts → infrastructure/storage/symbolicIndex.d.ts} +7 -18
- package/dist/introspection/fileIntrospector.d.ts +14 -0
- package/dist/introspection/index.d.ts +68 -0
- package/dist/introspection/introspection.test.d.ts +4 -0
- package/dist/introspection/projectDetector.d.ts +27 -0
- package/dist/introspection/types.d.ts +70 -0
- package/dist/modules/core/index.d.ts +69 -0
- package/dist/modules/core/symbols.d.ts +27 -0
- package/dist/modules/core/symbols.test.d.ts +4 -0
- package/dist/modules/{semantic → language/typescript}/index.d.ts +11 -12
- package/dist/types.d.ts +4 -1
- package/package.json +7 -6
- package/dist/application/index.d.ts +0 -7
- package/dist/utils/bm25.d.ts +0 -9
- package/dist/utils/embeddings.d.ts +0 -46
- /package/dist/{cli → app/cli}/main.d.ts +0 -0
- /package/dist/{application → domain}/usecases/index.d.ts +0 -0
- /package/dist/{utils → infrastructure/embeddings}/embeddings.test.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.test.d.ts +0 -0
package/dist/index.js
CHANGED
|
@@ -21,7 +21,12 @@ function createDefaultConfig() {
|
|
|
21
21
|
ignorePaths: DEFAULT_IGNORE_PATHS,
|
|
22
22
|
modules: [
|
|
23
23
|
{
|
|
24
|
-
id: "
|
|
24
|
+
id: "core",
|
|
25
|
+
enabled: true,
|
|
26
|
+
options: {}
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: "language/typescript",
|
|
25
30
|
enabled: true,
|
|
26
31
|
options: {
|
|
27
32
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
@@ -81,156 +86,32 @@ var init_entities = __esm(() => {
|
|
|
81
86
|
init_config();
|
|
82
87
|
});
|
|
83
88
|
|
|
84
|
-
// src/
|
|
85
|
-
import { pipeline, env } from "@xenova/transformers";
|
|
89
|
+
// src/infrastructure/config/configLoader.ts
|
|
86
90
|
import * as path from "path";
|
|
87
|
-
import * as os from "os";
|
|
88
|
-
function configureEmbeddings(config) {
|
|
89
|
-
const newConfig = { ...currentConfig, ...config };
|
|
90
|
-
if (newConfig.model !== currentConfig.model) {
|
|
91
|
-
embeddingPipeline = null;
|
|
92
|
-
currentModelName = null;
|
|
93
|
-
}
|
|
94
|
-
currentConfig = newConfig;
|
|
95
|
-
}
|
|
96
|
-
async function initializePipeline() {
|
|
97
|
-
if (embeddingPipeline && currentModelName === currentConfig.model) {
|
|
98
|
-
return;
|
|
99
|
-
}
|
|
100
|
-
if (isInitializing && initPromise) {
|
|
101
|
-
return initPromise;
|
|
102
|
-
}
|
|
103
|
-
isInitializing = true;
|
|
104
|
-
initPromise = (async () => {
|
|
105
|
-
const modelId = EMBEDDING_MODELS[currentConfig.model];
|
|
106
|
-
if (currentConfig.showProgress) {
|
|
107
|
-
console.log(`
|
|
108
|
-
Loading embedding model: ${currentConfig.model}`);
|
|
109
|
-
console.log(` Cache: ${CACHE_DIR}`);
|
|
110
|
-
}
|
|
111
|
-
try {
|
|
112
|
-
embeddingPipeline = await pipeline("feature-extraction", modelId, {
|
|
113
|
-
progress_callback: currentConfig.showProgress ? (progress) => {
|
|
114
|
-
if (progress.status === "progress" && progress.file) {
|
|
115
|
-
const pct = progress.progress ? Math.round(progress.progress) : 0;
|
|
116
|
-
process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
|
|
117
|
-
} else if (progress.status === "done" && progress.file) {
|
|
118
|
-
process.stdout.write(`\r Downloaded ${progress.file}
|
|
119
|
-
`);
|
|
120
|
-
} else if (progress.status === "ready") {}
|
|
121
|
-
} : undefined
|
|
122
|
-
});
|
|
123
|
-
currentModelName = currentConfig.model;
|
|
124
|
-
if (currentConfig.showProgress) {
|
|
125
|
-
console.log(` Model ready.
|
|
126
|
-
`);
|
|
127
|
-
}
|
|
128
|
-
} catch (error) {
|
|
129
|
-
embeddingPipeline = null;
|
|
130
|
-
currentModelName = null;
|
|
131
|
-
throw new Error(`Failed to load embedding model: ${error}`);
|
|
132
|
-
} finally {
|
|
133
|
-
isInitializing = false;
|
|
134
|
-
initPromise = null;
|
|
135
|
-
}
|
|
136
|
-
})();
|
|
137
|
-
return initPromise;
|
|
138
|
-
}
|
|
139
|
-
async function getEmbedding(text) {
|
|
140
|
-
await initializePipeline();
|
|
141
|
-
if (!embeddingPipeline) {
|
|
142
|
-
throw new Error("Embedding pipeline not initialized");
|
|
143
|
-
}
|
|
144
|
-
const output = await embeddingPipeline(text, {
|
|
145
|
-
pooling: "mean",
|
|
146
|
-
normalize: true
|
|
147
|
-
});
|
|
148
|
-
return Array.from(output.data);
|
|
149
|
-
}
|
|
150
|
-
async function getEmbeddings(texts) {
|
|
151
|
-
if (texts.length === 0)
|
|
152
|
-
return [];
|
|
153
|
-
await initializePipeline();
|
|
154
|
-
if (!embeddingPipeline) {
|
|
155
|
-
throw new Error("Embedding pipeline not initialized");
|
|
156
|
-
}
|
|
157
|
-
const results = [];
|
|
158
|
-
for (let i = 0;i < texts.length; i += BATCH_SIZE) {
|
|
159
|
-
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
160
|
-
const outputs = await Promise.all(batch.map(async (text) => {
|
|
161
|
-
const output = await embeddingPipeline(text, {
|
|
162
|
-
pooling: "mean",
|
|
163
|
-
normalize: true
|
|
164
|
-
});
|
|
165
|
-
return Array.from(output.data);
|
|
166
|
-
}));
|
|
167
|
-
results.push(...outputs);
|
|
168
|
-
}
|
|
169
|
-
return results;
|
|
170
|
-
}
|
|
171
|
-
function cosineSimilarity(a, b) {
|
|
172
|
-
if (a.length !== b.length) {
|
|
173
|
-
throw new Error("Vectors must have the same length");
|
|
174
|
-
}
|
|
175
|
-
let dotProduct = 0;
|
|
176
|
-
let normA = 0;
|
|
177
|
-
let normB = 0;
|
|
178
|
-
for (let i = 0;i < a.length; i++) {
|
|
179
|
-
dotProduct += a[i] * b[i];
|
|
180
|
-
normA += a[i] * a[i];
|
|
181
|
-
normB += b[i] * b[i];
|
|
182
|
-
}
|
|
183
|
-
if (normA === 0 || normB === 0)
|
|
184
|
-
return 0;
|
|
185
|
-
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
186
|
-
}
|
|
187
|
-
function getEmbeddingConfig() {
|
|
188
|
-
return { ...currentConfig };
|
|
189
|
-
}
|
|
190
|
-
var CACHE_DIR, EMBEDDING_MODELS, embeddingPipeline = null, currentModelName = null, isInitializing = false, initPromise = null, DEFAULT_CONFIG, currentConfig, BATCH_SIZE = 32;
|
|
191
|
-
var init_embeddings = __esm(() => {
|
|
192
|
-
CACHE_DIR = path.join(os.homedir(), ".cache", "raggrep", "models");
|
|
193
|
-
env.cacheDir = CACHE_DIR;
|
|
194
|
-
env.allowLocalModels = true;
|
|
195
|
-
EMBEDDING_MODELS = {
|
|
196
|
-
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
197
|
-
"all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
|
|
198
|
-
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
199
|
-
"paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
|
|
200
|
-
};
|
|
201
|
-
DEFAULT_CONFIG = {
|
|
202
|
-
model: "all-MiniLM-L6-v2",
|
|
203
|
-
showProgress: true
|
|
204
|
-
};
|
|
205
|
-
currentConfig = { ...DEFAULT_CONFIG };
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
// src/utils/config.ts
|
|
209
|
-
import * as path2 from "path";
|
|
210
91
|
import * as fs from "fs/promises";
|
|
211
|
-
function getRaggrepDir(rootDir, config =
|
|
212
|
-
return
|
|
92
|
+
function getRaggrepDir(rootDir, config = DEFAULT_CONFIG) {
|
|
93
|
+
return path.join(rootDir, config.indexDir);
|
|
213
94
|
}
|
|
214
|
-
function getModuleIndexPath(rootDir, moduleId, config =
|
|
215
|
-
return
|
|
95
|
+
function getModuleIndexPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
|
|
96
|
+
return path.join(rootDir, config.indexDir, "index", moduleId);
|
|
216
97
|
}
|
|
217
|
-
function getModuleManifestPath(rootDir, moduleId, config =
|
|
218
|
-
return
|
|
98
|
+
function getModuleManifestPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
|
|
99
|
+
return path.join(rootDir, config.indexDir, "index", moduleId, "manifest.json");
|
|
219
100
|
}
|
|
220
|
-
function getGlobalManifestPath(rootDir, config =
|
|
221
|
-
return
|
|
101
|
+
function getGlobalManifestPath(rootDir, config = DEFAULT_CONFIG) {
|
|
102
|
+
return path.join(rootDir, config.indexDir, "manifest.json");
|
|
222
103
|
}
|
|
223
|
-
function getConfigPath(rootDir, config =
|
|
224
|
-
return
|
|
104
|
+
function getConfigPath(rootDir, config = DEFAULT_CONFIG) {
|
|
105
|
+
return path.join(rootDir, config.indexDir, "config.json");
|
|
225
106
|
}
|
|
226
107
|
async function loadConfig(rootDir) {
|
|
227
|
-
const configPath = getConfigPath(rootDir,
|
|
108
|
+
const configPath = getConfigPath(rootDir, DEFAULT_CONFIG);
|
|
228
109
|
try {
|
|
229
110
|
const content = await fs.readFile(configPath, "utf-8");
|
|
230
111
|
const savedConfig = JSON.parse(content);
|
|
231
|
-
return { ...
|
|
112
|
+
return { ...DEFAULT_CONFIG, ...savedConfig };
|
|
232
113
|
} catch {
|
|
233
|
-
return
|
|
114
|
+
return DEFAULT_CONFIG;
|
|
234
115
|
}
|
|
235
116
|
}
|
|
236
117
|
function getModuleConfig(config, moduleId) {
|
|
@@ -248,11 +129,21 @@ function getEmbeddingConfigFromModule(moduleConfig) {
|
|
|
248
129
|
showProgress: options.showProgress !== false
|
|
249
130
|
};
|
|
250
131
|
}
|
|
251
|
-
var
|
|
252
|
-
var
|
|
132
|
+
var DEFAULT_CONFIG, EMBEDDING_MODELS;
|
|
133
|
+
var init_configLoader = __esm(() => {
|
|
253
134
|
init_entities();
|
|
254
|
-
|
|
255
|
-
|
|
135
|
+
DEFAULT_CONFIG = createDefaultConfig();
|
|
136
|
+
EMBEDDING_MODELS = {
|
|
137
|
+
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
138
|
+
"all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
|
|
139
|
+
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
140
|
+
"paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
|
|
141
|
+
};
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// src/infrastructure/config/index.ts
|
|
145
|
+
var init_config2 = __esm(() => {
|
|
146
|
+
init_configLoader();
|
|
256
147
|
});
|
|
257
148
|
|
|
258
149
|
// src/domain/services/bm25.ts
|
|
@@ -326,16 +217,604 @@ class BM25Index {
|
|
|
326
217
|
this.avgDocLength = 0;
|
|
327
218
|
this.totalDocs = 0;
|
|
328
219
|
}
|
|
220
|
+
addDocument(id, tokens) {
|
|
221
|
+
this.addDocuments([{ id, content: "", tokens }]);
|
|
222
|
+
}
|
|
223
|
+
serialize() {
|
|
224
|
+
const documents = {};
|
|
225
|
+
for (const [id, { tokens }] of this.documents) {
|
|
226
|
+
documents[id] = tokens;
|
|
227
|
+
}
|
|
228
|
+
return {
|
|
229
|
+
documents,
|
|
230
|
+
avgDocLength: this.avgDocLength,
|
|
231
|
+
documentFrequencies: Object.fromEntries(this.documentFrequencies),
|
|
232
|
+
totalDocs: this.totalDocs
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
static deserialize(data) {
|
|
236
|
+
const index = new BM25Index;
|
|
237
|
+
index.avgDocLength = data.avgDocLength;
|
|
238
|
+
index.totalDocs = data.totalDocs;
|
|
239
|
+
index.documentFrequencies = new Map(Object.entries(data.documentFrequencies));
|
|
240
|
+
for (const [id, tokens] of Object.entries(data.documents)) {
|
|
241
|
+
index.documents.set(id, { content: "", tokens });
|
|
242
|
+
}
|
|
243
|
+
return index;
|
|
244
|
+
}
|
|
329
245
|
}
|
|
330
246
|
function normalizeScore(score, midpoint = 5) {
|
|
331
247
|
return 1 / (1 + Math.exp(-score / midpoint + 1));
|
|
332
248
|
}
|
|
333
249
|
var BM25_K1 = 1.5, BM25_B = 0.75;
|
|
334
250
|
|
|
335
|
-
// src/
|
|
336
|
-
|
|
251
|
+
// src/modules/core/symbols.ts
|
|
252
|
+
function extractSymbols(content) {
|
|
253
|
+
const symbols = [];
|
|
254
|
+
const seenSymbols = new Set;
|
|
255
|
+
const lines = content.split(`
|
|
256
|
+
`);
|
|
257
|
+
for (const { type, pattern, exported } of SYMBOL_PATTERNS) {
|
|
258
|
+
pattern.lastIndex = 0;
|
|
259
|
+
let match;
|
|
260
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
261
|
+
const name = match[1];
|
|
262
|
+
const symbolKey = `${name}:${type}`;
|
|
263
|
+
if (seenSymbols.has(symbolKey))
|
|
264
|
+
continue;
|
|
265
|
+
seenSymbols.add(symbolKey);
|
|
266
|
+
const beforeMatch = content.substring(0, match.index);
|
|
267
|
+
const line = beforeMatch.split(`
|
|
268
|
+
`).length;
|
|
269
|
+
symbols.push({
|
|
270
|
+
name,
|
|
271
|
+
type,
|
|
272
|
+
line,
|
|
273
|
+
isExported: exported
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return symbols.sort((a, b) => a.line - b.line);
|
|
278
|
+
}
|
|
279
|
+
function symbolsToKeywords(symbols) {
|
|
280
|
+
const keywords = new Set;
|
|
281
|
+
for (const symbol of symbols) {
|
|
282
|
+
keywords.add(symbol.name.toLowerCase());
|
|
283
|
+
const parts = symbol.name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
|
|
284
|
+
for (const part of parts) {
|
|
285
|
+
if (part.length > 2) {
|
|
286
|
+
keywords.add(part);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return Array.from(keywords);
|
|
291
|
+
}
|
|
292
|
+
var SYMBOL_PATTERNS;
|
|
293
|
+
var init_symbols = __esm(() => {
|
|
294
|
+
SYMBOL_PATTERNS = [
|
|
295
|
+
{
|
|
296
|
+
type: "function",
|
|
297
|
+
pattern: /^export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
298
|
+
exported: true
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
type: "function",
|
|
302
|
+
pattern: /^export\s+(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
|
|
303
|
+
exported: true
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
type: "class",
|
|
307
|
+
pattern: /^export\s+(?:abstract\s+)?class\s+(\w+)/gm,
|
|
308
|
+
exported: true
|
|
309
|
+
},
|
|
310
|
+
{
|
|
311
|
+
type: "interface",
|
|
312
|
+
pattern: /^export\s+interface\s+(\w+)/gm,
|
|
313
|
+
exported: true
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
type: "type",
|
|
317
|
+
pattern: /^export\s+type\s+(\w+)/gm,
|
|
318
|
+
exported: true
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
type: "enum",
|
|
322
|
+
pattern: /^export\s+(?:const\s+)?enum\s+(\w+)/gm,
|
|
323
|
+
exported: true
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
type: "variable",
|
|
327
|
+
pattern: /^export\s+(?:const|let|var)\s+(\w+)\s*(?::|=)/gm,
|
|
328
|
+
exported: true
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
type: "function",
|
|
332
|
+
pattern: /^export\s+default\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
333
|
+
exported: true
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
type: "class",
|
|
337
|
+
pattern: /^export\s+default\s+class\s+(\w+)/gm,
|
|
338
|
+
exported: true
|
|
339
|
+
},
|
|
340
|
+
{
|
|
341
|
+
type: "function",
|
|
342
|
+
pattern: /^(?:async\s+)?function\s+(\w+)/gm,
|
|
343
|
+
exported: false
|
|
344
|
+
},
|
|
345
|
+
{
|
|
346
|
+
type: "function",
|
|
347
|
+
pattern: /^(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
|
|
348
|
+
exported: false
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
type: "class",
|
|
352
|
+
pattern: /^(?:abstract\s+)?class\s+(\w+)/gm,
|
|
353
|
+
exported: false
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
type: "interface",
|
|
357
|
+
pattern: /^interface\s+(\w+)/gm,
|
|
358
|
+
exported: false
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
type: "type",
|
|
362
|
+
pattern: /^type\s+(\w+)/gm,
|
|
363
|
+
exported: false
|
|
364
|
+
},
|
|
365
|
+
{
|
|
366
|
+
type: "enum",
|
|
367
|
+
pattern: /^(?:const\s+)?enum\s+(\w+)/gm,
|
|
368
|
+
exported: false
|
|
369
|
+
},
|
|
370
|
+
{
|
|
371
|
+
type: "function",
|
|
372
|
+
pattern: /^def\s+(\w+)\s*\(/gm,
|
|
373
|
+
exported: false
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
type: "class",
|
|
377
|
+
pattern: /^class\s+(\w+)(?:\s*\(|:)/gm,
|
|
378
|
+
exported: false
|
|
379
|
+
},
|
|
380
|
+
{
|
|
381
|
+
type: "function",
|
|
382
|
+
pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/gm,
|
|
383
|
+
exported: false
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
type: "type",
|
|
387
|
+
pattern: /^type\s+(\w+)\s+(?:struct|interface)/gm,
|
|
388
|
+
exported: false
|
|
389
|
+
},
|
|
390
|
+
{
|
|
391
|
+
type: "function",
|
|
392
|
+
pattern: /^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/gm,
|
|
393
|
+
exported: false
|
|
394
|
+
},
|
|
395
|
+
{
|
|
396
|
+
type: "type",
|
|
397
|
+
pattern: /^(?:pub\s+)?struct\s+(\w+)/gm,
|
|
398
|
+
exported: false
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
type: "enum",
|
|
402
|
+
pattern: /^(?:pub\s+)?enum\s+(\w+)/gm,
|
|
403
|
+
exported: false
|
|
404
|
+
},
|
|
405
|
+
{
|
|
406
|
+
type: "interface",
|
|
407
|
+
pattern: /^(?:pub\s+)?trait\s+(\w+)/gm,
|
|
408
|
+
exported: false
|
|
409
|
+
}
|
|
410
|
+
];
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
// src/modules/core/index.ts
|
|
414
|
+
var exports_core = {};
|
|
415
|
+
__export(exports_core, {
|
|
416
|
+
CoreModule: () => CoreModule
|
|
417
|
+
});
|
|
418
|
+
import * as path2 from "path";
|
|
419
|
+
import * as fs2 from "fs/promises";
|
|
337
420
|
|
|
338
|
-
|
|
421
|
+
class CoreModule {
|
|
422
|
+
id = "core";
|
|
423
|
+
name = "Core Search";
|
|
424
|
+
description = "Language-agnostic text search with symbol extraction";
|
|
425
|
+
version = "1.0.0";
|
|
426
|
+
symbolIndex = new Map;
|
|
427
|
+
bm25Index = null;
|
|
428
|
+
rootDir = "";
|
|
429
|
+
async initialize(_config) {}
|
|
430
|
+
async indexFile(filepath, content, ctx) {
|
|
431
|
+
this.rootDir = ctx.rootDir;
|
|
432
|
+
const symbols = extractSymbols(content);
|
|
433
|
+
const symbolKeywords = symbolsToKeywords(symbols);
|
|
434
|
+
const contentTokens = tokenize(content);
|
|
435
|
+
const allTokens = [...new Set([...contentTokens, ...symbolKeywords])];
|
|
436
|
+
const chunks = this.createChunks(filepath, content, symbols);
|
|
437
|
+
const stats = await ctx.getFileStats(filepath);
|
|
438
|
+
this.symbolIndex.set(filepath, {
|
|
439
|
+
filepath,
|
|
440
|
+
symbols,
|
|
441
|
+
tokens: allTokens
|
|
442
|
+
});
|
|
443
|
+
const moduleData = {
|
|
444
|
+
symbols,
|
|
445
|
+
tokens: allTokens
|
|
446
|
+
};
|
|
447
|
+
return {
|
|
448
|
+
filepath,
|
|
449
|
+
lastModified: stats.lastModified,
|
|
450
|
+
chunks,
|
|
451
|
+
moduleData
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
createChunks(filepath, content, symbols) {
|
|
455
|
+
const lines = content.split(`
|
|
456
|
+
`);
|
|
457
|
+
const chunks = [];
|
|
458
|
+
for (let start = 0;start < lines.length; start += LINES_PER_CHUNK - CHUNK_OVERLAP) {
|
|
459
|
+
const end = Math.min(start + LINES_PER_CHUNK, lines.length);
|
|
460
|
+
const chunkLines = lines.slice(start, end);
|
|
461
|
+
const chunkContent = chunkLines.join(`
|
|
462
|
+
`);
|
|
463
|
+
const chunkSymbols = symbols.filter((s) => s.line >= start + 1 && s.line <= end);
|
|
464
|
+
let chunkType = "block";
|
|
465
|
+
let chunkName;
|
|
466
|
+
let isExported = false;
|
|
467
|
+
if (chunkSymbols.length > 0) {
|
|
468
|
+
const primarySymbol = chunkSymbols[0];
|
|
469
|
+
chunkType = this.symbolTypeToChunkType(primarySymbol.type);
|
|
470
|
+
chunkName = primarySymbol.name;
|
|
471
|
+
isExported = primarySymbol.isExported;
|
|
472
|
+
}
|
|
473
|
+
const chunkId = `${filepath}:${start + 1}-${end}`;
|
|
474
|
+
chunks.push({
|
|
475
|
+
id: chunkId,
|
|
476
|
+
content: chunkContent,
|
|
477
|
+
startLine: start + 1,
|
|
478
|
+
endLine: end,
|
|
479
|
+
type: chunkType,
|
|
480
|
+
name: chunkName,
|
|
481
|
+
isExported
|
|
482
|
+
});
|
|
483
|
+
if (end >= lines.length)
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
return chunks;
|
|
487
|
+
}
|
|
488
|
+
symbolTypeToChunkType(symbolType) {
|
|
489
|
+
switch (symbolType) {
|
|
490
|
+
case "function":
|
|
491
|
+
case "method":
|
|
492
|
+
return "function";
|
|
493
|
+
case "class":
|
|
494
|
+
return "class";
|
|
495
|
+
case "interface":
|
|
496
|
+
return "interface";
|
|
497
|
+
case "type":
|
|
498
|
+
return "type";
|
|
499
|
+
case "enum":
|
|
500
|
+
return "enum";
|
|
501
|
+
case "variable":
|
|
502
|
+
return "variable";
|
|
503
|
+
default:
|
|
504
|
+
return "block";
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
async finalize(ctx) {
|
|
508
|
+
const config = ctx.config;
|
|
509
|
+
const coreDir = path2.join(getRaggrepDir(ctx.rootDir, config), "index", "core");
|
|
510
|
+
await fs2.mkdir(coreDir, { recursive: true });
|
|
511
|
+
this.bm25Index = new BM25Index;
|
|
512
|
+
for (const [filepath, entry] of this.symbolIndex) {
|
|
513
|
+
this.bm25Index.addDocument(filepath, entry.tokens);
|
|
514
|
+
}
|
|
515
|
+
const symbolIndexData = {
|
|
516
|
+
version: this.version,
|
|
517
|
+
lastUpdated: new Date().toISOString(),
|
|
518
|
+
files: Object.fromEntries(this.symbolIndex),
|
|
519
|
+
bm25Data: this.bm25Index.serialize()
|
|
520
|
+
};
|
|
521
|
+
await fs2.writeFile(path2.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
|
|
522
|
+
console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
|
|
523
|
+
}
|
|
524
|
+
async search(query, ctx, options) {
|
|
525
|
+
const config = ctx.config;
|
|
526
|
+
const topK = options?.topK ?? DEFAULT_TOP_K;
|
|
527
|
+
const minScore = options?.minScore ?? DEFAULT_MIN_SCORE;
|
|
528
|
+
if (this.symbolIndex.size === 0) {
|
|
529
|
+
await this.loadSymbolIndex(ctx.rootDir, config);
|
|
530
|
+
}
|
|
531
|
+
if (!this.bm25Index || this.symbolIndex.size === 0) {
|
|
532
|
+
return [];
|
|
533
|
+
}
|
|
534
|
+
const queryTokens = tokenize(query);
|
|
535
|
+
const bm25Results = this.bm25Index.search(query, topK * 2);
|
|
536
|
+
const bm25Scores = new Map(bm25Results.map((r) => [r.id, r.score]));
|
|
537
|
+
const symbolMatches = this.findSymbolMatches(queryTokens);
|
|
538
|
+
const results = [];
|
|
539
|
+
for (const filepath of this.symbolIndex.keys()) {
|
|
540
|
+
const entry = this.symbolIndex.get(filepath);
|
|
541
|
+
const bm25Score = bm25Scores.get(filepath) ?? 0;
|
|
542
|
+
const symbolScore = symbolMatches.get(filepath) ?? 0;
|
|
543
|
+
if (bm25Score === 0 && symbolScore === 0)
|
|
544
|
+
continue;
|
|
545
|
+
const combinedScore = 0.6 * normalizeScore(bm25Score) + 0.4 * symbolScore;
|
|
546
|
+
if (combinedScore >= minScore) {
|
|
547
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
548
|
+
if (!fileIndex)
|
|
549
|
+
continue;
|
|
550
|
+
const bestChunk = this.findBestChunk(fileIndex.chunks, queryTokens, entry.symbols);
|
|
551
|
+
results.push({
|
|
552
|
+
filepath,
|
|
553
|
+
chunk: bestChunk,
|
|
554
|
+
score: combinedScore,
|
|
555
|
+
moduleId: this.id,
|
|
556
|
+
context: {
|
|
557
|
+
bm25Score: normalizeScore(bm25Score),
|
|
558
|
+
symbolScore
|
|
559
|
+
}
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
564
|
+
}
|
|
565
|
+
findSymbolMatches(queryTokens) {
|
|
566
|
+
const matches = new Map;
|
|
567
|
+
for (const [filepath, entry] of this.symbolIndex) {
|
|
568
|
+
let matchScore = 0;
|
|
569
|
+
for (const symbol of entry.symbols) {
|
|
570
|
+
const symbolName = symbol.name.toLowerCase();
|
|
571
|
+
const symbolParts = symbolsToKeywords([symbol]);
|
|
572
|
+
for (const token of queryTokens) {
|
|
573
|
+
if (symbolName === token) {
|
|
574
|
+
matchScore += symbol.isExported ? 1 : 0.8;
|
|
575
|
+
} else if (symbolName.includes(token) || token.includes(symbolName)) {
|
|
576
|
+
matchScore += symbol.isExported ? 0.5 : 0.4;
|
|
577
|
+
} else if (symbolParts.some((p) => p === token)) {
|
|
578
|
+
matchScore += symbol.isExported ? 0.3 : 0.2;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
if (matchScore > 0) {
|
|
583
|
+
matches.set(filepath, Math.min(1, matchScore / queryTokens.length));
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
return matches;
|
|
587
|
+
}
|
|
588
|
+
findBestChunk(chunks, queryTokens, symbols) {
|
|
589
|
+
let bestChunk = chunks[0];
|
|
590
|
+
let bestScore = 0;
|
|
591
|
+
for (const chunk of chunks) {
|
|
592
|
+
let score = 0;
|
|
593
|
+
const chunkContent = chunk.content.toLowerCase();
|
|
594
|
+
for (const token of queryTokens) {
|
|
595
|
+
if (chunkContent.includes(token)) {
|
|
596
|
+
score += 1;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
if (chunk.name) {
|
|
600
|
+
const nameLower = chunk.name.toLowerCase();
|
|
601
|
+
for (const token of queryTokens) {
|
|
602
|
+
if (nameLower.includes(token)) {
|
|
603
|
+
score += 2;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
if (chunk.isExported) {
|
|
608
|
+
score += 0.5;
|
|
609
|
+
}
|
|
610
|
+
if (score > bestScore) {
|
|
611
|
+
bestScore = score;
|
|
612
|
+
bestChunk = chunk;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
return bestChunk;
|
|
616
|
+
}
|
|
617
|
+
async loadSymbolIndex(rootDir, config) {
|
|
618
|
+
const coreDir = path2.join(getRaggrepDir(rootDir, config), "index", "core");
|
|
619
|
+
const symbolsPath = path2.join(coreDir, "symbols.json");
|
|
620
|
+
try {
|
|
621
|
+
const content = await fs2.readFile(symbolsPath, "utf-8");
|
|
622
|
+
const data = JSON.parse(content);
|
|
623
|
+
this.symbolIndex = new Map(Object.entries(data.files));
|
|
624
|
+
if (data.bm25Data) {
|
|
625
|
+
this.bm25Index = BM25Index.deserialize(data.bm25Data);
|
|
626
|
+
}
|
|
627
|
+
} catch (error) {
|
|
628
|
+
this.symbolIndex = new Map;
|
|
629
|
+
this.bm25Index = null;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
async dispose() {
|
|
633
|
+
this.symbolIndex.clear();
|
|
634
|
+
this.bm25Index = null;
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
var DEFAULT_MIN_SCORE = 0.1, DEFAULT_TOP_K = 20, LINES_PER_CHUNK = 50, CHUNK_OVERLAP = 10;
|
|
638
|
+
var init_core = __esm(() => {
|
|
639
|
+
init_config2();
|
|
640
|
+
init_symbols();
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
// src/infrastructure/embeddings/transformersEmbedding.ts
|
|
644
|
+
import { pipeline, env } from "@xenova/transformers";
|
|
645
|
+
import * as path3 from "path";
|
|
646
|
+
import * as os from "os";
|
|
647
|
+
|
|
648
|
+
class TransformersEmbeddingProvider {
|
|
649
|
+
pipeline = null;
|
|
650
|
+
config;
|
|
651
|
+
isInitializing = false;
|
|
652
|
+
initPromise = null;
|
|
653
|
+
constructor(config) {
|
|
654
|
+
this.config = {
|
|
655
|
+
model: config?.model ?? "all-MiniLM-L6-v2",
|
|
656
|
+
showProgress: config?.showProgress ?? true
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
async initialize(config) {
|
|
660
|
+
if (config) {
|
|
661
|
+
if (config.model !== this.config.model) {
|
|
662
|
+
this.pipeline = null;
|
|
663
|
+
}
|
|
664
|
+
this.config = { ...this.config, ...config };
|
|
665
|
+
}
|
|
666
|
+
await this.ensurePipeline();
|
|
667
|
+
}
|
|
668
|
+
async ensurePipeline() {
|
|
669
|
+
if (this.pipeline) {
|
|
670
|
+
return;
|
|
671
|
+
}
|
|
672
|
+
if (this.isInitializing && this.initPromise) {
|
|
673
|
+
return this.initPromise;
|
|
674
|
+
}
|
|
675
|
+
this.isInitializing = true;
|
|
676
|
+
this.initPromise = (async () => {
|
|
677
|
+
const modelId = EMBEDDING_MODELS2[this.config.model];
|
|
678
|
+
if (this.config.showProgress) {
|
|
679
|
+
console.log(`
|
|
680
|
+
Loading embedding model: ${this.config.model}`);
|
|
681
|
+
console.log(` Cache: ${CACHE_DIR}`);
|
|
682
|
+
}
|
|
683
|
+
try {
|
|
684
|
+
this.pipeline = await pipeline("feature-extraction", modelId, {
|
|
685
|
+
progress_callback: this.config.showProgress ? (progress) => {
|
|
686
|
+
if (progress.status === "progress" && progress.file) {
|
|
687
|
+
const pct = progress.progress ? Math.round(progress.progress) : 0;
|
|
688
|
+
process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
|
|
689
|
+
} else if (progress.status === "done" && progress.file) {
|
|
690
|
+
process.stdout.write(`\r Downloaded ${progress.file}
|
|
691
|
+
`);
|
|
692
|
+
}
|
|
693
|
+
} : undefined
|
|
694
|
+
});
|
|
695
|
+
if (this.config.showProgress) {
|
|
696
|
+
console.log(` Model ready.
|
|
697
|
+
`);
|
|
698
|
+
}
|
|
699
|
+
} catch (error) {
|
|
700
|
+
this.pipeline = null;
|
|
701
|
+
throw new Error(`Failed to load embedding model: ${error}`);
|
|
702
|
+
} finally {
|
|
703
|
+
this.isInitializing = false;
|
|
704
|
+
this.initPromise = null;
|
|
705
|
+
}
|
|
706
|
+
})();
|
|
707
|
+
return this.initPromise;
|
|
708
|
+
}
|
|
709
|
+
async getEmbedding(text) {
|
|
710
|
+
await this.ensurePipeline();
|
|
711
|
+
if (!this.pipeline) {
|
|
712
|
+
throw new Error("Embedding pipeline not initialized");
|
|
713
|
+
}
|
|
714
|
+
const output = await this.pipeline(text, {
|
|
715
|
+
pooling: "mean",
|
|
716
|
+
normalize: true
|
|
717
|
+
});
|
|
718
|
+
return Array.from(output.data);
|
|
719
|
+
}
|
|
720
|
+
async getEmbeddings(texts) {
|
|
721
|
+
if (texts.length === 0)
|
|
722
|
+
return [];
|
|
723
|
+
await this.ensurePipeline();
|
|
724
|
+
if (!this.pipeline) {
|
|
725
|
+
throw new Error("Embedding pipeline not initialized");
|
|
726
|
+
}
|
|
727
|
+
const results = [];
|
|
728
|
+
for (let i = 0;i < texts.length; i += BATCH_SIZE) {
|
|
729
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
730
|
+
const outputs = await Promise.all(batch.map(async (text) => {
|
|
731
|
+
const output = await this.pipeline(text, {
|
|
732
|
+
pooling: "mean",
|
|
733
|
+
normalize: true
|
|
734
|
+
});
|
|
735
|
+
return Array.from(output.data);
|
|
736
|
+
}));
|
|
737
|
+
results.push(...outputs);
|
|
738
|
+
}
|
|
739
|
+
return results;
|
|
740
|
+
}
|
|
741
|
+
getDimension() {
|
|
742
|
+
return EMBEDDING_DIMENSION;
|
|
743
|
+
}
|
|
744
|
+
getModelName() {
|
|
745
|
+
return this.config.model;
|
|
746
|
+
}
|
|
747
|
+
async dispose() {
|
|
748
|
+
this.pipeline = null;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
function configureEmbeddings(config) {
|
|
752
|
+
const newConfig = { ...globalConfig, ...config };
|
|
753
|
+
if (newConfig.model !== globalConfig.model) {
|
|
754
|
+
globalProvider = null;
|
|
755
|
+
}
|
|
756
|
+
globalConfig = newConfig;
|
|
757
|
+
}
|
|
758
|
+
function getEmbeddingConfig() {
|
|
759
|
+
return { ...globalConfig };
|
|
760
|
+
}
|
|
761
|
+
async function ensureGlobalProvider() {
|
|
762
|
+
if (!globalProvider) {
|
|
763
|
+
globalProvider = new TransformersEmbeddingProvider(globalConfig);
|
|
764
|
+
await globalProvider.initialize();
|
|
765
|
+
}
|
|
766
|
+
return globalProvider;
|
|
767
|
+
}
|
|
768
|
+
async function getEmbedding(text) {
|
|
769
|
+
const provider = await ensureGlobalProvider();
|
|
770
|
+
return provider.getEmbedding(text);
|
|
771
|
+
}
|
|
772
|
+
async function getEmbeddings(texts) {
|
|
773
|
+
const provider = await ensureGlobalProvider();
|
|
774
|
+
return provider.getEmbeddings(texts);
|
|
775
|
+
}
|
|
776
|
+
var CACHE_DIR, EMBEDDING_MODELS2, EMBEDDING_DIMENSION = 384, BATCH_SIZE = 32, globalProvider = null, globalConfig;
|
|
777
|
+
var init_transformersEmbedding = __esm(() => {
|
|
778
|
+
CACHE_DIR = path3.join(os.homedir(), ".cache", "raggrep", "models");
|
|
779
|
+
env.cacheDir = CACHE_DIR;
|
|
780
|
+
env.allowLocalModels = true;
|
|
781
|
+
EMBEDDING_MODELS2 = {
|
|
782
|
+
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
783
|
+
"all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
|
|
784
|
+
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
785
|
+
"paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
|
|
786
|
+
};
|
|
787
|
+
globalConfig = {
|
|
788
|
+
model: "all-MiniLM-L6-v2",
|
|
789
|
+
showProgress: true
|
|
790
|
+
};
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
// src/infrastructure/embeddings/index.ts
|
|
794
|
+
var init_embeddings = __esm(() => {
|
|
795
|
+
init_transformersEmbedding();
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
// src/domain/services/similarity.ts
|
|
799
|
+
function cosineSimilarity(a, b) {
|
|
800
|
+
if (a.length !== b.length) {
|
|
801
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
802
|
+
}
|
|
803
|
+
let dotProduct = 0;
|
|
804
|
+
let normA = 0;
|
|
805
|
+
let normB = 0;
|
|
806
|
+
for (let i = 0;i < a.length; i++) {
|
|
807
|
+
dotProduct += a[i] * b[i];
|
|
808
|
+
normA += a[i] * a[i];
|
|
809
|
+
normB += b[i] * b[i];
|
|
810
|
+
}
|
|
811
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
812
|
+
if (magnitude === 0)
|
|
813
|
+
return 0;
|
|
814
|
+
return dotProduct / magnitude;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// src/modules/language/typescript/parseCode.ts
|
|
339
818
|
import * as ts from "typescript";
|
|
340
819
|
function parseCode(content, filepath) {
|
|
341
820
|
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
@@ -526,6 +1005,11 @@ function generateChunkId(filepath, startLine, endLine) {
|
|
|
526
1005
|
}
|
|
527
1006
|
var init_parseCode = () => {};
|
|
528
1007
|
|
|
1008
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
1009
|
+
var init_fileIndexStorage = __esm(() => {
|
|
1010
|
+
init_entities();
|
|
1011
|
+
});
|
|
1012
|
+
|
|
529
1013
|
// src/domain/services/keywords.ts
|
|
530
1014
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
531
1015
|
const keywords = new Set;
|
|
@@ -544,10 +1028,92 @@ function extractKeywords(content, name, maxKeywords = 50) {
|
|
|
544
1028
|
}
|
|
545
1029
|
return Array.from(keywords).slice(0, maxKeywords);
|
|
546
1030
|
}
|
|
1031
|
+
function splitIdentifier(str) {
|
|
1032
|
+
return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
|
|
1033
|
+
}
|
|
547
1034
|
function extractPathKeywords(filepath) {
|
|
548
|
-
|
|
1035
|
+
const keywords = new Set;
|
|
1036
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
1037
|
+
const segments = pathWithoutExt.split(/[/\\]/);
|
|
1038
|
+
for (const segment of segments) {
|
|
1039
|
+
if (segment.length < 2)
|
|
1040
|
+
continue;
|
|
1041
|
+
const lower = segment.toLowerCase();
|
|
1042
|
+
if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
|
|
1043
|
+
keywords.add(lower);
|
|
1044
|
+
}
|
|
1045
|
+
const parts = splitIdentifier(segment);
|
|
1046
|
+
for (const part of parts) {
|
|
1047
|
+
if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
|
|
1048
|
+
keywords.add(part);
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
return Array.from(keywords);
|
|
549
1053
|
}
|
|
550
|
-
|
|
1054
|
+
function parsePathContext(filepath) {
|
|
1055
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
1056
|
+
const allSegments = pathWithoutExt.split(/[/\\]/);
|
|
1057
|
+
const filename = allSegments[allSegments.length - 1];
|
|
1058
|
+
const dirSegments = allSegments.slice(0, -1);
|
|
1059
|
+
const keywords = extractPathKeywords(filepath);
|
|
1060
|
+
let layer;
|
|
1061
|
+
const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
|
|
1062
|
+
const filenameLower = filename.toLowerCase();
|
|
1063
|
+
for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS)) {
|
|
1064
|
+
for (const pattern of patterns) {
|
|
1065
|
+
if (filenameLower.includes(pattern)) {
|
|
1066
|
+
layer = layerName;
|
|
1067
|
+
break;
|
|
1068
|
+
}
|
|
1069
|
+
if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
|
|
1070
|
+
layer = layerName;
|
|
1071
|
+
break;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
if (layer)
|
|
1075
|
+
break;
|
|
1076
|
+
}
|
|
1077
|
+
let domain;
|
|
1078
|
+
const layerPatternSet = new Set(Object.values(LAYER_PATTERNS).flat());
|
|
1079
|
+
const reversedSegments = [...dirSegments].reverse();
|
|
1080
|
+
for (const segment of reversedSegments) {
|
|
1081
|
+
const lower = segment.toLowerCase();
|
|
1082
|
+
if (["src", "lib", "app", "packages", "modules"].includes(lower))
|
|
1083
|
+
continue;
|
|
1084
|
+
if (layerPatternSet.has(lower))
|
|
1085
|
+
continue;
|
|
1086
|
+
if (lower.length > 2) {
|
|
1087
|
+
domain = lower;
|
|
1088
|
+
break;
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
return {
|
|
1092
|
+
segments: dirSegments,
|
|
1093
|
+
layer,
|
|
1094
|
+
domain,
|
|
1095
|
+
depth: dirSegments.length,
|
|
1096
|
+
keywords
|
|
1097
|
+
};
|
|
1098
|
+
}
|
|
1099
|
+
function formatPathContextForEmbedding(pathContext) {
|
|
1100
|
+
const parts = [];
|
|
1101
|
+
if (pathContext.domain) {
|
|
1102
|
+
parts.push(pathContext.domain);
|
|
1103
|
+
}
|
|
1104
|
+
if (pathContext.layer) {
|
|
1105
|
+
parts.push(pathContext.layer);
|
|
1106
|
+
}
|
|
1107
|
+
const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
|
|
1108
|
+
if (significantSegments.length > 0) {
|
|
1109
|
+
parts.push(...significantSegments.map((s) => s.toLowerCase()));
|
|
1110
|
+
}
|
|
1111
|
+
if (parts.length === 0)
|
|
1112
|
+
return "";
|
|
1113
|
+
const unique = [...new Set(parts)];
|
|
1114
|
+
return `[${unique.join(" ")}]`;
|
|
1115
|
+
}
|
|
1116
|
+
var COMMON_KEYWORDS, LAYER_PATTERNS;
|
|
551
1117
|
var init_keywords = __esm(() => {
|
|
552
1118
|
COMMON_KEYWORDS = new Set([
|
|
553
1119
|
"const",
|
|
@@ -617,11 +1183,24 @@ var init_keywords = __esm(() => {
|
|
|
617
1183
|
"has",
|
|
618
1184
|
"have"
|
|
619
1185
|
]);
|
|
1186
|
+
LAYER_PATTERNS = {
|
|
1187
|
+
controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
|
|
1188
|
+
service: ["service", "services", "usecase", "usecases", "application"],
|
|
1189
|
+
repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
|
|
1190
|
+
model: ["model", "models", "entity", "entities", "schema", "schemas"],
|
|
1191
|
+
util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
|
|
1192
|
+
config: ["config", "configs", "configuration", "settings"],
|
|
1193
|
+
middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
|
|
1194
|
+
domain: ["domain", "core", "business"],
|
|
1195
|
+
infrastructure: ["infrastructure", "infra", "external", "adapters"],
|
|
1196
|
+
presentation: ["presentation", "view", "views", "component", "components", "ui"],
|
|
1197
|
+
test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
|
|
1198
|
+
};
|
|
620
1199
|
});
|
|
621
1200
|
|
|
622
|
-
// src/
|
|
623
|
-
import * as
|
|
624
|
-
import * as
|
|
1201
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
1202
|
+
import * as fs3 from "fs/promises";
|
|
1203
|
+
import * as path4 from "path";
|
|
625
1204
|
|
|
626
1205
|
class SymbolicIndex {
|
|
627
1206
|
meta = null;
|
|
@@ -630,7 +1209,7 @@ class SymbolicIndex {
|
|
|
630
1209
|
symbolicPath;
|
|
631
1210
|
moduleId;
|
|
632
1211
|
constructor(indexDir, moduleId) {
|
|
633
|
-
this.symbolicPath =
|
|
1212
|
+
this.symbolicPath = path4.join(indexDir, "index", moduleId, "symbolic");
|
|
634
1213
|
this.moduleId = moduleId;
|
|
635
1214
|
}
|
|
636
1215
|
async initialize() {
|
|
@@ -690,18 +1269,18 @@ class SymbolicIndex {
|
|
|
690
1269
|
throw new Error("Index not initialized");
|
|
691
1270
|
this.meta.lastUpdated = new Date().toISOString();
|
|
692
1271
|
this.meta.fileCount = this.fileSummaries.size;
|
|
693
|
-
await
|
|
694
|
-
const metaPath =
|
|
695
|
-
await
|
|
1272
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
1273
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1274
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
696
1275
|
for (const [filepath, summary] of this.fileSummaries) {
|
|
697
1276
|
const summaryPath = this.getFileSummaryPath(filepath);
|
|
698
|
-
await
|
|
699
|
-
await
|
|
1277
|
+
await fs3.mkdir(path4.dirname(summaryPath), { recursive: true });
|
|
1278
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
700
1279
|
}
|
|
701
1280
|
}
|
|
702
1281
|
async load() {
|
|
703
|
-
const metaPath =
|
|
704
|
-
const metaContent = await
|
|
1282
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1283
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
705
1284
|
this.meta = JSON.parse(metaContent);
|
|
706
1285
|
this.fileSummaries.clear();
|
|
707
1286
|
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
@@ -709,14 +1288,14 @@ class SymbolicIndex {
|
|
|
709
1288
|
}
|
|
710
1289
|
async loadFileSummariesRecursive(dir) {
|
|
711
1290
|
try {
|
|
712
|
-
const entries = await
|
|
1291
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
713
1292
|
for (const entry of entries) {
|
|
714
|
-
const fullPath =
|
|
1293
|
+
const fullPath = path4.join(dir, entry.name);
|
|
715
1294
|
if (entry.isDirectory()) {
|
|
716
1295
|
await this.loadFileSummariesRecursive(fullPath);
|
|
717
1296
|
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
718
1297
|
try {
|
|
719
|
-
const content = await
|
|
1298
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
720
1299
|
const summary = JSON.parse(content);
|
|
721
1300
|
if (summary.filepath) {
|
|
722
1301
|
this.fileSummaries.set(summary.filepath, summary);
|
|
@@ -728,18 +1307,18 @@ class SymbolicIndex {
|
|
|
728
1307
|
}
|
|
729
1308
|
getFileSummaryPath(filepath) {
|
|
730
1309
|
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
731
|
-
return
|
|
1310
|
+
return path4.join(this.symbolicPath, jsonPath);
|
|
732
1311
|
}
|
|
733
1312
|
async deleteFileSummary(filepath) {
|
|
734
1313
|
try {
|
|
735
|
-
await
|
|
1314
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
736
1315
|
} catch {}
|
|
737
1316
|
this.fileSummaries.delete(filepath);
|
|
738
1317
|
}
|
|
739
1318
|
async exists() {
|
|
740
1319
|
try {
|
|
741
|
-
const metaPath =
|
|
742
|
-
await
|
|
1320
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1321
|
+
await fs3.access(metaPath);
|
|
743
1322
|
return true;
|
|
744
1323
|
} catch {
|
|
745
1324
|
return false;
|
|
@@ -761,24 +1340,29 @@ class SymbolicIndex {
|
|
|
761
1340
|
this.bm25Index = new BM25Index;
|
|
762
1341
|
}
|
|
763
1342
|
}
|
|
764
|
-
var
|
|
765
|
-
init_keywords();
|
|
1343
|
+
var init_symbolicIndex = __esm(() => {
|
|
766
1344
|
init_keywords();
|
|
767
1345
|
});
|
|
768
1346
|
|
|
769
|
-
// src/
|
|
770
|
-
var
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
DEFAULT_TOP_K: () => DEFAULT_TOP_K,
|
|
774
|
-
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE
|
|
1347
|
+
// src/infrastructure/storage/index.ts
|
|
1348
|
+
var init_storage = __esm(() => {
|
|
1349
|
+
init_fileIndexStorage();
|
|
1350
|
+
init_symbolicIndex();
|
|
775
1351
|
});
|
|
776
|
-
import * as path4 from "path";
|
|
777
1352
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
1353
|
+
// src/modules/language/typescript/index.ts
|
|
1354
|
+
var exports_typescript = {};
|
|
1355
|
+
__export(exports_typescript, {
|
|
1356
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
1357
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
1358
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
1359
|
+
});
|
|
1360
|
+
import * as path5 from "path";
|
|
1361
|
+
|
|
1362
|
+
class TypeScriptModule {
|
|
1363
|
+
id = "language/typescript";
|
|
1364
|
+
name = "TypeScript Search";
|
|
1365
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
782
1366
|
version = "1.0.0";
|
|
783
1367
|
embeddingConfig = null;
|
|
784
1368
|
symbolicIndex = null;
|
|
@@ -795,7 +1379,12 @@ class SemanticModule {
|
|
|
795
1379
|
if (parsedChunks.length === 0) {
|
|
796
1380
|
return null;
|
|
797
1381
|
}
|
|
798
|
-
const
|
|
1382
|
+
const pathContext = parsePathContext(filepath);
|
|
1383
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
1384
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
1385
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
1386
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
1387
|
+
});
|
|
799
1388
|
const embeddings = await getEmbeddings(chunkContents);
|
|
800
1389
|
const chunks = parsedChunks.map((pc) => ({
|
|
801
1390
|
id: generateChunkId(filepath, pc.startLine, pc.endLine),
|
|
@@ -809,10 +1398,10 @@ class SemanticModule {
|
|
|
809
1398
|
}));
|
|
810
1399
|
const references = this.extractReferences(content, filepath);
|
|
811
1400
|
const stats = await ctx.getFileStats(filepath);
|
|
812
|
-
const
|
|
1401
|
+
const currentConfig = getEmbeddingConfig();
|
|
813
1402
|
const moduleData = {
|
|
814
1403
|
embeddings,
|
|
815
|
-
embeddingModel:
|
|
1404
|
+
embeddingModel: currentConfig.model
|
|
816
1405
|
};
|
|
817
1406
|
const chunkTypes = [...new Set(parsedChunks.map((pc) => pc.type))];
|
|
818
1407
|
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
@@ -821,13 +1410,20 @@ class SemanticModule {
|
|
|
821
1410
|
const keywords = extractKeywords(pc.content, pc.name);
|
|
822
1411
|
keywords.forEach((k) => allKeywords.add(k));
|
|
823
1412
|
}
|
|
1413
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
824
1414
|
const fileSummary = {
|
|
825
1415
|
filepath,
|
|
826
1416
|
chunkCount: chunks.length,
|
|
827
1417
|
chunkTypes,
|
|
828
1418
|
keywords: Array.from(allKeywords),
|
|
829
1419
|
exports,
|
|
830
|
-
lastModified: stats.lastModified
|
|
1420
|
+
lastModified: stats.lastModified,
|
|
1421
|
+
pathContext: {
|
|
1422
|
+
segments: pathContext.segments,
|
|
1423
|
+
layer: pathContext.layer,
|
|
1424
|
+
domain: pathContext.domain,
|
|
1425
|
+
depth: pathContext.depth
|
|
1426
|
+
}
|
|
831
1427
|
};
|
|
832
1428
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
833
1429
|
return {
|
|
@@ -851,7 +1447,7 @@ class SemanticModule {
|
|
|
851
1447
|
this.pendingSummaries.clear();
|
|
852
1448
|
}
|
|
853
1449
|
async search(query, ctx, options = {}) {
|
|
854
|
-
const { topK =
|
|
1450
|
+
const { topK = DEFAULT_TOP_K2, minScore = DEFAULT_MIN_SCORE2, filePatterns } = options;
|
|
855
1451
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
856
1452
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
857
1453
|
let candidateFiles;
|
|
@@ -904,11 +1500,32 @@ class SemanticModule {
|
|
|
904
1500
|
for (const result of bm25Results) {
|
|
905
1501
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
906
1502
|
}
|
|
1503
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
1504
|
+
const pathBoosts = new Map;
|
|
1505
|
+
for (const filepath of candidateFiles) {
|
|
1506
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
1507
|
+
if (summary?.pathContext) {
|
|
1508
|
+
let boost = 0;
|
|
1509
|
+
const ctx2 = summary.pathContext;
|
|
1510
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
1511
|
+
boost += 0.1;
|
|
1512
|
+
}
|
|
1513
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
1514
|
+
boost += 0.05;
|
|
1515
|
+
}
|
|
1516
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
1517
|
+
if (segmentMatch) {
|
|
1518
|
+
boost += 0.05;
|
|
1519
|
+
}
|
|
1520
|
+
pathBoosts.set(filepath, boost);
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
907
1523
|
const results = [];
|
|
908
1524
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
909
1525
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
910
1526
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
911
|
-
const
|
|
1527
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
1528
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
|
|
912
1529
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
913
1530
|
results.push({
|
|
914
1531
|
filepath,
|
|
@@ -917,7 +1534,8 @@ class SemanticModule {
|
|
|
917
1534
|
moduleId: this.id,
|
|
918
1535
|
context: {
|
|
919
1536
|
semanticScore,
|
|
920
|
-
bm25Score
|
|
1537
|
+
bm25Score,
|
|
1538
|
+
pathBoost
|
|
921
1539
|
}
|
|
922
1540
|
});
|
|
923
1541
|
}
|
|
@@ -933,36 +1551,37 @@ class SemanticModule {
|
|
|
933
1551
|
while ((match = importRegex.exec(content)) !== null) {
|
|
934
1552
|
const importPath = match[1];
|
|
935
1553
|
if (importPath.startsWith(".")) {
|
|
936
|
-
const dir =
|
|
937
|
-
const resolved =
|
|
1554
|
+
const dir = path5.dirname(filepath);
|
|
1555
|
+
const resolved = path5.normalize(path5.join(dir, importPath));
|
|
938
1556
|
references.push(resolved);
|
|
939
1557
|
}
|
|
940
1558
|
}
|
|
941
1559
|
while ((match = requireRegex.exec(content)) !== null) {
|
|
942
1560
|
const importPath = match[1];
|
|
943
1561
|
if (importPath.startsWith(".")) {
|
|
944
|
-
const dir =
|
|
945
|
-
const resolved =
|
|
1562
|
+
const dir = path5.dirname(filepath);
|
|
1563
|
+
const resolved = path5.normalize(path5.join(dir, importPath));
|
|
946
1564
|
references.push(resolved);
|
|
947
1565
|
}
|
|
948
1566
|
}
|
|
949
1567
|
return references;
|
|
950
1568
|
}
|
|
951
1569
|
}
|
|
952
|
-
var
|
|
953
|
-
var
|
|
1570
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TIER1_CANDIDATE_MULTIPLIER = 3;
|
|
1571
|
+
var init_typescript = __esm(() => {
|
|
954
1572
|
init_embeddings();
|
|
955
|
-
init_bm25();
|
|
956
1573
|
init_config2();
|
|
957
1574
|
init_parseCode();
|
|
958
|
-
|
|
1575
|
+
init_storage();
|
|
1576
|
+
init_keywords();
|
|
1577
|
+
init_keywords();
|
|
959
1578
|
});
|
|
960
1579
|
|
|
961
|
-
// src/indexer/index.ts
|
|
1580
|
+
// src/app/indexer/index.ts
|
|
962
1581
|
init_config2();
|
|
963
1582
|
import { glob } from "glob";
|
|
964
|
-
import * as
|
|
965
|
-
import * as
|
|
1583
|
+
import * as fs6 from "fs/promises";
|
|
1584
|
+
import * as path9 from "path";
|
|
966
1585
|
|
|
967
1586
|
// src/modules/registry.ts
|
|
968
1587
|
class ModuleRegistryImpl {
|
|
@@ -986,16 +1605,594 @@ class ModuleRegistryImpl {
|
|
|
986
1605
|
}
|
|
987
1606
|
var registry = new ModuleRegistryImpl;
|
|
988
1607
|
async function registerBuiltInModules() {
|
|
989
|
-
const {
|
|
990
|
-
|
|
1608
|
+
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
1609
|
+
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
1610
|
+
registry.register(new CoreModule2);
|
|
1611
|
+
registry.register(new TypeScriptModule2);
|
|
1612
|
+
}
|
|
1613
|
+
|
|
1614
|
+
// src/introspection/index.ts
|
|
1615
|
+
import * as path8 from "path";
|
|
1616
|
+
import * as fs5 from "fs/promises";
|
|
1617
|
+
|
|
1618
|
+
// src/introspection/projectDetector.ts
|
|
1619
|
+
import * as path6 from "path";
|
|
1620
|
+
import * as fs4 from "fs/promises";
|
|
1621
|
+
var MAX_SCAN_DEPTH = 4;
|
|
1622
|
+
var SKIP_DIRS = new Set([
|
|
1623
|
+
"node_modules",
|
|
1624
|
+
".git",
|
|
1625
|
+
"dist",
|
|
1626
|
+
"build",
|
|
1627
|
+
".next",
|
|
1628
|
+
".nuxt",
|
|
1629
|
+
"coverage",
|
|
1630
|
+
".raggrep"
|
|
1631
|
+
]);
|
|
1632
|
+
var PROJECT_PATTERNS = [
|
|
1633
|
+
{ pattern: /^apps\/([^/]+)/, type: "app", defaultScope: "unknown" },
|
|
1634
|
+
{ pattern: /^packages\/([^/]+)/, type: "library", defaultScope: "shared" },
|
|
1635
|
+
{ pattern: /^libs\/([^/]+)/, type: "library", defaultScope: "shared" },
|
|
1636
|
+
{ pattern: /^services\/([^/]+)/, type: "service", defaultScope: "backend" },
|
|
1637
|
+
{ pattern: /^scripts\/([^/]+)/, type: "script", defaultScope: "tooling" },
|
|
1638
|
+
{ pattern: /^tools\/([^/]+)/, type: "script", defaultScope: "tooling" }
|
|
1639
|
+
];
|
|
1640
|
+
var SCOPE_KEYWORDS = {
|
|
1641
|
+
frontend: [
|
|
1642
|
+
"web",
|
|
1643
|
+
"webapp",
|
|
1644
|
+
"frontend",
|
|
1645
|
+
"client",
|
|
1646
|
+
"ui",
|
|
1647
|
+
"app",
|
|
1648
|
+
"mobile",
|
|
1649
|
+
"react",
|
|
1650
|
+
"vue",
|
|
1651
|
+
"angular",
|
|
1652
|
+
"next",
|
|
1653
|
+
"nuxt"
|
|
1654
|
+
],
|
|
1655
|
+
backend: [
|
|
1656
|
+
"api",
|
|
1657
|
+
"server",
|
|
1658
|
+
"backend",
|
|
1659
|
+
"service",
|
|
1660
|
+
"worker",
|
|
1661
|
+
"lambda",
|
|
1662
|
+
"functions"
|
|
1663
|
+
],
|
|
1664
|
+
shared: ["shared", "common", "utils", "lib", "core", "types", "models"],
|
|
1665
|
+
tooling: [
|
|
1666
|
+
"scripts",
|
|
1667
|
+
"tools",
|
|
1668
|
+
"cli",
|
|
1669
|
+
"devtools",
|
|
1670
|
+
"build",
|
|
1671
|
+
"config",
|
|
1672
|
+
"infra"
|
|
1673
|
+
],
|
|
1674
|
+
unknown: []
|
|
1675
|
+
};
|
|
1676
|
+
function detectScopeFromName(name) {
|
|
1677
|
+
const nameLower = name.toLowerCase();
|
|
1678
|
+
for (const [scope, keywords] of Object.entries(SCOPE_KEYWORDS)) {
|
|
1679
|
+
if (scope === "unknown")
|
|
1680
|
+
continue;
|
|
1681
|
+
for (const keyword of keywords) {
|
|
1682
|
+
if (nameLower.includes(keyword)) {
|
|
1683
|
+
return scope;
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
}
|
|
1687
|
+
return "unknown";
|
|
1688
|
+
}
|
|
1689
|
+
async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
1690
|
+
if (depth > MAX_SCAN_DEPTH)
|
|
1691
|
+
return [];
|
|
1692
|
+
const results = [];
|
|
1693
|
+
const fullDir = currentDir ? path6.join(rootDir, currentDir) : rootDir;
|
|
1694
|
+
try {
|
|
1695
|
+
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
1696
|
+
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
1697
|
+
if (hasPackageJson && currentDir) {
|
|
1698
|
+
const info = await parsePackageJson(rootDir, currentDir);
|
|
1699
|
+
if (info) {
|
|
1700
|
+
results.push(info);
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
for (const entry of entries) {
|
|
1704
|
+
if (!entry.isDirectory())
|
|
1705
|
+
continue;
|
|
1706
|
+
if (SKIP_DIRS.has(entry.name))
|
|
1707
|
+
continue;
|
|
1708
|
+
const subPath = currentDir ? `${currentDir}/${entry.name}` : entry.name;
|
|
1709
|
+
const subResults = await scanForPackageJsons(rootDir, subPath, depth + 1);
|
|
1710
|
+
results.push(...subResults);
|
|
1711
|
+
}
|
|
1712
|
+
} catch {}
|
|
1713
|
+
return results;
|
|
1714
|
+
}
|
|
1715
|
+
async function parsePackageJson(rootDir, relativePath) {
|
|
1716
|
+
try {
|
|
1717
|
+
const packageJsonPath = path6.join(rootDir, relativePath, "package.json");
|
|
1718
|
+
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
1719
|
+
const pkg = JSON.parse(content);
|
|
1720
|
+
const name = pkg.name || path6.basename(relativePath);
|
|
1721
|
+
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
1722
|
+
let type = "unknown";
|
|
1723
|
+
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
1724
|
+
type = "app";
|
|
1725
|
+
} else if (deps["express"] || deps["fastify"] || deps["koa"] || deps["hono"]) {
|
|
1726
|
+
type = "service";
|
|
1727
|
+
} else if (pkg.main || pkg.exports) {
|
|
1728
|
+
type = "library";
|
|
1729
|
+
}
|
|
1730
|
+
const hasWorkspaces = Boolean(pkg.workspaces);
|
|
1731
|
+
return { name, relativePath, type, hasWorkspaces };
|
|
1732
|
+
} catch {
|
|
1733
|
+
return null;
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
async function detectProjectStructure(rootDir) {
|
|
1737
|
+
const projectMap = new Map;
|
|
1738
|
+
let isMonorepo = false;
|
|
1739
|
+
try {
|
|
1740
|
+
const entries = await fs4.readdir(rootDir, { withFileTypes: true });
|
|
1741
|
+
const dirNames = entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
|
1742
|
+
const monorepoPatterns = ["apps", "packages", "libs", "services"];
|
|
1743
|
+
const hasMonorepoStructure = monorepoPatterns.some((p) => dirNames.includes(p));
|
|
1744
|
+
if (hasMonorepoStructure) {
|
|
1745
|
+
isMonorepo = true;
|
|
1746
|
+
for (const pattern of monorepoPatterns) {
|
|
1747
|
+
if (!dirNames.includes(pattern))
|
|
1748
|
+
continue;
|
|
1749
|
+
const patternDir = path6.join(rootDir, pattern);
|
|
1750
|
+
try {
|
|
1751
|
+
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
1752
|
+
for (const subDir of subDirs) {
|
|
1753
|
+
if (!subDir.isDirectory())
|
|
1754
|
+
continue;
|
|
1755
|
+
const projectRoot = `${pattern}/${subDir.name}`;
|
|
1756
|
+
const type = getProjectType(pattern);
|
|
1757
|
+
projectMap.set(projectRoot, {
|
|
1758
|
+
name: subDir.name,
|
|
1759
|
+
root: projectRoot,
|
|
1760
|
+
type
|
|
1761
|
+
});
|
|
1762
|
+
}
|
|
1763
|
+
} catch {}
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
const packageJsons = await scanForPackageJsons(rootDir);
|
|
1767
|
+
for (const pkg of packageJsons) {
|
|
1768
|
+
if (pkg.hasWorkspaces) {
|
|
1769
|
+
isMonorepo = true;
|
|
1770
|
+
}
|
|
1771
|
+
if (packageJsons.length > 1) {
|
|
1772
|
+
isMonorepo = true;
|
|
1773
|
+
}
|
|
1774
|
+
projectMap.set(pkg.relativePath, {
|
|
1775
|
+
name: pkg.name,
|
|
1776
|
+
root: pkg.relativePath,
|
|
1777
|
+
type: pkg.type
|
|
1778
|
+
});
|
|
1779
|
+
}
|
|
1780
|
+
let rootType = "unknown";
|
|
1781
|
+
try {
|
|
1782
|
+
const rootPkgPath = path6.join(rootDir, "package.json");
|
|
1783
|
+
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
1784
|
+
if (rootPkg.workspaces) {
|
|
1785
|
+
isMonorepo = true;
|
|
1786
|
+
}
|
|
1787
|
+
const deps = { ...rootPkg.dependencies, ...rootPkg.devDependencies };
|
|
1788
|
+
if (deps["next"] || deps["react"] || deps["vue"]) {
|
|
1789
|
+
rootType = "app";
|
|
1790
|
+
} else if (deps["express"] || deps["fastify"] || deps["koa"]) {
|
|
1791
|
+
rootType = "service";
|
|
1792
|
+
}
|
|
1793
|
+
} catch {}
|
|
1794
|
+
const projects = Array.from(projectMap.values()).sort((a, b) => a.root.length - b.root.length);
|
|
1795
|
+
return {
|
|
1796
|
+
projects,
|
|
1797
|
+
isMonorepo,
|
|
1798
|
+
rootType: isMonorepo ? undefined : rootType
|
|
1799
|
+
};
|
|
1800
|
+
} catch {
|
|
1801
|
+
return {
|
|
1802
|
+
projects: [],
|
|
1803
|
+
isMonorepo: false,
|
|
1804
|
+
rootType: "unknown"
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
function getProjectType(patternDir) {
|
|
1809
|
+
switch (patternDir) {
|
|
1810
|
+
case "apps":
|
|
1811
|
+
return "app";
|
|
1812
|
+
case "packages":
|
|
1813
|
+
case "libs":
|
|
1814
|
+
return "library";
|
|
1815
|
+
case "services":
|
|
1816
|
+
return "service";
|
|
1817
|
+
case "scripts":
|
|
1818
|
+
case "tools":
|
|
1819
|
+
return "script";
|
|
1820
|
+
default:
|
|
1821
|
+
return "unknown";
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
function findProjectForFile(filepath, structure) {
|
|
1825
|
+
const normalizedPath = filepath.replace(/\\/g, "/");
|
|
1826
|
+
const matches = [];
|
|
1827
|
+
for (const project of structure.projects) {
|
|
1828
|
+
if (normalizedPath === project.root || normalizedPath.startsWith(project.root + "/")) {
|
|
1829
|
+
matches.push(project);
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
if (matches.length > 0) {
|
|
1833
|
+
return matches.reduce((best, current) => current.root.length > best.root.length ? current : best);
|
|
1834
|
+
}
|
|
1835
|
+
for (const { pattern, type } of PROJECT_PATTERNS) {
|
|
1836
|
+
const match = normalizedPath.match(pattern);
|
|
1837
|
+
if (match) {
|
|
1838
|
+
return {
|
|
1839
|
+
name: match[1],
|
|
1840
|
+
root: match[0],
|
|
1841
|
+
type
|
|
1842
|
+
};
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1845
|
+
return {
|
|
1846
|
+
name: "root",
|
|
1847
|
+
root: "",
|
|
1848
|
+
type: structure.rootType ?? "unknown"
|
|
1849
|
+
};
|
|
991
1850
|
}
|
|
992
1851
|
|
|
993
|
-
// src/
|
|
1852
|
+
// src/introspection/fileIntrospector.ts
|
|
1853
|
+
import * as path7 from "path";
|
|
1854
|
+
var LAYER_PATTERNS2 = {
|
|
1855
|
+
controller: ["controller", "api", "routes", "route", "handler"],
|
|
1856
|
+
service: ["service", "logic", "usecase", "usecases", "handler"],
|
|
1857
|
+
repository: ["repository", "repo", "dao", "store", "persistence"],
|
|
1858
|
+
model: ["model", "models", "entity", "entities", "schema", "schemas", "types", "type"],
|
|
1859
|
+
util: ["util", "utils", "helper", "helpers", "common", "lib"],
|
|
1860
|
+
config: ["config", "configuration", "settings"],
|
|
1861
|
+
middleware: ["middleware", "middlewares"],
|
|
1862
|
+
domain: ["domain"],
|
|
1863
|
+
infrastructure: ["infrastructure", "infra"],
|
|
1864
|
+
application: ["application", "app"],
|
|
1865
|
+
presentation: ["presentation", "ui", "views", "view", "component", "components"],
|
|
1866
|
+
test: ["test", "tests", "spec", "specs", "__tests__", "e2e"]
|
|
1867
|
+
};
|
|
1868
|
+
var DOMAIN_PATTERNS = [
|
|
1869
|
+
"auth",
|
|
1870
|
+
"authentication",
|
|
1871
|
+
"user",
|
|
1872
|
+
"users",
|
|
1873
|
+
"account",
|
|
1874
|
+
"accounts",
|
|
1875
|
+
"profile",
|
|
1876
|
+
"profiles",
|
|
1877
|
+
"product",
|
|
1878
|
+
"products",
|
|
1879
|
+
"item",
|
|
1880
|
+
"items",
|
|
1881
|
+
"catalog",
|
|
1882
|
+
"order",
|
|
1883
|
+
"orders",
|
|
1884
|
+
"cart",
|
|
1885
|
+
"checkout",
|
|
1886
|
+
"payment",
|
|
1887
|
+
"payments",
|
|
1888
|
+
"billing",
|
|
1889
|
+
"subscription",
|
|
1890
|
+
"subscriptions",
|
|
1891
|
+
"notification",
|
|
1892
|
+
"notifications",
|
|
1893
|
+
"email",
|
|
1894
|
+
"sms",
|
|
1895
|
+
"report",
|
|
1896
|
+
"reports",
|
|
1897
|
+
"analytics",
|
|
1898
|
+
"metrics",
|
|
1899
|
+
"dashboard",
|
|
1900
|
+
"admin",
|
|
1901
|
+
"settings",
|
|
1902
|
+
"search",
|
|
1903
|
+
"chat",
|
|
1904
|
+
"message",
|
|
1905
|
+
"messages",
|
|
1906
|
+
"feed",
|
|
1907
|
+
"post",
|
|
1908
|
+
"posts",
|
|
1909
|
+
"comment",
|
|
1910
|
+
"comments",
|
|
1911
|
+
"media",
|
|
1912
|
+
"upload",
|
|
1913
|
+
"file",
|
|
1914
|
+
"files",
|
|
1915
|
+
"storage",
|
|
1916
|
+
"cache",
|
|
1917
|
+
"session",
|
|
1918
|
+
"log",
|
|
1919
|
+
"logs",
|
|
1920
|
+
"audit"
|
|
1921
|
+
];
|
|
1922
|
+
var FRAMEWORK_INDICATORS = {
|
|
1923
|
+
nextjs: ["next", "next/"],
|
|
1924
|
+
express: ["express"],
|
|
1925
|
+
fastify: ["fastify"],
|
|
1926
|
+
react: ["react"],
|
|
1927
|
+
vue: ["vue"],
|
|
1928
|
+
angular: ["@angular/"],
|
|
1929
|
+
nestjs: ["@nestjs/"],
|
|
1930
|
+
koa: ["koa"]
|
|
1931
|
+
};
|
|
1932
|
+
var EXTENSION_TO_LANGUAGE = {
|
|
1933
|
+
".ts": "typescript",
|
|
1934
|
+
".tsx": "typescript",
|
|
1935
|
+
".js": "javascript",
|
|
1936
|
+
".jsx": "javascript",
|
|
1937
|
+
".mjs": "javascript",
|
|
1938
|
+
".cjs": "javascript",
|
|
1939
|
+
".py": "python",
|
|
1940
|
+
".go": "go",
|
|
1941
|
+
".rs": "rust",
|
|
1942
|
+
".java": "java",
|
|
1943
|
+
".kt": "kotlin",
|
|
1944
|
+
".swift": "swift",
|
|
1945
|
+
".rb": "ruby",
|
|
1946
|
+
".php": "php",
|
|
1947
|
+
".cs": "csharp",
|
|
1948
|
+
".cpp": "cpp",
|
|
1949
|
+
".c": "c",
|
|
1950
|
+
".h": "c",
|
|
1951
|
+
".hpp": "cpp",
|
|
1952
|
+
".md": "markdown",
|
|
1953
|
+
".json": "json",
|
|
1954
|
+
".yaml": "yaml",
|
|
1955
|
+
".yml": "yaml"
|
|
1956
|
+
};
|
|
1957
|
+
function introspectFile(filepath, structure, fileContent) {
|
|
1958
|
+
const normalizedPath = filepath.replace(/\\/g, "/");
|
|
1959
|
+
const segments = normalizedPath.split("/").filter((s) => s.length > 0);
|
|
1960
|
+
const filename = segments[segments.length - 1] || "";
|
|
1961
|
+
const ext = path7.extname(filename);
|
|
1962
|
+
const project = findProjectForFile(normalizedPath, structure);
|
|
1963
|
+
const language = EXTENSION_TO_LANGUAGE[ext] || "unknown";
|
|
1964
|
+
const layer = detectLayer(segments, filename);
|
|
1965
|
+
const domain = detectDomain(segments);
|
|
1966
|
+
const scope = detectScope(segments, project, layer);
|
|
1967
|
+
let framework;
|
|
1968
|
+
if (fileContent) {
|
|
1969
|
+
framework = detectFramework(fileContent);
|
|
1970
|
+
}
|
|
1971
|
+
return {
|
|
1972
|
+
filepath: normalizedPath,
|
|
1973
|
+
project,
|
|
1974
|
+
scope,
|
|
1975
|
+
layer,
|
|
1976
|
+
domain,
|
|
1977
|
+
language,
|
|
1978
|
+
framework,
|
|
1979
|
+
depth: segments.length - 1,
|
|
1980
|
+
pathSegments: segments.slice(0, -1)
|
|
1981
|
+
};
|
|
1982
|
+
}
|
|
1983
|
+
function detectLayer(segments, filename) {
|
|
1984
|
+
const filenameLower = filename.toLowerCase();
|
|
1985
|
+
for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
1986
|
+
for (const pattern of patterns) {
|
|
1987
|
+
if (filenameLower.includes(pattern)) {
|
|
1988
|
+
return layer;
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
for (let i = segments.length - 2;i >= 0; i--) {
|
|
1993
|
+
const segment = segments[i].toLowerCase();
|
|
1994
|
+
for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
1995
|
+
if (patterns.includes(segment)) {
|
|
1996
|
+
return layer;
|
|
1997
|
+
}
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
return;
|
|
2001
|
+
}
|
|
2002
|
+
function detectDomain(segments) {
|
|
2003
|
+
const skipSegments = new Set([
|
|
2004
|
+
"src",
|
|
2005
|
+
"lib",
|
|
2006
|
+
"app",
|
|
2007
|
+
"apps",
|
|
2008
|
+
"packages",
|
|
2009
|
+
"services",
|
|
2010
|
+
"modules",
|
|
2011
|
+
"features",
|
|
2012
|
+
...Object.values(LAYER_PATTERNS2).flat()
|
|
2013
|
+
]);
|
|
2014
|
+
for (const segment of segments) {
|
|
2015
|
+
const segmentLower = segment.toLowerCase();
|
|
2016
|
+
if (skipSegments.has(segmentLower))
|
|
2017
|
+
continue;
|
|
2018
|
+
if (DOMAIN_PATTERNS.includes(segmentLower)) {
|
|
2019
|
+
return segmentLower;
|
|
2020
|
+
}
|
|
2021
|
+
for (const domain of DOMAIN_PATTERNS) {
|
|
2022
|
+
if (segmentLower.startsWith(domain) || segmentLower.endsWith(domain)) {
|
|
2023
|
+
return domain;
|
|
2024
|
+
}
|
|
2025
|
+
}
|
|
2026
|
+
}
|
|
2027
|
+
return;
|
|
2028
|
+
}
|
|
2029
|
+
function detectScope(segments, project, layer) {
|
|
2030
|
+
const projectScope = detectScopeFromName(project.name);
|
|
2031
|
+
if (projectScope !== "unknown") {
|
|
2032
|
+
return projectScope;
|
|
2033
|
+
}
|
|
2034
|
+
if (layer) {
|
|
2035
|
+
switch (layer) {
|
|
2036
|
+
case "controller":
|
|
2037
|
+
case "repository":
|
|
2038
|
+
case "middleware":
|
|
2039
|
+
return "backend";
|
|
2040
|
+
case "presentation":
|
|
2041
|
+
return "frontend";
|
|
2042
|
+
case "util":
|
|
2043
|
+
case "model":
|
|
2044
|
+
return "shared";
|
|
2045
|
+
case "test":
|
|
2046
|
+
return "tooling";
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
2049
|
+
for (const segment of segments) {
|
|
2050
|
+
const segmentLower = segment.toLowerCase();
|
|
2051
|
+
if (["server", "api", "backend"].includes(segmentLower)) {
|
|
2052
|
+
return "backend";
|
|
2053
|
+
}
|
|
2054
|
+
if (["client", "web", "frontend", "ui"].includes(segmentLower)) {
|
|
2055
|
+
return "frontend";
|
|
2056
|
+
}
|
|
2057
|
+
if (["shared", "common", "lib", "libs"].includes(segmentLower)) {
|
|
2058
|
+
return "shared";
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
return "unknown";
|
|
2062
|
+
}
|
|
2063
|
+
function detectFramework(content) {
|
|
2064
|
+
for (const [framework, indicators] of Object.entries(FRAMEWORK_INDICATORS)) {
|
|
2065
|
+
for (const indicator of indicators) {
|
|
2066
|
+
if (content.includes(`from '${indicator}`) || content.includes(`from "${indicator}`) || content.includes(`require('${indicator}`) || content.includes(`require("${indicator}`)) {
|
|
2067
|
+
return framework;
|
|
2068
|
+
}
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
return;
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
// src/introspection/index.ts
|
|
2075
|
+
init_config2();
|
|
2076
|
+
|
|
2077
|
+
class IntrospectionIndex {
|
|
2078
|
+
rootDir;
|
|
2079
|
+
structure = null;
|
|
2080
|
+
files = new Map;
|
|
2081
|
+
config = {};
|
|
2082
|
+
constructor(rootDir) {
|
|
2083
|
+
this.rootDir = rootDir;
|
|
2084
|
+
}
|
|
2085
|
+
async initialize() {
|
|
2086
|
+
this.structure = await detectProjectStructure(this.rootDir);
|
|
2087
|
+
try {
|
|
2088
|
+
const configPath = path8.join(this.rootDir, ".raggrep", "config.json");
|
|
2089
|
+
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
2090
|
+
const config = JSON.parse(configContent);
|
|
2091
|
+
this.config = config.introspection || {};
|
|
2092
|
+
} catch {}
|
|
2093
|
+
}
|
|
2094
|
+
getStructure() {
|
|
2095
|
+
return this.structure;
|
|
2096
|
+
}
|
|
2097
|
+
addFile(filepath, content) {
|
|
2098
|
+
if (!this.structure) {
|
|
2099
|
+
throw new Error("IntrospectionIndex not initialized");
|
|
2100
|
+
}
|
|
2101
|
+
const intro = introspectFile(filepath, this.structure, content);
|
|
2102
|
+
this.applyOverrides(intro);
|
|
2103
|
+
this.files.set(filepath, intro);
|
|
2104
|
+
return intro;
|
|
2105
|
+
}
|
|
2106
|
+
getFile(filepath) {
|
|
2107
|
+
return this.files.get(filepath);
|
|
2108
|
+
}
|
|
2109
|
+
getAllFiles() {
|
|
2110
|
+
return Array.from(this.files.values());
|
|
2111
|
+
}
|
|
2112
|
+
applyOverrides(intro) {
|
|
2113
|
+
if (!this.config.projects)
|
|
2114
|
+
return;
|
|
2115
|
+
for (const [projectPath, overrides] of Object.entries(this.config.projects)) {
|
|
2116
|
+
if (intro.filepath.startsWith(projectPath + "/") || intro.project.root === projectPath) {
|
|
2117
|
+
if (overrides.scope) {
|
|
2118
|
+
intro.scope = overrides.scope;
|
|
2119
|
+
}
|
|
2120
|
+
if (overrides.framework) {
|
|
2121
|
+
intro.framework = overrides.framework;
|
|
2122
|
+
}
|
|
2123
|
+
break;
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
async save(config) {
|
|
2128
|
+
const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
2129
|
+
await fs5.mkdir(introDir, { recursive: true });
|
|
2130
|
+
const projectPath = path8.join(introDir, "_project.json");
|
|
2131
|
+
await fs5.writeFile(projectPath, JSON.stringify({
|
|
2132
|
+
version: "1.0.0",
|
|
2133
|
+
lastUpdated: new Date().toISOString(),
|
|
2134
|
+
structure: this.structure
|
|
2135
|
+
}, null, 2));
|
|
2136
|
+
for (const [filepath, intro] of this.files) {
|
|
2137
|
+
const introFilePath = path8.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
2138
|
+
await fs5.mkdir(path8.dirname(introFilePath), { recursive: true });
|
|
2139
|
+
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
2140
|
+
}
|
|
2141
|
+
console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
|
|
2142
|
+
}
|
|
2143
|
+
async load(config) {
|
|
2144
|
+
const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
2145
|
+
try {
|
|
2146
|
+
const projectPath = path8.join(introDir, "_project.json");
|
|
2147
|
+
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
2148
|
+
const projectData = JSON.parse(projectContent);
|
|
2149
|
+
this.structure = projectData.structure;
|
|
2150
|
+
await this.loadFilesRecursive(path8.join(introDir, "files"), "");
|
|
2151
|
+
} catch {
|
|
2152
|
+
this.structure = null;
|
|
2153
|
+
this.files.clear();
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
async loadFilesRecursive(basePath, prefix) {
|
|
2157
|
+
try {
|
|
2158
|
+
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
2159
|
+
for (const entry of entries) {
|
|
2160
|
+
const entryPath = path8.join(basePath, entry.name);
|
|
2161
|
+
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
2162
|
+
if (entry.isDirectory()) {
|
|
2163
|
+
await this.loadFilesRecursive(entryPath, relativePath);
|
|
2164
|
+
} else if (entry.name.endsWith(".json")) {
|
|
2165
|
+
const content = await fs5.readFile(entryPath, "utf-8");
|
|
2166
|
+
const intro = JSON.parse(content);
|
|
2167
|
+
this.files.set(intro.filepath, intro);
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
2170
|
+
} catch {}
|
|
2171
|
+
}
|
|
2172
|
+
clear() {
|
|
2173
|
+
this.files.clear();
|
|
2174
|
+
this.structure = null;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2178
|
+
// src/app/indexer/watcher.ts
|
|
2179
|
+
import { watch } from "chokidar";
|
|
2180
|
+
init_config2();
|
|
2181
|
+
|
|
2182
|
+
// src/app/indexer/index.ts
|
|
994
2183
|
async function indexDirectory(rootDir, options = {}) {
|
|
995
2184
|
const verbose = options.verbose ?? false;
|
|
996
|
-
rootDir =
|
|
2185
|
+
rootDir = path9.resolve(rootDir);
|
|
997
2186
|
console.log(`Indexing directory: ${rootDir}`);
|
|
998
2187
|
const config = await loadConfig(rootDir);
|
|
2188
|
+
const introspection = new IntrospectionIndex(rootDir);
|
|
2189
|
+
await introspection.initialize();
|
|
2190
|
+
if (verbose) {
|
|
2191
|
+
const structure = introspection.getStructure();
|
|
2192
|
+
if (structure?.isMonorepo) {
|
|
2193
|
+
console.log(`Detected monorepo with ${structure.projects.length} projects`);
|
|
2194
|
+
}
|
|
2195
|
+
}
|
|
999
2196
|
await registerBuiltInModules();
|
|
1000
2197
|
const enabledModules = registry.getEnabled(config);
|
|
1001
2198
|
if (enabledModules.length === 0) {
|
|
@@ -1012,7 +2209,7 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1012
2209
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
1013
2210
|
if (module.initialize && moduleConfig) {
|
|
1014
2211
|
const configWithOverrides = { ...moduleConfig };
|
|
1015
|
-
if (options.model && module.id === "
|
|
2212
|
+
if (options.model && module.id === "language/typescript") {
|
|
1016
2213
|
configWithOverrides.options = {
|
|
1017
2214
|
...configWithOverrides.options,
|
|
1018
2215
|
embeddingModel: options.model
|
|
@@ -1020,7 +2217,7 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1020
2217
|
}
|
|
1021
2218
|
await module.initialize(configWithOverrides);
|
|
1022
2219
|
}
|
|
1023
|
-
const result = await indexWithModule(rootDir, files, module, config, verbose);
|
|
2220
|
+
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
|
|
1024
2221
|
results.push(result);
|
|
1025
2222
|
if (module.finalize) {
|
|
1026
2223
|
console.log(`[${module.name}] Building secondary indexes...`);
|
|
@@ -1028,12 +2225,12 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1028
2225
|
rootDir,
|
|
1029
2226
|
config,
|
|
1030
2227
|
readFile: async (filepath) => {
|
|
1031
|
-
const fullPath =
|
|
1032
|
-
return
|
|
2228
|
+
const fullPath = path9.isAbsolute(filepath) ? filepath : path9.join(rootDir, filepath);
|
|
2229
|
+
return fs6.readFile(fullPath, "utf-8");
|
|
1033
2230
|
},
|
|
1034
2231
|
getFileStats: async (filepath) => {
|
|
1035
|
-
const fullPath =
|
|
1036
|
-
const stats = await
|
|
2232
|
+
const fullPath = path9.isAbsolute(filepath) ? filepath : path9.join(rootDir, filepath);
|
|
2233
|
+
const stats = await fs6.stat(fullPath);
|
|
1037
2234
|
return { lastModified: stats.mtime.toISOString() };
|
|
1038
2235
|
}
|
|
1039
2236
|
};
|
|
@@ -1041,10 +2238,11 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1041
2238
|
}
|
|
1042
2239
|
console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
1043
2240
|
}
|
|
2241
|
+
await introspection.save(config);
|
|
1044
2242
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
1045
2243
|
return results;
|
|
1046
2244
|
}
|
|
1047
|
-
async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
2245
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
|
|
1048
2246
|
const result = {
|
|
1049
2247
|
moduleId: module.id,
|
|
1050
2248
|
indexed: 0,
|
|
@@ -1056,19 +2254,20 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
|
1056
2254
|
rootDir,
|
|
1057
2255
|
config,
|
|
1058
2256
|
readFile: async (filepath) => {
|
|
1059
|
-
const fullPath =
|
|
1060
|
-
return
|
|
2257
|
+
const fullPath = path9.isAbsolute(filepath) ? filepath : path9.join(rootDir, filepath);
|
|
2258
|
+
return fs6.readFile(fullPath, "utf-8");
|
|
1061
2259
|
},
|
|
1062
2260
|
getFileStats: async (filepath) => {
|
|
1063
|
-
const fullPath =
|
|
1064
|
-
const stats = await
|
|
2261
|
+
const fullPath = path9.isAbsolute(filepath) ? filepath : path9.join(rootDir, filepath);
|
|
2262
|
+
const stats = await fs6.stat(fullPath);
|
|
1065
2263
|
return { lastModified: stats.mtime.toISOString() };
|
|
1066
|
-
}
|
|
2264
|
+
},
|
|
2265
|
+
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
1067
2266
|
};
|
|
1068
2267
|
for (const filepath of files) {
|
|
1069
|
-
const relativePath =
|
|
2268
|
+
const relativePath = path9.relative(rootDir, filepath);
|
|
1070
2269
|
try {
|
|
1071
|
-
const stats = await
|
|
2270
|
+
const stats = await fs6.stat(filepath);
|
|
1072
2271
|
const lastModified = stats.mtime.toISOString();
|
|
1073
2272
|
const existingEntry = manifest.files[relativePath];
|
|
1074
2273
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
@@ -1078,7 +2277,8 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
|
1078
2277
|
result.skipped++;
|
|
1079
2278
|
continue;
|
|
1080
2279
|
}
|
|
1081
|
-
const content = await
|
|
2280
|
+
const content = await fs6.readFile(filepath, "utf-8");
|
|
2281
|
+
introspection.addFile(relativePath, content);
|
|
1082
2282
|
if (verbose) {
|
|
1083
2283
|
console.log(` Processing ${relativePath}...`);
|
|
1084
2284
|
}
|
|
@@ -1122,7 +2322,7 @@ async function findFiles(rootDir, config) {
|
|
|
1122
2322
|
async function loadModuleManifest(rootDir, moduleId, config) {
|
|
1123
2323
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
1124
2324
|
try {
|
|
1125
|
-
const content = await
|
|
2325
|
+
const content = await fs6.readFile(manifestPath, "utf-8");
|
|
1126
2326
|
return JSON.parse(content);
|
|
1127
2327
|
} catch {
|
|
1128
2328
|
return {
|
|
@@ -1135,14 +2335,14 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
1135
2335
|
}
|
|
1136
2336
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
1137
2337
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
1138
|
-
await
|
|
1139
|
-
await
|
|
2338
|
+
await fs6.mkdir(path9.dirname(manifestPath), { recursive: true });
|
|
2339
|
+
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
1140
2340
|
}
|
|
1141
2341
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
1142
2342
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
1143
|
-
const indexFilePath =
|
|
1144
|
-
await
|
|
1145
|
-
await
|
|
2343
|
+
const indexFilePath = path9.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
2344
|
+
await fs6.mkdir(path9.dirname(indexFilePath), { recursive: true });
|
|
2345
|
+
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
1146
2346
|
}
|
|
1147
2347
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
1148
2348
|
const manifestPath = getGlobalManifestPath(rootDir, config);
|
|
@@ -1151,12 +2351,12 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
1151
2351
|
lastUpdated: new Date().toISOString(),
|
|
1152
2352
|
modules: modules.map((m) => m.id)
|
|
1153
2353
|
};
|
|
1154
|
-
await
|
|
1155
|
-
await
|
|
2354
|
+
await fs6.mkdir(path9.dirname(manifestPath), { recursive: true });
|
|
2355
|
+
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
1156
2356
|
}
|
|
1157
2357
|
async function cleanupIndex(rootDir, options = {}) {
|
|
1158
2358
|
const verbose = options.verbose ?? false;
|
|
1159
|
-
rootDir =
|
|
2359
|
+
rootDir = path9.resolve(rootDir);
|
|
1160
2360
|
console.log(`Cleaning up index in: ${rootDir}`);
|
|
1161
2361
|
const config = await loadConfig(rootDir);
|
|
1162
2362
|
await registerBuiltInModules();
|
|
@@ -1186,9 +2386,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1186
2386
|
const filesToRemove = [];
|
|
1187
2387
|
const updatedFiles = {};
|
|
1188
2388
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
1189
|
-
const fullPath =
|
|
2389
|
+
const fullPath = path9.join(rootDir, filepath);
|
|
1190
2390
|
try {
|
|
1191
|
-
await
|
|
2391
|
+
await fs6.access(fullPath);
|
|
1192
2392
|
updatedFiles[filepath] = entry;
|
|
1193
2393
|
result.kept++;
|
|
1194
2394
|
} catch {
|
|
@@ -1200,9 +2400,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1200
2400
|
}
|
|
1201
2401
|
}
|
|
1202
2402
|
for (const filepath of filesToRemove) {
|
|
1203
|
-
const indexFilePath =
|
|
2403
|
+
const indexFilePath = path9.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
1204
2404
|
try {
|
|
1205
|
-
await
|
|
2405
|
+
await fs6.unlink(indexFilePath);
|
|
1206
2406
|
} catch {}
|
|
1207
2407
|
}
|
|
1208
2408
|
manifest.files = updatedFiles;
|
|
@@ -1213,16 +2413,16 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1213
2413
|
}
|
|
1214
2414
|
async function cleanupEmptyDirectories(dir) {
|
|
1215
2415
|
try {
|
|
1216
|
-
const entries = await
|
|
2416
|
+
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
1217
2417
|
for (const entry of entries) {
|
|
1218
2418
|
if (entry.isDirectory()) {
|
|
1219
|
-
const subDir =
|
|
2419
|
+
const subDir = path9.join(dir, entry.name);
|
|
1220
2420
|
await cleanupEmptyDirectories(subDir);
|
|
1221
2421
|
}
|
|
1222
2422
|
}
|
|
1223
|
-
const remainingEntries = await
|
|
2423
|
+
const remainingEntries = await fs6.readdir(dir);
|
|
1224
2424
|
if (remainingEntries.length === 0) {
|
|
1225
|
-
await
|
|
2425
|
+
await fs6.rmdir(dir);
|
|
1226
2426
|
return true;
|
|
1227
2427
|
}
|
|
1228
2428
|
return false;
|
|
@@ -1231,12 +2431,12 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
1231
2431
|
}
|
|
1232
2432
|
}
|
|
1233
2433
|
|
|
1234
|
-
// src/search/index.ts
|
|
2434
|
+
// src/app/search/index.ts
|
|
1235
2435
|
init_config2();
|
|
1236
|
-
import * as
|
|
1237
|
-
import * as
|
|
2436
|
+
import * as fs7 from "fs/promises";
|
|
2437
|
+
import * as path10 from "path";
|
|
1238
2438
|
async function search(rootDir, query, options = {}) {
|
|
1239
|
-
rootDir =
|
|
2439
|
+
rootDir = path10.resolve(rootDir);
|
|
1240
2440
|
console.log(`Searching for: "${query}"`);
|
|
1241
2441
|
const config = await loadConfig(rootDir);
|
|
1242
2442
|
await registerBuiltInModules();
|
|
@@ -1277,9 +2477,9 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
1277
2477
|
config,
|
|
1278
2478
|
loadFileIndex: async (filepath) => {
|
|
1279
2479
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
1280
|
-
const indexFilePath = hasExtension ?
|
|
2480
|
+
const indexFilePath = hasExtension ? path10.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path10.join(indexPath, filepath + ".json");
|
|
1281
2481
|
try {
|
|
1282
|
-
const content = await
|
|
2482
|
+
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
1283
2483
|
return JSON.parse(content);
|
|
1284
2484
|
} catch {
|
|
1285
2485
|
return null;
|
|
@@ -1289,7 +2489,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
1289
2489
|
const files = [];
|
|
1290
2490
|
await traverseDirectory(indexPath, files, indexPath);
|
|
1291
2491
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
1292
|
-
const relative3 =
|
|
2492
|
+
const relative3 = path10.relative(indexPath, f);
|
|
1293
2493
|
return relative3.replace(/\.json$/, "");
|
|
1294
2494
|
});
|
|
1295
2495
|
}
|
|
@@ -1297,9 +2497,9 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
1297
2497
|
}
|
|
1298
2498
|
async function traverseDirectory(dir, files, basePath) {
|
|
1299
2499
|
try {
|
|
1300
|
-
const entries = await
|
|
2500
|
+
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
1301
2501
|
for (const entry of entries) {
|
|
1302
|
-
const fullPath =
|
|
2502
|
+
const fullPath = path10.join(dir, entry.name);
|
|
1303
2503
|
if (entry.isDirectory()) {
|
|
1304
2504
|
await traverseDirectory(fullPath, files, basePath);
|
|
1305
2505
|
} else if (entry.isFile()) {
|
|
@@ -1311,7 +2511,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
1311
2511
|
async function loadGlobalManifest(rootDir, config) {
|
|
1312
2512
|
const manifestPath = getGlobalManifestPath(rootDir, config);
|
|
1313
2513
|
try {
|
|
1314
|
-
const content = await
|
|
2514
|
+
const content = await fs7.readFile(manifestPath, "utf-8");
|
|
1315
2515
|
return JSON.parse(content);
|
|
1316
2516
|
} catch {
|
|
1317
2517
|
return null;
|
|
@@ -1375,4 +2575,4 @@ export {
|
|
|
1375
2575
|
cleanup
|
|
1376
2576
|
};
|
|
1377
2577
|
|
|
1378
|
-
//# debugId=
|
|
2578
|
+
//# debugId=0E3E5BCA1147AB0A64756E2164756E21
|