raggrep 0.1.0 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -6
- package/dist/{indexer → app/indexer}/index.d.ts +26 -1
- package/dist/app/indexer/watcher.d.ts +33 -0
- package/dist/{search → app/search}/index.d.ts +1 -1
- package/dist/cli/main.js +1802 -222
- package/dist/cli/main.js.map +26 -15
- package/dist/composition.d.ts +7 -7
- package/dist/domain/entities/fileSummary.d.ts +18 -0
- package/dist/domain/entities/index.d.ts +1 -1
- package/dist/domain/entities/searchResult.d.ts +47 -2
- package/dist/domain/index.d.ts +5 -3
- package/dist/domain/ports/embedding.d.ts +0 -4
- package/dist/domain/ports/index.d.ts +3 -4
- package/dist/domain/services/bm25.d.ts +24 -0
- package/dist/domain/services/index.d.ts +3 -2
- package/dist/domain/services/keywords.d.ts +45 -0
- package/dist/domain/services/similarity.d.ts +23 -0
- package/dist/{application → domain}/usecases/cleanupIndex.d.ts +2 -2
- package/dist/{application → domain}/usecases/indexDirectory.d.ts +2 -2
- package/dist/{application → domain}/usecases/searchIndex.d.ts +2 -2
- package/dist/index.d.ts +5 -5
- package/dist/index.js +1444 -244
- package/dist/index.js.map +26 -15
- package/dist/{utils/config.d.ts → infrastructure/config/configLoader.d.ts} +7 -4
- package/dist/infrastructure/config/index.d.ts +6 -0
- package/dist/infrastructure/embeddings/index.d.ts +3 -1
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +16 -0
- package/dist/infrastructure/index.d.ts +4 -3
- package/dist/infrastructure/storage/index.d.ts +4 -1
- package/dist/{utils/tieredIndex.d.ts → infrastructure/storage/symbolicIndex.d.ts} +7 -18
- package/dist/introspection/fileIntrospector.d.ts +14 -0
- package/dist/introspection/index.d.ts +68 -0
- package/dist/introspection/introspection.test.d.ts +4 -0
- package/dist/introspection/projectDetector.d.ts +27 -0
- package/dist/introspection/types.d.ts +70 -0
- package/dist/modules/core/index.d.ts +69 -0
- package/dist/modules/core/symbols.d.ts +27 -0
- package/dist/modules/core/symbols.test.d.ts +4 -0
- package/dist/modules/{semantic → language/typescript}/index.d.ts +11 -12
- package/dist/types.d.ts +4 -1
- package/package.json +7 -6
- package/dist/application/index.d.ts +0 -7
- package/dist/utils/bm25.d.ts +0 -9
- package/dist/utils/embeddings.d.ts +0 -46
- /package/dist/{cli → app/cli}/main.d.ts +0 -0
- /package/dist/{application → domain}/usecases/index.d.ts +0 -0
- /package/dist/{utils → infrastructure/embeddings}/embeddings.test.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.test.d.ts +0 -0
package/dist/cli/main.js
CHANGED
|
@@ -11,117 +11,144 @@ var __export = (target, all) => {
|
|
|
11
11
|
};
|
|
12
12
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
13
13
|
|
|
14
|
-
// src/
|
|
14
|
+
// src/infrastructure/embeddings/transformersEmbedding.ts
|
|
15
15
|
import { pipeline, env } from "@xenova/transformers";
|
|
16
16
|
import * as path from "path";
|
|
17
17
|
import * as os from "os";
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
18
|
+
|
|
19
|
+
class TransformersEmbeddingProvider {
|
|
20
|
+
pipeline = null;
|
|
21
|
+
config;
|
|
22
|
+
isInitializing = false;
|
|
23
|
+
initPromise = null;
|
|
24
|
+
constructor(config) {
|
|
25
|
+
this.config = {
|
|
26
|
+
model: config?.model ?? "all-MiniLM-L6-v2",
|
|
27
|
+
showProgress: config?.showProgress ?? true
|
|
28
|
+
};
|
|
29
29
|
}
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
async initialize(config) {
|
|
31
|
+
if (config) {
|
|
32
|
+
if (config.model !== this.config.model) {
|
|
33
|
+
this.pipeline = null;
|
|
34
|
+
}
|
|
35
|
+
this.config = { ...this.config, ...config };
|
|
36
|
+
}
|
|
37
|
+
await this.ensurePipeline();
|
|
32
38
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (currentConfig.showProgress) {
|
|
37
|
-
console.log(`
|
|
38
|
-
Loading embedding model: ${currentConfig.model}`);
|
|
39
|
-
console.log(` Cache: ${CACHE_DIR}`);
|
|
39
|
+
async ensurePipeline() {
|
|
40
|
+
if (this.pipeline) {
|
|
41
|
+
return;
|
|
40
42
|
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
43
|
+
if (this.isInitializing && this.initPromise) {
|
|
44
|
+
return this.initPromise;
|
|
45
|
+
}
|
|
46
|
+
this.isInitializing = true;
|
|
47
|
+
this.initPromise = (async () => {
|
|
48
|
+
const modelId = EMBEDDING_MODELS[this.config.model];
|
|
49
|
+
if (this.config.showProgress) {
|
|
50
|
+
console.log(`
|
|
51
|
+
Loading embedding model: ${this.config.model}`);
|
|
52
|
+
console.log(` Cache: ${CACHE_DIR}`);
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
this.pipeline = await pipeline("feature-extraction", modelId, {
|
|
56
|
+
progress_callback: this.config.showProgress ? (progress) => {
|
|
57
|
+
if (progress.status === "progress" && progress.file) {
|
|
58
|
+
const pct = progress.progress ? Math.round(progress.progress) : 0;
|
|
59
|
+
process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
|
|
60
|
+
} else if (progress.status === "done" && progress.file) {
|
|
61
|
+
process.stdout.write(`\r Downloaded ${progress.file}
|
|
49
62
|
`);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
console.log(` Model ready.
|
|
63
|
+
}
|
|
64
|
+
} : undefined
|
|
65
|
+
});
|
|
66
|
+
if (this.config.showProgress) {
|
|
67
|
+
console.log(` Model ready.
|
|
56
68
|
`);
|
|
69
|
+
}
|
|
70
|
+
} catch (error) {
|
|
71
|
+
this.pipeline = null;
|
|
72
|
+
throw new Error(`Failed to load embedding model: ${error}`);
|
|
73
|
+
} finally {
|
|
74
|
+
this.isInitializing = false;
|
|
75
|
+
this.initPromise = null;
|
|
57
76
|
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
77
|
+
})();
|
|
78
|
+
return this.initPromise;
|
|
79
|
+
}
|
|
80
|
+
async getEmbedding(text) {
|
|
81
|
+
await this.ensurePipeline();
|
|
82
|
+
if (!this.pipeline) {
|
|
83
|
+
throw new Error("Embedding pipeline not initialized");
|
|
65
84
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if (!embeddingPipeline) {
|
|
72
|
-
throw new Error("Embedding pipeline not initialized");
|
|
85
|
+
const output = await this.pipeline(text, {
|
|
86
|
+
pooling: "mean",
|
|
87
|
+
normalize: true
|
|
88
|
+
});
|
|
89
|
+
return Array.from(output.data);
|
|
73
90
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
91
|
+
async getEmbeddings(texts) {
|
|
92
|
+
if (texts.length === 0)
|
|
93
|
+
return [];
|
|
94
|
+
await this.ensurePipeline();
|
|
95
|
+
if (!this.pipeline) {
|
|
96
|
+
throw new Error("Embedding pipeline not initialized");
|
|
97
|
+
}
|
|
98
|
+
const results = [];
|
|
99
|
+
for (let i = 0;i < texts.length; i += BATCH_SIZE) {
|
|
100
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
101
|
+
const outputs = await Promise.all(batch.map(async (text) => {
|
|
102
|
+
const output = await this.pipeline(text, {
|
|
103
|
+
pooling: "mean",
|
|
104
|
+
normalize: true
|
|
105
|
+
});
|
|
106
|
+
return Array.from(output.data);
|
|
107
|
+
}));
|
|
108
|
+
results.push(...outputs);
|
|
109
|
+
}
|
|
110
|
+
return results;
|
|
86
111
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
90
|
-
const outputs = await Promise.all(batch.map(async (text) => {
|
|
91
|
-
const output = await embeddingPipeline(text, {
|
|
92
|
-
pooling: "mean",
|
|
93
|
-
normalize: true
|
|
94
|
-
});
|
|
95
|
-
return Array.from(output.data);
|
|
96
|
-
}));
|
|
97
|
-
results.push(...outputs);
|
|
112
|
+
getDimension() {
|
|
113
|
+
return EMBEDDING_DIMENSION;
|
|
98
114
|
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
function cosineSimilarity(a, b) {
|
|
102
|
-
if (a.length !== b.length) {
|
|
103
|
-
throw new Error("Vectors must have the same length");
|
|
115
|
+
getModelName() {
|
|
116
|
+
return this.config.model;
|
|
104
117
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
let normB = 0;
|
|
108
|
-
for (let i = 0;i < a.length; i++) {
|
|
109
|
-
dotProduct += a[i] * b[i];
|
|
110
|
-
normA += a[i] * a[i];
|
|
111
|
-
normB += b[i] * b[i];
|
|
118
|
+
async dispose() {
|
|
119
|
+
this.pipeline = null;
|
|
112
120
|
}
|
|
113
|
-
if (normA === 0 || normB === 0)
|
|
114
|
-
return 0;
|
|
115
|
-
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
116
|
-
}
|
|
117
|
-
function getEmbeddingConfig() {
|
|
118
|
-
return { ...currentConfig };
|
|
119
121
|
}
|
|
120
122
|
function getCacheDir() {
|
|
121
123
|
return CACHE_DIR;
|
|
122
124
|
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
function configureEmbeddings(config) {
|
|
126
|
+
const newConfig = { ...globalConfig, ...config };
|
|
127
|
+
if (newConfig.model !== globalConfig.model) {
|
|
128
|
+
globalProvider = null;
|
|
129
|
+
}
|
|
130
|
+
globalConfig = newConfig;
|
|
131
|
+
}
|
|
132
|
+
function getEmbeddingConfig() {
|
|
133
|
+
return { ...globalConfig };
|
|
134
|
+
}
|
|
135
|
+
async function ensureGlobalProvider() {
|
|
136
|
+
if (!globalProvider) {
|
|
137
|
+
globalProvider = new TransformersEmbeddingProvider(globalConfig);
|
|
138
|
+
await globalProvider.initialize();
|
|
139
|
+
}
|
|
140
|
+
return globalProvider;
|
|
141
|
+
}
|
|
142
|
+
async function getEmbedding(text) {
|
|
143
|
+
const provider = await ensureGlobalProvider();
|
|
144
|
+
return provider.getEmbedding(text);
|
|
145
|
+
}
|
|
146
|
+
async function getEmbeddings(texts) {
|
|
147
|
+
const provider = await ensureGlobalProvider();
|
|
148
|
+
return provider.getEmbeddings(texts);
|
|
149
|
+
}
|
|
150
|
+
var CACHE_DIR, EMBEDDING_MODELS, EMBEDDING_DIMENSION = 384, BATCH_SIZE = 32, globalProvider = null, globalConfig;
|
|
151
|
+
var init_transformersEmbedding = __esm(() => {
|
|
125
152
|
CACHE_DIR = path.join(os.homedir(), ".cache", "raggrep", "models");
|
|
126
153
|
env.cacheDir = CACHE_DIR;
|
|
127
154
|
env.allowLocalModels = true;
|
|
@@ -131,11 +158,15 @@ var init_embeddings = __esm(() => {
|
|
|
131
158
|
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
132
159
|
"paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
|
|
133
160
|
};
|
|
134
|
-
|
|
161
|
+
globalConfig = {
|
|
135
162
|
model: "all-MiniLM-L6-v2",
|
|
136
163
|
showProgress: true
|
|
137
164
|
};
|
|
138
|
-
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// src/infrastructure/embeddings/index.ts
|
|
168
|
+
var init_embeddings = __esm(() => {
|
|
169
|
+
init_transformersEmbedding();
|
|
139
170
|
});
|
|
140
171
|
// src/domain/entities/searchResult.ts
|
|
141
172
|
var init_searchResult = () => {};
|
|
@@ -149,7 +180,12 @@ function createDefaultConfig() {
|
|
|
149
180
|
ignorePaths: DEFAULT_IGNORE_PATHS,
|
|
150
181
|
modules: [
|
|
151
182
|
{
|
|
152
|
-
id: "
|
|
183
|
+
id: "core",
|
|
184
|
+
enabled: true,
|
|
185
|
+
options: {}
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
id: "language/typescript",
|
|
153
189
|
enabled: true,
|
|
154
190
|
options: {
|
|
155
191
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
@@ -209,32 +245,32 @@ var init_entities = __esm(() => {
|
|
|
209
245
|
init_config();
|
|
210
246
|
});
|
|
211
247
|
|
|
212
|
-
// src/
|
|
248
|
+
// src/infrastructure/config/configLoader.ts
|
|
213
249
|
import * as path2 from "path";
|
|
214
250
|
import * as fs from "fs/promises";
|
|
215
|
-
function getRaggrepDir(rootDir, config =
|
|
251
|
+
function getRaggrepDir(rootDir, config = DEFAULT_CONFIG) {
|
|
216
252
|
return path2.join(rootDir, config.indexDir);
|
|
217
253
|
}
|
|
218
|
-
function getModuleIndexPath(rootDir, moduleId, config =
|
|
254
|
+
function getModuleIndexPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
|
|
219
255
|
return path2.join(rootDir, config.indexDir, "index", moduleId);
|
|
220
256
|
}
|
|
221
|
-
function getModuleManifestPath(rootDir, moduleId, config =
|
|
257
|
+
function getModuleManifestPath(rootDir, moduleId, config = DEFAULT_CONFIG) {
|
|
222
258
|
return path2.join(rootDir, config.indexDir, "index", moduleId, "manifest.json");
|
|
223
259
|
}
|
|
224
|
-
function getGlobalManifestPath(rootDir, config =
|
|
260
|
+
function getGlobalManifestPath(rootDir, config = DEFAULT_CONFIG) {
|
|
225
261
|
return path2.join(rootDir, config.indexDir, "manifest.json");
|
|
226
262
|
}
|
|
227
|
-
function getConfigPath(rootDir, config =
|
|
263
|
+
function getConfigPath(rootDir, config = DEFAULT_CONFIG) {
|
|
228
264
|
return path2.join(rootDir, config.indexDir, "config.json");
|
|
229
265
|
}
|
|
230
266
|
async function loadConfig(rootDir) {
|
|
231
|
-
const configPath = getConfigPath(rootDir,
|
|
267
|
+
const configPath = getConfigPath(rootDir, DEFAULT_CONFIG);
|
|
232
268
|
try {
|
|
233
269
|
const content = await fs.readFile(configPath, "utf-8");
|
|
234
270
|
const savedConfig = JSON.parse(content);
|
|
235
|
-
return { ...
|
|
271
|
+
return { ...DEFAULT_CONFIG, ...savedConfig };
|
|
236
272
|
} catch {
|
|
237
|
-
return
|
|
273
|
+
return DEFAULT_CONFIG;
|
|
238
274
|
}
|
|
239
275
|
}
|
|
240
276
|
function getModuleConfig(config, moduleId) {
|
|
@@ -243,7 +279,7 @@ function getModuleConfig(config, moduleId) {
|
|
|
243
279
|
function getEmbeddingConfigFromModule(moduleConfig) {
|
|
244
280
|
const options = moduleConfig.options || {};
|
|
245
281
|
const modelName = options.embeddingModel || "all-MiniLM-L6-v2";
|
|
246
|
-
if (!(modelName in
|
|
282
|
+
if (!(modelName in EMBEDDING_MODELS2)) {
|
|
247
283
|
console.warn(`Unknown embedding model: ${modelName}, falling back to all-MiniLM-L6-v2`);
|
|
248
284
|
return { model: "all-MiniLM-L6-v2" };
|
|
249
285
|
}
|
|
@@ -252,11 +288,21 @@ function getEmbeddingConfigFromModule(moduleConfig) {
|
|
|
252
288
|
showProgress: options.showProgress !== false
|
|
253
289
|
};
|
|
254
290
|
}
|
|
255
|
-
var
|
|
256
|
-
var
|
|
291
|
+
var DEFAULT_CONFIG, EMBEDDING_MODELS2;
|
|
292
|
+
var init_configLoader = __esm(() => {
|
|
257
293
|
init_entities();
|
|
258
|
-
|
|
259
|
-
|
|
294
|
+
DEFAULT_CONFIG = createDefaultConfig();
|
|
295
|
+
EMBEDDING_MODELS2 = {
|
|
296
|
+
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
297
|
+
"all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
|
|
298
|
+
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
299
|
+
"paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
|
|
300
|
+
};
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// src/infrastructure/config/index.ts
|
|
304
|
+
var init_config2 = __esm(() => {
|
|
305
|
+
init_configLoader();
|
|
260
306
|
});
|
|
261
307
|
|
|
262
308
|
// src/domain/services/bm25.ts
|
|
@@ -330,16 +376,449 @@ class BM25Index {
|
|
|
330
376
|
this.avgDocLength = 0;
|
|
331
377
|
this.totalDocs = 0;
|
|
332
378
|
}
|
|
379
|
+
addDocument(id, tokens) {
|
|
380
|
+
this.addDocuments([{ id, content: "", tokens }]);
|
|
381
|
+
}
|
|
382
|
+
serialize() {
|
|
383
|
+
const documents = {};
|
|
384
|
+
for (const [id, { tokens }] of this.documents) {
|
|
385
|
+
documents[id] = tokens;
|
|
386
|
+
}
|
|
387
|
+
return {
|
|
388
|
+
documents,
|
|
389
|
+
avgDocLength: this.avgDocLength,
|
|
390
|
+
documentFrequencies: Object.fromEntries(this.documentFrequencies),
|
|
391
|
+
totalDocs: this.totalDocs
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
static deserialize(data) {
|
|
395
|
+
const index = new BM25Index;
|
|
396
|
+
index.avgDocLength = data.avgDocLength;
|
|
397
|
+
index.totalDocs = data.totalDocs;
|
|
398
|
+
index.documentFrequencies = new Map(Object.entries(data.documentFrequencies));
|
|
399
|
+
for (const [id, tokens] of Object.entries(data.documents)) {
|
|
400
|
+
index.documents.set(id, { content: "", tokens });
|
|
401
|
+
}
|
|
402
|
+
return index;
|
|
403
|
+
}
|
|
333
404
|
}
|
|
334
405
|
function normalizeScore(score, midpoint = 5) {
|
|
335
406
|
return 1 / (1 + Math.exp(-score / midpoint + 1));
|
|
336
407
|
}
|
|
337
408
|
var BM25_K1 = 1.5, BM25_B = 0.75;
|
|
338
409
|
|
|
339
|
-
// src/
|
|
340
|
-
|
|
410
|
+
// src/modules/core/symbols.ts
|
|
411
|
+
function extractSymbols(content) {
|
|
412
|
+
const symbols = [];
|
|
413
|
+
const seenSymbols = new Set;
|
|
414
|
+
const lines = content.split(`
|
|
415
|
+
`);
|
|
416
|
+
for (const { type, pattern, exported } of SYMBOL_PATTERNS) {
|
|
417
|
+
pattern.lastIndex = 0;
|
|
418
|
+
let match;
|
|
419
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
420
|
+
const name = match[1];
|
|
421
|
+
const symbolKey = `${name}:${type}`;
|
|
422
|
+
if (seenSymbols.has(symbolKey))
|
|
423
|
+
continue;
|
|
424
|
+
seenSymbols.add(symbolKey);
|
|
425
|
+
const beforeMatch = content.substring(0, match.index);
|
|
426
|
+
const line = beforeMatch.split(`
|
|
427
|
+
`).length;
|
|
428
|
+
symbols.push({
|
|
429
|
+
name,
|
|
430
|
+
type,
|
|
431
|
+
line,
|
|
432
|
+
isExported: exported
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
return symbols.sort((a, b) => a.line - b.line);
|
|
437
|
+
}
|
|
438
|
+
function symbolsToKeywords(symbols) {
|
|
439
|
+
const keywords = new Set;
|
|
440
|
+
for (const symbol of symbols) {
|
|
441
|
+
keywords.add(symbol.name.toLowerCase());
|
|
442
|
+
const parts = symbol.name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
|
|
443
|
+
for (const part of parts) {
|
|
444
|
+
if (part.length > 2) {
|
|
445
|
+
keywords.add(part);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
return Array.from(keywords);
|
|
450
|
+
}
|
|
451
|
+
var SYMBOL_PATTERNS;
|
|
452
|
+
var init_symbols = __esm(() => {
|
|
453
|
+
SYMBOL_PATTERNS = [
|
|
454
|
+
{
|
|
455
|
+
type: "function",
|
|
456
|
+
pattern: /^export\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
457
|
+
exported: true
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
type: "function",
|
|
461
|
+
pattern: /^export\s+(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
|
|
462
|
+
exported: true
|
|
463
|
+
},
|
|
464
|
+
{
|
|
465
|
+
type: "class",
|
|
466
|
+
pattern: /^export\s+(?:abstract\s+)?class\s+(\w+)/gm,
|
|
467
|
+
exported: true
|
|
468
|
+
},
|
|
469
|
+
{
|
|
470
|
+
type: "interface",
|
|
471
|
+
pattern: /^export\s+interface\s+(\w+)/gm,
|
|
472
|
+
exported: true
|
|
473
|
+
},
|
|
474
|
+
{
|
|
475
|
+
type: "type",
|
|
476
|
+
pattern: /^export\s+type\s+(\w+)/gm,
|
|
477
|
+
exported: true
|
|
478
|
+
},
|
|
479
|
+
{
|
|
480
|
+
type: "enum",
|
|
481
|
+
pattern: /^export\s+(?:const\s+)?enum\s+(\w+)/gm,
|
|
482
|
+
exported: true
|
|
483
|
+
},
|
|
484
|
+
{
|
|
485
|
+
type: "variable",
|
|
486
|
+
pattern: /^export\s+(?:const|let|var)\s+(\w+)\s*(?::|=)/gm,
|
|
487
|
+
exported: true
|
|
488
|
+
},
|
|
489
|
+
{
|
|
490
|
+
type: "function",
|
|
491
|
+
pattern: /^export\s+default\s+(?:async\s+)?function\s+(\w+)/gm,
|
|
492
|
+
exported: true
|
|
493
|
+
},
|
|
494
|
+
{
|
|
495
|
+
type: "class",
|
|
496
|
+
pattern: /^export\s+default\s+class\s+(\w+)/gm,
|
|
497
|
+
exported: true
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
type: "function",
|
|
501
|
+
pattern: /^(?:async\s+)?function\s+(\w+)/gm,
|
|
502
|
+
exported: false
|
|
503
|
+
},
|
|
504
|
+
{
|
|
505
|
+
type: "function",
|
|
506
|
+
pattern: /^(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/gm,
|
|
507
|
+
exported: false
|
|
508
|
+
},
|
|
509
|
+
{
|
|
510
|
+
type: "class",
|
|
511
|
+
pattern: /^(?:abstract\s+)?class\s+(\w+)/gm,
|
|
512
|
+
exported: false
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
type: "interface",
|
|
516
|
+
pattern: /^interface\s+(\w+)/gm,
|
|
517
|
+
exported: false
|
|
518
|
+
},
|
|
519
|
+
{
|
|
520
|
+
type: "type",
|
|
521
|
+
pattern: /^type\s+(\w+)/gm,
|
|
522
|
+
exported: false
|
|
523
|
+
},
|
|
524
|
+
{
|
|
525
|
+
type: "enum",
|
|
526
|
+
pattern: /^(?:const\s+)?enum\s+(\w+)/gm,
|
|
527
|
+
exported: false
|
|
528
|
+
},
|
|
529
|
+
{
|
|
530
|
+
type: "function",
|
|
531
|
+
pattern: /^def\s+(\w+)\s*\(/gm,
|
|
532
|
+
exported: false
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
type: "class",
|
|
536
|
+
pattern: /^class\s+(\w+)(?:\s*\(|:)/gm,
|
|
537
|
+
exported: false
|
|
538
|
+
},
|
|
539
|
+
{
|
|
540
|
+
type: "function",
|
|
541
|
+
pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/gm,
|
|
542
|
+
exported: false
|
|
543
|
+
},
|
|
544
|
+
{
|
|
545
|
+
type: "type",
|
|
546
|
+
pattern: /^type\s+(\w+)\s+(?:struct|interface)/gm,
|
|
547
|
+
exported: false
|
|
548
|
+
},
|
|
549
|
+
{
|
|
550
|
+
type: "function",
|
|
551
|
+
pattern: /^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/gm,
|
|
552
|
+
exported: false
|
|
553
|
+
},
|
|
554
|
+
{
|
|
555
|
+
type: "type",
|
|
556
|
+
pattern: /^(?:pub\s+)?struct\s+(\w+)/gm,
|
|
557
|
+
exported: false
|
|
558
|
+
},
|
|
559
|
+
{
|
|
560
|
+
type: "enum",
|
|
561
|
+
pattern: /^(?:pub\s+)?enum\s+(\w+)/gm,
|
|
562
|
+
exported: false
|
|
563
|
+
},
|
|
564
|
+
{
|
|
565
|
+
type: "interface",
|
|
566
|
+
pattern: /^(?:pub\s+)?trait\s+(\w+)/gm,
|
|
567
|
+
exported: false
|
|
568
|
+
}
|
|
569
|
+
];
|
|
570
|
+
});
|
|
571
|
+
|
|
572
|
+
// src/modules/core/index.ts
|
|
573
|
+
var exports_core = {};
|
|
574
|
+
__export(exports_core, {
|
|
575
|
+
CoreModule: () => CoreModule
|
|
576
|
+
});
|
|
577
|
+
import * as path3 from "path";
|
|
578
|
+
import * as fs2 from "fs/promises";
|
|
579
|
+
|
|
580
|
+
class CoreModule {
|
|
581
|
+
id = "core";
|
|
582
|
+
name = "Core Search";
|
|
583
|
+
description = "Language-agnostic text search with symbol extraction";
|
|
584
|
+
version = "1.0.0";
|
|
585
|
+
symbolIndex = new Map;
|
|
586
|
+
bm25Index = null;
|
|
587
|
+
rootDir = "";
|
|
588
|
+
async initialize(_config) {}
|
|
589
|
+
async indexFile(filepath, content, ctx) {
|
|
590
|
+
this.rootDir = ctx.rootDir;
|
|
591
|
+
const symbols = extractSymbols(content);
|
|
592
|
+
const symbolKeywords = symbolsToKeywords(symbols);
|
|
593
|
+
const contentTokens = tokenize(content);
|
|
594
|
+
const allTokens = [...new Set([...contentTokens, ...symbolKeywords])];
|
|
595
|
+
const chunks = this.createChunks(filepath, content, symbols);
|
|
596
|
+
const stats = await ctx.getFileStats(filepath);
|
|
597
|
+
this.symbolIndex.set(filepath, {
|
|
598
|
+
filepath,
|
|
599
|
+
symbols,
|
|
600
|
+
tokens: allTokens
|
|
601
|
+
});
|
|
602
|
+
const moduleData = {
|
|
603
|
+
symbols,
|
|
604
|
+
tokens: allTokens
|
|
605
|
+
};
|
|
606
|
+
return {
|
|
607
|
+
filepath,
|
|
608
|
+
lastModified: stats.lastModified,
|
|
609
|
+
chunks,
|
|
610
|
+
moduleData
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
createChunks(filepath, content, symbols) {
|
|
614
|
+
const lines = content.split(`
|
|
615
|
+
`);
|
|
616
|
+
const chunks = [];
|
|
617
|
+
for (let start = 0;start < lines.length; start += LINES_PER_CHUNK - CHUNK_OVERLAP) {
|
|
618
|
+
const end = Math.min(start + LINES_PER_CHUNK, lines.length);
|
|
619
|
+
const chunkLines = lines.slice(start, end);
|
|
620
|
+
const chunkContent = chunkLines.join(`
|
|
621
|
+
`);
|
|
622
|
+
const chunkSymbols = symbols.filter((s) => s.line >= start + 1 && s.line <= end);
|
|
623
|
+
let chunkType = "block";
|
|
624
|
+
let chunkName;
|
|
625
|
+
let isExported = false;
|
|
626
|
+
if (chunkSymbols.length > 0) {
|
|
627
|
+
const primarySymbol = chunkSymbols[0];
|
|
628
|
+
chunkType = this.symbolTypeToChunkType(primarySymbol.type);
|
|
629
|
+
chunkName = primarySymbol.name;
|
|
630
|
+
isExported = primarySymbol.isExported;
|
|
631
|
+
}
|
|
632
|
+
const chunkId = `${filepath}:${start + 1}-${end}`;
|
|
633
|
+
chunks.push({
|
|
634
|
+
id: chunkId,
|
|
635
|
+
content: chunkContent,
|
|
636
|
+
startLine: start + 1,
|
|
637
|
+
endLine: end,
|
|
638
|
+
type: chunkType,
|
|
639
|
+
name: chunkName,
|
|
640
|
+
isExported
|
|
641
|
+
});
|
|
642
|
+
if (end >= lines.length)
|
|
643
|
+
break;
|
|
644
|
+
}
|
|
645
|
+
return chunks;
|
|
646
|
+
}
|
|
647
|
+
symbolTypeToChunkType(symbolType) {
|
|
648
|
+
switch (symbolType) {
|
|
649
|
+
case "function":
|
|
650
|
+
case "method":
|
|
651
|
+
return "function";
|
|
652
|
+
case "class":
|
|
653
|
+
return "class";
|
|
654
|
+
case "interface":
|
|
655
|
+
return "interface";
|
|
656
|
+
case "type":
|
|
657
|
+
return "type";
|
|
658
|
+
case "enum":
|
|
659
|
+
return "enum";
|
|
660
|
+
case "variable":
|
|
661
|
+
return "variable";
|
|
662
|
+
default:
|
|
663
|
+
return "block";
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
async finalize(ctx) {
|
|
667
|
+
const config = ctx.config;
|
|
668
|
+
const coreDir = path3.join(getRaggrepDir(ctx.rootDir, config), "index", "core");
|
|
669
|
+
await fs2.mkdir(coreDir, { recursive: true });
|
|
670
|
+
this.bm25Index = new BM25Index;
|
|
671
|
+
for (const [filepath, entry] of this.symbolIndex) {
|
|
672
|
+
this.bm25Index.addDocument(filepath, entry.tokens);
|
|
673
|
+
}
|
|
674
|
+
const symbolIndexData = {
|
|
675
|
+
version: this.version,
|
|
676
|
+
lastUpdated: new Date().toISOString(),
|
|
677
|
+
files: Object.fromEntries(this.symbolIndex),
|
|
678
|
+
bm25Data: this.bm25Index.serialize()
|
|
679
|
+
};
|
|
680
|
+
await fs2.writeFile(path3.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
|
|
681
|
+
console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
|
|
682
|
+
}
|
|
683
|
+
async search(query, ctx, options) {
|
|
684
|
+
const config = ctx.config;
|
|
685
|
+
const topK = options?.topK ?? DEFAULT_TOP_K;
|
|
686
|
+
const minScore = options?.minScore ?? DEFAULT_MIN_SCORE;
|
|
687
|
+
if (this.symbolIndex.size === 0) {
|
|
688
|
+
await this.loadSymbolIndex(ctx.rootDir, config);
|
|
689
|
+
}
|
|
690
|
+
if (!this.bm25Index || this.symbolIndex.size === 0) {
|
|
691
|
+
return [];
|
|
692
|
+
}
|
|
693
|
+
const queryTokens = tokenize(query);
|
|
694
|
+
const bm25Results = this.bm25Index.search(query, topK * 2);
|
|
695
|
+
const bm25Scores = new Map(bm25Results.map((r) => [r.id, r.score]));
|
|
696
|
+
const symbolMatches = this.findSymbolMatches(queryTokens);
|
|
697
|
+
const results = [];
|
|
698
|
+
for (const filepath of this.symbolIndex.keys()) {
|
|
699
|
+
const entry = this.symbolIndex.get(filepath);
|
|
700
|
+
const bm25Score = bm25Scores.get(filepath) ?? 0;
|
|
701
|
+
const symbolScore = symbolMatches.get(filepath) ?? 0;
|
|
702
|
+
if (bm25Score === 0 && symbolScore === 0)
|
|
703
|
+
continue;
|
|
704
|
+
const combinedScore = 0.6 * normalizeScore(bm25Score) + 0.4 * symbolScore;
|
|
705
|
+
if (combinedScore >= minScore) {
|
|
706
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
707
|
+
if (!fileIndex)
|
|
708
|
+
continue;
|
|
709
|
+
const bestChunk = this.findBestChunk(fileIndex.chunks, queryTokens, entry.symbols);
|
|
710
|
+
results.push({
|
|
711
|
+
filepath,
|
|
712
|
+
chunk: bestChunk,
|
|
713
|
+
score: combinedScore,
|
|
714
|
+
moduleId: this.id,
|
|
715
|
+
context: {
|
|
716
|
+
bm25Score: normalizeScore(bm25Score),
|
|
717
|
+
symbolScore
|
|
718
|
+
}
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
723
|
+
}
|
|
724
|
+
findSymbolMatches(queryTokens) {
|
|
725
|
+
const matches = new Map;
|
|
726
|
+
for (const [filepath, entry] of this.symbolIndex) {
|
|
727
|
+
let matchScore = 0;
|
|
728
|
+
for (const symbol of entry.symbols) {
|
|
729
|
+
const symbolName = symbol.name.toLowerCase();
|
|
730
|
+
const symbolParts = symbolsToKeywords([symbol]);
|
|
731
|
+
for (const token of queryTokens) {
|
|
732
|
+
if (symbolName === token) {
|
|
733
|
+
matchScore += symbol.isExported ? 1 : 0.8;
|
|
734
|
+
} else if (symbolName.includes(token) || token.includes(symbolName)) {
|
|
735
|
+
matchScore += symbol.isExported ? 0.5 : 0.4;
|
|
736
|
+
} else if (symbolParts.some((p) => p === token)) {
|
|
737
|
+
matchScore += symbol.isExported ? 0.3 : 0.2;
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
if (matchScore > 0) {
|
|
742
|
+
matches.set(filepath, Math.min(1, matchScore / queryTokens.length));
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
return matches;
|
|
746
|
+
}
|
|
747
|
+
findBestChunk(chunks, queryTokens, symbols) {
|
|
748
|
+
let bestChunk = chunks[0];
|
|
749
|
+
let bestScore = 0;
|
|
750
|
+
for (const chunk of chunks) {
|
|
751
|
+
let score = 0;
|
|
752
|
+
const chunkContent = chunk.content.toLowerCase();
|
|
753
|
+
for (const token of queryTokens) {
|
|
754
|
+
if (chunkContent.includes(token)) {
|
|
755
|
+
score += 1;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
if (chunk.name) {
|
|
759
|
+
const nameLower = chunk.name.toLowerCase();
|
|
760
|
+
for (const token of queryTokens) {
|
|
761
|
+
if (nameLower.includes(token)) {
|
|
762
|
+
score += 2;
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
if (chunk.isExported) {
|
|
767
|
+
score += 0.5;
|
|
768
|
+
}
|
|
769
|
+
if (score > bestScore) {
|
|
770
|
+
bestScore = score;
|
|
771
|
+
bestChunk = chunk;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
return bestChunk;
|
|
775
|
+
}
|
|
776
|
+
async loadSymbolIndex(rootDir, config) {
|
|
777
|
+
const coreDir = path3.join(getRaggrepDir(rootDir, config), "index", "core");
|
|
778
|
+
const symbolsPath = path3.join(coreDir, "symbols.json");
|
|
779
|
+
try {
|
|
780
|
+
const content = await fs2.readFile(symbolsPath, "utf-8");
|
|
781
|
+
const data = JSON.parse(content);
|
|
782
|
+
this.symbolIndex = new Map(Object.entries(data.files));
|
|
783
|
+
if (data.bm25Data) {
|
|
784
|
+
this.bm25Index = BM25Index.deserialize(data.bm25Data);
|
|
785
|
+
}
|
|
786
|
+
} catch (error) {
|
|
787
|
+
this.symbolIndex = new Map;
|
|
788
|
+
this.bm25Index = null;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
async dispose() {
|
|
792
|
+
this.symbolIndex.clear();
|
|
793
|
+
this.bm25Index = null;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
var DEFAULT_MIN_SCORE = 0.1, DEFAULT_TOP_K = 20, LINES_PER_CHUNK = 50, CHUNK_OVERLAP = 10;
|
|
797
|
+
var init_core = __esm(() => {
|
|
798
|
+
init_config2();
|
|
799
|
+
init_symbols();
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
// src/domain/services/similarity.ts
|
|
803
|
+
function cosineSimilarity(a, b) {
|
|
804
|
+
if (a.length !== b.length) {
|
|
805
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
806
|
+
}
|
|
807
|
+
let dotProduct = 0;
|
|
808
|
+
let normA = 0;
|
|
809
|
+
let normB = 0;
|
|
810
|
+
for (let i = 0;i < a.length; i++) {
|
|
811
|
+
dotProduct += a[i] * b[i];
|
|
812
|
+
normA += a[i] * a[i];
|
|
813
|
+
normB += b[i] * b[i];
|
|
814
|
+
}
|
|
815
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
816
|
+
if (magnitude === 0)
|
|
817
|
+
return 0;
|
|
818
|
+
return dotProduct / magnitude;
|
|
819
|
+
}
|
|
341
820
|
|
|
342
|
-
// src/modules/
|
|
821
|
+
// src/modules/language/typescript/parseCode.ts
|
|
343
822
|
import * as ts from "typescript";
|
|
344
823
|
function parseCode(content, filepath) {
|
|
345
824
|
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
@@ -530,6 +1009,11 @@ function generateChunkId(filepath, startLine, endLine) {
|
|
|
530
1009
|
}
|
|
531
1010
|
var init_parseCode = () => {};
|
|
532
1011
|
|
|
1012
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
1013
|
+
var init_fileIndexStorage = __esm(() => {
|
|
1014
|
+
init_entities();
|
|
1015
|
+
});
|
|
1016
|
+
|
|
533
1017
|
// src/domain/services/keywords.ts
|
|
534
1018
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
535
1019
|
const keywords = new Set;
|
|
@@ -548,10 +1032,92 @@ function extractKeywords(content, name, maxKeywords = 50) {
|
|
|
548
1032
|
}
|
|
549
1033
|
return Array.from(keywords).slice(0, maxKeywords);
|
|
550
1034
|
}
|
|
1035
|
+
function splitIdentifier(str) {
|
|
1036
|
+
return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
|
|
1037
|
+
}
|
|
551
1038
|
function extractPathKeywords(filepath) {
|
|
552
|
-
|
|
1039
|
+
const keywords = new Set;
|
|
1040
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
1041
|
+
const segments = pathWithoutExt.split(/[/\\]/);
|
|
1042
|
+
for (const segment of segments) {
|
|
1043
|
+
if (segment.length < 2)
|
|
1044
|
+
continue;
|
|
1045
|
+
const lower = segment.toLowerCase();
|
|
1046
|
+
if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
|
|
1047
|
+
keywords.add(lower);
|
|
1048
|
+
}
|
|
1049
|
+
const parts = splitIdentifier(segment);
|
|
1050
|
+
for (const part of parts) {
|
|
1051
|
+
if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
|
|
1052
|
+
keywords.add(part);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
return Array.from(keywords);
|
|
1057
|
+
}
|
|
1058
|
+
function parsePathContext(filepath) {
|
|
1059
|
+
const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
|
|
1060
|
+
const allSegments = pathWithoutExt.split(/[/\\]/);
|
|
1061
|
+
const filename = allSegments[allSegments.length - 1];
|
|
1062
|
+
const dirSegments = allSegments.slice(0, -1);
|
|
1063
|
+
const keywords = extractPathKeywords(filepath);
|
|
1064
|
+
let layer;
|
|
1065
|
+
const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
|
|
1066
|
+
const filenameLower = filename.toLowerCase();
|
|
1067
|
+
for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS)) {
|
|
1068
|
+
for (const pattern of patterns) {
|
|
1069
|
+
if (filenameLower.includes(pattern)) {
|
|
1070
|
+
layer = layerName;
|
|
1071
|
+
break;
|
|
1072
|
+
}
|
|
1073
|
+
if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
|
|
1074
|
+
layer = layerName;
|
|
1075
|
+
break;
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
if (layer)
|
|
1079
|
+
break;
|
|
1080
|
+
}
|
|
1081
|
+
let domain;
|
|
1082
|
+
const layerPatternSet = new Set(Object.values(LAYER_PATTERNS).flat());
|
|
1083
|
+
const reversedSegments = [...dirSegments].reverse();
|
|
1084
|
+
for (const segment of reversedSegments) {
|
|
1085
|
+
const lower = segment.toLowerCase();
|
|
1086
|
+
if (["src", "lib", "app", "packages", "modules"].includes(lower))
|
|
1087
|
+
continue;
|
|
1088
|
+
if (layerPatternSet.has(lower))
|
|
1089
|
+
continue;
|
|
1090
|
+
if (lower.length > 2) {
|
|
1091
|
+
domain = lower;
|
|
1092
|
+
break;
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
return {
|
|
1096
|
+
segments: dirSegments,
|
|
1097
|
+
layer,
|
|
1098
|
+
domain,
|
|
1099
|
+
depth: dirSegments.length,
|
|
1100
|
+
keywords
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
function formatPathContextForEmbedding(pathContext) {
|
|
1104
|
+
const parts = [];
|
|
1105
|
+
if (pathContext.domain) {
|
|
1106
|
+
parts.push(pathContext.domain);
|
|
1107
|
+
}
|
|
1108
|
+
if (pathContext.layer) {
|
|
1109
|
+
parts.push(pathContext.layer);
|
|
1110
|
+
}
|
|
1111
|
+
const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
|
|
1112
|
+
if (significantSegments.length > 0) {
|
|
1113
|
+
parts.push(...significantSegments.map((s) => s.toLowerCase()));
|
|
1114
|
+
}
|
|
1115
|
+
if (parts.length === 0)
|
|
1116
|
+
return "";
|
|
1117
|
+
const unique = [...new Set(parts)];
|
|
1118
|
+
return `[${unique.join(" ")}]`;
|
|
553
1119
|
}
|
|
554
|
-
var COMMON_KEYWORDS;
|
|
1120
|
+
var COMMON_KEYWORDS, LAYER_PATTERNS;
|
|
555
1121
|
var init_keywords = __esm(() => {
|
|
556
1122
|
COMMON_KEYWORDS = new Set([
|
|
557
1123
|
"const",
|
|
@@ -621,11 +1187,24 @@ var init_keywords = __esm(() => {
|
|
|
621
1187
|
"has",
|
|
622
1188
|
"have"
|
|
623
1189
|
]);
|
|
1190
|
+
LAYER_PATTERNS = {
|
|
1191
|
+
controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
|
|
1192
|
+
service: ["service", "services", "usecase", "usecases", "application"],
|
|
1193
|
+
repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
|
|
1194
|
+
model: ["model", "models", "entity", "entities", "schema", "schemas"],
|
|
1195
|
+
util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
|
|
1196
|
+
config: ["config", "configs", "configuration", "settings"],
|
|
1197
|
+
middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
|
|
1198
|
+
domain: ["domain", "core", "business"],
|
|
1199
|
+
infrastructure: ["infrastructure", "infra", "external", "adapters"],
|
|
1200
|
+
presentation: ["presentation", "view", "views", "component", "components", "ui"],
|
|
1201
|
+
test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
|
|
1202
|
+
};
|
|
624
1203
|
});
|
|
625
1204
|
|
|
626
|
-
// src/
|
|
627
|
-
import * as
|
|
628
|
-
import * as
|
|
1205
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
1206
|
+
import * as fs3 from "fs/promises";
|
|
1207
|
+
import * as path4 from "path";
|
|
629
1208
|
|
|
630
1209
|
class SymbolicIndex {
|
|
631
1210
|
meta = null;
|
|
@@ -634,7 +1213,7 @@ class SymbolicIndex {
|
|
|
634
1213
|
symbolicPath;
|
|
635
1214
|
moduleId;
|
|
636
1215
|
constructor(indexDir, moduleId) {
|
|
637
|
-
this.symbolicPath =
|
|
1216
|
+
this.symbolicPath = path4.join(indexDir, "index", moduleId, "symbolic");
|
|
638
1217
|
this.moduleId = moduleId;
|
|
639
1218
|
}
|
|
640
1219
|
async initialize() {
|
|
@@ -694,18 +1273,18 @@ class SymbolicIndex {
|
|
|
694
1273
|
throw new Error("Index not initialized");
|
|
695
1274
|
this.meta.lastUpdated = new Date().toISOString();
|
|
696
1275
|
this.meta.fileCount = this.fileSummaries.size;
|
|
697
|
-
await
|
|
698
|
-
const metaPath =
|
|
699
|
-
await
|
|
1276
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
1277
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1278
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
700
1279
|
for (const [filepath, summary] of this.fileSummaries) {
|
|
701
1280
|
const summaryPath = this.getFileSummaryPath(filepath);
|
|
702
|
-
await
|
|
703
|
-
await
|
|
1281
|
+
await fs3.mkdir(path4.dirname(summaryPath), { recursive: true });
|
|
1282
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
704
1283
|
}
|
|
705
1284
|
}
|
|
706
1285
|
async load() {
|
|
707
|
-
const metaPath =
|
|
708
|
-
const metaContent = await
|
|
1286
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1287
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
709
1288
|
this.meta = JSON.parse(metaContent);
|
|
710
1289
|
this.fileSummaries.clear();
|
|
711
1290
|
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
@@ -713,14 +1292,14 @@ class SymbolicIndex {
|
|
|
713
1292
|
}
|
|
714
1293
|
async loadFileSummariesRecursive(dir) {
|
|
715
1294
|
try {
|
|
716
|
-
const entries = await
|
|
1295
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
717
1296
|
for (const entry of entries) {
|
|
718
|
-
const fullPath =
|
|
1297
|
+
const fullPath = path4.join(dir, entry.name);
|
|
719
1298
|
if (entry.isDirectory()) {
|
|
720
1299
|
await this.loadFileSummariesRecursive(fullPath);
|
|
721
1300
|
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
722
1301
|
try {
|
|
723
|
-
const content = await
|
|
1302
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
724
1303
|
const summary = JSON.parse(content);
|
|
725
1304
|
if (summary.filepath) {
|
|
726
1305
|
this.fileSummaries.set(summary.filepath, summary);
|
|
@@ -732,18 +1311,18 @@ class SymbolicIndex {
|
|
|
732
1311
|
}
|
|
733
1312
|
getFileSummaryPath(filepath) {
|
|
734
1313
|
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
735
|
-
return
|
|
1314
|
+
return path4.join(this.symbolicPath, jsonPath);
|
|
736
1315
|
}
|
|
737
1316
|
async deleteFileSummary(filepath) {
|
|
738
1317
|
try {
|
|
739
|
-
await
|
|
1318
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
740
1319
|
} catch {}
|
|
741
1320
|
this.fileSummaries.delete(filepath);
|
|
742
1321
|
}
|
|
743
1322
|
async exists() {
|
|
744
1323
|
try {
|
|
745
|
-
const metaPath =
|
|
746
|
-
await
|
|
1324
|
+
const metaPath = path4.join(this.symbolicPath, "_meta.json");
|
|
1325
|
+
await fs3.access(metaPath);
|
|
747
1326
|
return true;
|
|
748
1327
|
} catch {
|
|
749
1328
|
return false;
|
|
@@ -765,24 +1344,29 @@ class SymbolicIndex {
|
|
|
765
1344
|
this.bm25Index = new BM25Index;
|
|
766
1345
|
}
|
|
767
1346
|
}
|
|
768
|
-
var
|
|
769
|
-
init_keywords();
|
|
1347
|
+
var init_symbolicIndex = __esm(() => {
|
|
770
1348
|
init_keywords();
|
|
771
1349
|
});
|
|
772
1350
|
|
|
773
|
-
// src/
|
|
774
|
-
var
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
DEFAULT_TOP_K: () => DEFAULT_TOP_K,
|
|
778
|
-
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE
|
|
1351
|
+
// src/infrastructure/storage/index.ts
|
|
1352
|
+
var init_storage = __esm(() => {
|
|
1353
|
+
init_fileIndexStorage();
|
|
1354
|
+
init_symbolicIndex();
|
|
779
1355
|
});
|
|
780
|
-
import * as path4 from "path";
|
|
781
1356
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
1357
|
+
// src/modules/language/typescript/index.ts
|
|
1358
|
+
var exports_typescript = {};
|
|
1359
|
+
__export(exports_typescript, {
|
|
1360
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
1361
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
1362
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
1363
|
+
});
|
|
1364
|
+
import * as path5 from "path";
|
|
1365
|
+
|
|
1366
|
+
class TypeScriptModule {
|
|
1367
|
+
id = "language/typescript";
|
|
1368
|
+
name = "TypeScript Search";
|
|
1369
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
786
1370
|
version = "1.0.0";
|
|
787
1371
|
embeddingConfig = null;
|
|
788
1372
|
symbolicIndex = null;
|
|
@@ -799,7 +1383,12 @@ class SemanticModule {
|
|
|
799
1383
|
if (parsedChunks.length === 0) {
|
|
800
1384
|
return null;
|
|
801
1385
|
}
|
|
802
|
-
const
|
|
1386
|
+
const pathContext = parsePathContext(filepath);
|
|
1387
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
1388
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
1389
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
1390
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
1391
|
+
});
|
|
803
1392
|
const embeddings = await getEmbeddings(chunkContents);
|
|
804
1393
|
const chunks = parsedChunks.map((pc) => ({
|
|
805
1394
|
id: generateChunkId(filepath, pc.startLine, pc.endLine),
|
|
@@ -813,10 +1402,10 @@ class SemanticModule {
|
|
|
813
1402
|
}));
|
|
814
1403
|
const references = this.extractReferences(content, filepath);
|
|
815
1404
|
const stats = await ctx.getFileStats(filepath);
|
|
816
|
-
const
|
|
1405
|
+
const currentConfig = getEmbeddingConfig();
|
|
817
1406
|
const moduleData = {
|
|
818
1407
|
embeddings,
|
|
819
|
-
embeddingModel:
|
|
1408
|
+
embeddingModel: currentConfig.model
|
|
820
1409
|
};
|
|
821
1410
|
const chunkTypes = [...new Set(parsedChunks.map((pc) => pc.type))];
|
|
822
1411
|
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
@@ -825,13 +1414,20 @@ class SemanticModule {
|
|
|
825
1414
|
const keywords = extractKeywords(pc.content, pc.name);
|
|
826
1415
|
keywords.forEach((k) => allKeywords.add(k));
|
|
827
1416
|
}
|
|
1417
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
828
1418
|
const fileSummary = {
|
|
829
1419
|
filepath,
|
|
830
1420
|
chunkCount: chunks.length,
|
|
831
1421
|
chunkTypes,
|
|
832
1422
|
keywords: Array.from(allKeywords),
|
|
833
1423
|
exports,
|
|
834
|
-
lastModified: stats.lastModified
|
|
1424
|
+
lastModified: stats.lastModified,
|
|
1425
|
+
pathContext: {
|
|
1426
|
+
segments: pathContext.segments,
|
|
1427
|
+
layer: pathContext.layer,
|
|
1428
|
+
domain: pathContext.domain,
|
|
1429
|
+
depth: pathContext.depth
|
|
1430
|
+
}
|
|
835
1431
|
};
|
|
836
1432
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
837
1433
|
return {
|
|
@@ -855,7 +1451,7 @@ class SemanticModule {
|
|
|
855
1451
|
this.pendingSummaries.clear();
|
|
856
1452
|
}
|
|
857
1453
|
async search(query, ctx, options = {}) {
|
|
858
|
-
const { topK =
|
|
1454
|
+
const { topK = DEFAULT_TOP_K2, minScore = DEFAULT_MIN_SCORE2, filePatterns } = options;
|
|
859
1455
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
860
1456
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
861
1457
|
let candidateFiles;
|
|
@@ -908,11 +1504,32 @@ class SemanticModule {
|
|
|
908
1504
|
for (const result of bm25Results) {
|
|
909
1505
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
910
1506
|
}
|
|
1507
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
1508
|
+
const pathBoosts = new Map;
|
|
1509
|
+
for (const filepath of candidateFiles) {
|
|
1510
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
1511
|
+
if (summary?.pathContext) {
|
|
1512
|
+
let boost = 0;
|
|
1513
|
+
const ctx2 = summary.pathContext;
|
|
1514
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
1515
|
+
boost += 0.1;
|
|
1516
|
+
}
|
|
1517
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
1518
|
+
boost += 0.05;
|
|
1519
|
+
}
|
|
1520
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
1521
|
+
if (segmentMatch) {
|
|
1522
|
+
boost += 0.05;
|
|
1523
|
+
}
|
|
1524
|
+
pathBoosts.set(filepath, boost);
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
911
1527
|
const results = [];
|
|
912
1528
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
913
1529
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
914
1530
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
915
|
-
const
|
|
1531
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
1532
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
|
|
916
1533
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
917
1534
|
results.push({
|
|
918
1535
|
filepath,
|
|
@@ -921,7 +1538,8 @@ class SemanticModule {
|
|
|
921
1538
|
moduleId: this.id,
|
|
922
1539
|
context: {
|
|
923
1540
|
semanticScore,
|
|
924
|
-
bm25Score
|
|
1541
|
+
bm25Score,
|
|
1542
|
+
pathBoost
|
|
925
1543
|
}
|
|
926
1544
|
});
|
|
927
1545
|
}
|
|
@@ -937,29 +1555,30 @@ class SemanticModule {
|
|
|
937
1555
|
while ((match = importRegex.exec(content)) !== null) {
|
|
938
1556
|
const importPath = match[1];
|
|
939
1557
|
if (importPath.startsWith(".")) {
|
|
940
|
-
const dir =
|
|
941
|
-
const resolved =
|
|
1558
|
+
const dir = path5.dirname(filepath);
|
|
1559
|
+
const resolved = path5.normalize(path5.join(dir, importPath));
|
|
942
1560
|
references.push(resolved);
|
|
943
1561
|
}
|
|
944
1562
|
}
|
|
945
1563
|
while ((match = requireRegex.exec(content)) !== null) {
|
|
946
1564
|
const importPath = match[1];
|
|
947
1565
|
if (importPath.startsWith(".")) {
|
|
948
|
-
const dir =
|
|
949
|
-
const resolved =
|
|
1566
|
+
const dir = path5.dirname(filepath);
|
|
1567
|
+
const resolved = path5.normalize(path5.join(dir, importPath));
|
|
950
1568
|
references.push(resolved);
|
|
951
1569
|
}
|
|
952
1570
|
}
|
|
953
1571
|
return references;
|
|
954
1572
|
}
|
|
955
1573
|
}
|
|
956
|
-
var
|
|
957
|
-
var
|
|
1574
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TIER1_CANDIDATE_MULTIPLIER = 3;
|
|
1575
|
+
var init_typescript = __esm(() => {
|
|
958
1576
|
init_embeddings();
|
|
959
|
-
init_bm25();
|
|
960
1577
|
init_config2();
|
|
961
1578
|
init_parseCode();
|
|
962
|
-
|
|
1579
|
+
init_storage();
|
|
1580
|
+
init_keywords();
|
|
1581
|
+
init_keywords();
|
|
963
1582
|
});
|
|
964
1583
|
|
|
965
1584
|
// src/modules/registry.ts
|
|
@@ -983,28 +1602,777 @@ class ModuleRegistryImpl {
|
|
|
983
1602
|
}
|
|
984
1603
|
}
|
|
985
1604
|
async function registerBuiltInModules() {
|
|
986
|
-
const {
|
|
987
|
-
|
|
1605
|
+
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
1606
|
+
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
1607
|
+
registry.register(new CoreModule2);
|
|
1608
|
+
registry.register(new TypeScriptModule2);
|
|
988
1609
|
}
|
|
989
1610
|
var registry;
|
|
990
1611
|
var init_registry = __esm(() => {
|
|
991
1612
|
registry = new ModuleRegistryImpl;
|
|
992
1613
|
});
|
|
993
1614
|
|
|
994
|
-
// src/
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1615
|
+
// src/introspection/projectDetector.ts
|
|
1616
|
+
import * as path6 from "path";
|
|
1617
|
+
import * as fs4 from "fs/promises";
|
|
1618
|
+
function detectScopeFromName(name) {
|
|
1619
|
+
const nameLower = name.toLowerCase();
|
|
1620
|
+
for (const [scope, keywords] of Object.entries(SCOPE_KEYWORDS)) {
|
|
1621
|
+
if (scope === "unknown")
|
|
1622
|
+
continue;
|
|
1623
|
+
for (const keyword of keywords) {
|
|
1624
|
+
if (nameLower.includes(keyword)) {
|
|
1625
|
+
return scope;
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
return "unknown";
|
|
1630
|
+
}
|
|
1631
|
+
async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
1632
|
+
if (depth > MAX_SCAN_DEPTH)
|
|
1633
|
+
return [];
|
|
1634
|
+
const results = [];
|
|
1635
|
+
const fullDir = currentDir ? path6.join(rootDir, currentDir) : rootDir;
|
|
1636
|
+
try {
|
|
1637
|
+
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
1638
|
+
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
1639
|
+
if (hasPackageJson && currentDir) {
|
|
1640
|
+
const info = await parsePackageJson(rootDir, currentDir);
|
|
1641
|
+
if (info) {
|
|
1642
|
+
results.push(info);
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
for (const entry of entries) {
|
|
1646
|
+
if (!entry.isDirectory())
|
|
1647
|
+
continue;
|
|
1648
|
+
if (SKIP_DIRS.has(entry.name))
|
|
1649
|
+
continue;
|
|
1650
|
+
const subPath = currentDir ? `${currentDir}/${entry.name}` : entry.name;
|
|
1651
|
+
const subResults = await scanForPackageJsons(rootDir, subPath, depth + 1);
|
|
1652
|
+
results.push(...subResults);
|
|
1653
|
+
}
|
|
1654
|
+
} catch {}
|
|
1655
|
+
return results;
|
|
1656
|
+
}
|
|
1657
|
+
async function parsePackageJson(rootDir, relativePath) {
|
|
1658
|
+
try {
|
|
1659
|
+
const packageJsonPath = path6.join(rootDir, relativePath, "package.json");
|
|
1660
|
+
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
1661
|
+
const pkg = JSON.parse(content);
|
|
1662
|
+
const name = pkg.name || path6.basename(relativePath);
|
|
1663
|
+
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
1664
|
+
let type = "unknown";
|
|
1665
|
+
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
1666
|
+
type = "app";
|
|
1667
|
+
} else if (deps["express"] || deps["fastify"] || deps["koa"] || deps["hono"]) {
|
|
1668
|
+
type = "service";
|
|
1669
|
+
} else if (pkg.main || pkg.exports) {
|
|
1670
|
+
type = "library";
|
|
1671
|
+
}
|
|
1672
|
+
const hasWorkspaces = Boolean(pkg.workspaces);
|
|
1673
|
+
return { name, relativePath, type, hasWorkspaces };
|
|
1674
|
+
} catch {
|
|
1675
|
+
return null;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
async function detectProjectStructure(rootDir) {
|
|
1679
|
+
const projectMap = new Map;
|
|
1680
|
+
let isMonorepo = false;
|
|
1681
|
+
try {
|
|
1682
|
+
const entries = await fs4.readdir(rootDir, { withFileTypes: true });
|
|
1683
|
+
const dirNames = entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
|
1684
|
+
const monorepoPatterns = ["apps", "packages", "libs", "services"];
|
|
1685
|
+
const hasMonorepoStructure = monorepoPatterns.some((p) => dirNames.includes(p));
|
|
1686
|
+
if (hasMonorepoStructure) {
|
|
1687
|
+
isMonorepo = true;
|
|
1688
|
+
for (const pattern of monorepoPatterns) {
|
|
1689
|
+
if (!dirNames.includes(pattern))
|
|
1690
|
+
continue;
|
|
1691
|
+
const patternDir = path6.join(rootDir, pattern);
|
|
1692
|
+
try {
|
|
1693
|
+
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
1694
|
+
for (const subDir of subDirs) {
|
|
1695
|
+
if (!subDir.isDirectory())
|
|
1696
|
+
continue;
|
|
1697
|
+
const projectRoot = `${pattern}/${subDir.name}`;
|
|
1698
|
+
const type = getProjectType(pattern);
|
|
1699
|
+
projectMap.set(projectRoot, {
|
|
1700
|
+
name: subDir.name,
|
|
1701
|
+
root: projectRoot,
|
|
1702
|
+
type
|
|
1703
|
+
});
|
|
1704
|
+
}
|
|
1705
|
+
} catch {}
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
const packageJsons = await scanForPackageJsons(rootDir);
|
|
1709
|
+
for (const pkg of packageJsons) {
|
|
1710
|
+
if (pkg.hasWorkspaces) {
|
|
1711
|
+
isMonorepo = true;
|
|
1712
|
+
}
|
|
1713
|
+
if (packageJsons.length > 1) {
|
|
1714
|
+
isMonorepo = true;
|
|
1715
|
+
}
|
|
1716
|
+
projectMap.set(pkg.relativePath, {
|
|
1717
|
+
name: pkg.name,
|
|
1718
|
+
root: pkg.relativePath,
|
|
1719
|
+
type: pkg.type
|
|
1720
|
+
});
|
|
1721
|
+
}
|
|
1722
|
+
let rootType = "unknown";
|
|
1723
|
+
try {
|
|
1724
|
+
const rootPkgPath = path6.join(rootDir, "package.json");
|
|
1725
|
+
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
1726
|
+
if (rootPkg.workspaces) {
|
|
1727
|
+
isMonorepo = true;
|
|
1728
|
+
}
|
|
1729
|
+
const deps = { ...rootPkg.dependencies, ...rootPkg.devDependencies };
|
|
1730
|
+
if (deps["next"] || deps["react"] || deps["vue"]) {
|
|
1731
|
+
rootType = "app";
|
|
1732
|
+
} else if (deps["express"] || deps["fastify"] || deps["koa"]) {
|
|
1733
|
+
rootType = "service";
|
|
1734
|
+
}
|
|
1735
|
+
} catch {}
|
|
1736
|
+
const projects = Array.from(projectMap.values()).sort((a, b) => a.root.length - b.root.length);
|
|
1737
|
+
return {
|
|
1738
|
+
projects,
|
|
1739
|
+
isMonorepo,
|
|
1740
|
+
rootType: isMonorepo ? undefined : rootType
|
|
1741
|
+
};
|
|
1742
|
+
} catch {
|
|
1743
|
+
return {
|
|
1744
|
+
projects: [],
|
|
1745
|
+
isMonorepo: false,
|
|
1746
|
+
rootType: "unknown"
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
function getProjectType(patternDir) {
|
|
1751
|
+
switch (patternDir) {
|
|
1752
|
+
case "apps":
|
|
1753
|
+
return "app";
|
|
1754
|
+
case "packages":
|
|
1755
|
+
case "libs":
|
|
1756
|
+
return "library";
|
|
1757
|
+
case "services":
|
|
1758
|
+
return "service";
|
|
1759
|
+
case "scripts":
|
|
1760
|
+
case "tools":
|
|
1761
|
+
return "script";
|
|
1762
|
+
default:
|
|
1763
|
+
return "unknown";
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
function findProjectForFile(filepath, structure) {
|
|
1767
|
+
const normalizedPath = filepath.replace(/\\/g, "/");
|
|
1768
|
+
const matches = [];
|
|
1769
|
+
for (const project of structure.projects) {
|
|
1770
|
+
if (normalizedPath === project.root || normalizedPath.startsWith(project.root + "/")) {
|
|
1771
|
+
matches.push(project);
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
if (matches.length > 0) {
|
|
1775
|
+
return matches.reduce((best, current) => current.root.length > best.root.length ? current : best);
|
|
1776
|
+
}
|
|
1777
|
+
for (const { pattern, type } of PROJECT_PATTERNS) {
|
|
1778
|
+
const match = normalizedPath.match(pattern);
|
|
1779
|
+
if (match) {
|
|
1780
|
+
return {
|
|
1781
|
+
name: match[1],
|
|
1782
|
+
root: match[0],
|
|
1783
|
+
type
|
|
1784
|
+
};
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
return {
|
|
1788
|
+
name: "root",
|
|
1789
|
+
root: "",
|
|
1790
|
+
type: structure.rootType ?? "unknown"
|
|
1791
|
+
};
|
|
1792
|
+
}
|
|
1793
|
+
var MAX_SCAN_DEPTH = 4, SKIP_DIRS, PROJECT_PATTERNS, SCOPE_KEYWORDS;
|
|
1794
|
+
var init_projectDetector = __esm(() => {
|
|
1795
|
+
SKIP_DIRS = new Set([
|
|
1796
|
+
"node_modules",
|
|
1797
|
+
".git",
|
|
1798
|
+
"dist",
|
|
1799
|
+
"build",
|
|
1800
|
+
".next",
|
|
1801
|
+
".nuxt",
|
|
1802
|
+
"coverage",
|
|
1803
|
+
".raggrep"
|
|
1804
|
+
]);
|
|
1805
|
+
PROJECT_PATTERNS = [
|
|
1806
|
+
{ pattern: /^apps\/([^/]+)/, type: "app", defaultScope: "unknown" },
|
|
1807
|
+
{ pattern: /^packages\/([^/]+)/, type: "library", defaultScope: "shared" },
|
|
1808
|
+
{ pattern: /^libs\/([^/]+)/, type: "library", defaultScope: "shared" },
|
|
1809
|
+
{ pattern: /^services\/([^/]+)/, type: "service", defaultScope: "backend" },
|
|
1810
|
+
{ pattern: /^scripts\/([^/]+)/, type: "script", defaultScope: "tooling" },
|
|
1811
|
+
{ pattern: /^tools\/([^/]+)/, type: "script", defaultScope: "tooling" }
|
|
1812
|
+
];
|
|
1813
|
+
SCOPE_KEYWORDS = {
|
|
1814
|
+
frontend: [
|
|
1815
|
+
"web",
|
|
1816
|
+
"webapp",
|
|
1817
|
+
"frontend",
|
|
1818
|
+
"client",
|
|
1819
|
+
"ui",
|
|
1820
|
+
"app",
|
|
1821
|
+
"mobile",
|
|
1822
|
+
"react",
|
|
1823
|
+
"vue",
|
|
1824
|
+
"angular",
|
|
1825
|
+
"next",
|
|
1826
|
+
"nuxt"
|
|
1827
|
+
],
|
|
1828
|
+
backend: [
|
|
1829
|
+
"api",
|
|
1830
|
+
"server",
|
|
1831
|
+
"backend",
|
|
1832
|
+
"service",
|
|
1833
|
+
"worker",
|
|
1834
|
+
"lambda",
|
|
1835
|
+
"functions"
|
|
1836
|
+
],
|
|
1837
|
+
shared: ["shared", "common", "utils", "lib", "core", "types", "models"],
|
|
1838
|
+
tooling: [
|
|
1839
|
+
"scripts",
|
|
1840
|
+
"tools",
|
|
1841
|
+
"cli",
|
|
1842
|
+
"devtools",
|
|
1843
|
+
"build",
|
|
1844
|
+
"config",
|
|
1845
|
+
"infra"
|
|
1846
|
+
],
|
|
1847
|
+
unknown: []
|
|
1848
|
+
};
|
|
1849
|
+
});
|
|
1850
|
+
|
|
1851
|
+
// src/introspection/fileIntrospector.ts
|
|
1852
|
+
import * as path7 from "path";
|
|
1853
|
+
function introspectFile(filepath, structure, fileContent) {
|
|
1854
|
+
const normalizedPath = filepath.replace(/\\/g, "/");
|
|
1855
|
+
const segments = normalizedPath.split("/").filter((s) => s.length > 0);
|
|
1856
|
+
const filename = segments[segments.length - 1] || "";
|
|
1857
|
+
const ext = path7.extname(filename);
|
|
1858
|
+
const project = findProjectForFile(normalizedPath, structure);
|
|
1859
|
+
const language = EXTENSION_TO_LANGUAGE[ext] || "unknown";
|
|
1860
|
+
const layer = detectLayer(segments, filename);
|
|
1861
|
+
const domain = detectDomain(segments);
|
|
1862
|
+
const scope = detectScope(segments, project, layer);
|
|
1863
|
+
let framework;
|
|
1864
|
+
if (fileContent) {
|
|
1865
|
+
framework = detectFramework(fileContent);
|
|
1866
|
+
}
|
|
1867
|
+
return {
|
|
1868
|
+
filepath: normalizedPath,
|
|
1869
|
+
project,
|
|
1870
|
+
scope,
|
|
1871
|
+
layer,
|
|
1872
|
+
domain,
|
|
1873
|
+
language,
|
|
1874
|
+
framework,
|
|
1875
|
+
depth: segments.length - 1,
|
|
1876
|
+
pathSegments: segments.slice(0, -1)
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
function detectLayer(segments, filename) {
|
|
1880
|
+
const filenameLower = filename.toLowerCase();
|
|
1881
|
+
for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
1882
|
+
for (const pattern of patterns) {
|
|
1883
|
+
if (filenameLower.includes(pattern)) {
|
|
1884
|
+
return layer;
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
for (let i = segments.length - 2;i >= 0; i--) {
|
|
1889
|
+
const segment = segments[i].toLowerCase();
|
|
1890
|
+
for (const [layer, patterns] of Object.entries(LAYER_PATTERNS2)) {
|
|
1891
|
+
if (patterns.includes(segment)) {
|
|
1892
|
+
return layer;
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
return;
|
|
1897
|
+
}
|
|
1898
|
+
function detectDomain(segments) {
|
|
1899
|
+
const skipSegments = new Set([
|
|
1900
|
+
"src",
|
|
1901
|
+
"lib",
|
|
1902
|
+
"app",
|
|
1903
|
+
"apps",
|
|
1904
|
+
"packages",
|
|
1905
|
+
"services",
|
|
1906
|
+
"modules",
|
|
1907
|
+
"features",
|
|
1908
|
+
...Object.values(LAYER_PATTERNS2).flat()
|
|
1909
|
+
]);
|
|
1910
|
+
for (const segment of segments) {
|
|
1911
|
+
const segmentLower = segment.toLowerCase();
|
|
1912
|
+
if (skipSegments.has(segmentLower))
|
|
1913
|
+
continue;
|
|
1914
|
+
if (DOMAIN_PATTERNS.includes(segmentLower)) {
|
|
1915
|
+
return segmentLower;
|
|
1916
|
+
}
|
|
1917
|
+
for (const domain of DOMAIN_PATTERNS) {
|
|
1918
|
+
if (segmentLower.startsWith(domain) || segmentLower.endsWith(domain)) {
|
|
1919
|
+
return domain;
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
return;
|
|
1924
|
+
}
|
|
1925
|
+
function detectScope(segments, project, layer) {
|
|
1926
|
+
const projectScope = detectScopeFromName(project.name);
|
|
1927
|
+
if (projectScope !== "unknown") {
|
|
1928
|
+
return projectScope;
|
|
1929
|
+
}
|
|
1930
|
+
if (layer) {
|
|
1931
|
+
switch (layer) {
|
|
1932
|
+
case "controller":
|
|
1933
|
+
case "repository":
|
|
1934
|
+
case "middleware":
|
|
1935
|
+
return "backend";
|
|
1936
|
+
case "presentation":
|
|
1937
|
+
return "frontend";
|
|
1938
|
+
case "util":
|
|
1939
|
+
case "model":
|
|
1940
|
+
return "shared";
|
|
1941
|
+
case "test":
|
|
1942
|
+
return "tooling";
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
for (const segment of segments) {
|
|
1946
|
+
const segmentLower = segment.toLowerCase();
|
|
1947
|
+
if (["server", "api", "backend"].includes(segmentLower)) {
|
|
1948
|
+
return "backend";
|
|
1949
|
+
}
|
|
1950
|
+
if (["client", "web", "frontend", "ui"].includes(segmentLower)) {
|
|
1951
|
+
return "frontend";
|
|
1952
|
+
}
|
|
1953
|
+
if (["shared", "common", "lib", "libs"].includes(segmentLower)) {
|
|
1954
|
+
return "shared";
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
return "unknown";
|
|
1958
|
+
}
|
|
1959
|
+
function detectFramework(content) {
|
|
1960
|
+
for (const [framework, indicators] of Object.entries(FRAMEWORK_INDICATORS)) {
|
|
1961
|
+
for (const indicator of indicators) {
|
|
1962
|
+
if (content.includes(`from '${indicator}`) || content.includes(`from "${indicator}`) || content.includes(`require('${indicator}`) || content.includes(`require("${indicator}`)) {
|
|
1963
|
+
return framework;
|
|
1964
|
+
}
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
return;
|
|
1968
|
+
}
|
|
1969
|
+
var LAYER_PATTERNS2, DOMAIN_PATTERNS, FRAMEWORK_INDICATORS, EXTENSION_TO_LANGUAGE;
|
|
1970
|
+
var init_fileIntrospector = __esm(() => {
|
|
1971
|
+
init_projectDetector();
|
|
1972
|
+
LAYER_PATTERNS2 = {
|
|
1973
|
+
controller: ["controller", "api", "routes", "route", "handler"],
|
|
1974
|
+
service: ["service", "logic", "usecase", "usecases", "handler"],
|
|
1975
|
+
repository: ["repository", "repo", "dao", "store", "persistence"],
|
|
1976
|
+
model: ["model", "models", "entity", "entities", "schema", "schemas", "types", "type"],
|
|
1977
|
+
util: ["util", "utils", "helper", "helpers", "common", "lib"],
|
|
1978
|
+
config: ["config", "configuration", "settings"],
|
|
1979
|
+
middleware: ["middleware", "middlewares"],
|
|
1980
|
+
domain: ["domain"],
|
|
1981
|
+
infrastructure: ["infrastructure", "infra"],
|
|
1982
|
+
application: ["application", "app"],
|
|
1983
|
+
presentation: ["presentation", "ui", "views", "view", "component", "components"],
|
|
1984
|
+
test: ["test", "tests", "spec", "specs", "__tests__", "e2e"]
|
|
1985
|
+
};
|
|
1986
|
+
DOMAIN_PATTERNS = [
|
|
1987
|
+
"auth",
|
|
1988
|
+
"authentication",
|
|
1989
|
+
"user",
|
|
1990
|
+
"users",
|
|
1991
|
+
"account",
|
|
1992
|
+
"accounts",
|
|
1993
|
+
"profile",
|
|
1994
|
+
"profiles",
|
|
1995
|
+
"product",
|
|
1996
|
+
"products",
|
|
1997
|
+
"item",
|
|
1998
|
+
"items",
|
|
1999
|
+
"catalog",
|
|
2000
|
+
"order",
|
|
2001
|
+
"orders",
|
|
2002
|
+
"cart",
|
|
2003
|
+
"checkout",
|
|
2004
|
+
"payment",
|
|
2005
|
+
"payments",
|
|
2006
|
+
"billing",
|
|
2007
|
+
"subscription",
|
|
2008
|
+
"subscriptions",
|
|
2009
|
+
"notification",
|
|
2010
|
+
"notifications",
|
|
2011
|
+
"email",
|
|
2012
|
+
"sms",
|
|
2013
|
+
"report",
|
|
2014
|
+
"reports",
|
|
2015
|
+
"analytics",
|
|
2016
|
+
"metrics",
|
|
2017
|
+
"dashboard",
|
|
2018
|
+
"admin",
|
|
2019
|
+
"settings",
|
|
2020
|
+
"search",
|
|
2021
|
+
"chat",
|
|
2022
|
+
"message",
|
|
2023
|
+
"messages",
|
|
2024
|
+
"feed",
|
|
2025
|
+
"post",
|
|
2026
|
+
"posts",
|
|
2027
|
+
"comment",
|
|
2028
|
+
"comments",
|
|
2029
|
+
"media",
|
|
2030
|
+
"upload",
|
|
2031
|
+
"file",
|
|
2032
|
+
"files",
|
|
2033
|
+
"storage",
|
|
2034
|
+
"cache",
|
|
2035
|
+
"session",
|
|
2036
|
+
"log",
|
|
2037
|
+
"logs",
|
|
2038
|
+
"audit"
|
|
2039
|
+
];
|
|
2040
|
+
FRAMEWORK_INDICATORS = {
|
|
2041
|
+
nextjs: ["next", "next/"],
|
|
2042
|
+
express: ["express"],
|
|
2043
|
+
fastify: ["fastify"],
|
|
2044
|
+
react: ["react"],
|
|
2045
|
+
vue: ["vue"],
|
|
2046
|
+
angular: ["@angular/"],
|
|
2047
|
+
nestjs: ["@nestjs/"],
|
|
2048
|
+
koa: ["koa"]
|
|
2049
|
+
};
|
|
2050
|
+
EXTENSION_TO_LANGUAGE = {
|
|
2051
|
+
".ts": "typescript",
|
|
2052
|
+
".tsx": "typescript",
|
|
2053
|
+
".js": "javascript",
|
|
2054
|
+
".jsx": "javascript",
|
|
2055
|
+
".mjs": "javascript",
|
|
2056
|
+
".cjs": "javascript",
|
|
2057
|
+
".py": "python",
|
|
2058
|
+
".go": "go",
|
|
2059
|
+
".rs": "rust",
|
|
2060
|
+
".java": "java",
|
|
2061
|
+
".kt": "kotlin",
|
|
2062
|
+
".swift": "swift",
|
|
2063
|
+
".rb": "ruby",
|
|
2064
|
+
".php": "php",
|
|
2065
|
+
".cs": "csharp",
|
|
2066
|
+
".cpp": "cpp",
|
|
2067
|
+
".c": "c",
|
|
2068
|
+
".h": "c",
|
|
2069
|
+
".hpp": "cpp",
|
|
2070
|
+
".md": "markdown",
|
|
2071
|
+
".json": "json",
|
|
2072
|
+
".yaml": "yaml",
|
|
2073
|
+
".yml": "yaml"
|
|
2074
|
+
};
|
|
2075
|
+
});
|
|
2076
|
+
|
|
2077
|
+
// src/introspection/index.ts
|
|
2078
|
+
import * as path8 from "path";
|
|
2079
|
+
import * as fs5 from "fs/promises";
|
|
2080
|
+
|
|
2081
|
+
class IntrospectionIndex {
|
|
2082
|
+
rootDir;
|
|
2083
|
+
structure = null;
|
|
2084
|
+
files = new Map;
|
|
2085
|
+
config = {};
|
|
2086
|
+
constructor(rootDir) {
|
|
2087
|
+
this.rootDir = rootDir;
|
|
2088
|
+
}
|
|
2089
|
+
async initialize() {
|
|
2090
|
+
this.structure = await detectProjectStructure(this.rootDir);
|
|
2091
|
+
try {
|
|
2092
|
+
const configPath = path8.join(this.rootDir, ".raggrep", "config.json");
|
|
2093
|
+
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
2094
|
+
const config = JSON.parse(configContent);
|
|
2095
|
+
this.config = config.introspection || {};
|
|
2096
|
+
} catch {}
|
|
2097
|
+
}
|
|
2098
|
+
getStructure() {
|
|
2099
|
+
return this.structure;
|
|
2100
|
+
}
|
|
2101
|
+
addFile(filepath, content) {
|
|
2102
|
+
if (!this.structure) {
|
|
2103
|
+
throw new Error("IntrospectionIndex not initialized");
|
|
2104
|
+
}
|
|
2105
|
+
const intro = introspectFile(filepath, this.structure, content);
|
|
2106
|
+
this.applyOverrides(intro);
|
|
2107
|
+
this.files.set(filepath, intro);
|
|
2108
|
+
return intro;
|
|
2109
|
+
}
|
|
2110
|
+
getFile(filepath) {
|
|
2111
|
+
return this.files.get(filepath);
|
|
2112
|
+
}
|
|
2113
|
+
getAllFiles() {
|
|
2114
|
+
return Array.from(this.files.values());
|
|
2115
|
+
}
|
|
2116
|
+
applyOverrides(intro) {
|
|
2117
|
+
if (!this.config.projects)
|
|
2118
|
+
return;
|
|
2119
|
+
for (const [projectPath, overrides] of Object.entries(this.config.projects)) {
|
|
2120
|
+
if (intro.filepath.startsWith(projectPath + "/") || intro.project.root === projectPath) {
|
|
2121
|
+
if (overrides.scope) {
|
|
2122
|
+
intro.scope = overrides.scope;
|
|
2123
|
+
}
|
|
2124
|
+
if (overrides.framework) {
|
|
2125
|
+
intro.framework = overrides.framework;
|
|
2126
|
+
}
|
|
2127
|
+
break;
|
|
2128
|
+
}
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2131
|
+
async save(config) {
|
|
2132
|
+
const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
2133
|
+
await fs5.mkdir(introDir, { recursive: true });
|
|
2134
|
+
const projectPath = path8.join(introDir, "_project.json");
|
|
2135
|
+
await fs5.writeFile(projectPath, JSON.stringify({
|
|
2136
|
+
version: "1.0.0",
|
|
2137
|
+
lastUpdated: new Date().toISOString(),
|
|
2138
|
+
structure: this.structure
|
|
2139
|
+
}, null, 2));
|
|
2140
|
+
for (const [filepath, intro] of this.files) {
|
|
2141
|
+
const introFilePath = path8.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
2142
|
+
await fs5.mkdir(path8.dirname(introFilePath), { recursive: true });
|
|
2143
|
+
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
2144
|
+
}
|
|
2145
|
+
console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
|
|
2146
|
+
}
|
|
2147
|
+
async load(config) {
|
|
2148
|
+
const introDir = path8.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
2149
|
+
try {
|
|
2150
|
+
const projectPath = path8.join(introDir, "_project.json");
|
|
2151
|
+
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
2152
|
+
const projectData = JSON.parse(projectContent);
|
|
2153
|
+
this.structure = projectData.structure;
|
|
2154
|
+
await this.loadFilesRecursive(path8.join(introDir, "files"), "");
|
|
2155
|
+
} catch {
|
|
2156
|
+
this.structure = null;
|
|
2157
|
+
this.files.clear();
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
async loadFilesRecursive(basePath, prefix) {
|
|
2161
|
+
try {
|
|
2162
|
+
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
2163
|
+
for (const entry of entries) {
|
|
2164
|
+
const entryPath = path8.join(basePath, entry.name);
|
|
2165
|
+
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
2166
|
+
if (entry.isDirectory()) {
|
|
2167
|
+
await this.loadFilesRecursive(entryPath, relativePath);
|
|
2168
|
+
} else if (entry.name.endsWith(".json")) {
|
|
2169
|
+
const content = await fs5.readFile(entryPath, "utf-8");
|
|
2170
|
+
const intro = JSON.parse(content);
|
|
2171
|
+
this.files.set(intro.filepath, intro);
|
|
2172
|
+
}
|
|
2173
|
+
}
|
|
2174
|
+
} catch {}
|
|
2175
|
+
}
|
|
2176
|
+
clear() {
|
|
2177
|
+
this.files.clear();
|
|
2178
|
+
this.structure = null;
|
|
2179
|
+
}
|
|
2180
|
+
}
|
|
2181
|
+
var init_introspection = __esm(() => {
|
|
2182
|
+
init_projectDetector();
|
|
2183
|
+
init_fileIntrospector();
|
|
2184
|
+
init_config2();
|
|
2185
|
+
init_fileIntrospector();
|
|
2186
|
+
init_projectDetector();
|
|
2187
|
+
});
|
|
2188
|
+
|
|
2189
|
+
// src/app/indexer/watcher.ts
|
|
2190
|
+
import { watch } from "chokidar";
|
|
2191
|
+
import * as path9 from "path";
|
|
2192
|
+
async function watchDirectory(rootDir, options = {}) {
|
|
2193
|
+
const {
|
|
2194
|
+
debounceMs = DEFAULT_DEBOUNCE_MS,
|
|
2195
|
+
verbose = false,
|
|
2196
|
+
model,
|
|
2197
|
+
onIndexStart,
|
|
2198
|
+
onIndexComplete,
|
|
2199
|
+
onFileChange,
|
|
2200
|
+
onError
|
|
2201
|
+
} = options;
|
|
2202
|
+
rootDir = path9.resolve(rootDir);
|
|
2203
|
+
const config = await loadConfig(rootDir);
|
|
2204
|
+
const watchPatterns = config.extensions.map((ext) => `**/*${ext}`);
|
|
2205
|
+
const ignorePatterns = [
|
|
2206
|
+
...config.ignorePaths.map((p) => `**/${p}/**`),
|
|
2207
|
+
`**/${config.indexDir}/**`
|
|
2208
|
+
];
|
|
2209
|
+
let isRunning = true;
|
|
2210
|
+
let isIndexing = false;
|
|
2211
|
+
let pendingChanges = new Map;
|
|
2212
|
+
let debounceTimer = null;
|
|
2213
|
+
let watcher = null;
|
|
2214
|
+
async function processPendingChanges() {
|
|
2215
|
+
if (!isRunning || isIndexing || pendingChanges.size === 0) {
|
|
2216
|
+
return;
|
|
2217
|
+
}
|
|
2218
|
+
isIndexing = true;
|
|
2219
|
+
const changes = new Map(pendingChanges);
|
|
2220
|
+
pendingChanges.clear();
|
|
2221
|
+
try {
|
|
2222
|
+
const filesToIndex = [];
|
|
2223
|
+
const filesToDelete = [];
|
|
2224
|
+
for (const [filepath, event] of changes) {
|
|
2225
|
+
if (event === "unlink") {
|
|
2226
|
+
filesToDelete.push(filepath);
|
|
2227
|
+
} else {
|
|
2228
|
+
filesToIndex.push(filepath);
|
|
2229
|
+
}
|
|
2230
|
+
}
|
|
2231
|
+
if (filesToDelete.length > 0) {
|
|
2232
|
+
if (verbose) {
|
|
2233
|
+
console.log(`
|
|
2234
|
+
[Watch] Cleaning up ${filesToDelete.length} deleted file(s)...`);
|
|
2235
|
+
}
|
|
2236
|
+
await cleanupIndex(rootDir, { verbose: false });
|
|
2237
|
+
}
|
|
2238
|
+
if (filesToIndex.length > 0) {
|
|
2239
|
+
if (onIndexStart) {
|
|
2240
|
+
onIndexStart(filesToIndex);
|
|
2241
|
+
}
|
|
2242
|
+
if (verbose) {
|
|
2243
|
+
console.log(`
|
|
2244
|
+
[Watch] Indexing ${filesToIndex.length} changed file(s)...`);
|
|
2245
|
+
}
|
|
2246
|
+
const results = await indexDirectory(rootDir, {
|
|
2247
|
+
model,
|
|
2248
|
+
verbose: false
|
|
2249
|
+
});
|
|
2250
|
+
if (onIndexComplete) {
|
|
2251
|
+
onIndexComplete(results);
|
|
2252
|
+
}
|
|
2253
|
+
for (const result of results) {
|
|
2254
|
+
if (result.indexed > 0 || result.errors > 0) {
|
|
2255
|
+
console.log(`[Watch] ${result.moduleId}: ${result.indexed} indexed, ${result.errors} errors`);
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
}
|
|
2259
|
+
} catch (error) {
|
|
2260
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
2261
|
+
console.error("[Watch] Error during indexing:", err.message);
|
|
2262
|
+
if (onError) {
|
|
2263
|
+
onError(err);
|
|
2264
|
+
}
|
|
2265
|
+
} finally {
|
|
2266
|
+
isIndexing = false;
|
|
2267
|
+
if (pendingChanges.size > 0) {
|
|
2268
|
+
scheduleProcessing();
|
|
2269
|
+
}
|
|
2270
|
+
}
|
|
2271
|
+
}
|
|
2272
|
+
function scheduleProcessing() {
|
|
2273
|
+
if (debounceTimer) {
|
|
2274
|
+
clearTimeout(debounceTimer);
|
|
2275
|
+
}
|
|
2276
|
+
if (pendingChanges.size >= MAX_BATCH_SIZE) {
|
|
2277
|
+
processPendingChanges();
|
|
2278
|
+
return;
|
|
2279
|
+
}
|
|
2280
|
+
debounceTimer = setTimeout(() => {
|
|
2281
|
+
debounceTimer = null;
|
|
2282
|
+
processPendingChanges();
|
|
2283
|
+
}, debounceMs);
|
|
2284
|
+
}
|
|
2285
|
+
function handleFileEvent(event, filepath) {
|
|
2286
|
+
if (!isRunning)
|
|
2287
|
+
return;
|
|
2288
|
+
const relativePath = path9.relative(rootDir, filepath);
|
|
2289
|
+
for (const ignorePath of config.ignorePaths) {
|
|
2290
|
+
if (relativePath.startsWith(ignorePath) || relativePath.includes(`/${ignorePath}/`)) {
|
|
2291
|
+
return;
|
|
2292
|
+
}
|
|
2293
|
+
}
|
|
2294
|
+
if (onFileChange) {
|
|
2295
|
+
onFileChange(event, relativePath);
|
|
2296
|
+
}
|
|
2297
|
+
if (verbose) {
|
|
2298
|
+
const symbol = event === "add" ? "+" : event === "unlink" ? "-" : "~";
|
|
2299
|
+
console.log(`[Watch] ${symbol} ${relativePath}`);
|
|
2300
|
+
}
|
|
2301
|
+
pendingChanges.set(relativePath, event);
|
|
2302
|
+
scheduleProcessing();
|
|
2303
|
+
}
|
|
2304
|
+
watcher = watch(watchPatterns, {
|
|
2305
|
+
cwd: rootDir,
|
|
2306
|
+
ignored: ignorePatterns,
|
|
2307
|
+
persistent: true,
|
|
2308
|
+
ignoreInitial: true,
|
|
2309
|
+
awaitWriteFinish: {
|
|
2310
|
+
stabilityThreshold: 100,
|
|
2311
|
+
pollInterval: 50
|
|
2312
|
+
},
|
|
2313
|
+
usePolling: false,
|
|
2314
|
+
atomic: true
|
|
2315
|
+
});
|
|
2316
|
+
watcher.on("add", (filepath) => handleFileEvent("add", path9.join(rootDir, filepath)));
|
|
2317
|
+
watcher.on("change", (filepath) => handleFileEvent("change", path9.join(rootDir, filepath)));
|
|
2318
|
+
watcher.on("unlink", (filepath) => handleFileEvent("unlink", path9.join(rootDir, filepath)));
|
|
2319
|
+
watcher.on("error", (error) => {
|
|
2320
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
2321
|
+
console.error("[Watch] Watcher error:", err);
|
|
2322
|
+
if (onError) {
|
|
2323
|
+
onError(err);
|
|
2324
|
+
}
|
|
2325
|
+
});
|
|
2326
|
+
await new Promise((resolve2) => {
|
|
2327
|
+
watcher.on("ready", () => {
|
|
2328
|
+
resolve2();
|
|
2329
|
+
});
|
|
2330
|
+
});
|
|
2331
|
+
return {
|
|
2332
|
+
stop: async () => {
|
|
2333
|
+
isRunning = false;
|
|
2334
|
+
if (debounceTimer) {
|
|
2335
|
+
clearTimeout(debounceTimer);
|
|
2336
|
+
debounceTimer = null;
|
|
2337
|
+
}
|
|
2338
|
+
if (watcher) {
|
|
2339
|
+
await watcher.close();
|
|
2340
|
+
watcher = null;
|
|
2341
|
+
}
|
|
2342
|
+
},
|
|
2343
|
+
isRunning: () => isRunning
|
|
2344
|
+
};
|
|
2345
|
+
}
|
|
2346
|
+
var DEFAULT_DEBOUNCE_MS = 300, MAX_BATCH_SIZE = 100;
|
|
2347
|
+
var init_watcher = __esm(() => {
|
|
2348
|
+
init_config2();
|
|
2349
|
+
init_indexer();
|
|
2350
|
+
});
|
|
2351
|
+
|
|
2352
|
+
// src/app/indexer/index.ts
|
|
2353
|
+
var exports_indexer = {};
|
|
2354
|
+
__export(exports_indexer, {
|
|
2355
|
+
watchDirectory: () => watchDirectory,
|
|
2356
|
+
indexDirectory: () => indexDirectory,
|
|
2357
|
+
getIndexStatus: () => getIndexStatus,
|
|
998
2358
|
cleanupIndex: () => cleanupIndex
|
|
999
2359
|
});
|
|
1000
2360
|
import { glob } from "glob";
|
|
1001
|
-
import * as
|
|
1002
|
-
import * as
|
|
2361
|
+
import * as fs6 from "fs/promises";
|
|
2362
|
+
import * as path10 from "path";
|
|
1003
2363
|
async function indexDirectory(rootDir, options = {}) {
|
|
1004
2364
|
const verbose = options.verbose ?? false;
|
|
1005
|
-
rootDir =
|
|
2365
|
+
rootDir = path10.resolve(rootDir);
|
|
1006
2366
|
console.log(`Indexing directory: ${rootDir}`);
|
|
1007
2367
|
const config = await loadConfig(rootDir);
|
|
2368
|
+
const introspection = new IntrospectionIndex(rootDir);
|
|
2369
|
+
await introspection.initialize();
|
|
2370
|
+
if (verbose) {
|
|
2371
|
+
const structure = introspection.getStructure();
|
|
2372
|
+
if (structure?.isMonorepo) {
|
|
2373
|
+
console.log(`Detected monorepo with ${structure.projects.length} projects`);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
1008
2376
|
await registerBuiltInModules();
|
|
1009
2377
|
const enabledModules = registry.getEnabled(config);
|
|
1010
2378
|
if (enabledModules.length === 0) {
|
|
@@ -1021,7 +2389,7 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1021
2389
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
1022
2390
|
if (module.initialize && moduleConfig) {
|
|
1023
2391
|
const configWithOverrides = { ...moduleConfig };
|
|
1024
|
-
if (options.model && module.id === "
|
|
2392
|
+
if (options.model && module.id === "language/typescript") {
|
|
1025
2393
|
configWithOverrides.options = {
|
|
1026
2394
|
...configWithOverrides.options,
|
|
1027
2395
|
embeddingModel: options.model
|
|
@@ -1029,7 +2397,7 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1029
2397
|
}
|
|
1030
2398
|
await module.initialize(configWithOverrides);
|
|
1031
2399
|
}
|
|
1032
|
-
const result = await indexWithModule(rootDir, files, module, config, verbose);
|
|
2400
|
+
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
|
|
1033
2401
|
results.push(result);
|
|
1034
2402
|
if (module.finalize) {
|
|
1035
2403
|
console.log(`[${module.name}] Building secondary indexes...`);
|
|
@@ -1037,12 +2405,12 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1037
2405
|
rootDir,
|
|
1038
2406
|
config,
|
|
1039
2407
|
readFile: async (filepath) => {
|
|
1040
|
-
const fullPath =
|
|
1041
|
-
return
|
|
2408
|
+
const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
|
|
2409
|
+
return fs6.readFile(fullPath, "utf-8");
|
|
1042
2410
|
},
|
|
1043
2411
|
getFileStats: async (filepath) => {
|
|
1044
|
-
const fullPath =
|
|
1045
|
-
const stats = await
|
|
2412
|
+
const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
|
|
2413
|
+
const stats = await fs6.stat(fullPath);
|
|
1046
2414
|
return { lastModified: stats.mtime.toISOString() };
|
|
1047
2415
|
}
|
|
1048
2416
|
};
|
|
@@ -1050,10 +2418,11 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
1050
2418
|
}
|
|
1051
2419
|
console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
1052
2420
|
}
|
|
2421
|
+
await introspection.save(config);
|
|
1053
2422
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
1054
2423
|
return results;
|
|
1055
2424
|
}
|
|
1056
|
-
async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
2425
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
|
|
1057
2426
|
const result = {
|
|
1058
2427
|
moduleId: module.id,
|
|
1059
2428
|
indexed: 0,
|
|
@@ -1065,19 +2434,20 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
|
1065
2434
|
rootDir,
|
|
1066
2435
|
config,
|
|
1067
2436
|
readFile: async (filepath) => {
|
|
1068
|
-
const fullPath =
|
|
1069
|
-
return
|
|
2437
|
+
const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
|
|
2438
|
+
return fs6.readFile(fullPath, "utf-8");
|
|
1070
2439
|
},
|
|
1071
2440
|
getFileStats: async (filepath) => {
|
|
1072
|
-
const fullPath =
|
|
1073
|
-
const stats = await
|
|
2441
|
+
const fullPath = path10.isAbsolute(filepath) ? filepath : path10.join(rootDir, filepath);
|
|
2442
|
+
const stats = await fs6.stat(fullPath);
|
|
1074
2443
|
return { lastModified: stats.mtime.toISOString() };
|
|
1075
|
-
}
|
|
2444
|
+
},
|
|
2445
|
+
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
1076
2446
|
};
|
|
1077
2447
|
for (const filepath of files) {
|
|
1078
|
-
const relativePath =
|
|
2448
|
+
const relativePath = path10.relative(rootDir, filepath);
|
|
1079
2449
|
try {
|
|
1080
|
-
const stats = await
|
|
2450
|
+
const stats = await fs6.stat(filepath);
|
|
1081
2451
|
const lastModified = stats.mtime.toISOString();
|
|
1082
2452
|
const existingEntry = manifest.files[relativePath];
|
|
1083
2453
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
@@ -1087,7 +2457,8 @@ async function indexWithModule(rootDir, files, module, config, verbose) {
|
|
|
1087
2457
|
result.skipped++;
|
|
1088
2458
|
continue;
|
|
1089
2459
|
}
|
|
1090
|
-
const content = await
|
|
2460
|
+
const content = await fs6.readFile(filepath, "utf-8");
|
|
2461
|
+
introspection.addFile(relativePath, content);
|
|
1091
2462
|
if (verbose) {
|
|
1092
2463
|
console.log(` Processing ${relativePath}...`);
|
|
1093
2464
|
}
|
|
@@ -1131,7 +2502,7 @@ async function findFiles(rootDir, config) {
|
|
|
1131
2502
|
async function loadModuleManifest(rootDir, moduleId, config) {
|
|
1132
2503
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
1133
2504
|
try {
|
|
1134
|
-
const content = await
|
|
2505
|
+
const content = await fs6.readFile(manifestPath, "utf-8");
|
|
1135
2506
|
return JSON.parse(content);
|
|
1136
2507
|
} catch {
|
|
1137
2508
|
return {
|
|
@@ -1144,14 +2515,14 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
1144
2515
|
}
|
|
1145
2516
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
1146
2517
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
1147
|
-
await
|
|
1148
|
-
await
|
|
2518
|
+
await fs6.mkdir(path10.dirname(manifestPath), { recursive: true });
|
|
2519
|
+
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
1149
2520
|
}
|
|
1150
2521
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
1151
2522
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
1152
|
-
const indexFilePath =
|
|
1153
|
-
await
|
|
1154
|
-
await
|
|
2523
|
+
const indexFilePath = path10.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
2524
|
+
await fs6.mkdir(path10.dirname(indexFilePath), { recursive: true });
|
|
2525
|
+
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
1155
2526
|
}
|
|
1156
2527
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
1157
2528
|
const manifestPath = getGlobalManifestPath(rootDir, config);
|
|
@@ -1160,12 +2531,12 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
1160
2531
|
lastUpdated: new Date().toISOString(),
|
|
1161
2532
|
modules: modules.map((m) => m.id)
|
|
1162
2533
|
};
|
|
1163
|
-
await
|
|
1164
|
-
await
|
|
2534
|
+
await fs6.mkdir(path10.dirname(manifestPath), { recursive: true });
|
|
2535
|
+
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
1165
2536
|
}
|
|
1166
2537
|
async function cleanupIndex(rootDir, options = {}) {
|
|
1167
2538
|
const verbose = options.verbose ?? false;
|
|
1168
|
-
rootDir =
|
|
2539
|
+
rootDir = path10.resolve(rootDir);
|
|
1169
2540
|
console.log(`Cleaning up index in: ${rootDir}`);
|
|
1170
2541
|
const config = await loadConfig(rootDir);
|
|
1171
2542
|
await registerBuiltInModules();
|
|
@@ -1195,9 +2566,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1195
2566
|
const filesToRemove = [];
|
|
1196
2567
|
const updatedFiles = {};
|
|
1197
2568
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
1198
|
-
const fullPath =
|
|
2569
|
+
const fullPath = path10.join(rootDir, filepath);
|
|
1199
2570
|
try {
|
|
1200
|
-
await
|
|
2571
|
+
await fs6.access(fullPath);
|
|
1201
2572
|
updatedFiles[filepath] = entry;
|
|
1202
2573
|
result.kept++;
|
|
1203
2574
|
} catch {
|
|
@@ -1209,9 +2580,9 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1209
2580
|
}
|
|
1210
2581
|
}
|
|
1211
2582
|
for (const filepath of filesToRemove) {
|
|
1212
|
-
const indexFilePath =
|
|
2583
|
+
const indexFilePath = path10.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
1213
2584
|
try {
|
|
1214
|
-
await
|
|
2585
|
+
await fs6.unlink(indexFilePath);
|
|
1215
2586
|
} catch {}
|
|
1216
2587
|
}
|
|
1217
2588
|
manifest.files = updatedFiles;
|
|
@@ -1222,16 +2593,16 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
1222
2593
|
}
|
|
1223
2594
|
async function cleanupEmptyDirectories(dir) {
|
|
1224
2595
|
try {
|
|
1225
|
-
const entries = await
|
|
2596
|
+
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
1226
2597
|
for (const entry of entries) {
|
|
1227
2598
|
if (entry.isDirectory()) {
|
|
1228
|
-
const subDir =
|
|
2599
|
+
const subDir = path10.join(dir, entry.name);
|
|
1229
2600
|
await cleanupEmptyDirectories(subDir);
|
|
1230
2601
|
}
|
|
1231
2602
|
}
|
|
1232
|
-
const remainingEntries = await
|
|
2603
|
+
const remainingEntries = await fs6.readdir(dir);
|
|
1233
2604
|
if (remainingEntries.length === 0) {
|
|
1234
|
-
await
|
|
2605
|
+
await fs6.rmdir(dir);
|
|
1235
2606
|
return true;
|
|
1236
2607
|
}
|
|
1237
2608
|
return false;
|
|
@@ -1239,21 +2610,79 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
1239
2610
|
return false;
|
|
1240
2611
|
}
|
|
1241
2612
|
}
|
|
2613
|
+
async function getIndexStatus(rootDir) {
|
|
2614
|
+
rootDir = path10.resolve(rootDir);
|
|
2615
|
+
const config = await loadConfig(rootDir);
|
|
2616
|
+
const indexDir = path10.join(rootDir, config.indexDir);
|
|
2617
|
+
const status = {
|
|
2618
|
+
exists: false,
|
|
2619
|
+
rootDir,
|
|
2620
|
+
indexDir,
|
|
2621
|
+
modules: [],
|
|
2622
|
+
totalFiles: 0
|
|
2623
|
+
};
|
|
2624
|
+
try {
|
|
2625
|
+
await fs6.access(indexDir);
|
|
2626
|
+
} catch {
|
|
2627
|
+
return status;
|
|
2628
|
+
}
|
|
2629
|
+
try {
|
|
2630
|
+
const globalManifestPath = getGlobalManifestPath(rootDir, config);
|
|
2631
|
+
const content = await fs6.readFile(globalManifestPath, "utf-8");
|
|
2632
|
+
const globalManifest = JSON.parse(content);
|
|
2633
|
+
status.exists = true;
|
|
2634
|
+
status.lastUpdated = globalManifest.lastUpdated;
|
|
2635
|
+
for (const moduleId of globalManifest.modules) {
|
|
2636
|
+
try {
|
|
2637
|
+
const manifest = await loadModuleManifest(rootDir, moduleId, config);
|
|
2638
|
+
const fileCount = Object.keys(manifest.files).length;
|
|
2639
|
+
status.modules.push({
|
|
2640
|
+
id: moduleId,
|
|
2641
|
+
fileCount,
|
|
2642
|
+
lastUpdated: manifest.lastUpdated
|
|
2643
|
+
});
|
|
2644
|
+
status.totalFiles += fileCount;
|
|
2645
|
+
} catch {}
|
|
2646
|
+
}
|
|
2647
|
+
} catch {
|
|
2648
|
+
try {
|
|
2649
|
+
const entries = await fs6.readdir(path10.join(indexDir, "index"));
|
|
2650
|
+
if (entries.length > 0) {
|
|
2651
|
+
status.exists = true;
|
|
2652
|
+
for (const entry of entries) {
|
|
2653
|
+
try {
|
|
2654
|
+
const manifest = await loadModuleManifest(rootDir, entry, config);
|
|
2655
|
+
const fileCount = Object.keys(manifest.files).length;
|
|
2656
|
+
status.modules.push({
|
|
2657
|
+
id: entry,
|
|
2658
|
+
fileCount,
|
|
2659
|
+
lastUpdated: manifest.lastUpdated
|
|
2660
|
+
});
|
|
2661
|
+
status.totalFiles += fileCount;
|
|
2662
|
+
} catch {}
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
} catch {}
|
|
2666
|
+
}
|
|
2667
|
+
return status;
|
|
2668
|
+
}
|
|
1242
2669
|
var init_indexer = __esm(() => {
|
|
1243
2670
|
init_config2();
|
|
1244
2671
|
init_registry();
|
|
2672
|
+
init_introspection();
|
|
2673
|
+
init_watcher();
|
|
1245
2674
|
});
|
|
1246
2675
|
|
|
1247
|
-
// src/search/index.ts
|
|
2676
|
+
// src/app/search/index.ts
|
|
1248
2677
|
var exports_search = {};
|
|
1249
2678
|
__export(exports_search, {
|
|
1250
2679
|
search: () => search,
|
|
1251
2680
|
formatSearchResults: () => formatSearchResults
|
|
1252
2681
|
});
|
|
1253
|
-
import * as
|
|
1254
|
-
import * as
|
|
2682
|
+
import * as fs7 from "fs/promises";
|
|
2683
|
+
import * as path11 from "path";
|
|
1255
2684
|
async function search(rootDir, query, options = {}) {
|
|
1256
|
-
rootDir =
|
|
2685
|
+
rootDir = path11.resolve(rootDir);
|
|
1257
2686
|
console.log(`Searching for: "${query}"`);
|
|
1258
2687
|
const config = await loadConfig(rootDir);
|
|
1259
2688
|
await registerBuiltInModules();
|
|
@@ -1294,9 +2723,9 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
1294
2723
|
config,
|
|
1295
2724
|
loadFileIndex: async (filepath) => {
|
|
1296
2725
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
1297
|
-
const indexFilePath = hasExtension ?
|
|
2726
|
+
const indexFilePath = hasExtension ? path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path11.join(indexPath, filepath + ".json");
|
|
1298
2727
|
try {
|
|
1299
|
-
const content = await
|
|
2728
|
+
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
1300
2729
|
return JSON.parse(content);
|
|
1301
2730
|
} catch {
|
|
1302
2731
|
return null;
|
|
@@ -1306,17 +2735,17 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
1306
2735
|
const files = [];
|
|
1307
2736
|
await traverseDirectory(indexPath, files, indexPath);
|
|
1308
2737
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
1309
|
-
const
|
|
1310
|
-
return
|
|
2738
|
+
const relative4 = path11.relative(indexPath, f);
|
|
2739
|
+
return relative4.replace(/\.json$/, "");
|
|
1311
2740
|
});
|
|
1312
2741
|
}
|
|
1313
2742
|
};
|
|
1314
2743
|
}
|
|
1315
2744
|
async function traverseDirectory(dir, files, basePath) {
|
|
1316
2745
|
try {
|
|
1317
|
-
const entries = await
|
|
2746
|
+
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
1318
2747
|
for (const entry of entries) {
|
|
1319
|
-
const fullPath =
|
|
2748
|
+
const fullPath = path11.join(dir, entry.name);
|
|
1320
2749
|
if (entry.isDirectory()) {
|
|
1321
2750
|
await traverseDirectory(fullPath, files, basePath);
|
|
1322
2751
|
} else if (entry.isFile()) {
|
|
@@ -1328,7 +2757,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
1328
2757
|
async function loadGlobalManifest(rootDir, config) {
|
|
1329
2758
|
const manifestPath = getGlobalManifestPath(rootDir, config);
|
|
1330
2759
|
try {
|
|
1331
|
-
const content = await
|
|
2760
|
+
const content = await fs7.readFile(manifestPath, "utf-8");
|
|
1332
2761
|
return JSON.parse(content);
|
|
1333
2762
|
} catch {
|
|
1334
2763
|
return null;
|
|
@@ -1371,14 +2800,40 @@ var init_search = __esm(() => {
|
|
|
1371
2800
|
init_registry();
|
|
1372
2801
|
});
|
|
1373
2802
|
|
|
1374
|
-
// src/cli/main.ts
|
|
2803
|
+
// src/app/cli/main.ts
|
|
1375
2804
|
init_embeddings();
|
|
2805
|
+
import { createRequire } from "module";
|
|
2806
|
+
var require2 = createRequire(import.meta.url);
|
|
2807
|
+
var pkg = require2("../../../package.json");
|
|
2808
|
+
var VERSION = pkg.version;
|
|
1376
2809
|
var args = process.argv.slice(2);
|
|
1377
2810
|
var command = args[0];
|
|
2811
|
+
if (command === "--version" || command === "-v") {
|
|
2812
|
+
console.log(`raggrep v${VERSION}`);
|
|
2813
|
+
process.exit(0);
|
|
2814
|
+
}
|
|
2815
|
+
function formatTimeAgo(date) {
|
|
2816
|
+
const now = new Date;
|
|
2817
|
+
const diffMs = now.getTime() - date.getTime();
|
|
2818
|
+
const diffSecs = Math.floor(diffMs / 1000);
|
|
2819
|
+
const diffMins = Math.floor(diffSecs / 60);
|
|
2820
|
+
const diffHours = Math.floor(diffMins / 60);
|
|
2821
|
+
const diffDays = Math.floor(diffHours / 24);
|
|
2822
|
+
if (diffSecs < 60)
|
|
2823
|
+
return "just now";
|
|
2824
|
+
if (diffMins < 60)
|
|
2825
|
+
return `${diffMins}m ago`;
|
|
2826
|
+
if (diffHours < 24)
|
|
2827
|
+
return `${diffHours}h ago`;
|
|
2828
|
+
if (diffDays < 7)
|
|
2829
|
+
return `${diffDays}d ago`;
|
|
2830
|
+
return date.toLocaleDateString();
|
|
2831
|
+
}
|
|
1378
2832
|
function parseFlags(args2) {
|
|
1379
2833
|
const flags = {
|
|
1380
2834
|
help: false,
|
|
1381
2835
|
verbose: false,
|
|
2836
|
+
watch: false,
|
|
1382
2837
|
remaining: []
|
|
1383
2838
|
};
|
|
1384
2839
|
for (let i = 0;i < args2.length; i++) {
|
|
@@ -1387,6 +2842,8 @@ function parseFlags(args2) {
|
|
|
1387
2842
|
flags.help = true;
|
|
1388
2843
|
} else if (arg === "--verbose" || arg === "-v") {
|
|
1389
2844
|
flags.verbose = true;
|
|
2845
|
+
} else if (arg === "--watch" || arg === "-w") {
|
|
2846
|
+
flags.watch = true;
|
|
1390
2847
|
} else if (arg === "--model" || arg === "-m") {
|
|
1391
2848
|
const modelName = args2[++i];
|
|
1392
2849
|
if (modelName && modelName in EMBEDDING_MODELS) {
|
|
@@ -1436,6 +2893,7 @@ Usage:
|
|
|
1436
2893
|
raggrep index [options]
|
|
1437
2894
|
|
|
1438
2895
|
Options:
|
|
2896
|
+
-w, --watch Watch for file changes and re-index automatically
|
|
1439
2897
|
-m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
|
|
1440
2898
|
-v, --verbose Show detailed progress
|
|
1441
2899
|
-h, --help Show this help message
|
|
@@ -1447,12 +2905,13 @@ Model Cache: ${getCacheDir()}
|
|
|
1447
2905
|
|
|
1448
2906
|
Examples:
|
|
1449
2907
|
raggrep index
|
|
2908
|
+
raggrep index --watch
|
|
1450
2909
|
raggrep index --model bge-small-en-v1.5
|
|
1451
2910
|
raggrep index --verbose
|
|
1452
2911
|
`);
|
|
1453
2912
|
process.exit(0);
|
|
1454
2913
|
}
|
|
1455
|
-
const { indexDirectory: indexDirectory2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
2914
|
+
const { indexDirectory: indexDirectory2, watchDirectory: watchDirectory2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
1456
2915
|
console.log("RAGgrep Indexer");
|
|
1457
2916
|
console.log(`================
|
|
1458
2917
|
`);
|
|
@@ -1471,6 +2930,39 @@ Examples:
|
|
|
1471
2930
|
console.error("Error during indexing:", error);
|
|
1472
2931
|
process.exit(1);
|
|
1473
2932
|
}
|
|
2933
|
+
if (flags.watch) {
|
|
2934
|
+
console.log(`
|
|
2935
|
+
┌─────────────────────────────────────────┐`);
|
|
2936
|
+
console.log("│ Watching for changes... (Ctrl+C to stop) │");
|
|
2937
|
+
console.log(`└─────────────────────────────────────────┘
|
|
2938
|
+
`);
|
|
2939
|
+
try {
|
|
2940
|
+
const watcher = await watchDirectory2(process.cwd(), {
|
|
2941
|
+
model: flags.model,
|
|
2942
|
+
verbose: flags.verbose,
|
|
2943
|
+
onFileChange: (event, filepath) => {
|
|
2944
|
+
if (flags.verbose) {
|
|
2945
|
+
const symbol = event === "add" ? "+" : event === "unlink" ? "-" : "~";
|
|
2946
|
+
console.log(` ${symbol} ${filepath}`);
|
|
2947
|
+
}
|
|
2948
|
+
}
|
|
2949
|
+
});
|
|
2950
|
+
const shutdown = async () => {
|
|
2951
|
+
console.log(`
|
|
2952
|
+
|
|
2953
|
+
Stopping watcher...`);
|
|
2954
|
+
await watcher.stop();
|
|
2955
|
+
console.log("Done.");
|
|
2956
|
+
process.exit(0);
|
|
2957
|
+
};
|
|
2958
|
+
process.on("SIGINT", shutdown);
|
|
2959
|
+
process.on("SIGTERM", shutdown);
|
|
2960
|
+
await new Promise(() => {});
|
|
2961
|
+
} catch (error) {
|
|
2962
|
+
console.error("Error starting watcher:", error);
|
|
2963
|
+
process.exit(1);
|
|
2964
|
+
}
|
|
2965
|
+
}
|
|
1474
2966
|
break;
|
|
1475
2967
|
}
|
|
1476
2968
|
case "query": {
|
|
@@ -1487,6 +2979,10 @@ Options:
|
|
|
1487
2979
|
-t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
|
|
1488
2980
|
-h, --help Show this help message
|
|
1489
2981
|
|
|
2982
|
+
Note:
|
|
2983
|
+
If the current directory has not been indexed, raggrep will
|
|
2984
|
+
automatically index it before searching.
|
|
2985
|
+
|
|
1490
2986
|
Examples:
|
|
1491
2987
|
raggrep query "user authentication"
|
|
1492
2988
|
raggrep query "handle errors" --top 5
|
|
@@ -1496,16 +2992,36 @@ Examples:
|
|
|
1496
2992
|
process.exit(0);
|
|
1497
2993
|
}
|
|
1498
2994
|
const { search: search2, formatSearchResults: formatSearchResults2 } = await Promise.resolve().then(() => (init_search(), exports_search));
|
|
2995
|
+
const { getIndexStatus: getIndexStatus2, indexDirectory: indexDirectory2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
1499
2996
|
const query = flags.remaining[0];
|
|
1500
2997
|
if (!query) {
|
|
1501
2998
|
console.error("Usage: raggrep query <search query>");
|
|
1502
2999
|
console.error('Run "raggrep query --help" for more information.');
|
|
1503
3000
|
process.exit(1);
|
|
1504
3001
|
}
|
|
1505
|
-
console.log("RAGgrep Search");
|
|
1506
|
-
console.log(`==============
|
|
1507
|
-
`);
|
|
1508
3002
|
try {
|
|
3003
|
+
const status = await getIndexStatus2(process.cwd());
|
|
3004
|
+
if (!status.exists) {
|
|
3005
|
+
console.log(`No index found. Indexing directory first...
|
|
3006
|
+
`);
|
|
3007
|
+
console.log("RAGgrep Indexer");
|
|
3008
|
+
console.log(`================
|
|
3009
|
+
`);
|
|
3010
|
+
const indexResults = await indexDirectory2(process.cwd(), {
|
|
3011
|
+
model: flags.model,
|
|
3012
|
+
verbose: false
|
|
3013
|
+
});
|
|
3014
|
+
console.log(`
|
|
3015
|
+
================`);
|
|
3016
|
+
console.log("Summary:");
|
|
3017
|
+
for (const result of indexResults) {
|
|
3018
|
+
console.log(` ${result.moduleId}: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
3019
|
+
}
|
|
3020
|
+
console.log("");
|
|
3021
|
+
}
|
|
3022
|
+
console.log("RAGgrep Search");
|
|
3023
|
+
console.log(`==============
|
|
3024
|
+
`);
|
|
1509
3025
|
const filePatterns = flags.fileType ? [`*.${flags.fileType}`] : undefined;
|
|
1510
3026
|
const results = await search2(process.cwd(), query, {
|
|
1511
3027
|
topK: flags.topK ?? 10,
|
|
@@ -1561,9 +3077,72 @@ Examples:
|
|
|
1561
3077
|
}
|
|
1562
3078
|
break;
|
|
1563
3079
|
}
|
|
3080
|
+
case "status": {
|
|
3081
|
+
if (flags.help) {
|
|
3082
|
+
console.log(`
|
|
3083
|
+
raggrep status - Show the current state of the index
|
|
3084
|
+
|
|
3085
|
+
Usage:
|
|
3086
|
+
raggrep status [options]
|
|
3087
|
+
|
|
3088
|
+
Options:
|
|
3089
|
+
-h, --help Show this help message
|
|
3090
|
+
|
|
3091
|
+
Description:
|
|
3092
|
+
Displays information about the index in the current directory,
|
|
3093
|
+
including whether it exists, how many files are indexed, and
|
|
3094
|
+
when it was last updated.
|
|
3095
|
+
|
|
3096
|
+
Examples:
|
|
3097
|
+
raggrep status
|
|
3098
|
+
`);
|
|
3099
|
+
process.exit(0);
|
|
3100
|
+
}
|
|
3101
|
+
const { getIndexStatus: getIndexStatus2 } = await Promise.resolve().then(() => (init_indexer(), exports_indexer));
|
|
3102
|
+
try {
|
|
3103
|
+
const status = await getIndexStatus2(process.cwd());
|
|
3104
|
+
if (!status.exists) {
|
|
3105
|
+
console.log(`
|
|
3106
|
+
┌─────────────────────────────────────────┐
|
|
3107
|
+
│ RAGgrep Status │
|
|
3108
|
+
├─────────────────────────────────────────┤
|
|
3109
|
+
│ ○ Not indexed │
|
|
3110
|
+
└─────────────────────────────────────────┘
|
|
3111
|
+
|
|
3112
|
+
Directory: ${status.rootDir}
|
|
3113
|
+
|
|
3114
|
+
Run "raggrep index" to create an index.
|
|
3115
|
+
`);
|
|
3116
|
+
} else {
|
|
3117
|
+
const date = status.lastUpdated ? new Date(status.lastUpdated) : null;
|
|
3118
|
+
const timeAgo = date ? formatTimeAgo(date) : "unknown";
|
|
3119
|
+
console.log(`
|
|
3120
|
+
┌─────────────────────────────────────────┐
|
|
3121
|
+
│ RAGgrep Status │
|
|
3122
|
+
├─────────────────────────────────────────┤
|
|
3123
|
+
│ ● Indexed │
|
|
3124
|
+
└─────────────────────────────────────────┘
|
|
3125
|
+
|
|
3126
|
+
Files: ${status.totalFiles.toString().padEnd(10)} Updated: ${timeAgo}
|
|
3127
|
+
Location: ${status.indexDir}
|
|
3128
|
+
`);
|
|
3129
|
+
if (status.modules.length > 0) {
|
|
3130
|
+
console.log(" Modules:");
|
|
3131
|
+
for (const mod of status.modules) {
|
|
3132
|
+
console.log(` └─ ${mod.id} (${mod.fileCount} files)`);
|
|
3133
|
+
}
|
|
3134
|
+
console.log("");
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
} catch (error) {
|
|
3138
|
+
console.error("Error getting status:", error);
|
|
3139
|
+
process.exit(1);
|
|
3140
|
+
}
|
|
3141
|
+
break;
|
|
3142
|
+
}
|
|
1564
3143
|
default:
|
|
1565
3144
|
console.log(`
|
|
1566
|
-
raggrep - Local filesystem-based RAG system for codebases
|
|
3145
|
+
raggrep v${VERSION} - Local filesystem-based RAG system for codebases
|
|
1567
3146
|
|
|
1568
3147
|
Usage:
|
|
1569
3148
|
raggrep <command> [options]
|
|
@@ -1571,16 +3150,17 @@ Usage:
|
|
|
1571
3150
|
Commands:
|
|
1572
3151
|
index Index the current directory
|
|
1573
3152
|
query Search the indexed codebase
|
|
3153
|
+
status Show the current state of the index
|
|
1574
3154
|
cleanup Remove stale index entries for deleted files
|
|
1575
3155
|
|
|
1576
3156
|
Options:
|
|
1577
|
-
-h, --help
|
|
3157
|
+
-h, --help Show help for a command
|
|
3158
|
+
-v, --version Show version number
|
|
1578
3159
|
|
|
1579
3160
|
Examples:
|
|
1580
3161
|
raggrep index
|
|
1581
|
-
raggrep index --model bge-small-en-v1.5
|
|
1582
3162
|
raggrep query "user login"
|
|
1583
|
-
raggrep
|
|
3163
|
+
raggrep status
|
|
1584
3164
|
raggrep cleanup
|
|
1585
3165
|
|
|
1586
3166
|
Run 'raggrep <command> --help' for more information.
|
|
@@ -1593,4 +3173,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
1593
3173
|
}
|
|
1594
3174
|
main();
|
|
1595
3175
|
|
|
1596
|
-
//# debugId=
|
|
3176
|
+
//# debugId=9D236CA9876AFE5464756E2164756E21
|