raggrep 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +32 -4
- package/dist/cli/main.js +1459 -635
- package/dist/cli/main.js.map +17 -11
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/ports/embedding.d.ts +4 -1
- package/dist/domain/ports/index.d.ts +2 -1
- package/dist/domain/ports/logger.d.ts +66 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.d.ts +45 -8
- package/dist/index.js +1500 -679
- package/dist/index.js.map +17 -11
- package/dist/infrastructure/index.d.ts +1 -0
- package/dist/infrastructure/logger/index.d.ts +6 -0
- package/dist/infrastructure/logger/loggers.d.ts +75 -0
- package/dist/modules/data/json/index.d.ts +47 -0
- package/dist/modules/docs/markdown/index.d.ts +47 -0
- package/dist/modules/language/typescript/index.d.ts +10 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,4 +1,20 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
1
4
|
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __toESM = (mod, isNodeMode, target) => {
|
|
8
|
+
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
9
|
+
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
10
|
+
for (let key of __getOwnPropNames(mod))
|
|
11
|
+
if (!__hasOwnProp.call(to, key))
|
|
12
|
+
__defProp(to, key, {
|
|
13
|
+
get: () => mod[key],
|
|
14
|
+
enumerable: true
|
|
15
|
+
});
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
2
18
|
var __export = (target, all) => {
|
|
3
19
|
for (var name in all)
|
|
4
20
|
__defProp(target, name, {
|
|
@@ -9,6 +25,7 @@ var __export = (target, all) => {
|
|
|
9
25
|
});
|
|
10
26
|
};
|
|
11
27
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
28
|
+
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
12
29
|
// src/domain/entities/searchResult.ts
|
|
13
30
|
var DEFAULT_SEARCH_OPTIONS;
|
|
14
31
|
var init_searchResult = __esm(() => {
|
|
@@ -39,6 +56,20 @@ function createDefaultConfig() {
|
|
|
39
56
|
options: {
|
|
40
57
|
embeddingModel: "all-MiniLM-L6-v2"
|
|
41
58
|
}
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
id: "data/json",
|
|
62
|
+
enabled: true,
|
|
63
|
+
options: {
|
|
64
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
id: "docs/markdown",
|
|
69
|
+
enabled: true,
|
|
70
|
+
options: {
|
|
71
|
+
embeddingModel: "all-MiniLM-L6-v2"
|
|
72
|
+
}
|
|
42
73
|
}
|
|
43
74
|
]
|
|
44
75
|
};
|
|
@@ -82,16 +113,18 @@ var init_config = __esm(() => {
|
|
|
82
113
|
".jsx",
|
|
83
114
|
".mjs",
|
|
84
115
|
".cjs",
|
|
116
|
+
".mts",
|
|
117
|
+
".cts",
|
|
118
|
+
".json",
|
|
119
|
+
".md",
|
|
85
120
|
".py",
|
|
86
121
|
".go",
|
|
87
122
|
".rs",
|
|
88
123
|
".java",
|
|
89
|
-
".json",
|
|
90
124
|
".yaml",
|
|
91
125
|
".yml",
|
|
92
126
|
".toml",
|
|
93
127
|
".sql",
|
|
94
|
-
".md",
|
|
95
128
|
".txt"
|
|
96
129
|
];
|
|
97
130
|
});
|
|
@@ -1999,7 +2032,8 @@ class TransformersEmbeddingProvider {
|
|
|
1999
2032
|
constructor(config) {
|
|
2000
2033
|
this.config = {
|
|
2001
2034
|
model: config?.model ?? "all-MiniLM-L6-v2",
|
|
2002
|
-
showProgress: config?.showProgress ?? false
|
|
2035
|
+
showProgress: config?.showProgress ?? false,
|
|
2036
|
+
logger: config?.logger
|
|
2003
2037
|
};
|
|
2004
2038
|
}
|
|
2005
2039
|
async initialize(config) {
|
|
@@ -2021,29 +2055,55 @@ class TransformersEmbeddingProvider {
|
|
|
2021
2055
|
this.isInitializing = true;
|
|
2022
2056
|
this.initPromise = (async () => {
|
|
2023
2057
|
const modelId = EMBEDDING_MODELS2[this.config.model];
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
}
|
|
2058
|
+
const logger = this.config.logger;
|
|
2059
|
+
const showProgress = this.config.showProgress || !!logger;
|
|
2060
|
+
const isCached = await isModelCached(this.config.model);
|
|
2061
|
+
let hasDownloads = false;
|
|
2029
2062
|
try {
|
|
2030
2063
|
this.pipeline = await pipeline("feature-extraction", modelId, {
|
|
2031
|
-
progress_callback:
|
|
2064
|
+
progress_callback: showProgress && !isCached ? (progress) => {
|
|
2032
2065
|
if (progress.status === "progress" && progress.file) {
|
|
2066
|
+
if (!hasDownloads) {
|
|
2067
|
+
hasDownloads = true;
|
|
2068
|
+
if (logger) {
|
|
2069
|
+
logger.info(`Downloading embedding model: ${this.config.model}`);
|
|
2070
|
+
} else {
|
|
2071
|
+
console.log(`
|
|
2072
|
+
Loading embedding model: ${this.config.model}`);
|
|
2073
|
+
console.log(` Cache: ${CACHE_DIR}`);
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
2033
2076
|
const pct = progress.progress ? Math.round(progress.progress) : 0;
|
|
2034
|
-
|
|
2077
|
+
if (logger) {
|
|
2078
|
+
logger.progress(` Downloading ${progress.file}: ${pct}%`);
|
|
2079
|
+
} else {
|
|
2080
|
+
process.stdout.write(`\r Downloading ${progress.file}: ${pct}% `);
|
|
2081
|
+
}
|
|
2035
2082
|
} else if (progress.status === "done" && progress.file) {
|
|
2036
|
-
|
|
2083
|
+
if (logger) {
|
|
2084
|
+
logger.clearProgress();
|
|
2085
|
+
logger.info(` Downloaded ${progress.file}`);
|
|
2086
|
+
} else if (hasDownloads) {
|
|
2087
|
+
process.stdout.write(`\r Downloaded ${progress.file}
|
|
2037
2088
|
`);
|
|
2089
|
+
}
|
|
2038
2090
|
}
|
|
2039
2091
|
} : undefined
|
|
2040
2092
|
});
|
|
2041
|
-
if (
|
|
2042
|
-
|
|
2093
|
+
if (hasDownloads) {
|
|
2094
|
+
if (logger) {
|
|
2095
|
+
logger.clearProgress();
|
|
2096
|
+
logger.info(`Model ready: ${this.config.model}`);
|
|
2097
|
+
} else {
|
|
2098
|
+
console.log(` Model ready.
|
|
2043
2099
|
`);
|
|
2100
|
+
}
|
|
2044
2101
|
}
|
|
2045
2102
|
} catch (error) {
|
|
2046
2103
|
this.pipeline = null;
|
|
2104
|
+
if (logger) {
|
|
2105
|
+
logger.clearProgress();
|
|
2106
|
+
}
|
|
2047
2107
|
throw new Error(`Failed to load embedding model: ${error}`);
|
|
2048
2108
|
} finally {
|
|
2049
2109
|
this.isInitializing = false;
|
|
@@ -2094,9 +2154,21 @@ class TransformersEmbeddingProvider {
|
|
|
2094
2154
|
this.pipeline = null;
|
|
2095
2155
|
}
|
|
2096
2156
|
}
|
|
2157
|
+
async function isModelCached(model) {
|
|
2158
|
+
const modelId = EMBEDDING_MODELS2[model];
|
|
2159
|
+
const modelPath = path6.join(CACHE_DIR, modelId);
|
|
2160
|
+
try {
|
|
2161
|
+
const fs3 = await import("fs/promises");
|
|
2162
|
+
const onnxPath = path6.join(modelPath, "onnx", "model_quantized.onnx");
|
|
2163
|
+
await fs3.access(onnxPath);
|
|
2164
|
+
return true;
|
|
2165
|
+
} catch {
|
|
2166
|
+
return false;
|
|
2167
|
+
}
|
|
2168
|
+
}
|
|
2097
2169
|
function configureEmbeddings(config) {
|
|
2098
2170
|
const newConfig = { ...globalConfig, ...config };
|
|
2099
|
-
if (newConfig.model !== globalConfig.model) {
|
|
2171
|
+
if (newConfig.model !== globalConfig.model || newConfig.logger !== globalConfig.logger) {
|
|
2100
2172
|
globalProvider = null;
|
|
2101
2173
|
}
|
|
2102
2174
|
globalConfig = newConfig;
|
|
@@ -2132,7 +2204,8 @@ var init_transformersEmbedding = __esm(() => {
|
|
|
2132
2204
|
};
|
|
2133
2205
|
globalConfig = {
|
|
2134
2206
|
model: "all-MiniLM-L6-v2",
|
|
2135
|
-
showProgress: false
|
|
2207
|
+
showProgress: false,
|
|
2208
|
+
logger: undefined
|
|
2136
2209
|
};
|
|
2137
2210
|
});
|
|
2138
2211
|
|
|
@@ -2141,221 +2214,6 @@ var init_embeddings = __esm(() => {
|
|
|
2141
2214
|
init_transformersEmbedding();
|
|
2142
2215
|
});
|
|
2143
2216
|
|
|
2144
|
-
// src/domain/services/similarity.ts
|
|
2145
|
-
function cosineSimilarity(a, b) {
|
|
2146
|
-
if (a.length !== b.length) {
|
|
2147
|
-
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2148
|
-
}
|
|
2149
|
-
let dotProduct = 0;
|
|
2150
|
-
let normA = 0;
|
|
2151
|
-
let normB = 0;
|
|
2152
|
-
for (let i = 0;i < a.length; i++) {
|
|
2153
|
-
dotProduct += a[i] * b[i];
|
|
2154
|
-
normA += a[i] * a[i];
|
|
2155
|
-
normB += b[i] * b[i];
|
|
2156
|
-
}
|
|
2157
|
-
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2158
|
-
if (magnitude === 0)
|
|
2159
|
-
return 0;
|
|
2160
|
-
return dotProduct / magnitude;
|
|
2161
|
-
}
|
|
2162
|
-
|
|
2163
|
-
// src/modules/language/typescript/parseCode.ts
|
|
2164
|
-
import * as ts from "typescript";
|
|
2165
|
-
function parseCode(content, filepath) {
|
|
2166
|
-
const ext = filepath.split(".").pop()?.toLowerCase();
|
|
2167
|
-
if (["ts", "tsx", "js", "jsx", "mts", "cts", "mjs", "cjs"].includes(ext || "")) {
|
|
2168
|
-
return parseTypeScript(content, filepath);
|
|
2169
|
-
}
|
|
2170
|
-
return parseGenericCode(content);
|
|
2171
|
-
}
|
|
2172
|
-
function parseTypeScript(content, filepath) {
|
|
2173
|
-
const chunks = [];
|
|
2174
|
-
const lines = content.split(`
|
|
2175
|
-
`);
|
|
2176
|
-
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2177
|
-
function getLineNumbers(node) {
|
|
2178
|
-
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2179
|
-
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2180
|
-
return {
|
|
2181
|
-
startLine: start.line + 1,
|
|
2182
|
-
endLine: end.line + 1
|
|
2183
|
-
};
|
|
2184
|
-
}
|
|
2185
|
-
function getNodeText(node) {
|
|
2186
|
-
return node.getText(sourceFile);
|
|
2187
|
-
}
|
|
2188
|
-
function isExported(node) {
|
|
2189
|
-
if (!ts.canHaveModifiers(node))
|
|
2190
|
-
return false;
|
|
2191
|
-
const modifiers = ts.getModifiers(node);
|
|
2192
|
-
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2193
|
-
}
|
|
2194
|
-
function getJSDoc(node) {
|
|
2195
|
-
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2196
|
-
if (jsDocNodes.length === 0)
|
|
2197
|
-
return;
|
|
2198
|
-
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2199
|
-
`);
|
|
2200
|
-
}
|
|
2201
|
-
function getFunctionName(node) {
|
|
2202
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2203
|
-
return node.name.text;
|
|
2204
|
-
}
|
|
2205
|
-
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2206
|
-
return node.name.text;
|
|
2207
|
-
}
|
|
2208
|
-
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2209
|
-
return node.name.text;
|
|
2210
|
-
}
|
|
2211
|
-
return;
|
|
2212
|
-
}
|
|
2213
|
-
function visit(node) {
|
|
2214
|
-
const { startLine, endLine } = getLineNumbers(node);
|
|
2215
|
-
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2216
|
-
chunks.push({
|
|
2217
|
-
content: getNodeText(node),
|
|
2218
|
-
startLine,
|
|
2219
|
-
endLine,
|
|
2220
|
-
type: "function",
|
|
2221
|
-
name: node.name.text,
|
|
2222
|
-
isExported: isExported(node),
|
|
2223
|
-
jsDoc: getJSDoc(node)
|
|
2224
|
-
});
|
|
2225
|
-
return;
|
|
2226
|
-
}
|
|
2227
|
-
if (ts.isVariableStatement(node)) {
|
|
2228
|
-
for (const decl of node.declarationList.declarations) {
|
|
2229
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2230
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2231
|
-
chunks.push({
|
|
2232
|
-
content: getNodeText(node),
|
|
2233
|
-
startLine,
|
|
2234
|
-
endLine,
|
|
2235
|
-
type: "function",
|
|
2236
|
-
name,
|
|
2237
|
-
isExported: isExported(node),
|
|
2238
|
-
jsDoc: getJSDoc(node)
|
|
2239
|
-
});
|
|
2240
|
-
return;
|
|
2241
|
-
}
|
|
2242
|
-
}
|
|
2243
|
-
}
|
|
2244
|
-
if (ts.isClassDeclaration(node) && node.name) {
|
|
2245
|
-
chunks.push({
|
|
2246
|
-
content: getNodeText(node),
|
|
2247
|
-
startLine,
|
|
2248
|
-
endLine,
|
|
2249
|
-
type: "class",
|
|
2250
|
-
name: node.name.text,
|
|
2251
|
-
isExported: isExported(node),
|
|
2252
|
-
jsDoc: getJSDoc(node)
|
|
2253
|
-
});
|
|
2254
|
-
return;
|
|
2255
|
-
}
|
|
2256
|
-
if (ts.isInterfaceDeclaration(node)) {
|
|
2257
|
-
chunks.push({
|
|
2258
|
-
content: getNodeText(node),
|
|
2259
|
-
startLine,
|
|
2260
|
-
endLine,
|
|
2261
|
-
type: "interface",
|
|
2262
|
-
name: node.name.text,
|
|
2263
|
-
isExported: isExported(node),
|
|
2264
|
-
jsDoc: getJSDoc(node)
|
|
2265
|
-
});
|
|
2266
|
-
return;
|
|
2267
|
-
}
|
|
2268
|
-
if (ts.isTypeAliasDeclaration(node)) {
|
|
2269
|
-
chunks.push({
|
|
2270
|
-
content: getNodeText(node),
|
|
2271
|
-
startLine,
|
|
2272
|
-
endLine,
|
|
2273
|
-
type: "type",
|
|
2274
|
-
name: node.name.text,
|
|
2275
|
-
isExported: isExported(node),
|
|
2276
|
-
jsDoc: getJSDoc(node)
|
|
2277
|
-
});
|
|
2278
|
-
return;
|
|
2279
|
-
}
|
|
2280
|
-
if (ts.isEnumDeclaration(node)) {
|
|
2281
|
-
chunks.push({
|
|
2282
|
-
content: getNodeText(node),
|
|
2283
|
-
startLine,
|
|
2284
|
-
endLine,
|
|
2285
|
-
type: "enum",
|
|
2286
|
-
name: node.name.text,
|
|
2287
|
-
isExported: isExported(node),
|
|
2288
|
-
jsDoc: getJSDoc(node)
|
|
2289
|
-
});
|
|
2290
|
-
return;
|
|
2291
|
-
}
|
|
2292
|
-
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2293
|
-
for (const decl of node.declarationList.declarations) {
|
|
2294
|
-
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2295
|
-
continue;
|
|
2296
|
-
}
|
|
2297
|
-
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2298
|
-
chunks.push({
|
|
2299
|
-
content: getNodeText(node),
|
|
2300
|
-
startLine,
|
|
2301
|
-
endLine,
|
|
2302
|
-
type: "variable",
|
|
2303
|
-
name,
|
|
2304
|
-
isExported: true,
|
|
2305
|
-
jsDoc: getJSDoc(node)
|
|
2306
|
-
});
|
|
2307
|
-
}
|
|
2308
|
-
return;
|
|
2309
|
-
}
|
|
2310
|
-
ts.forEachChild(node, visit);
|
|
2311
|
-
}
|
|
2312
|
-
ts.forEachChild(sourceFile, visit);
|
|
2313
|
-
if (chunks.length === 0) {
|
|
2314
|
-
return parseGenericCode(content);
|
|
2315
|
-
}
|
|
2316
|
-
return chunks;
|
|
2317
|
-
}
|
|
2318
|
-
function parseGenericCode(content) {
|
|
2319
|
-
const chunks = [];
|
|
2320
|
-
const lines = content.split(`
|
|
2321
|
-
`);
|
|
2322
|
-
const CHUNK_SIZE = 30;
|
|
2323
|
-
const OVERLAP = 5;
|
|
2324
|
-
if (lines.length <= CHUNK_SIZE) {
|
|
2325
|
-
return [
|
|
2326
|
-
{
|
|
2327
|
-
content,
|
|
2328
|
-
startLine: 1,
|
|
2329
|
-
endLine: lines.length,
|
|
2330
|
-
type: "file"
|
|
2331
|
-
}
|
|
2332
|
-
];
|
|
2333
|
-
}
|
|
2334
|
-
for (let i = 0;i < lines.length; i += CHUNK_SIZE - OVERLAP) {
|
|
2335
|
-
const endIdx = Math.min(i + CHUNK_SIZE, lines.length);
|
|
2336
|
-
chunks.push({
|
|
2337
|
-
content: lines.slice(i, endIdx).join(`
|
|
2338
|
-
`),
|
|
2339
|
-
startLine: i + 1,
|
|
2340
|
-
endLine: endIdx,
|
|
2341
|
-
type: "block"
|
|
2342
|
-
});
|
|
2343
|
-
if (endIdx >= lines.length)
|
|
2344
|
-
break;
|
|
2345
|
-
}
|
|
2346
|
-
return chunks;
|
|
2347
|
-
}
|
|
2348
|
-
function generateChunkId(filepath, startLine, endLine) {
|
|
2349
|
-
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2350
|
-
return `${safePath}-${startLine}-${endLine}`;
|
|
2351
|
-
}
|
|
2352
|
-
var init_parseCode = () => {};
|
|
2353
|
-
|
|
2354
|
-
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2355
|
-
var init_fileIndexStorage = __esm(() => {
|
|
2356
|
-
init_entities();
|
|
2357
|
-
});
|
|
2358
|
-
|
|
2359
2217
|
// src/domain/services/keywords.ts
|
|
2360
2218
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2361
2219
|
const keywords = new Set;
|
|
@@ -2544,166 +2402,27 @@ var init_keywords = __esm(() => {
|
|
|
2544
2402
|
};
|
|
2545
2403
|
});
|
|
2546
2404
|
|
|
2547
|
-
// src/
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
class SymbolicIndex {
|
|
2552
|
-
meta = null;
|
|
2553
|
-
fileSummaries = new Map;
|
|
2554
|
-
bm25Index = null;
|
|
2555
|
-
symbolicPath;
|
|
2556
|
-
moduleId;
|
|
2557
|
-
constructor(indexDir, moduleId) {
|
|
2558
|
-
this.symbolicPath = path7.join(indexDir, "index", moduleId, "symbolic");
|
|
2559
|
-
this.moduleId = moduleId;
|
|
2405
|
+
// src/domain/services/similarity.ts
|
|
2406
|
+
function cosineSimilarity(a, b) {
|
|
2407
|
+
if (a.length !== b.length) {
|
|
2408
|
+
throw new Error(`Vector length mismatch: ${a.length} vs ${b.length}`);
|
|
2560
2409
|
}
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
moduleId: this.moduleId,
|
|
2569
|
-
fileCount: 0,
|
|
2570
|
-
bm25Data: {
|
|
2571
|
-
avgDocLength: 0,
|
|
2572
|
-
documentFrequencies: {},
|
|
2573
|
-
totalDocs: 0
|
|
2574
|
-
}
|
|
2575
|
-
};
|
|
2576
|
-
this.bm25Index = new BM25Index;
|
|
2577
|
-
}
|
|
2578
|
-
}
|
|
2579
|
-
addFile(summary) {
|
|
2580
|
-
this.fileSummaries.set(summary.filepath, summary);
|
|
2581
|
-
}
|
|
2582
|
-
removeFile(filepath) {
|
|
2583
|
-
return this.fileSummaries.delete(filepath);
|
|
2584
|
-
}
|
|
2585
|
-
buildBM25Index() {
|
|
2586
|
-
this.bm25Index = new BM25Index;
|
|
2587
|
-
for (const [filepath, summary] of this.fileSummaries) {
|
|
2588
|
-
const content = [
|
|
2589
|
-
...summary.keywords,
|
|
2590
|
-
...summary.exports,
|
|
2591
|
-
...extractPathKeywords(filepath)
|
|
2592
|
-
].join(" ");
|
|
2593
|
-
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2594
|
-
}
|
|
2595
|
-
if (this.meta) {
|
|
2596
|
-
this.meta.fileCount = this.fileSummaries.size;
|
|
2597
|
-
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2598
|
-
}
|
|
2599
|
-
}
|
|
2600
|
-
findCandidates(query, maxCandidates = 20) {
|
|
2601
|
-
if (!this.bm25Index) {
|
|
2602
|
-
return Array.from(this.fileSummaries.keys());
|
|
2603
|
-
}
|
|
2604
|
-
const results = this.bm25Index.search(query, maxCandidates);
|
|
2605
|
-
return results.map((r) => r.id);
|
|
2606
|
-
}
|
|
2607
|
-
getAllFiles() {
|
|
2608
|
-
return Array.from(this.fileSummaries.keys());
|
|
2609
|
-
}
|
|
2610
|
-
getFileSummary(filepath) {
|
|
2611
|
-
return this.fileSummaries.get(filepath);
|
|
2612
|
-
}
|
|
2613
|
-
async save() {
|
|
2614
|
-
if (!this.meta)
|
|
2615
|
-
throw new Error("Index not initialized");
|
|
2616
|
-
this.meta.lastUpdated = new Date().toISOString();
|
|
2617
|
-
this.meta.fileCount = this.fileSummaries.size;
|
|
2618
|
-
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2619
|
-
const metaPath = path7.join(this.symbolicPath, "_meta.json");
|
|
2620
|
-
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2621
|
-
for (const [filepath, summary] of this.fileSummaries) {
|
|
2622
|
-
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2623
|
-
await fs3.mkdir(path7.dirname(summaryPath), { recursive: true });
|
|
2624
|
-
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2625
|
-
}
|
|
2626
|
-
}
|
|
2627
|
-
async load() {
|
|
2628
|
-
const metaPath = path7.join(this.symbolicPath, "_meta.json");
|
|
2629
|
-
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2630
|
-
this.meta = JSON.parse(metaContent);
|
|
2631
|
-
this.fileSummaries.clear();
|
|
2632
|
-
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
2633
|
-
this.buildBM25Index();
|
|
2634
|
-
}
|
|
2635
|
-
async loadFileSummariesRecursive(dir) {
|
|
2636
|
-
try {
|
|
2637
|
-
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2638
|
-
for (const entry of entries) {
|
|
2639
|
-
const fullPath = path7.join(dir, entry.name);
|
|
2640
|
-
if (entry.isDirectory()) {
|
|
2641
|
-
await this.loadFileSummariesRecursive(fullPath);
|
|
2642
|
-
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
2643
|
-
try {
|
|
2644
|
-
const content = await fs3.readFile(fullPath, "utf-8");
|
|
2645
|
-
const summary = JSON.parse(content);
|
|
2646
|
-
if (summary.filepath) {
|
|
2647
|
-
this.fileSummaries.set(summary.filepath, summary);
|
|
2648
|
-
}
|
|
2649
|
-
} catch {}
|
|
2650
|
-
}
|
|
2651
|
-
}
|
|
2652
|
-
} catch {}
|
|
2653
|
-
}
|
|
2654
|
-
getFileSummaryPath(filepath) {
|
|
2655
|
-
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2656
|
-
return path7.join(this.symbolicPath, jsonPath);
|
|
2657
|
-
}
|
|
2658
|
-
async deleteFileSummary(filepath) {
|
|
2659
|
-
try {
|
|
2660
|
-
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
2661
|
-
} catch {}
|
|
2662
|
-
this.fileSummaries.delete(filepath);
|
|
2663
|
-
}
|
|
2664
|
-
async exists() {
|
|
2665
|
-
try {
|
|
2666
|
-
const metaPath = path7.join(this.symbolicPath, "_meta.json");
|
|
2667
|
-
await fs3.access(metaPath);
|
|
2668
|
-
return true;
|
|
2669
|
-
} catch {
|
|
2670
|
-
return false;
|
|
2671
|
-
}
|
|
2672
|
-
}
|
|
2673
|
-
get size() {
|
|
2674
|
-
return this.fileSummaries.size;
|
|
2675
|
-
}
|
|
2676
|
-
clear() {
|
|
2677
|
-
this.fileSummaries.clear();
|
|
2678
|
-
if (this.meta) {
|
|
2679
|
-
this.meta.fileCount = 0;
|
|
2680
|
-
this.meta.bm25Data = {
|
|
2681
|
-
avgDocLength: 0,
|
|
2682
|
-
documentFrequencies: {},
|
|
2683
|
-
totalDocs: 0
|
|
2684
|
-
};
|
|
2685
|
-
}
|
|
2686
|
-
this.bm25Index = new BM25Index;
|
|
2410
|
+
let dotProduct = 0;
|
|
2411
|
+
let normA = 0;
|
|
2412
|
+
let normB = 0;
|
|
2413
|
+
for (let i = 0;i < a.length; i++) {
|
|
2414
|
+
dotProduct += a[i] * b[i];
|
|
2415
|
+
normA += a[i] * a[i];
|
|
2416
|
+
normB += b[i] * b[i];
|
|
2687
2417
|
}
|
|
2418
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
2419
|
+
if (magnitude === 0)
|
|
2420
|
+
return 0;
|
|
2421
|
+
return dotProduct / magnitude;
|
|
2688
2422
|
}
|
|
2689
|
-
var init_symbolicIndex = __esm(() => {
|
|
2690
|
-
init_keywords();
|
|
2691
|
-
});
|
|
2692
|
-
|
|
2693
|
-
// src/infrastructure/storage/index.ts
|
|
2694
|
-
var init_storage = __esm(() => {
|
|
2695
|
-
init_fileIndexStorage();
|
|
2696
|
-
init_symbolicIndex();
|
|
2697
|
-
});
|
|
2698
2423
|
|
|
2699
|
-
// src/
|
|
2700
|
-
|
|
2701
|
-
__export(exports_typescript, {
|
|
2702
|
-
TypeScriptModule: () => TypeScriptModule,
|
|
2703
|
-
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2704
|
-
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2705
|
-
});
|
|
2706
|
-
import * as path8 from "path";
|
|
2424
|
+
// src/domain/services/queryIntent.ts
|
|
2425
|
+
import * as path7 from "path";
|
|
2707
2426
|
function detectQueryIntent(queryTerms) {
|
|
2708
2427
|
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2709
2428
|
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
@@ -2715,10 +2434,20 @@ function detectQueryIntent(queryTerms) {
|
|
|
2715
2434
|
}
|
|
2716
2435
|
return "neutral";
|
|
2717
2436
|
}
|
|
2437
|
+
function extractQueryTerms(query) {
|
|
2438
|
+
return query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
|
|
2439
|
+
}
|
|
2440
|
+
function isSourceCodeFile(filepath) {
|
|
2441
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2442
|
+
return SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2443
|
+
}
|
|
2444
|
+
function isDocFile(filepath) {
|
|
2445
|
+
const ext = path7.extname(filepath).toLowerCase();
|
|
2446
|
+
return DOC_EXTENSIONS.includes(ext);
|
|
2447
|
+
}
|
|
2718
2448
|
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2719
|
-
const
|
|
2720
|
-
const
|
|
2721
|
-
const isDoc = DOC_EXTENSIONS.includes(ext);
|
|
2449
|
+
const isSourceCode = isSourceCodeFile(filepath);
|
|
2450
|
+
const isDoc = isDocFile(filepath);
|
|
2722
2451
|
const intent = detectQueryIntent(queryTerms);
|
|
2723
2452
|
if (intent === "implementation") {
|
|
2724
2453
|
if (isSourceCode) {
|
|
@@ -2734,103 +2463,1110 @@ function calculateFileTypeBoost(filepath, queryTerms) {
|
|
|
2734
2463
|
}
|
|
2735
2464
|
return 0;
|
|
2736
2465
|
}
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2466
|
+
var IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2467
|
+
var init_queryIntent = __esm(() => {
|
|
2468
|
+
IMPLEMENTATION_TERMS = [
|
|
2469
|
+
"function",
|
|
2470
|
+
"method",
|
|
2471
|
+
"class",
|
|
2472
|
+
"interface",
|
|
2473
|
+
"implement",
|
|
2474
|
+
"implementation",
|
|
2475
|
+
"endpoint",
|
|
2476
|
+
"route",
|
|
2477
|
+
"handler",
|
|
2478
|
+
"controller",
|
|
2479
|
+
"module",
|
|
2480
|
+
"code"
|
|
2481
|
+
];
|
|
2482
|
+
DOCUMENTATION_TERMS = [
|
|
2483
|
+
"documentation",
|
|
2484
|
+
"docs",
|
|
2485
|
+
"guide",
|
|
2486
|
+
"tutorial",
|
|
2487
|
+
"readme",
|
|
2488
|
+
"how",
|
|
2489
|
+
"what",
|
|
2490
|
+
"why",
|
|
2491
|
+
"explain",
|
|
2492
|
+
"overview",
|
|
2493
|
+
"getting",
|
|
2494
|
+
"started",
|
|
2495
|
+
"requirements",
|
|
2496
|
+
"setup",
|
|
2497
|
+
"install",
|
|
2498
|
+
"configure",
|
|
2499
|
+
"configuration"
|
|
2500
|
+
];
|
|
2501
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
2502
|
+
".ts",
|
|
2503
|
+
".tsx",
|
|
2504
|
+
".js",
|
|
2505
|
+
".jsx",
|
|
2506
|
+
".mjs",
|
|
2507
|
+
".cjs",
|
|
2508
|
+
".py",
|
|
2509
|
+
".go",
|
|
2510
|
+
".rs",
|
|
2511
|
+
".java"
|
|
2512
|
+
];
|
|
2513
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2514
|
+
});
|
|
2515
|
+
|
|
2516
|
+
// src/domain/services/chunking.ts
|
|
2517
|
+
function createLineBasedChunks(content, options = {}) {
|
|
2518
|
+
const {
|
|
2519
|
+
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2520
|
+
overlap = DEFAULT_OVERLAP,
|
|
2521
|
+
minLinesForMultipleChunks = chunkSize
|
|
2522
|
+
} = options;
|
|
2523
|
+
const lines = content.split(`
|
|
2524
|
+
`);
|
|
2525
|
+
const chunks = [];
|
|
2526
|
+
if (lines.length <= minLinesForMultipleChunks) {
|
|
2527
|
+
return [
|
|
2528
|
+
{
|
|
2529
|
+
content,
|
|
2530
|
+
startLine: 1,
|
|
2531
|
+
endLine: lines.length,
|
|
2532
|
+
type: "file"
|
|
2533
|
+
}
|
|
2534
|
+
];
|
|
2535
|
+
}
|
|
2536
|
+
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2537
|
+
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2538
|
+
chunks.push({
|
|
2539
|
+
content: lines.slice(i, endIdx).join(`
|
|
2540
|
+
`),
|
|
2541
|
+
startLine: i + 1,
|
|
2542
|
+
endLine: endIdx,
|
|
2543
|
+
type: "block"
|
|
2544
|
+
});
|
|
2545
|
+
if (endIdx >= lines.length)
|
|
2546
|
+
break;
|
|
2547
|
+
}
|
|
2548
|
+
return chunks;
|
|
2549
|
+
}
|
|
2550
|
+
function generateChunkId(filepath, startLine, endLine) {
|
|
2551
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2552
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2553
|
+
}
|
|
2554
|
+
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2555
|
+
|
|
2556
|
+
// src/domain/services/index.ts
|
|
2557
|
+
var init_services = __esm(() => {
|
|
2558
|
+
init_keywords();
|
|
2559
|
+
init_queryIntent();
|
|
2560
|
+
});
|
|
2561
|
+
|
|
2562
|
+
// src/modules/language/typescript/parseCode.ts
|
|
2563
|
+
import * as ts from "typescript";
|
|
2564
|
+
function parseTypeScriptCode(content, filepath) {
|
|
2565
|
+
return parseTypeScript(content, filepath);
|
|
2566
|
+
}
|
|
2567
|
+
function parseTypeScript(content, filepath) {
|
|
2568
|
+
const chunks = [];
|
|
2569
|
+
const lines = content.split(`
|
|
2570
|
+
`);
|
|
2571
|
+
const sourceFile = ts.createSourceFile(filepath, content, ts.ScriptTarget.Latest, true, filepath.endsWith(".tsx") || filepath.endsWith(".jsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS);
|
|
2572
|
+
function getLineNumbers(node) {
|
|
2573
|
+
const start = sourceFile.getLineAndCharacterOfPosition(node.getStart());
|
|
2574
|
+
const end = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
|
|
2575
|
+
return {
|
|
2576
|
+
startLine: start.line + 1,
|
|
2577
|
+
endLine: end.line + 1
|
|
2578
|
+
};
|
|
2579
|
+
}
|
|
2580
|
+
function getNodeText(node) {
|
|
2581
|
+
return node.getText(sourceFile);
|
|
2582
|
+
}
|
|
2583
|
+
function isExported(node) {
|
|
2584
|
+
if (!ts.canHaveModifiers(node))
|
|
2585
|
+
return false;
|
|
2586
|
+
const modifiers = ts.getModifiers(node);
|
|
2587
|
+
return modifiers?.some((m) => m.kind === ts.SyntaxKind.ExportKeyword) ?? false;
|
|
2588
|
+
}
|
|
2589
|
+
function getJSDoc(node) {
|
|
2590
|
+
const jsDocNodes = ts.getJSDocCommentsAndTags(node);
|
|
2591
|
+
if (jsDocNodes.length === 0)
|
|
2592
|
+
return;
|
|
2593
|
+
return jsDocNodes.map((doc) => doc.getText(sourceFile)).join(`
|
|
2594
|
+
`);
|
|
2595
|
+
}
|
|
2596
|
+
function getFunctionName(node) {
|
|
2597
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2598
|
+
return node.name.text;
|
|
2599
|
+
}
|
|
2600
|
+
if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2601
|
+
return node.name.text;
|
|
2602
|
+
}
|
|
2603
|
+
if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) {
|
|
2604
|
+
return node.name.text;
|
|
2605
|
+
}
|
|
2606
|
+
return;
|
|
2607
|
+
}
|
|
2608
|
+
function visit(node) {
|
|
2609
|
+
const { startLine, endLine } = getLineNumbers(node);
|
|
2610
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
2611
|
+
chunks.push({
|
|
2612
|
+
content: getNodeText(node),
|
|
2613
|
+
startLine,
|
|
2614
|
+
endLine,
|
|
2615
|
+
type: "function",
|
|
2616
|
+
name: node.name.text,
|
|
2617
|
+
isExported: isExported(node),
|
|
2618
|
+
jsDoc: getJSDoc(node)
|
|
2619
|
+
});
|
|
2620
|
+
return;
|
|
2621
|
+
}
|
|
2622
|
+
if (ts.isVariableStatement(node)) {
|
|
2623
|
+
for (const decl of node.declarationList.declarations) {
|
|
2624
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2625
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2626
|
+
chunks.push({
|
|
2627
|
+
content: getNodeText(node),
|
|
2628
|
+
startLine,
|
|
2629
|
+
endLine,
|
|
2630
|
+
type: "function",
|
|
2631
|
+
name,
|
|
2632
|
+
isExported: isExported(node),
|
|
2633
|
+
jsDoc: getJSDoc(node)
|
|
2634
|
+
});
|
|
2635
|
+
return;
|
|
2636
|
+
}
|
|
2637
|
+
}
|
|
2638
|
+
}
|
|
2639
|
+
if (ts.isClassDeclaration(node) && node.name) {
|
|
2640
|
+
chunks.push({
|
|
2641
|
+
content: getNodeText(node),
|
|
2642
|
+
startLine,
|
|
2643
|
+
endLine,
|
|
2644
|
+
type: "class",
|
|
2645
|
+
name: node.name.text,
|
|
2646
|
+
isExported: isExported(node),
|
|
2647
|
+
jsDoc: getJSDoc(node)
|
|
2648
|
+
});
|
|
2649
|
+
return;
|
|
2650
|
+
}
|
|
2651
|
+
if (ts.isInterfaceDeclaration(node)) {
|
|
2652
|
+
chunks.push({
|
|
2653
|
+
content: getNodeText(node),
|
|
2654
|
+
startLine,
|
|
2655
|
+
endLine,
|
|
2656
|
+
type: "interface",
|
|
2657
|
+
name: node.name.text,
|
|
2658
|
+
isExported: isExported(node),
|
|
2659
|
+
jsDoc: getJSDoc(node)
|
|
2660
|
+
});
|
|
2661
|
+
return;
|
|
2662
|
+
}
|
|
2663
|
+
if (ts.isTypeAliasDeclaration(node)) {
|
|
2664
|
+
chunks.push({
|
|
2665
|
+
content: getNodeText(node),
|
|
2666
|
+
startLine,
|
|
2667
|
+
endLine,
|
|
2668
|
+
type: "type",
|
|
2669
|
+
name: node.name.text,
|
|
2670
|
+
isExported: isExported(node),
|
|
2671
|
+
jsDoc: getJSDoc(node)
|
|
2672
|
+
});
|
|
2673
|
+
return;
|
|
2674
|
+
}
|
|
2675
|
+
if (ts.isEnumDeclaration(node)) {
|
|
2676
|
+
chunks.push({
|
|
2677
|
+
content: getNodeText(node),
|
|
2678
|
+
startLine,
|
|
2679
|
+
endLine,
|
|
2680
|
+
type: "enum",
|
|
2681
|
+
name: node.name.text,
|
|
2682
|
+
isExported: isExported(node),
|
|
2683
|
+
jsDoc: getJSDoc(node)
|
|
2684
|
+
});
|
|
2685
|
+
return;
|
|
2686
|
+
}
|
|
2687
|
+
if (ts.isVariableStatement(node) && isExported(node)) {
|
|
2688
|
+
for (const decl of node.declarationList.declarations) {
|
|
2689
|
+
if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
|
|
2690
|
+
continue;
|
|
2691
|
+
}
|
|
2692
|
+
const name = ts.isIdentifier(decl.name) ? decl.name.text : undefined;
|
|
2693
|
+
chunks.push({
|
|
2694
|
+
content: getNodeText(node),
|
|
2695
|
+
startLine,
|
|
2696
|
+
endLine,
|
|
2697
|
+
type: "variable",
|
|
2698
|
+
name,
|
|
2699
|
+
isExported: true,
|
|
2700
|
+
jsDoc: getJSDoc(node)
|
|
2701
|
+
});
|
|
2702
|
+
}
|
|
2703
|
+
return;
|
|
2704
|
+
}
|
|
2705
|
+
ts.forEachChild(node, visit);
|
|
2706
|
+
}
|
|
2707
|
+
ts.forEachChild(sourceFile, visit);
|
|
2708
|
+
if (chunks.length === 0) {
|
|
2709
|
+
const lines2 = content.split(`
|
|
2710
|
+
`);
|
|
2711
|
+
return [
|
|
2712
|
+
{
|
|
2713
|
+
content,
|
|
2714
|
+
startLine: 1,
|
|
2715
|
+
endLine: lines2.length,
|
|
2716
|
+
type: "file"
|
|
2717
|
+
}
|
|
2718
|
+
];
|
|
2719
|
+
}
|
|
2720
|
+
return chunks;
|
|
2721
|
+
}
|
|
2722
|
+
function generateChunkId2(filepath, startLine, endLine) {
|
|
2723
|
+
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2724
|
+
return `${safePath}-${startLine}-${endLine}`;
|
|
2725
|
+
}
|
|
2726
|
+
var init_parseCode = () => {};
|
|
2727
|
+
|
|
2728
|
+
// src/infrastructure/storage/fileIndexStorage.ts
|
|
2729
|
+
var init_fileIndexStorage = __esm(() => {
|
|
2730
|
+
init_entities();
|
|
2731
|
+
});
|
|
2732
|
+
|
|
2733
|
+
// src/infrastructure/storage/symbolicIndex.ts
|
|
2734
|
+
import * as fs3 from "fs/promises";
|
|
2735
|
+
import * as path8 from "path";
|
|
2736
|
+
|
|
2737
|
+
class SymbolicIndex {
|
|
2738
|
+
meta = null;
|
|
2739
|
+
fileSummaries = new Map;
|
|
2740
|
+
bm25Index = null;
|
|
2741
|
+
symbolicPath;
|
|
2742
|
+
moduleId;
|
|
2743
|
+
constructor(indexDir, moduleId) {
|
|
2744
|
+
this.symbolicPath = path8.join(indexDir, "index", moduleId, "symbolic");
|
|
2745
|
+
this.moduleId = moduleId;
|
|
2746
|
+
}
|
|
2747
|
+
async initialize() {
|
|
2748
|
+
try {
|
|
2749
|
+
await this.load();
|
|
2750
|
+
} catch {
|
|
2751
|
+
this.meta = {
|
|
2752
|
+
version: "1.0.0",
|
|
2753
|
+
lastUpdated: new Date().toISOString(),
|
|
2754
|
+
moduleId: this.moduleId,
|
|
2755
|
+
fileCount: 0,
|
|
2756
|
+
bm25Data: {
|
|
2757
|
+
avgDocLength: 0,
|
|
2758
|
+
documentFrequencies: {},
|
|
2759
|
+
totalDocs: 0
|
|
2760
|
+
}
|
|
2761
|
+
};
|
|
2762
|
+
this.bm25Index = new BM25Index;
|
|
2763
|
+
}
|
|
2764
|
+
}
|
|
2765
|
+
addFile(summary) {
|
|
2766
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2767
|
+
}
|
|
2768
|
+
removeFile(filepath) {
|
|
2769
|
+
return this.fileSummaries.delete(filepath);
|
|
2770
|
+
}
|
|
2771
|
+
buildBM25Index() {
|
|
2772
|
+
this.bm25Index = new BM25Index;
|
|
2773
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2774
|
+
const content = [
|
|
2775
|
+
...summary.keywords,
|
|
2776
|
+
...summary.exports,
|
|
2777
|
+
...extractPathKeywords(filepath)
|
|
2778
|
+
].join(" ");
|
|
2779
|
+
this.bm25Index.addDocuments([{ id: filepath, content }]);
|
|
2780
|
+
}
|
|
2781
|
+
if (this.meta) {
|
|
2782
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2783
|
+
this.meta.bm25Data.totalDocs = this.fileSummaries.size;
|
|
2784
|
+
}
|
|
2785
|
+
}
|
|
2786
|
+
findCandidates(query, maxCandidates = 20) {
|
|
2787
|
+
if (!this.bm25Index) {
|
|
2788
|
+
return Array.from(this.fileSummaries.keys());
|
|
2789
|
+
}
|
|
2790
|
+
const results = this.bm25Index.search(query, maxCandidates);
|
|
2791
|
+
return results.map((r) => r.id);
|
|
2792
|
+
}
|
|
2793
|
+
getAllFiles() {
|
|
2794
|
+
return Array.from(this.fileSummaries.keys());
|
|
2795
|
+
}
|
|
2796
|
+
getFileSummary(filepath) {
|
|
2797
|
+
return this.fileSummaries.get(filepath);
|
|
2798
|
+
}
|
|
2799
|
+
async save() {
|
|
2800
|
+
if (!this.meta)
|
|
2801
|
+
throw new Error("Index not initialized");
|
|
2802
|
+
this.meta.lastUpdated = new Date().toISOString();
|
|
2803
|
+
this.meta.fileCount = this.fileSummaries.size;
|
|
2804
|
+
await fs3.mkdir(this.symbolicPath, { recursive: true });
|
|
2805
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2806
|
+
await fs3.writeFile(metaPath, JSON.stringify(this.meta, null, 2));
|
|
2807
|
+
for (const [filepath, summary] of this.fileSummaries) {
|
|
2808
|
+
const summaryPath = this.getFileSummaryPath(filepath);
|
|
2809
|
+
await fs3.mkdir(path8.dirname(summaryPath), { recursive: true });
|
|
2810
|
+
await fs3.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
2811
|
+
}
|
|
2812
|
+
}
|
|
2813
|
+
async load() {
|
|
2814
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2815
|
+
const metaContent = await fs3.readFile(metaPath, "utf-8");
|
|
2816
|
+
this.meta = JSON.parse(metaContent);
|
|
2817
|
+
this.fileSummaries.clear();
|
|
2818
|
+
await this.loadFileSummariesRecursive(this.symbolicPath);
|
|
2819
|
+
this.buildBM25Index();
|
|
2820
|
+
}
|
|
2821
|
+
async loadFileSummariesRecursive(dir) {
|
|
2822
|
+
try {
|
|
2823
|
+
const entries = await fs3.readdir(dir, { withFileTypes: true });
|
|
2824
|
+
for (const entry of entries) {
|
|
2825
|
+
const fullPath = path8.join(dir, entry.name);
|
|
2826
|
+
if (entry.isDirectory()) {
|
|
2827
|
+
await this.loadFileSummariesRecursive(fullPath);
|
|
2828
|
+
} else if (entry.name.endsWith(".json") && entry.name !== "_meta.json") {
|
|
2829
|
+
try {
|
|
2830
|
+
const content = await fs3.readFile(fullPath, "utf-8");
|
|
2831
|
+
const summary = JSON.parse(content);
|
|
2832
|
+
if (summary.filepath) {
|
|
2833
|
+
this.fileSummaries.set(summary.filepath, summary);
|
|
2834
|
+
}
|
|
2835
|
+
} catch {}
|
|
2836
|
+
}
|
|
2837
|
+
}
|
|
2838
|
+
} catch {}
|
|
2839
|
+
}
|
|
2840
|
+
getFileSummaryPath(filepath) {
|
|
2841
|
+
const jsonPath = filepath.replace(/\.[^.]+$/, ".json");
|
|
2842
|
+
return path8.join(this.symbolicPath, jsonPath);
|
|
2843
|
+
}
|
|
2844
|
+
async deleteFileSummary(filepath) {
|
|
2845
|
+
try {
|
|
2846
|
+
await fs3.unlink(this.getFileSummaryPath(filepath));
|
|
2847
|
+
} catch {}
|
|
2848
|
+
this.fileSummaries.delete(filepath);
|
|
2849
|
+
}
|
|
2850
|
+
async exists() {
|
|
2851
|
+
try {
|
|
2852
|
+
const metaPath = path8.join(this.symbolicPath, "_meta.json");
|
|
2853
|
+
await fs3.access(metaPath);
|
|
2854
|
+
return true;
|
|
2855
|
+
} catch {
|
|
2856
|
+
return false;
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
get size() {
|
|
2860
|
+
return this.fileSummaries.size;
|
|
2861
|
+
}
|
|
2862
|
+
clear() {
|
|
2863
|
+
this.fileSummaries.clear();
|
|
2864
|
+
if (this.meta) {
|
|
2865
|
+
this.meta.fileCount = 0;
|
|
2866
|
+
this.meta.bm25Data = {
|
|
2867
|
+
avgDocLength: 0,
|
|
2868
|
+
documentFrequencies: {},
|
|
2869
|
+
totalDocs: 0
|
|
2870
|
+
};
|
|
2871
|
+
}
|
|
2872
|
+
this.bm25Index = new BM25Index;
|
|
2873
|
+
}
|
|
2874
|
+
}
|
|
2875
|
+
var init_symbolicIndex = __esm(() => {
|
|
2876
|
+
init_keywords();
|
|
2877
|
+
});
|
|
2878
|
+
|
|
2879
|
+
// src/infrastructure/storage/index.ts
|
|
2880
|
+
var init_storage = __esm(() => {
|
|
2881
|
+
init_fileIndexStorage();
|
|
2882
|
+
init_symbolicIndex();
|
|
2883
|
+
});
|
|
2884
|
+
|
|
2885
|
+
// src/modules/language/typescript/index.ts
|
|
2886
|
+
var exports_typescript = {};
|
|
2887
|
+
__export(exports_typescript, {
|
|
2888
|
+
isTypeScriptFile: () => isTypeScriptFile,
|
|
2889
|
+
TypeScriptModule: () => TypeScriptModule,
|
|
2890
|
+
TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
|
|
2891
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K2,
|
|
2892
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2893
|
+
});
|
|
2894
|
+
import * as path9 from "path";
|
|
2895
|
+
function isTypeScriptFile(filepath) {
|
|
2896
|
+
const ext = path9.extname(filepath).toLowerCase();
|
|
2897
|
+
return TYPESCRIPT_EXTENSIONS.includes(ext);
|
|
2898
|
+
}
|
|
2899
|
+
function calculateChunkTypeBoost(chunk) {
|
|
2900
|
+
switch (chunk.type) {
|
|
2901
|
+
case "function":
|
|
2902
|
+
return 0.05;
|
|
2903
|
+
case "class":
|
|
2904
|
+
case "interface":
|
|
2905
|
+
return 0.04;
|
|
2906
|
+
case "type":
|
|
2907
|
+
case "enum":
|
|
2908
|
+
return 0.03;
|
|
2909
|
+
case "variable":
|
|
2910
|
+
return 0.02;
|
|
2911
|
+
case "file":
|
|
2912
|
+
case "block":
|
|
2913
|
+
default:
|
|
2914
|
+
return 0;
|
|
2915
|
+
}
|
|
2916
|
+
}
|
|
2917
|
+
function calculateExportBoost(chunk) {
|
|
2918
|
+
return chunk.isExported ? 0.03 : 0;
|
|
2919
|
+
}
|
|
2920
|
+
|
|
2921
|
+
class TypeScriptModule {
|
|
2922
|
+
id = "language/typescript";
|
|
2923
|
+
name = "TypeScript Search";
|
|
2924
|
+
description = "TypeScript-aware code search with AST parsing and semantic embeddings";
|
|
2925
|
+
version = "1.0.0";
|
|
2926
|
+
embeddingConfig = null;
|
|
2927
|
+
symbolicIndex = null;
|
|
2928
|
+
pendingSummaries = new Map;
|
|
2929
|
+
rootDir = "";
|
|
2930
|
+
logger = undefined;
|
|
2931
|
+
async initialize(config) {
|
|
2932
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
2933
|
+
this.logger = config.options?.logger;
|
|
2934
|
+
if (this.logger) {
|
|
2935
|
+
this.embeddingConfig = {
|
|
2936
|
+
...this.embeddingConfig,
|
|
2937
|
+
logger: this.logger
|
|
2938
|
+
};
|
|
2939
|
+
}
|
|
2940
|
+
configureEmbeddings(this.embeddingConfig);
|
|
2941
|
+
this.pendingSummaries.clear();
|
|
2942
|
+
}
|
|
2943
|
+
async indexFile(filepath, content, ctx) {
|
|
2944
|
+
if (!isTypeScriptFile(filepath)) {
|
|
2945
|
+
return null;
|
|
2946
|
+
}
|
|
2947
|
+
this.rootDir = ctx.rootDir;
|
|
2948
|
+
const parsedChunks = parseTypeScriptCode(content, filepath);
|
|
2949
|
+
if (parsedChunks.length === 0) {
|
|
2950
|
+
return null;
|
|
2951
|
+
}
|
|
2952
|
+
const pathContext = parsePathContext(filepath);
|
|
2953
|
+
const pathPrefix = formatPathContextForEmbedding(pathContext);
|
|
2954
|
+
const chunkContents = parsedChunks.map((c) => {
|
|
2955
|
+
const namePrefix = c.name ? `${c.name}: ` : "";
|
|
2956
|
+
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
2957
|
+
});
|
|
2958
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
2959
|
+
const chunks = parsedChunks.map((pc) => ({
|
|
2960
|
+
id: generateChunkId2(filepath, pc.startLine, pc.endLine),
|
|
2961
|
+
content: pc.content,
|
|
2962
|
+
startLine: pc.startLine,
|
|
2963
|
+
endLine: pc.endLine,
|
|
2964
|
+
type: pc.type,
|
|
2965
|
+
name: pc.name,
|
|
2966
|
+
isExported: pc.isExported,
|
|
2967
|
+
jsDoc: pc.jsDoc
|
|
2968
|
+
}));
|
|
2969
|
+
const references = this.extractReferences(content, filepath);
|
|
2970
|
+
const stats = await ctx.getFileStats(filepath);
|
|
2971
|
+
const currentConfig = getEmbeddingConfig();
|
|
2972
|
+
const moduleData = {
|
|
2973
|
+
embeddings,
|
|
2974
|
+
embeddingModel: currentConfig.model
|
|
2975
|
+
};
|
|
2976
|
+
const chunkTypes = [
|
|
2977
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2978
|
+
];
|
|
2979
|
+
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2980
|
+
const allKeywords = new Set;
|
|
2981
|
+
for (const pc of parsedChunks) {
|
|
2982
|
+
const keywords = extractKeywords(pc.content, pc.name);
|
|
2983
|
+
keywords.forEach((k) => allKeywords.add(k));
|
|
2984
|
+
}
|
|
2985
|
+
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
2986
|
+
const fileSummary = {
|
|
2987
|
+
filepath,
|
|
2988
|
+
chunkCount: chunks.length,
|
|
2989
|
+
chunkTypes,
|
|
2990
|
+
keywords: Array.from(allKeywords),
|
|
2991
|
+
exports,
|
|
2992
|
+
lastModified: stats.lastModified,
|
|
2993
|
+
pathContext: {
|
|
2994
|
+
segments: pathContext.segments,
|
|
2995
|
+
layer: pathContext.layer,
|
|
2996
|
+
domain: pathContext.domain,
|
|
2997
|
+
depth: pathContext.depth
|
|
2998
|
+
}
|
|
2999
|
+
};
|
|
3000
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3001
|
+
return {
|
|
3002
|
+
filepath,
|
|
3003
|
+
lastModified: stats.lastModified,
|
|
3004
|
+
chunks,
|
|
3005
|
+
moduleData,
|
|
3006
|
+
references
|
|
3007
|
+
};
|
|
3008
|
+
}
|
|
3009
|
+
async finalize(ctx) {
|
|
3010
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3011
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3012
|
+
await this.symbolicIndex.initialize();
|
|
3013
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3014
|
+
this.symbolicIndex.addFile(summary);
|
|
3015
|
+
}
|
|
3016
|
+
this.symbolicIndex.buildBM25Index();
|
|
3017
|
+
await this.symbolicIndex.save();
|
|
3018
|
+
this.pendingSummaries.clear();
|
|
3019
|
+
}
|
|
3020
|
+
async search(query, ctx, options = {}) {
|
|
3021
|
+
const {
|
|
3022
|
+
topK = DEFAULT_TOP_K2,
|
|
3023
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
3024
|
+
filePatterns
|
|
3025
|
+
} = options;
|
|
3026
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3027
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3028
|
+
let allFiles;
|
|
3029
|
+
try {
|
|
3030
|
+
await symbolicIndex.initialize();
|
|
3031
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3032
|
+
} catch {
|
|
3033
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3034
|
+
}
|
|
3035
|
+
let filesToSearch = allFiles;
|
|
3036
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3037
|
+
filesToSearch = allFiles.filter((filepath) => {
|
|
3038
|
+
return filePatterns.some((pattern) => {
|
|
3039
|
+
if (pattern.startsWith("*.")) {
|
|
3040
|
+
const ext = pattern.slice(1);
|
|
3041
|
+
return filepath.endsWith(ext);
|
|
3042
|
+
}
|
|
3043
|
+
return filepath.includes(pattern);
|
|
3044
|
+
});
|
|
3045
|
+
});
|
|
3046
|
+
}
|
|
3047
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3048
|
+
const bm25Index = new BM25Index;
|
|
3049
|
+
const allChunksData = [];
|
|
3050
|
+
for (const filepath of filesToSearch) {
|
|
3051
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3052
|
+
if (!fileIndex)
|
|
3053
|
+
continue;
|
|
3054
|
+
const moduleData = fileIndex.moduleData;
|
|
3055
|
+
if (!moduleData?.embeddings)
|
|
3056
|
+
continue;
|
|
3057
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3058
|
+
const chunk = fileIndex.chunks[i];
|
|
3059
|
+
const embedding = moduleData.embeddings[i];
|
|
3060
|
+
if (!embedding)
|
|
3061
|
+
continue;
|
|
3062
|
+
allChunksData.push({
|
|
3063
|
+
filepath: fileIndex.filepath,
|
|
3064
|
+
chunk,
|
|
3065
|
+
embedding
|
|
3066
|
+
});
|
|
3067
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3068
|
+
}
|
|
3069
|
+
}
|
|
3070
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3071
|
+
const bm25Scores = new Map;
|
|
3072
|
+
for (const result of bm25Results) {
|
|
3073
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3074
|
+
}
|
|
3075
|
+
const queryTerms = extractQueryTerms(query);
|
|
3076
|
+
const pathBoosts = new Map;
|
|
3077
|
+
for (const filepath of filesToSearch) {
|
|
3078
|
+
const summary = symbolicIndex.getFileSummary(filepath);
|
|
3079
|
+
if (summary?.pathContext) {
|
|
3080
|
+
let boost = 0;
|
|
3081
|
+
const ctx2 = summary.pathContext;
|
|
3082
|
+
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
3083
|
+
boost += 0.1;
|
|
3084
|
+
}
|
|
3085
|
+
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
3086
|
+
boost += 0.05;
|
|
3087
|
+
}
|
|
3088
|
+
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
3089
|
+
if (segmentMatch) {
|
|
3090
|
+
boost += 0.05;
|
|
3091
|
+
}
|
|
3092
|
+
pathBoosts.set(filepath, boost);
|
|
3093
|
+
}
|
|
3094
|
+
}
|
|
3095
|
+
const results = [];
|
|
3096
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3097
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3098
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3099
|
+
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
3100
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
3101
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
3102
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
3103
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
3104
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
3105
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3106
|
+
results.push({
|
|
3107
|
+
filepath,
|
|
3108
|
+
chunk,
|
|
3109
|
+
score: hybridScore,
|
|
3110
|
+
moduleId: this.id,
|
|
3111
|
+
context: {
|
|
3112
|
+
semanticScore,
|
|
3113
|
+
bm25Score,
|
|
3114
|
+
pathBoost,
|
|
3115
|
+
fileTypeBoost,
|
|
3116
|
+
chunkTypeBoost,
|
|
3117
|
+
exportBoost
|
|
3118
|
+
}
|
|
3119
|
+
});
|
|
3120
|
+
}
|
|
3121
|
+
}
|
|
3122
|
+
results.sort((a, b) => b.score - a.score);
|
|
3123
|
+
return results.slice(0, topK);
|
|
3124
|
+
}
|
|
3125
|
+
extractReferences(content, filepath) {
|
|
3126
|
+
const references = [];
|
|
3127
|
+
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
3128
|
+
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
3129
|
+
let match;
|
|
3130
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
3131
|
+
const importPath = match[1];
|
|
3132
|
+
if (importPath.startsWith(".")) {
|
|
3133
|
+
const dir = path9.dirname(filepath);
|
|
3134
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3135
|
+
references.push(resolved);
|
|
3136
|
+
}
|
|
3137
|
+
}
|
|
3138
|
+
while ((match = requireRegex.exec(content)) !== null) {
|
|
3139
|
+
const importPath = match[1];
|
|
3140
|
+
if (importPath.startsWith(".")) {
|
|
3141
|
+
const dir = path9.dirname(filepath);
|
|
3142
|
+
const resolved = path9.normalize(path9.join(dir, importPath));
|
|
3143
|
+
references.push(resolved);
|
|
3144
|
+
}
|
|
3145
|
+
}
|
|
3146
|
+
return references;
|
|
3147
|
+
}
|
|
3148
|
+
}
|
|
3149
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
|
|
3150
|
+
var init_typescript = __esm(() => {
|
|
3151
|
+
init_embeddings();
|
|
3152
|
+
init_services();
|
|
3153
|
+
init_config2();
|
|
3154
|
+
init_parseCode();
|
|
3155
|
+
init_storage();
|
|
3156
|
+
TYPESCRIPT_EXTENSIONS = [
|
|
3157
|
+
".ts",
|
|
3158
|
+
".tsx",
|
|
3159
|
+
".js",
|
|
3160
|
+
".jsx",
|
|
3161
|
+
".mjs",
|
|
3162
|
+
".cjs",
|
|
3163
|
+
".mts",
|
|
3164
|
+
".cts"
|
|
3165
|
+
];
|
|
3166
|
+
});
|
|
3167
|
+
|
|
3168
|
+
// src/modules/data/json/index.ts
|
|
3169
|
+
var exports_json = {};
|
|
3170
|
+
__export(exports_json, {
|
|
3171
|
+
isJsonFile: () => isJsonFile,
|
|
3172
|
+
JsonModule: () => JsonModule,
|
|
3173
|
+
JSON_EXTENSIONS: () => JSON_EXTENSIONS,
|
|
3174
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K3,
|
|
3175
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE3
|
|
3176
|
+
});
|
|
3177
|
+
import * as path10 from "path";
|
|
3178
|
+
function isJsonFile(filepath) {
|
|
3179
|
+
const ext = path10.extname(filepath).toLowerCase();
|
|
3180
|
+
return JSON_EXTENSIONS.includes(ext);
|
|
3181
|
+
}
|
|
3182
|
+
function extractJsonKeys(obj, prefix = "") {
|
|
3183
|
+
const keys = [];
|
|
3184
|
+
if (obj === null || obj === undefined) {
|
|
3185
|
+
return keys;
|
|
3186
|
+
}
|
|
3187
|
+
if (Array.isArray(obj)) {
|
|
3188
|
+
obj.forEach((item, index) => {
|
|
3189
|
+
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3190
|
+
});
|
|
3191
|
+
} else if (typeof obj === "object") {
|
|
3192
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3193
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3194
|
+
keys.push(key);
|
|
3195
|
+
keys.push(...extractJsonKeys(value, fullKey));
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
return keys;
|
|
3199
|
+
}
|
|
3200
|
+
function extractJsonKeywords(content) {
|
|
3201
|
+
try {
|
|
3202
|
+
const parsed = JSON.parse(content);
|
|
3203
|
+
const keys = extractJsonKeys(parsed);
|
|
3204
|
+
const stringValues = [];
|
|
3205
|
+
const extractStrings = (obj) => {
|
|
3206
|
+
if (typeof obj === "string") {
|
|
3207
|
+
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3208
|
+
stringValues.push(...words);
|
|
3209
|
+
} else if (Array.isArray(obj)) {
|
|
3210
|
+
obj.forEach(extractStrings);
|
|
3211
|
+
} else if (obj && typeof obj === "object") {
|
|
3212
|
+
Object.values(obj).forEach(extractStrings);
|
|
3213
|
+
}
|
|
3214
|
+
};
|
|
3215
|
+
extractStrings(parsed);
|
|
3216
|
+
return [...new Set([...keys, ...stringValues])];
|
|
3217
|
+
} catch {
|
|
3218
|
+
return [];
|
|
3219
|
+
}
|
|
3220
|
+
}
|
|
3221
|
+
|
|
3222
|
+
class JsonModule {
|
|
3223
|
+
id = "data/json";
|
|
3224
|
+
name = "JSON Search";
|
|
3225
|
+
description = "JSON file search with structure-aware indexing";
|
|
3226
|
+
version = "1.0.0";
|
|
3227
|
+
embeddingConfig = null;
|
|
3228
|
+
symbolicIndex = null;
|
|
3229
|
+
pendingSummaries = new Map;
|
|
3230
|
+
rootDir = "";
|
|
3231
|
+
logger = undefined;
|
|
3232
|
+
async initialize(config) {
|
|
3233
|
+
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3234
|
+
this.logger = config.options?.logger;
|
|
3235
|
+
if (this.logger) {
|
|
3236
|
+
this.embeddingConfig = {
|
|
3237
|
+
...this.embeddingConfig,
|
|
3238
|
+
logger: this.logger
|
|
3239
|
+
};
|
|
3240
|
+
}
|
|
3241
|
+
configureEmbeddings(this.embeddingConfig);
|
|
3242
|
+
this.pendingSummaries.clear();
|
|
3243
|
+
}
|
|
3244
|
+
async indexFile(filepath, content, ctx) {
|
|
3245
|
+
if (!isJsonFile(filepath)) {
|
|
3246
|
+
return null;
|
|
3247
|
+
}
|
|
3248
|
+
this.rootDir = ctx.rootDir;
|
|
3249
|
+
const textChunks = createLineBasedChunks(content, {
|
|
3250
|
+
chunkSize: 50,
|
|
3251
|
+
overlap: 10
|
|
3252
|
+
});
|
|
3253
|
+
if (textChunks.length === 0) {
|
|
3254
|
+
return null;
|
|
3255
|
+
}
|
|
3256
|
+
const chunkContents = textChunks.map((c) => {
|
|
3257
|
+
const filename = path10.basename(filepath);
|
|
3258
|
+
return `${filename}: ${c.content}`;
|
|
3259
|
+
});
|
|
3260
|
+
const embeddings = await getEmbeddings(chunkContents);
|
|
3261
|
+
const chunks = textChunks.map((tc, i) => ({
|
|
3262
|
+
id: generateChunkId(filepath, tc.startLine, tc.endLine),
|
|
3263
|
+
content: tc.content,
|
|
3264
|
+
startLine: tc.startLine,
|
|
3265
|
+
endLine: tc.endLine,
|
|
3266
|
+
type: tc.type
|
|
3267
|
+
}));
|
|
3268
|
+
const jsonKeys = extractJsonKeys((() => {
|
|
3269
|
+
try {
|
|
3270
|
+
return JSON.parse(content);
|
|
3271
|
+
} catch {
|
|
3272
|
+
return {};
|
|
3273
|
+
}
|
|
3274
|
+
})());
|
|
3275
|
+
const stats = await ctx.getFileStats(filepath);
|
|
3276
|
+
const currentConfig = getEmbeddingConfig();
|
|
3277
|
+
const moduleData = {
|
|
3278
|
+
embeddings,
|
|
3279
|
+
embeddingModel: currentConfig.model,
|
|
3280
|
+
jsonKeys
|
|
3281
|
+
};
|
|
3282
|
+
const keywords = extractJsonKeywords(content);
|
|
3283
|
+
const fileSummary = {
|
|
3284
|
+
filepath,
|
|
3285
|
+
chunkCount: chunks.length,
|
|
3286
|
+
chunkTypes: ["file"],
|
|
3287
|
+
keywords,
|
|
3288
|
+
exports: [],
|
|
3289
|
+
lastModified: stats.lastModified
|
|
3290
|
+
};
|
|
3291
|
+
this.pendingSummaries.set(filepath, fileSummary);
|
|
3292
|
+
return {
|
|
3293
|
+
filepath,
|
|
3294
|
+
lastModified: stats.lastModified,
|
|
3295
|
+
chunks,
|
|
3296
|
+
moduleData
|
|
3297
|
+
};
|
|
3298
|
+
}
|
|
3299
|
+
async finalize(ctx) {
|
|
3300
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3301
|
+
this.symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3302
|
+
await this.symbolicIndex.initialize();
|
|
3303
|
+
for (const [filepath, summary] of this.pendingSummaries) {
|
|
3304
|
+
this.symbolicIndex.addFile(summary);
|
|
3305
|
+
}
|
|
3306
|
+
this.symbolicIndex.buildBM25Index();
|
|
3307
|
+
await this.symbolicIndex.save();
|
|
3308
|
+
this.pendingSummaries.clear();
|
|
3309
|
+
}
|
|
3310
|
+
async search(query, ctx, options = {}) {
|
|
3311
|
+
const {
|
|
3312
|
+
topK = DEFAULT_TOP_K3,
|
|
3313
|
+
minScore = DEFAULT_MIN_SCORE3,
|
|
3314
|
+
filePatterns
|
|
3315
|
+
} = options;
|
|
3316
|
+
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3317
|
+
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
3318
|
+
let allFiles;
|
|
3319
|
+
try {
|
|
3320
|
+
await symbolicIndex.initialize();
|
|
3321
|
+
allFiles = symbolicIndex.getAllFiles();
|
|
3322
|
+
} catch {
|
|
3323
|
+
allFiles = await ctx.listIndexedFiles();
|
|
3324
|
+
}
|
|
3325
|
+
let filesToSearch = allFiles.filter((f) => isJsonFile(f));
|
|
3326
|
+
if (filePatterns && filePatterns.length > 0) {
|
|
3327
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
3328
|
+
return filePatterns.some((pattern) => {
|
|
3329
|
+
if (pattern.startsWith("*.")) {
|
|
3330
|
+
const ext = pattern.slice(1);
|
|
3331
|
+
return filepath.endsWith(ext);
|
|
3332
|
+
}
|
|
3333
|
+
return filepath.includes(pattern);
|
|
3334
|
+
});
|
|
3335
|
+
});
|
|
3336
|
+
}
|
|
3337
|
+
const queryEmbedding = await getEmbedding(query);
|
|
3338
|
+
const bm25Index = new BM25Index;
|
|
3339
|
+
const allChunksData = [];
|
|
3340
|
+
for (const filepath of filesToSearch) {
|
|
3341
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3342
|
+
if (!fileIndex)
|
|
3343
|
+
continue;
|
|
3344
|
+
const moduleData = fileIndex.moduleData;
|
|
3345
|
+
if (!moduleData?.embeddings)
|
|
3346
|
+
continue;
|
|
3347
|
+
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3348
|
+
const chunk = fileIndex.chunks[i];
|
|
3349
|
+
const embedding = moduleData.embeddings[i];
|
|
3350
|
+
if (!embedding)
|
|
3351
|
+
continue;
|
|
3352
|
+
allChunksData.push({
|
|
3353
|
+
filepath: fileIndex.filepath,
|
|
3354
|
+
chunk,
|
|
3355
|
+
embedding
|
|
3356
|
+
});
|
|
3357
|
+
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3358
|
+
}
|
|
3359
|
+
}
|
|
3360
|
+
const bm25Results = bm25Index.search(query, topK * 3);
|
|
3361
|
+
const bm25Scores = new Map;
|
|
3362
|
+
for (const result of bm25Results) {
|
|
3363
|
+
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3364
|
+
}
|
|
3365
|
+
const queryTerms = extractQueryTerms(query);
|
|
3366
|
+
const results = [];
|
|
3367
|
+
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
3368
|
+
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
3369
|
+
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3370
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
|
|
3371
|
+
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
3372
|
+
results.push({
|
|
3373
|
+
filepath,
|
|
3374
|
+
chunk,
|
|
3375
|
+
score: hybridScore,
|
|
3376
|
+
moduleId: this.id,
|
|
3377
|
+
context: {
|
|
3378
|
+
semanticScore,
|
|
3379
|
+
bm25Score
|
|
3380
|
+
}
|
|
3381
|
+
});
|
|
3382
|
+
}
|
|
3383
|
+
}
|
|
3384
|
+
results.sort((a, b) => b.score - a.score);
|
|
3385
|
+
return results.slice(0, topK);
|
|
3386
|
+
}
|
|
3387
|
+
}
|
|
3388
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
|
|
3389
|
+
var init_json = __esm(() => {
|
|
3390
|
+
init_embeddings();
|
|
3391
|
+
init_services();
|
|
3392
|
+
init_config2();
|
|
3393
|
+
init_storage();
|
|
3394
|
+
JSON_EXTENSIONS = [".json"];
|
|
3395
|
+
});
|
|
3396
|
+
|
|
3397
|
+
// src/modules/docs/markdown/index.ts
|
|
3398
|
+
var exports_markdown = {};
|
|
3399
|
+
__export(exports_markdown, {
|
|
3400
|
+
isMarkdownFile: () => isMarkdownFile,
|
|
3401
|
+
MarkdownModule: () => MarkdownModule,
|
|
3402
|
+
MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
|
|
3403
|
+
DEFAULT_TOP_K: () => DEFAULT_TOP_K4,
|
|
3404
|
+
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE4
|
|
3405
|
+
});
|
|
3406
|
+
import * as path11 from "path";
|
|
3407
|
+
function isMarkdownFile(filepath) {
|
|
3408
|
+
const ext = path11.extname(filepath).toLowerCase();
|
|
3409
|
+
return MARKDOWN_EXTENSIONS.includes(ext);
|
|
3410
|
+
}
|
|
3411
|
+
function parseMarkdownSections(content) {
|
|
3412
|
+
const lines = content.split(`
|
|
3413
|
+
`);
|
|
3414
|
+
const sections = [];
|
|
3415
|
+
let currentSection = null;
|
|
3416
|
+
let currentContent = [];
|
|
3417
|
+
let startLine = 1;
|
|
3418
|
+
for (let i = 0;i < lines.length; i++) {
|
|
3419
|
+
const line = lines[i];
|
|
3420
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
3421
|
+
if (headingMatch) {
|
|
3422
|
+
if (currentSection) {
|
|
3423
|
+
currentSection.content = currentContent.join(`
|
|
3424
|
+
`).trim();
|
|
3425
|
+
currentSection.endLine = i;
|
|
3426
|
+
if (currentSection.content || currentSection.heading) {
|
|
3427
|
+
sections.push(currentSection);
|
|
3428
|
+
}
|
|
3429
|
+
} else if (currentContent.length > 0) {
|
|
3430
|
+
sections.push({
|
|
3431
|
+
heading: "",
|
|
3432
|
+
level: 0,
|
|
3433
|
+
content: currentContent.join(`
|
|
3434
|
+
`).trim(),
|
|
3435
|
+
startLine: 1,
|
|
3436
|
+
endLine: i
|
|
3437
|
+
});
|
|
3438
|
+
}
|
|
3439
|
+
currentSection = {
|
|
3440
|
+
heading: headingMatch[2],
|
|
3441
|
+
level: headingMatch[1].length,
|
|
3442
|
+
content: "",
|
|
3443
|
+
startLine: i + 1,
|
|
3444
|
+
endLine: lines.length
|
|
3445
|
+
};
|
|
3446
|
+
currentContent = [];
|
|
3447
|
+
} else {
|
|
3448
|
+
currentContent.push(line);
|
|
3449
|
+
}
|
|
3450
|
+
}
|
|
3451
|
+
if (currentSection) {
|
|
3452
|
+
currentSection.content = currentContent.join(`
|
|
3453
|
+
`).trim();
|
|
3454
|
+
currentSection.endLine = lines.length;
|
|
3455
|
+
if (currentSection.content || currentSection.heading) {
|
|
3456
|
+
sections.push(currentSection);
|
|
3457
|
+
}
|
|
3458
|
+
} else if (currentContent.length > 0) {
|
|
3459
|
+
sections.push({
|
|
3460
|
+
heading: "",
|
|
3461
|
+
level: 0,
|
|
3462
|
+
content: currentContent.join(`
|
|
3463
|
+
`).trim(),
|
|
3464
|
+
startLine: 1,
|
|
3465
|
+
endLine: lines.length
|
|
3466
|
+
});
|
|
2753
3467
|
}
|
|
3468
|
+
return sections;
|
|
2754
3469
|
}
|
|
2755
|
-
function
|
|
2756
|
-
|
|
3470
|
+
function extractMarkdownKeywords(content) {
|
|
3471
|
+
const keywords = [];
|
|
3472
|
+
const headingMatches = content.matchAll(/^#{1,6}\s+(.+)$/gm);
|
|
3473
|
+
for (const match of headingMatches) {
|
|
3474
|
+
const heading = match[1].toLowerCase();
|
|
3475
|
+
const words = heading.split(/\s+/).filter((w) => w.length > 2);
|
|
3476
|
+
keywords.push(...words);
|
|
3477
|
+
}
|
|
3478
|
+
const emphasisMatches = content.matchAll(/\*\*(.+?)\*\*|\*(.+?)\*/g);
|
|
3479
|
+
for (const match of emphasisMatches) {
|
|
3480
|
+
const text = (match[1] || match[2] || "").toLowerCase();
|
|
3481
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3482
|
+
keywords.push(...words);
|
|
3483
|
+
}
|
|
3484
|
+
const codeMatches = content.matchAll(/`([^`]+)`/g);
|
|
3485
|
+
for (const match of codeMatches) {
|
|
3486
|
+
const code = match[1].toLowerCase();
|
|
3487
|
+
if (code.length > 2 && code.length < 50) {
|
|
3488
|
+
keywords.push(code);
|
|
3489
|
+
}
|
|
3490
|
+
}
|
|
3491
|
+
const linkMatches = content.matchAll(/\[([^\]]+)\]/g);
|
|
3492
|
+
for (const match of linkMatches) {
|
|
3493
|
+
const text = match[1].toLowerCase();
|
|
3494
|
+
const words = text.split(/\s+/).filter((w) => w.length > 2);
|
|
3495
|
+
keywords.push(...words);
|
|
3496
|
+
}
|
|
3497
|
+
return [...new Set(keywords)];
|
|
2757
3498
|
}
|
|
2758
3499
|
|
|
2759
|
-
class
|
|
2760
|
-
id = "
|
|
2761
|
-
name = "
|
|
2762
|
-
description = "
|
|
3500
|
+
class MarkdownModule {
|
|
3501
|
+
id = "docs/markdown";
|
|
3502
|
+
name = "Markdown Search";
|
|
3503
|
+
description = "Markdown documentation search with section-aware indexing";
|
|
2763
3504
|
version = "1.0.0";
|
|
2764
3505
|
embeddingConfig = null;
|
|
2765
3506
|
symbolicIndex = null;
|
|
2766
3507
|
pendingSummaries = new Map;
|
|
2767
3508
|
rootDir = "";
|
|
3509
|
+
logger = undefined;
|
|
2768
3510
|
async initialize(config) {
|
|
2769
3511
|
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3512
|
+
this.logger = config.options?.logger;
|
|
3513
|
+
if (this.logger) {
|
|
3514
|
+
this.embeddingConfig = {
|
|
3515
|
+
...this.embeddingConfig,
|
|
3516
|
+
logger: this.logger
|
|
3517
|
+
};
|
|
3518
|
+
}
|
|
2770
3519
|
configureEmbeddings(this.embeddingConfig);
|
|
2771
3520
|
this.pendingSummaries.clear();
|
|
2772
3521
|
}
|
|
2773
3522
|
async indexFile(filepath, content, ctx) {
|
|
3523
|
+
if (!isMarkdownFile(filepath)) {
|
|
3524
|
+
return null;
|
|
3525
|
+
}
|
|
2774
3526
|
this.rootDir = ctx.rootDir;
|
|
2775
|
-
const
|
|
2776
|
-
if (
|
|
3527
|
+
const sections = parseMarkdownSections(content);
|
|
3528
|
+
if (sections.length === 0) {
|
|
2777
3529
|
return null;
|
|
2778
3530
|
}
|
|
2779
|
-
const
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
return `${pathPrefix} ${namePrefix}${c.content}`;
|
|
3531
|
+
const chunkContents = sections.map((s) => {
|
|
3532
|
+
const filename = path11.basename(filepath);
|
|
3533
|
+
const headingContext = s.heading ? `${s.heading}: ` : "";
|
|
3534
|
+
return `${filename} ${headingContext}${s.content}`;
|
|
2784
3535
|
});
|
|
2785
3536
|
const embeddings = await getEmbeddings(chunkContents);
|
|
2786
|
-
const chunks =
|
|
2787
|
-
id: generateChunkId(filepath,
|
|
2788
|
-
content:
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
3537
|
+
const chunks = sections.map((section, i) => ({
|
|
3538
|
+
id: generateChunkId(filepath, section.startLine, section.endLine),
|
|
3539
|
+
content: section.heading ? `## ${section.heading}
|
|
3540
|
+
|
|
3541
|
+
${section.content}` : section.content,
|
|
3542
|
+
startLine: section.startLine,
|
|
3543
|
+
endLine: section.endLine,
|
|
3544
|
+
type: "block",
|
|
3545
|
+
name: section.heading || undefined
|
|
2795
3546
|
}));
|
|
2796
|
-
const
|
|
3547
|
+
const headings = sections.filter((s) => s.heading).map((s) => s.heading);
|
|
2797
3548
|
const stats = await ctx.getFileStats(filepath);
|
|
2798
3549
|
const currentConfig = getEmbeddingConfig();
|
|
2799
3550
|
const moduleData = {
|
|
2800
3551
|
embeddings,
|
|
2801
|
-
embeddingModel: currentConfig.model
|
|
3552
|
+
embeddingModel: currentConfig.model,
|
|
3553
|
+
headings
|
|
2802
3554
|
};
|
|
2803
|
-
const
|
|
2804
|
-
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2805
|
-
];
|
|
2806
|
-
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2807
|
-
const allKeywords = new Set;
|
|
2808
|
-
for (const pc of parsedChunks) {
|
|
2809
|
-
const keywords = extractKeywords(pc.content, pc.name);
|
|
2810
|
-
keywords.forEach((k) => allKeywords.add(k));
|
|
2811
|
-
}
|
|
2812
|
-
pathContext.keywords.forEach((k) => allKeywords.add(k));
|
|
3555
|
+
const keywords = extractMarkdownKeywords(content);
|
|
2813
3556
|
const fileSummary = {
|
|
2814
3557
|
filepath,
|
|
2815
3558
|
chunkCount: chunks.length,
|
|
2816
|
-
chunkTypes,
|
|
2817
|
-
keywords
|
|
2818
|
-
exports,
|
|
2819
|
-
lastModified: stats.lastModified
|
|
2820
|
-
pathContext: {
|
|
2821
|
-
segments: pathContext.segments,
|
|
2822
|
-
layer: pathContext.layer,
|
|
2823
|
-
domain: pathContext.domain,
|
|
2824
|
-
depth: pathContext.depth
|
|
2825
|
-
}
|
|
3559
|
+
chunkTypes: ["block"],
|
|
3560
|
+
keywords,
|
|
3561
|
+
exports: headings,
|
|
3562
|
+
lastModified: stats.lastModified
|
|
2826
3563
|
};
|
|
2827
3564
|
this.pendingSummaries.set(filepath, fileSummary);
|
|
2828
3565
|
return {
|
|
2829
3566
|
filepath,
|
|
2830
3567
|
lastModified: stats.lastModified,
|
|
2831
3568
|
chunks,
|
|
2832
|
-
moduleData
|
|
2833
|
-
references
|
|
3569
|
+
moduleData
|
|
2834
3570
|
};
|
|
2835
3571
|
}
|
|
2836
3572
|
async finalize(ctx) {
|
|
@@ -2846,8 +3582,8 @@ class TypeScriptModule {
|
|
|
2846
3582
|
}
|
|
2847
3583
|
async search(query, ctx, options = {}) {
|
|
2848
3584
|
const {
|
|
2849
|
-
topK =
|
|
2850
|
-
minScore =
|
|
3585
|
+
topK = DEFAULT_TOP_K4,
|
|
3586
|
+
minScore = DEFAULT_MIN_SCORE4,
|
|
2851
3587
|
filePatterns
|
|
2852
3588
|
} = options;
|
|
2853
3589
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
@@ -2859,9 +3595,9 @@ class TypeScriptModule {
|
|
|
2859
3595
|
} catch {
|
|
2860
3596
|
allFiles = await ctx.listIndexedFiles();
|
|
2861
3597
|
}
|
|
2862
|
-
let filesToSearch = allFiles;
|
|
3598
|
+
let filesToSearch = allFiles.filter((f) => isMarkdownFile(f));
|
|
2863
3599
|
if (filePatterns && filePatterns.length > 0) {
|
|
2864
|
-
filesToSearch =
|
|
3600
|
+
filesToSearch = filesToSearch.filter((filepath) => {
|
|
2865
3601
|
return filePatterns.some((pattern) => {
|
|
2866
3602
|
if (pattern.startsWith("*.")) {
|
|
2867
3603
|
const ext = pattern.slice(1);
|
|
@@ -2899,36 +3635,24 @@ class TypeScriptModule {
|
|
|
2899
3635
|
for (const result of bm25Results) {
|
|
2900
3636
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
2901
3637
|
}
|
|
2902
|
-
const queryTerms = query
|
|
2903
|
-
const pathBoosts = new Map;
|
|
2904
|
-
for (const filepath of filesToSearch) {
|
|
2905
|
-
const summary = symbolicIndex.getFileSummary(filepath);
|
|
2906
|
-
if (summary?.pathContext) {
|
|
2907
|
-
let boost = 0;
|
|
2908
|
-
const ctx2 = summary.pathContext;
|
|
2909
|
-
if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
|
|
2910
|
-
boost += 0.1;
|
|
2911
|
-
}
|
|
2912
|
-
if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
|
|
2913
|
-
boost += 0.05;
|
|
2914
|
-
}
|
|
2915
|
-
const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
|
|
2916
|
-
if (segmentMatch) {
|
|
2917
|
-
boost += 0.05;
|
|
2918
|
-
}
|
|
2919
|
-
pathBoosts.set(filepath, boost);
|
|
2920
|
-
}
|
|
2921
|
-
}
|
|
3638
|
+
const queryTerms = extractQueryTerms(query);
|
|
2922
3639
|
const results = [];
|
|
2923
3640
|
for (const { filepath, chunk, embedding } of allChunksData) {
|
|
2924
3641
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
2925
3642
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2931
|
-
|
|
3643
|
+
let docBoost = 0;
|
|
3644
|
+
if (queryTerms.some((t) => [
|
|
3645
|
+
"docs",
|
|
3646
|
+
"documentation",
|
|
3647
|
+
"readme",
|
|
3648
|
+
"guide",
|
|
3649
|
+
"how",
|
|
3650
|
+
"what",
|
|
3651
|
+
"explain"
|
|
3652
|
+
].includes(t))) {
|
|
3653
|
+
docBoost = 0.05;
|
|
3654
|
+
}
|
|
3655
|
+
const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
2932
3656
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
2933
3657
|
results.push({
|
|
2934
3658
|
filepath,
|
|
@@ -2938,10 +3662,7 @@ class TypeScriptModule {
|
|
|
2938
3662
|
context: {
|
|
2939
3663
|
semanticScore,
|
|
2940
3664
|
bm25Score,
|
|
2941
|
-
|
|
2942
|
-
fileTypeBoost,
|
|
2943
|
-
chunkTypeBoost,
|
|
2944
|
-
exportBoost
|
|
3665
|
+
docBoost
|
|
2945
3666
|
}
|
|
2946
3667
|
});
|
|
2947
3668
|
}
|
|
@@ -2949,91 +3670,21 @@ class TypeScriptModule {
|
|
|
2949
3670
|
results.sort((a, b) => b.score - a.score);
|
|
2950
3671
|
return results.slice(0, topK);
|
|
2951
3672
|
}
|
|
2952
|
-
extractReferences(content, filepath) {
|
|
2953
|
-
const references = [];
|
|
2954
|
-
const importRegex = /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g;
|
|
2955
|
-
const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
2956
|
-
let match;
|
|
2957
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
2958
|
-
const importPath = match[1];
|
|
2959
|
-
if (importPath.startsWith(".")) {
|
|
2960
|
-
const dir = path8.dirname(filepath);
|
|
2961
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
2962
|
-
references.push(resolved);
|
|
2963
|
-
}
|
|
2964
|
-
}
|
|
2965
|
-
while ((match = requireRegex.exec(content)) !== null) {
|
|
2966
|
-
const importPath = match[1];
|
|
2967
|
-
if (importPath.startsWith(".")) {
|
|
2968
|
-
const dir = path8.dirname(filepath);
|
|
2969
|
-
const resolved = path8.normalize(path8.join(dir, importPath));
|
|
2970
|
-
references.push(resolved);
|
|
2971
|
-
}
|
|
2972
|
-
}
|
|
2973
|
-
return references;
|
|
2974
|
-
}
|
|
2975
3673
|
}
|
|
2976
|
-
var
|
|
2977
|
-
var
|
|
3674
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
|
|
3675
|
+
var init_markdown = __esm(() => {
|
|
2978
3676
|
init_embeddings();
|
|
3677
|
+
init_services();
|
|
2979
3678
|
init_config2();
|
|
2980
|
-
init_parseCode();
|
|
2981
3679
|
init_storage();
|
|
2982
|
-
|
|
2983
|
-
init_keywords();
|
|
2984
|
-
IMPLEMENTATION_TERMS = [
|
|
2985
|
-
"function",
|
|
2986
|
-
"method",
|
|
2987
|
-
"class",
|
|
2988
|
-
"interface",
|
|
2989
|
-
"implement",
|
|
2990
|
-
"implementation",
|
|
2991
|
-
"endpoint",
|
|
2992
|
-
"route",
|
|
2993
|
-
"handler",
|
|
2994
|
-
"controller",
|
|
2995
|
-
"module",
|
|
2996
|
-
"code"
|
|
2997
|
-
];
|
|
2998
|
-
DOCUMENTATION_TERMS = [
|
|
2999
|
-
"documentation",
|
|
3000
|
-
"docs",
|
|
3001
|
-
"guide",
|
|
3002
|
-
"tutorial",
|
|
3003
|
-
"readme",
|
|
3004
|
-
"how",
|
|
3005
|
-
"what",
|
|
3006
|
-
"why",
|
|
3007
|
-
"explain",
|
|
3008
|
-
"overview",
|
|
3009
|
-
"getting",
|
|
3010
|
-
"started",
|
|
3011
|
-
"requirements",
|
|
3012
|
-
"setup",
|
|
3013
|
-
"install",
|
|
3014
|
-
"configure",
|
|
3015
|
-
"configuration"
|
|
3016
|
-
];
|
|
3017
|
-
SOURCE_CODE_EXTENSIONS = [
|
|
3018
|
-
".ts",
|
|
3019
|
-
".tsx",
|
|
3020
|
-
".js",
|
|
3021
|
-
".jsx",
|
|
3022
|
-
".mjs",
|
|
3023
|
-
".cjs",
|
|
3024
|
-
".py",
|
|
3025
|
-
".go",
|
|
3026
|
-
".rs",
|
|
3027
|
-
".java"
|
|
3028
|
-
];
|
|
3029
|
-
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
3680
|
+
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
3030
3681
|
});
|
|
3031
3682
|
|
|
3032
3683
|
// src/app/indexer/index.ts
|
|
3033
3684
|
init_config2();
|
|
3034
3685
|
import { glob } from "glob";
|
|
3035
3686
|
import * as fs6 from "fs/promises";
|
|
3036
|
-
import * as
|
|
3687
|
+
import * as path14 from "path";
|
|
3037
3688
|
|
|
3038
3689
|
// src/modules/registry.ts
|
|
3039
3690
|
class ModuleRegistryImpl {
|
|
@@ -3058,16 +3709,20 @@ var registry = new ModuleRegistryImpl;
|
|
|
3058
3709
|
async function registerBuiltInModules() {
|
|
3059
3710
|
const { CoreModule: CoreModule2 } = await Promise.resolve().then(() => (init_core(), exports_core));
|
|
3060
3711
|
const { TypeScriptModule: TypeScriptModule2 } = await Promise.resolve().then(() => (init_typescript(), exports_typescript));
|
|
3712
|
+
const { JsonModule: JsonModule2 } = await Promise.resolve().then(() => (init_json(), exports_json));
|
|
3713
|
+
const { MarkdownModule: MarkdownModule2 } = await Promise.resolve().then(() => (init_markdown(), exports_markdown));
|
|
3061
3714
|
registry.register(new CoreModule2);
|
|
3062
3715
|
registry.register(new TypeScriptModule2);
|
|
3716
|
+
registry.register(new JsonModule2);
|
|
3717
|
+
registry.register(new MarkdownModule2);
|
|
3063
3718
|
}
|
|
3064
3719
|
|
|
3065
3720
|
// src/infrastructure/introspection/IntrospectionIndex.ts
|
|
3066
|
-
import * as
|
|
3721
|
+
import * as path13 from "path";
|
|
3067
3722
|
import * as fs5 from "fs/promises";
|
|
3068
3723
|
|
|
3069
3724
|
// src/infrastructure/introspection/projectDetector.ts
|
|
3070
|
-
import * as
|
|
3725
|
+
import * as path12 from "path";
|
|
3071
3726
|
import * as fs4 from "fs/promises";
|
|
3072
3727
|
var MAX_SCAN_DEPTH = 4;
|
|
3073
3728
|
var SKIP_DIRS = new Set([
|
|
@@ -3084,7 +3739,7 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3084
3739
|
if (depth > MAX_SCAN_DEPTH)
|
|
3085
3740
|
return [];
|
|
3086
3741
|
const results = [];
|
|
3087
|
-
const fullDir = currentDir ?
|
|
3742
|
+
const fullDir = currentDir ? path12.join(rootDir, currentDir) : rootDir;
|
|
3088
3743
|
try {
|
|
3089
3744
|
const entries = await fs4.readdir(fullDir, { withFileTypes: true });
|
|
3090
3745
|
const hasPackageJson = entries.some((e) => e.isFile() && e.name === "package.json");
|
|
@@ -3107,10 +3762,10 @@ async function scanForPackageJsons(rootDir, currentDir = "", depth = 0) {
|
|
|
3107
3762
|
}
|
|
3108
3763
|
async function parsePackageJson(rootDir, relativePath) {
|
|
3109
3764
|
try {
|
|
3110
|
-
const packageJsonPath =
|
|
3765
|
+
const packageJsonPath = path12.join(rootDir, relativePath, "package.json");
|
|
3111
3766
|
const content = await fs4.readFile(packageJsonPath, "utf-8");
|
|
3112
3767
|
const pkg = JSON.parse(content);
|
|
3113
|
-
const name = pkg.name ||
|
|
3768
|
+
const name = pkg.name || path12.basename(relativePath);
|
|
3114
3769
|
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
3115
3770
|
let type = "unknown";
|
|
3116
3771
|
if (deps["next"] || deps["react"] || deps["vue"] || deps["svelte"]) {
|
|
@@ -3155,7 +3810,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3155
3810
|
for (const pattern of monorepoPatterns) {
|
|
3156
3811
|
if (!dirNames.includes(pattern))
|
|
3157
3812
|
continue;
|
|
3158
|
-
const patternDir =
|
|
3813
|
+
const patternDir = path12.join(rootDir, pattern);
|
|
3159
3814
|
try {
|
|
3160
3815
|
const subDirs = await fs4.readdir(patternDir, { withFileTypes: true });
|
|
3161
3816
|
for (const subDir of subDirs) {
|
|
@@ -3186,7 +3841,7 @@ async function detectProjectStructure(rootDir) {
|
|
|
3186
3841
|
}
|
|
3187
3842
|
let rootType = "unknown";
|
|
3188
3843
|
try {
|
|
3189
|
-
const rootPkgPath =
|
|
3844
|
+
const rootPkgPath = path12.join(rootDir, "package.json");
|
|
3190
3845
|
const rootPkg = JSON.parse(await fs4.readFile(rootPkgPath, "utf-8"));
|
|
3191
3846
|
if (rootPkg.workspaces)
|
|
3192
3847
|
isMonorepo = true;
|
|
@@ -3227,7 +3882,7 @@ class IntrospectionIndex {
|
|
|
3227
3882
|
async initialize() {
|
|
3228
3883
|
this.structure = await detectProjectStructure(this.rootDir);
|
|
3229
3884
|
try {
|
|
3230
|
-
const configPath =
|
|
3885
|
+
const configPath = path13.join(this.rootDir, ".raggrep", "config.json");
|
|
3231
3886
|
const configContent = await fs5.readFile(configPath, "utf-8");
|
|
3232
3887
|
const config = JSON.parse(configContent);
|
|
3233
3888
|
this.config = config.introspection || {};
|
|
@@ -3267,28 +3922,28 @@ class IntrospectionIndex {
|
|
|
3267
3922
|
}
|
|
3268
3923
|
}
|
|
3269
3924
|
async save(config) {
|
|
3270
|
-
const introDir =
|
|
3925
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3271
3926
|
await fs5.mkdir(introDir, { recursive: true });
|
|
3272
|
-
const projectPath =
|
|
3927
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3273
3928
|
await fs5.writeFile(projectPath, JSON.stringify({
|
|
3274
3929
|
version: "1.0.0",
|
|
3275
3930
|
lastUpdated: new Date().toISOString(),
|
|
3276
3931
|
structure: this.structure
|
|
3277
3932
|
}, null, 2));
|
|
3278
3933
|
for (const [filepath, intro] of this.files) {
|
|
3279
|
-
const introFilePath =
|
|
3280
|
-
await fs5.mkdir(
|
|
3934
|
+
const introFilePath = path13.join(introDir, "files", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3935
|
+
await fs5.mkdir(path13.dirname(introFilePath), { recursive: true });
|
|
3281
3936
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3282
3937
|
}
|
|
3283
3938
|
}
|
|
3284
3939
|
async load(config) {
|
|
3285
|
-
const introDir =
|
|
3940
|
+
const introDir = path13.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
3286
3941
|
try {
|
|
3287
|
-
const projectPath =
|
|
3942
|
+
const projectPath = path13.join(introDir, "_project.json");
|
|
3288
3943
|
const projectContent = await fs5.readFile(projectPath, "utf-8");
|
|
3289
3944
|
const projectData = JSON.parse(projectContent);
|
|
3290
3945
|
this.structure = projectData.structure;
|
|
3291
|
-
await this.loadFilesRecursive(
|
|
3946
|
+
await this.loadFilesRecursive(path13.join(introDir, "files"), "");
|
|
3292
3947
|
} catch {
|
|
3293
3948
|
this.structure = null;
|
|
3294
3949
|
this.files.clear();
|
|
@@ -3298,7 +3953,7 @@ class IntrospectionIndex {
|
|
|
3298
3953
|
try {
|
|
3299
3954
|
const entries = await fs5.readdir(basePath, { withFileTypes: true });
|
|
3300
3955
|
for (const entry of entries) {
|
|
3301
|
-
const entryPath =
|
|
3956
|
+
const entryPath = path13.join(basePath, entry.name);
|
|
3302
3957
|
const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name;
|
|
3303
3958
|
if (entry.isDirectory()) {
|
|
3304
3959
|
await this.loadFilesRecursive(entryPath, relativePath);
|
|
@@ -3315,51 +3970,148 @@ class IntrospectionIndex {
|
|
|
3315
3970
|
this.structure = null;
|
|
3316
3971
|
}
|
|
3317
3972
|
}
|
|
3973
|
+
// src/infrastructure/logger/loggers.ts
|
|
3974
|
+
class ConsoleLogger {
|
|
3975
|
+
verbose;
|
|
3976
|
+
constructor(options) {
|
|
3977
|
+
this.verbose = options?.verbose ?? false;
|
|
3978
|
+
}
|
|
3979
|
+
info(message) {
|
|
3980
|
+
console.log(message);
|
|
3981
|
+
}
|
|
3982
|
+
warn(message) {
|
|
3983
|
+
console.warn(message);
|
|
3984
|
+
}
|
|
3985
|
+
error(message) {
|
|
3986
|
+
console.error(message);
|
|
3987
|
+
}
|
|
3988
|
+
debug(message) {
|
|
3989
|
+
if (this.verbose) {
|
|
3990
|
+
console.log(message);
|
|
3991
|
+
}
|
|
3992
|
+
}
|
|
3993
|
+
progress(message) {
|
|
3994
|
+
console.log(message);
|
|
3995
|
+
}
|
|
3996
|
+
clearProgress() {}
|
|
3997
|
+
}
|
|
3998
|
+
|
|
3999
|
+
class InlineProgressLogger {
|
|
4000
|
+
verbose;
|
|
4001
|
+
lastProgressLength = 0;
|
|
4002
|
+
hasProgress = false;
|
|
4003
|
+
constructor(options) {
|
|
4004
|
+
this.verbose = options?.verbose ?? false;
|
|
4005
|
+
}
|
|
4006
|
+
info(message) {
|
|
4007
|
+
this.clearProgress();
|
|
4008
|
+
console.log(message);
|
|
4009
|
+
}
|
|
4010
|
+
warn(message) {
|
|
4011
|
+
this.clearProgress();
|
|
4012
|
+
console.warn(message);
|
|
4013
|
+
}
|
|
4014
|
+
error(message) {
|
|
4015
|
+
this.clearProgress();
|
|
4016
|
+
console.error(message);
|
|
4017
|
+
}
|
|
4018
|
+
debug(message) {
|
|
4019
|
+
if (this.verbose) {
|
|
4020
|
+
this.clearProgress();
|
|
4021
|
+
console.log(message);
|
|
4022
|
+
}
|
|
4023
|
+
}
|
|
4024
|
+
progress(message) {
|
|
4025
|
+
process.stdout.write(`\r${message}`);
|
|
4026
|
+
const padding = Math.max(0, this.lastProgressLength - message.length);
|
|
4027
|
+
if (padding > 0) {
|
|
4028
|
+
process.stdout.write(" ".repeat(padding));
|
|
4029
|
+
}
|
|
4030
|
+
this.lastProgressLength = message.length;
|
|
4031
|
+
this.hasProgress = true;
|
|
4032
|
+
}
|
|
4033
|
+
clearProgress() {
|
|
4034
|
+
if (this.hasProgress && this.lastProgressLength > 0) {
|
|
4035
|
+
process.stdout.write("\r" + " ".repeat(this.lastProgressLength) + "\r");
|
|
4036
|
+
this.lastProgressLength = 0;
|
|
4037
|
+
this.hasProgress = false;
|
|
4038
|
+
}
|
|
4039
|
+
}
|
|
4040
|
+
}
|
|
4041
|
+
|
|
4042
|
+
class SilentLogger {
|
|
4043
|
+
info() {}
|
|
4044
|
+
warn() {}
|
|
4045
|
+
error() {}
|
|
4046
|
+
debug() {}
|
|
4047
|
+
progress() {}
|
|
4048
|
+
clearProgress() {}
|
|
4049
|
+
}
|
|
4050
|
+
function createLogger(options) {
|
|
4051
|
+
return new ConsoleLogger(options);
|
|
4052
|
+
}
|
|
4053
|
+
function createInlineLogger(options) {
|
|
4054
|
+
return new InlineProgressLogger(options);
|
|
4055
|
+
}
|
|
4056
|
+
function createSilentLogger() {
|
|
4057
|
+
return new SilentLogger;
|
|
4058
|
+
}
|
|
3318
4059
|
// src/app/indexer/watcher.ts
|
|
3319
4060
|
import { watch } from "chokidar";
|
|
3320
4061
|
init_config2();
|
|
3321
4062
|
|
|
3322
4063
|
// src/app/indexer/index.ts
|
|
4064
|
+
async function parallelMap(items, processor, concurrency) {
|
|
4065
|
+
const results = new Array(items.length);
|
|
4066
|
+
let nextIndex = 0;
|
|
4067
|
+
async function worker() {
|
|
4068
|
+
while (nextIndex < items.length) {
|
|
4069
|
+
const index = nextIndex++;
|
|
4070
|
+
const item = items[index];
|
|
4071
|
+
try {
|
|
4072
|
+
const value = await processor(item, index);
|
|
4073
|
+
results[index] = { success: true, value };
|
|
4074
|
+
} catch (error) {
|
|
4075
|
+
results[index] = { success: false, error };
|
|
4076
|
+
}
|
|
4077
|
+
}
|
|
4078
|
+
}
|
|
4079
|
+
const workers = Array(Math.min(concurrency, items.length)).fill(null).map(() => worker());
|
|
4080
|
+
await Promise.all(workers);
|
|
4081
|
+
return results;
|
|
4082
|
+
}
|
|
3323
4083
|
var INDEX_SCHEMA_VERSION = "1.0.0";
|
|
4084
|
+
var DEFAULT_CONCURRENCY = 4;
|
|
3324
4085
|
async function indexDirectory(rootDir, options = {}) {
|
|
3325
4086
|
const verbose = options.verbose ?? false;
|
|
3326
4087
|
const quiet = options.quiet ?? false;
|
|
3327
|
-
|
|
4088
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY;
|
|
4089
|
+
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
4090
|
+
rootDir = path14.resolve(rootDir);
|
|
3328
4091
|
const location = getIndexLocation(rootDir);
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
}
|
|
4092
|
+
logger.info(`Indexing directory: ${rootDir}`);
|
|
4093
|
+
logger.info(`Index location: ${location.indexDir}`);
|
|
4094
|
+
logger.debug(`Concurrency: ${concurrency}`);
|
|
3333
4095
|
const config = await loadConfig(rootDir);
|
|
3334
4096
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3335
4097
|
await introspection.initialize();
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
console.log(`Detected monorepo with ${structure.projects.length} projects`);
|
|
3340
|
-
}
|
|
4098
|
+
const structure = introspection.getStructure();
|
|
4099
|
+
if (structure?.isMonorepo) {
|
|
4100
|
+
logger.debug(`Detected monorepo with ${structure.projects.length} projects`);
|
|
3341
4101
|
}
|
|
3342
4102
|
await registerBuiltInModules();
|
|
3343
4103
|
const enabledModules = registry.getEnabled(config);
|
|
3344
4104
|
if (enabledModules.length === 0) {
|
|
3345
|
-
|
|
3346
|
-
console.log("No modules enabled. Check your configuration.");
|
|
3347
|
-
}
|
|
4105
|
+
logger.info("No modules enabled. Check your configuration.");
|
|
3348
4106
|
return [];
|
|
3349
4107
|
}
|
|
3350
|
-
|
|
3351
|
-
console.log(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3352
|
-
}
|
|
4108
|
+
logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
|
|
3353
4109
|
const files = await findFiles(rootDir, config);
|
|
3354
|
-
|
|
3355
|
-
console.log(`Found ${files.length} files to index`);
|
|
3356
|
-
}
|
|
4110
|
+
logger.info(`Found ${files.length} files to index`);
|
|
3357
4111
|
const results = [];
|
|
3358
4112
|
for (const module of enabledModules) {
|
|
3359
|
-
|
|
3360
|
-
console.log(`
|
|
4113
|
+
logger.info(`
|
|
3361
4114
|
[${module.name}] Starting indexing...`);
|
|
3362
|
-
}
|
|
3363
4115
|
const moduleConfig = getModuleConfig(config, module.id);
|
|
3364
4116
|
if (module.initialize && moduleConfig) {
|
|
3365
4117
|
const configWithOverrides = { ...moduleConfig };
|
|
@@ -3369,32 +4121,32 @@ async function indexDirectory(rootDir, options = {}) {
|
|
|
3369
4121
|
embeddingModel: options.model
|
|
3370
4122
|
};
|
|
3371
4123
|
}
|
|
4124
|
+
configWithOverrides.options = {
|
|
4125
|
+
...configWithOverrides.options,
|
|
4126
|
+
logger
|
|
4127
|
+
};
|
|
3372
4128
|
await module.initialize(configWithOverrides);
|
|
3373
4129
|
}
|
|
3374
|
-
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection);
|
|
4130
|
+
const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
|
|
3375
4131
|
results.push(result);
|
|
3376
4132
|
if (module.finalize) {
|
|
3377
|
-
|
|
3378
|
-
console.log(`[${module.name}] Building secondary indexes...`);
|
|
3379
|
-
}
|
|
4133
|
+
logger.info(`[${module.name}] Building secondary indexes...`);
|
|
3380
4134
|
const ctx = {
|
|
3381
4135
|
rootDir,
|
|
3382
4136
|
config,
|
|
3383
4137
|
readFile: async (filepath) => {
|
|
3384
|
-
const fullPath =
|
|
4138
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3385
4139
|
return fs6.readFile(fullPath, "utf-8");
|
|
3386
4140
|
},
|
|
3387
4141
|
getFileStats: async (filepath) => {
|
|
3388
|
-
const fullPath =
|
|
4142
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3389
4143
|
const stats = await fs6.stat(fullPath);
|
|
3390
4144
|
return { lastModified: stats.mtime.toISOString() };
|
|
3391
4145
|
}
|
|
3392
4146
|
};
|
|
3393
4147
|
await module.finalize(ctx);
|
|
3394
4148
|
}
|
|
3395
|
-
|
|
3396
|
-
console.log(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
3397
|
-
}
|
|
4149
|
+
logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
|
|
3398
4150
|
}
|
|
3399
4151
|
await introspection.save(config);
|
|
3400
4152
|
await updateGlobalManifest(rootDir, enabledModules, config);
|
|
@@ -3417,28 +4169,37 @@ async function deleteIndex(rootDir) {
|
|
|
3417
4169
|
await fs6.rm(indexDir, { recursive: true, force: true });
|
|
3418
4170
|
} catch {}
|
|
3419
4171
|
}
|
|
4172
|
+
async function resetIndex(rootDir) {
|
|
4173
|
+
rootDir = path14.resolve(rootDir);
|
|
4174
|
+
const status = await getIndexStatus(rootDir);
|
|
4175
|
+
if (!status.exists) {
|
|
4176
|
+
throw new Error(`No index found for ${rootDir}`);
|
|
4177
|
+
}
|
|
4178
|
+
await deleteIndex(rootDir);
|
|
4179
|
+
return {
|
|
4180
|
+
success: true,
|
|
4181
|
+
indexDir: status.indexDir
|
|
4182
|
+
};
|
|
4183
|
+
}
|
|
3420
4184
|
async function ensureIndexFresh(rootDir, options = {}) {
|
|
3421
4185
|
const verbose = options.verbose ?? false;
|
|
3422
4186
|
const quiet = options.quiet ?? false;
|
|
3423
|
-
|
|
4187
|
+
const logger = options.logger ? options.logger : quiet ? createSilentLogger() : createLogger({ verbose });
|
|
4188
|
+
rootDir = path14.resolve(rootDir);
|
|
3424
4189
|
const status = await getIndexStatus(rootDir);
|
|
3425
4190
|
if (!status.exists) {
|
|
3426
|
-
|
|
3427
|
-
console.log(`No index found. Creating index...
|
|
4191
|
+
logger.info(`No index found. Creating index...
|
|
3428
4192
|
`);
|
|
3429
|
-
}
|
|
3430
|
-
const results = await indexDirectory(rootDir, { ...options, quiet });
|
|
4193
|
+
const results = await indexDirectory(rootDir, { ...options, logger });
|
|
3431
4194
|
const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
3432
4195
|
return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
|
|
3433
4196
|
}
|
|
3434
4197
|
const versionCompatible = await isIndexVersionCompatible(rootDir);
|
|
3435
4198
|
if (!versionCompatible) {
|
|
3436
|
-
|
|
3437
|
-
console.log(`Index version incompatible. Rebuilding...
|
|
4199
|
+
logger.info(`Index version incompatible. Rebuilding...
|
|
3438
4200
|
`);
|
|
3439
|
-
}
|
|
3440
4201
|
await deleteIndex(rootDir);
|
|
3441
|
-
const results = await indexDirectory(rootDir, { ...options,
|
|
4202
|
+
const results = await indexDirectory(rootDir, { ...options, logger });
|
|
3442
4203
|
const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
|
|
3443
4204
|
return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
|
|
3444
4205
|
}
|
|
@@ -3451,7 +4212,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3451
4212
|
const introspection = new IntrospectionIndex(rootDir);
|
|
3452
4213
|
await introspection.initialize();
|
|
3453
4214
|
const currentFiles = await findFiles(rootDir, config);
|
|
3454
|
-
const currentFileSet = new Set(currentFiles.map((f) =>
|
|
4215
|
+
const currentFileSet = new Set(currentFiles.map((f) => path14.relative(rootDir, f)));
|
|
3455
4216
|
let totalIndexed = 0;
|
|
3456
4217
|
let totalRemoved = 0;
|
|
3457
4218
|
let totalUnchanged = 0;
|
|
@@ -3465,6 +4226,10 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3465
4226
|
embeddingModel: options.model
|
|
3466
4227
|
};
|
|
3467
4228
|
}
|
|
4229
|
+
configWithOverrides.options = {
|
|
4230
|
+
...configWithOverrides.options,
|
|
4231
|
+
logger
|
|
4232
|
+
};
|
|
3468
4233
|
await module.initialize(configWithOverrides);
|
|
3469
4234
|
}
|
|
3470
4235
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
@@ -3476,14 +4241,12 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3476
4241
|
}
|
|
3477
4242
|
}
|
|
3478
4243
|
for (const filepath of filesToRemove) {
|
|
3479
|
-
|
|
3480
|
-
|
|
3481
|
-
}
|
|
3482
|
-
const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4244
|
+
logger.debug(` Removing stale: ${filepath}`);
|
|
4245
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3483
4246
|
try {
|
|
3484
4247
|
await fs6.unlink(indexFilePath);
|
|
3485
4248
|
} catch {}
|
|
3486
|
-
const symbolicFilePath =
|
|
4249
|
+
const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
3487
4250
|
try {
|
|
3488
4251
|
await fs6.unlink(symbolicFilePath);
|
|
3489
4252
|
} catch {}
|
|
@@ -3494,18 +4257,21 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3494
4257
|
rootDir,
|
|
3495
4258
|
config,
|
|
3496
4259
|
readFile: async (filepath) => {
|
|
3497
|
-
const fullPath =
|
|
4260
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3498
4261
|
return fs6.readFile(fullPath, "utf-8");
|
|
3499
4262
|
},
|
|
3500
4263
|
getFileStats: async (filepath) => {
|
|
3501
|
-
const fullPath =
|
|
4264
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3502
4265
|
const stats = await fs6.stat(fullPath);
|
|
3503
4266
|
return { lastModified: stats.mtime.toISOString() };
|
|
3504
4267
|
},
|
|
3505
4268
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3506
4269
|
};
|
|
3507
|
-
|
|
3508
|
-
|
|
4270
|
+
const totalFiles = currentFiles.length;
|
|
4271
|
+
for (let i = 0;i < currentFiles.length; i++) {
|
|
4272
|
+
const filepath = currentFiles[i];
|
|
4273
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
4274
|
+
const progress = `[${i + 1}/${totalFiles}]`;
|
|
3509
4275
|
try {
|
|
3510
4276
|
const stats = await fs6.stat(filepath);
|
|
3511
4277
|
const lastModified = stats.mtime.toISOString();
|
|
@@ -3514,9 +4280,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3514
4280
|
totalUnchanged++;
|
|
3515
4281
|
continue;
|
|
3516
4282
|
}
|
|
3517
|
-
|
|
3518
|
-
console.log(` Indexing: ${relativePath}`);
|
|
3519
|
-
}
|
|
4283
|
+
logger.progress(` ${progress} Indexing: ${relativePath}`);
|
|
3520
4284
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3521
4285
|
introspection.addFile(relativePath, content);
|
|
3522
4286
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
@@ -3529,11 +4293,11 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3529
4293
|
totalIndexed++;
|
|
3530
4294
|
}
|
|
3531
4295
|
} catch (error) {
|
|
3532
|
-
|
|
3533
|
-
|
|
3534
|
-
}
|
|
4296
|
+
logger.clearProgress();
|
|
4297
|
+
logger.error(` ${progress} Error indexing ${relativePath}: ${error}`);
|
|
3535
4298
|
}
|
|
3536
4299
|
}
|
|
4300
|
+
logger.clearProgress();
|
|
3537
4301
|
if (totalIndexed > 0 || totalRemoved > 0) {
|
|
3538
4302
|
manifest.lastUpdated = new Date().toISOString();
|
|
3539
4303
|
await writeModuleManifest(rootDir, module.id, manifest, config);
|
|
@@ -3557,7 +4321,7 @@ async function ensureIndexFresh(rootDir, options = {}) {
|
|
|
3557
4321
|
unchanged: totalUnchanged
|
|
3558
4322
|
};
|
|
3559
4323
|
}
|
|
3560
|
-
async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
|
|
4324
|
+
async function indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency = DEFAULT_CONCURRENCY) {
|
|
3561
4325
|
const result = {
|
|
3562
4326
|
moduleId: module.id,
|
|
3563
4327
|
indexed: 0,
|
|
@@ -3565,55 +4329,102 @@ async function indexWithModule(rootDir, files, module, config, verbose, introspe
|
|
|
3565
4329
|
errors: 0
|
|
3566
4330
|
};
|
|
3567
4331
|
const manifest = await loadModuleManifest(rootDir, module.id, config);
|
|
4332
|
+
const indexPath = getModuleIndexPath(rootDir, module.id, config);
|
|
4333
|
+
const currentFileSet = new Set(files.map((f) => path14.relative(rootDir, f)));
|
|
4334
|
+
const filesToRemove = [];
|
|
4335
|
+
for (const filepath of Object.keys(manifest.files)) {
|
|
4336
|
+
if (!currentFileSet.has(filepath)) {
|
|
4337
|
+
filesToRemove.push(filepath);
|
|
4338
|
+
}
|
|
4339
|
+
}
|
|
4340
|
+
if (filesToRemove.length > 0) {
|
|
4341
|
+
logger.info(` Removing ${filesToRemove.length} stale entries...`);
|
|
4342
|
+
for (const filepath of filesToRemove) {
|
|
4343
|
+
logger.debug(` Removing: ${filepath}`);
|
|
4344
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4345
|
+
try {
|
|
4346
|
+
await fs6.unlink(indexFilePath);
|
|
4347
|
+
} catch {}
|
|
4348
|
+
const symbolicFilePath = path14.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
|
|
4349
|
+
try {
|
|
4350
|
+
await fs6.unlink(symbolicFilePath);
|
|
4351
|
+
} catch {}
|
|
4352
|
+
delete manifest.files[filepath];
|
|
4353
|
+
}
|
|
4354
|
+
await cleanupEmptyDirectories(indexPath);
|
|
4355
|
+
}
|
|
3568
4356
|
const ctx = {
|
|
3569
4357
|
rootDir,
|
|
3570
4358
|
config,
|
|
3571
4359
|
readFile: async (filepath) => {
|
|
3572
|
-
const fullPath =
|
|
4360
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3573
4361
|
return fs6.readFile(fullPath, "utf-8");
|
|
3574
4362
|
},
|
|
3575
4363
|
getFileStats: async (filepath) => {
|
|
3576
|
-
const fullPath =
|
|
4364
|
+
const fullPath = path14.isAbsolute(filepath) ? filepath : path14.join(rootDir, filepath);
|
|
3577
4365
|
const stats = await fs6.stat(fullPath);
|
|
3578
4366
|
return { lastModified: stats.mtime.toISOString() };
|
|
3579
4367
|
},
|
|
3580
4368
|
getIntrospection: (filepath) => introspection.getFile(filepath)
|
|
3581
4369
|
};
|
|
3582
|
-
|
|
3583
|
-
|
|
4370
|
+
const totalFiles = files.length;
|
|
4371
|
+
let completedCount = 0;
|
|
4372
|
+
const processFile = async (filepath, _index) => {
|
|
4373
|
+
const relativePath = path14.relative(rootDir, filepath);
|
|
3584
4374
|
try {
|
|
3585
4375
|
const stats = await fs6.stat(filepath);
|
|
3586
4376
|
const lastModified = stats.mtime.toISOString();
|
|
3587
4377
|
const existingEntry = manifest.files[relativePath];
|
|
3588
4378
|
if (existingEntry && existingEntry.lastModified === lastModified) {
|
|
3589
|
-
|
|
3590
|
-
|
|
3591
|
-
}
|
|
3592
|
-
result.skipped++;
|
|
3593
|
-
continue;
|
|
4379
|
+
completedCount++;
|
|
4380
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (unchanged)`);
|
|
4381
|
+
return { relativePath, status: "skipped" };
|
|
3594
4382
|
}
|
|
3595
4383
|
const content = await fs6.readFile(filepath, "utf-8");
|
|
3596
4384
|
introspection.addFile(relativePath, content);
|
|
3597
|
-
|
|
3598
|
-
|
|
3599
|
-
}
|
|
4385
|
+
completedCount++;
|
|
4386
|
+
logger.progress(` [${completedCount}/${totalFiles}] Processing: ${relativePath}`);
|
|
3600
4387
|
const fileIndex = await module.indexFile(relativePath, content, ctx);
|
|
3601
4388
|
if (!fileIndex) {
|
|
3602
|
-
|
|
3603
|
-
|
|
3604
|
-
}
|
|
3605
|
-
result.skipped++;
|
|
3606
|
-
continue;
|
|
4389
|
+
logger.debug(` [${completedCount}/${totalFiles}] Skipped ${relativePath} (no chunks)`);
|
|
4390
|
+
return { relativePath, status: "skipped" };
|
|
3607
4391
|
}
|
|
3608
4392
|
await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
|
|
3609
|
-
|
|
4393
|
+
return {
|
|
4394
|
+
relativePath,
|
|
4395
|
+
status: "indexed",
|
|
3610
4396
|
lastModified,
|
|
3611
4397
|
chunkCount: fileIndex.chunks.length
|
|
3612
4398
|
};
|
|
3613
|
-
result.indexed++;
|
|
3614
4399
|
} catch (error) {
|
|
3615
|
-
|
|
4400
|
+
completedCount++;
|
|
4401
|
+
return { relativePath, status: "error", error };
|
|
4402
|
+
}
|
|
4403
|
+
};
|
|
4404
|
+
logger.debug(` Using concurrency: ${concurrency}`);
|
|
4405
|
+
const results = await parallelMap(files, processFile, concurrency);
|
|
4406
|
+
logger.clearProgress();
|
|
4407
|
+
for (const item of results) {
|
|
4408
|
+
if (!item.success) {
|
|
3616
4409
|
result.errors++;
|
|
4410
|
+
continue;
|
|
4411
|
+
}
|
|
4412
|
+
const fileResult = item.value;
|
|
4413
|
+
switch (fileResult.status) {
|
|
4414
|
+
case "indexed":
|
|
4415
|
+
manifest.files[fileResult.relativePath] = {
|
|
4416
|
+
lastModified: fileResult.lastModified,
|
|
4417
|
+
chunkCount: fileResult.chunkCount
|
|
4418
|
+
};
|
|
4419
|
+
result.indexed++;
|
|
4420
|
+
break;
|
|
4421
|
+
case "skipped":
|
|
4422
|
+
result.skipped++;
|
|
4423
|
+
break;
|
|
4424
|
+
case "error":
|
|
4425
|
+
logger.error(` Error indexing ${fileResult.relativePath}: ${fileResult.error}`);
|
|
4426
|
+
result.errors++;
|
|
4427
|
+
break;
|
|
3617
4428
|
}
|
|
3618
4429
|
}
|
|
3619
4430
|
manifest.lastUpdated = new Date().toISOString();
|
|
@@ -3650,13 +4461,13 @@ async function loadModuleManifest(rootDir, moduleId, config) {
|
|
|
3650
4461
|
}
|
|
3651
4462
|
async function writeModuleManifest(rootDir, moduleId, manifest, config) {
|
|
3652
4463
|
const manifestPath = getModuleManifestPath(rootDir, moduleId, config);
|
|
3653
|
-
await fs6.mkdir(
|
|
4464
|
+
await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
|
|
3654
4465
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3655
4466
|
}
|
|
3656
4467
|
async function writeFileIndex(rootDir, moduleId, filepath, fileIndex, config) {
|
|
3657
4468
|
const indexPath = getModuleIndexPath(rootDir, moduleId, config);
|
|
3658
|
-
const indexFilePath =
|
|
3659
|
-
await fs6.mkdir(
|
|
4469
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
4470
|
+
await fs6.mkdir(path14.dirname(indexFilePath), { recursive: true });
|
|
3660
4471
|
await fs6.writeFile(indexFilePath, JSON.stringify(fileIndex, null, 2));
|
|
3661
4472
|
}
|
|
3662
4473
|
async function updateGlobalManifest(rootDir, modules, config) {
|
|
@@ -3666,31 +4477,32 @@ async function updateGlobalManifest(rootDir, modules, config) {
|
|
|
3666
4477
|
lastUpdated: new Date().toISOString(),
|
|
3667
4478
|
modules: modules.map((m) => m.id)
|
|
3668
4479
|
};
|
|
3669
|
-
await fs6.mkdir(
|
|
4480
|
+
await fs6.mkdir(path14.dirname(manifestPath), { recursive: true });
|
|
3670
4481
|
await fs6.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
|
|
3671
4482
|
}
|
|
3672
4483
|
async function cleanupIndex(rootDir, options = {}) {
|
|
3673
4484
|
const verbose = options.verbose ?? false;
|
|
3674
|
-
|
|
3675
|
-
|
|
4485
|
+
const logger = options.logger ?? createLogger({ verbose });
|
|
4486
|
+
rootDir = path14.resolve(rootDir);
|
|
4487
|
+
logger.info(`Cleaning up index in: ${rootDir}`);
|
|
3676
4488
|
const config = await loadConfig(rootDir);
|
|
3677
4489
|
await registerBuiltInModules();
|
|
3678
4490
|
const enabledModules = registry.getEnabled(config);
|
|
3679
4491
|
if (enabledModules.length === 0) {
|
|
3680
|
-
|
|
4492
|
+
logger.info("No modules enabled.");
|
|
3681
4493
|
return [];
|
|
3682
4494
|
}
|
|
3683
4495
|
const results = [];
|
|
3684
4496
|
for (const module of enabledModules) {
|
|
3685
|
-
|
|
4497
|
+
logger.info(`
|
|
3686
4498
|
[${module.name}] Checking for stale entries...`);
|
|
3687
|
-
const result = await cleanupModuleIndex(rootDir, module.id, config,
|
|
4499
|
+
const result = await cleanupModuleIndex(rootDir, module.id, config, logger);
|
|
3688
4500
|
results.push(result);
|
|
3689
|
-
|
|
4501
|
+
logger.info(`[${module.name}] Removed ${result.removed} stale entries, kept ${result.kept} valid entries`);
|
|
3690
4502
|
}
|
|
3691
4503
|
return results;
|
|
3692
4504
|
}
|
|
3693
|
-
async function cleanupModuleIndex(rootDir, moduleId, config,
|
|
4505
|
+
async function cleanupModuleIndex(rootDir, moduleId, config, logger) {
|
|
3694
4506
|
const result = {
|
|
3695
4507
|
moduleId,
|
|
3696
4508
|
removed: 0,
|
|
@@ -3701,7 +4513,7 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
3701
4513
|
const filesToRemove = [];
|
|
3702
4514
|
const updatedFiles = {};
|
|
3703
4515
|
for (const [filepath, entry] of Object.entries(manifest.files)) {
|
|
3704
|
-
const fullPath =
|
|
4516
|
+
const fullPath = path14.join(rootDir, filepath);
|
|
3705
4517
|
try {
|
|
3706
4518
|
await fs6.access(fullPath);
|
|
3707
4519
|
updatedFiles[filepath] = entry;
|
|
@@ -3709,13 +4521,11 @@ async function cleanupModuleIndex(rootDir, moduleId, config, verbose) {
|
|
|
3709
4521
|
} catch {
|
|
3710
4522
|
filesToRemove.push(filepath);
|
|
3711
4523
|
result.removed++;
|
|
3712
|
-
|
|
3713
|
-
console.log(` Removing stale entry: ${filepath}`);
|
|
3714
|
-
}
|
|
4524
|
+
logger.debug(` Removing stale entry: ${filepath}`);
|
|
3715
4525
|
}
|
|
3716
4526
|
}
|
|
3717
4527
|
for (const filepath of filesToRemove) {
|
|
3718
|
-
const indexFilePath =
|
|
4528
|
+
const indexFilePath = path14.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
|
|
3719
4529
|
try {
|
|
3720
4530
|
await fs6.unlink(indexFilePath);
|
|
3721
4531
|
} catch {}
|
|
@@ -3731,7 +4541,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3731
4541
|
const entries = await fs6.readdir(dir, { withFileTypes: true });
|
|
3732
4542
|
for (const entry of entries) {
|
|
3733
4543
|
if (entry.isDirectory()) {
|
|
3734
|
-
const subDir =
|
|
4544
|
+
const subDir = path14.join(dir, entry.name);
|
|
3735
4545
|
await cleanupEmptyDirectories(subDir);
|
|
3736
4546
|
}
|
|
3737
4547
|
}
|
|
@@ -3746,7 +4556,7 @@ async function cleanupEmptyDirectories(dir) {
|
|
|
3746
4556
|
}
|
|
3747
4557
|
}
|
|
3748
4558
|
async function getIndexStatus(rootDir) {
|
|
3749
|
-
rootDir =
|
|
4559
|
+
rootDir = path14.resolve(rootDir);
|
|
3750
4560
|
const config = await loadConfig(rootDir);
|
|
3751
4561
|
const location = getIndexLocation(rootDir);
|
|
3752
4562
|
const indexDir = location.indexDir;
|
|
@@ -3782,7 +4592,7 @@ async function getIndexStatus(rootDir) {
|
|
|
3782
4592
|
}
|
|
3783
4593
|
} catch {
|
|
3784
4594
|
try {
|
|
3785
|
-
const entries = await fs6.readdir(
|
|
4595
|
+
const entries = await fs6.readdir(path14.join(indexDir, "index"));
|
|
3786
4596
|
if (entries.length > 0) {
|
|
3787
4597
|
status.exists = true;
|
|
3788
4598
|
for (const entry of entries) {
|
|
@@ -3805,7 +4615,7 @@ async function getIndexStatus(rootDir) {
|
|
|
3805
4615
|
|
|
3806
4616
|
// src/app/search/index.ts
|
|
3807
4617
|
import * as fs7 from "fs/promises";
|
|
3808
|
-
import * as
|
|
4618
|
+
import * as path15 from "path";
|
|
3809
4619
|
|
|
3810
4620
|
// src/types.ts
|
|
3811
4621
|
init_entities();
|
|
@@ -3813,7 +4623,7 @@ init_entities();
|
|
|
3813
4623
|
// src/app/search/index.ts
|
|
3814
4624
|
init_config2();
|
|
3815
4625
|
async function search(rootDir, query, options = {}) {
|
|
3816
|
-
rootDir =
|
|
4626
|
+
rootDir = path15.resolve(rootDir);
|
|
3817
4627
|
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
3818
4628
|
if (ensureFresh) {
|
|
3819
4629
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
@@ -3858,7 +4668,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
3858
4668
|
config,
|
|
3859
4669
|
loadFileIndex: async (filepath) => {
|
|
3860
4670
|
const hasExtension = /\.[^./]+$/.test(filepath);
|
|
3861
|
-
const indexFilePath = hasExtension ?
|
|
4671
|
+
const indexFilePath = hasExtension ? path15.join(indexPath, filepath.replace(/\.[^.]+$/, ".json")) : path15.join(indexPath, filepath + ".json");
|
|
3862
4672
|
try {
|
|
3863
4673
|
const content = await fs7.readFile(indexFilePath, "utf-8");
|
|
3864
4674
|
return JSON.parse(content);
|
|
@@ -3870,7 +4680,7 @@ function createSearchContext(rootDir, moduleId, config) {
|
|
|
3870
4680
|
const files = [];
|
|
3871
4681
|
await traverseDirectory(indexPath, files, indexPath);
|
|
3872
4682
|
return files.filter((f) => f.endsWith(".json") && !f.endsWith("manifest.json")).map((f) => {
|
|
3873
|
-
const relative3 =
|
|
4683
|
+
const relative3 = path15.relative(indexPath, f);
|
|
3874
4684
|
return relative3.replace(/\.json$/, "");
|
|
3875
4685
|
});
|
|
3876
4686
|
}
|
|
@@ -3880,7 +4690,7 @@ async function traverseDirectory(dir, files, basePath) {
|
|
|
3880
4690
|
try {
|
|
3881
4691
|
const entries = await fs7.readdir(dir, { withFileTypes: true });
|
|
3882
4692
|
for (const entry of entries) {
|
|
3883
|
-
const fullPath =
|
|
4693
|
+
const fullPath = path15.join(dir, entry.name);
|
|
3884
4694
|
if (entry.isDirectory()) {
|
|
3885
4695
|
await traverseDirectory(fullPath, files, basePath);
|
|
3886
4696
|
} else if (entry.isFile()) {
|
|
@@ -3956,19 +4766,30 @@ async function search2(directory, query, options = {}) {
|
|
|
3956
4766
|
async function cleanup(directory, options = {}) {
|
|
3957
4767
|
return cleanupIndex(directory, options);
|
|
3958
4768
|
}
|
|
4769
|
+
async function reset(directory) {
|
|
4770
|
+
return resetIndex(directory);
|
|
4771
|
+
}
|
|
3959
4772
|
var raggrep = {
|
|
3960
4773
|
index,
|
|
3961
4774
|
search: search2,
|
|
3962
4775
|
cleanup,
|
|
4776
|
+
reset,
|
|
3963
4777
|
formatSearchResults
|
|
3964
4778
|
};
|
|
3965
4779
|
var src_default = raggrep;
|
|
3966
4780
|
export {
|
|
3967
4781
|
search2 as search,
|
|
4782
|
+
reset,
|
|
3968
4783
|
index,
|
|
3969
4784
|
formatSearchResults,
|
|
3970
4785
|
src_default as default,
|
|
3971
|
-
|
|
4786
|
+
createSilentLogger,
|
|
4787
|
+
createLogger,
|
|
4788
|
+
createInlineLogger,
|
|
4789
|
+
cleanup,
|
|
4790
|
+
SilentLogger,
|
|
4791
|
+
InlineProgressLogger,
|
|
4792
|
+
ConsoleLogger
|
|
3972
4793
|
};
|
|
3973
4794
|
|
|
3974
|
-
//# debugId=
|
|
4795
|
+
//# debugId=984F0AA3FD08D5A664756E2164756E21
|