@ulpi/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +200 -0
- package/dist/auth-PN7TMQHV-2W4ICG64.js +15 -0
- package/dist/chunk-247GVVKK.js +2259 -0
- package/dist/chunk-2CLNOKPA.js +793 -0
- package/dist/chunk-2HEE5OKX.js +79 -0
- package/dist/chunk-2MZER6ND.js +415 -0
- package/dist/chunk-3SBPZRB5.js +772 -0
- package/dist/chunk-4VNS5WPM.js +42 -0
- package/dist/chunk-6JCMYYBT.js +1546 -0
- package/dist/chunk-6OCEY7JY.js +422 -0
- package/dist/chunk-74WVVWJ4.js +375 -0
- package/dist/chunk-7AL4DOEJ.js +131 -0
- package/dist/chunk-7LXY5UVC.js +330 -0
- package/dist/chunk-DBMUNBNB.js +3048 -0
- package/dist/chunk-JWUUVXIV.js +13694 -0
- package/dist/chunk-KIKPIH6N.js +4048 -0
- package/dist/chunk-KLEASXUR.js +70 -0
- package/dist/chunk-MIAQVCFW.js +39 -0
- package/dist/chunk-NNUWU6CV.js +1610 -0
- package/dist/chunk-PKD4ASEM.js +115 -0
- package/dist/chunk-Q4HIY43N.js +4230 -0
- package/dist/chunk-QJ5GSMEC.js +146 -0
- package/dist/chunk-SIAQVRKG.js +2163 -0
- package/dist/chunk-SPOI23SB.js +197 -0
- package/dist/chunk-YM2HV4IA.js +505 -0
- package/dist/codemap-RRJIDBQ5.js +636 -0
- package/dist/config-EGAXXCGL.js +127 -0
- package/dist/dist-6G7JC2RA.js +90 -0
- package/dist/dist-7LHZ65GC.js +418 -0
- package/dist/dist-LZKZFPVX.js +140 -0
- package/dist/dist-R5F4MX3I.js +107 -0
- package/dist/dist-R5ZJ4LX5.js +56 -0
- package/dist/dist-RJGCUS3L.js +87 -0
- package/dist/dist-RKOGLK7R.js +151 -0
- package/dist/dist-W7K4WPAF.js +597 -0
- package/dist/export-import-4A5MWLIA.js +53 -0
- package/dist/history-ATTUKOHO.js +934 -0
- package/dist/index.js +2120 -0
- package/dist/init-AY5C2ZAS.js +393 -0
- package/dist/launchd-LF2QMSKZ.js +148 -0
- package/dist/log-TVTUXAYD.js +75 -0
- package/dist/mcp-installer-NQCGKQ23.js +124 -0
- package/dist/memory-J3G24QHS.js +406 -0
- package/dist/ollama-3XCUZMZT-FYKHW4TZ.js +7 -0
- package/dist/openai-E7G2YAHU-UYY4ZWON.js +8 -0
- package/dist/projects-ATHDD3D6.js +271 -0
- package/dist/review-ADUPV3PN.js +152 -0
- package/dist/rules-E427DKYJ.js +134 -0
- package/dist/server-MOYPE4SM-N7SE2AN7.js +18 -0
- package/dist/server-X5P6WH2M-7K2RY34N.js +11 -0
- package/dist/skills/ulpi-generate-guardian/SKILL.md +511 -0
- package/dist/skills/ulpi-generate-guardian/references/framework-rules.md +692 -0
- package/dist/skills/ulpi-generate-guardian/references/language-rules.md +596 -0
- package/dist/skills-CX73O3IV.js +76 -0
- package/dist/status-4DFHDJMN.js +66 -0
- package/dist/templates/biome.yml +24 -0
- package/dist/templates/conventional-commits.yml +18 -0
- package/dist/templates/django.yml +30 -0
- package/dist/templates/docker.yml +30 -0
- package/dist/templates/eslint.yml +13 -0
- package/dist/templates/express.yml +20 -0
- package/dist/templates/fastapi.yml +23 -0
- package/dist/templates/git-flow.yml +26 -0
- package/dist/templates/github-flow.yml +27 -0
- package/dist/templates/go.yml +33 -0
- package/dist/templates/jest.yml +24 -0
- package/dist/templates/laravel.yml +30 -0
- package/dist/templates/monorepo.yml +26 -0
- package/dist/templates/nestjs.yml +21 -0
- package/dist/templates/nextjs.yml +31 -0
- package/dist/templates/nodejs.yml +33 -0
- package/dist/templates/npm.yml +15 -0
- package/dist/templates/php.yml +25 -0
- package/dist/templates/pnpm.yml +15 -0
- package/dist/templates/prettier.yml +23 -0
- package/dist/templates/prisma.yml +21 -0
- package/dist/templates/python.yml +33 -0
- package/dist/templates/quality-of-life.yml +111 -0
- package/dist/templates/ruby.yml +25 -0
- package/dist/templates/rust.yml +34 -0
- package/dist/templates/typescript.yml +14 -0
- package/dist/templates/vitest.yml +24 -0
- package/dist/templates/yarn.yml +15 -0
- package/dist/templates-U7T6MARD.js +156 -0
- package/dist/ui-L7UAWXDY.js +167 -0
- package/dist/ui.html +698 -0
- package/dist/ulpi-RMMCUAGP-JCJ273T6.js +161 -0
- package/dist/uninstall-6SW35IK4.js +25 -0
- package/dist/update-M2B4RLGH.js +61 -0
- package/dist/version-checker-ANCS3IHR.js +10 -0
- package/package.json +92 -0
|
@@ -0,0 +1,3048 @@
|
|
|
1
|
+
import {
|
|
2
|
+
commitInWorktree,
|
|
3
|
+
copyAndStage,
|
|
4
|
+
historyBranchExists,
|
|
5
|
+
withWorktree,
|
|
6
|
+
writeAndStage
|
|
7
|
+
} from "./chunk-NNUWU6CV.js";
|
|
8
|
+
import {
|
|
9
|
+
CodemapConfigSchema
|
|
10
|
+
} from "./chunk-74WVVWJ4.js";
|
|
11
|
+
import {
|
|
12
|
+
LOGS_DIR,
|
|
13
|
+
codemapBranchDir,
|
|
14
|
+
codemapConfigFile,
|
|
15
|
+
codemapLanceDir,
|
|
16
|
+
codemapManifestFile,
|
|
17
|
+
codemapMetadataDir,
|
|
18
|
+
codemapSchemaFile,
|
|
19
|
+
codemapStatsFile,
|
|
20
|
+
getCodemapBranch,
|
|
21
|
+
getCurrentBranch,
|
|
22
|
+
projectCodemapDir
|
|
23
|
+
} from "./chunk-7LXY5UVC.js";
|
|
24
|
+
|
|
25
|
+
// ../../packages/codemap-engine/dist/index.js
|
|
26
|
+
import * as fs from "fs";
|
|
27
|
+
import * as path from "path";
|
|
28
|
+
import * as fs2 from "fs";
|
|
29
|
+
import * as path2 from "path";
|
|
30
|
+
import { createHash } from "crypto";
|
|
31
|
+
import { execFileSync } from "child_process";
|
|
32
|
+
import * as fs3 from "fs";
|
|
33
|
+
import * as path5 from "path";
|
|
34
|
+
import { createHash as createHash3 } from "crypto";
|
|
35
|
+
import { createHash as createHash2 } from "crypto";
|
|
36
|
+
import * as path3 from "path";
|
|
37
|
+
import * as path4 from "path";
|
|
38
|
+
import { createRequire } from "module";
|
|
39
|
+
import * as fs4 from "fs";
|
|
40
|
+
import * as path6 from "path";
|
|
41
|
+
import { createHash as createHash4 } from "crypto";
|
|
42
|
+
import * as fs5 from "fs";
|
|
43
|
+
import * as path7 from "path";
|
|
44
|
+
import { connect } from "@lancedb/lancedb";
|
|
45
|
+
import * as fs9 from "fs";
|
|
46
|
+
import * as path11 from "path";
|
|
47
|
+
import * as fs6 from "fs";
|
|
48
|
+
import * as path8 from "path";
|
|
49
|
+
import * as fs7 from "fs";
|
|
50
|
+
import * as path9 from "path";
|
|
51
|
+
import * as fs8 from "fs";
|
|
52
|
+
import * as os from "os";
|
|
53
|
+
import * as path10 from "path";
|
|
54
|
+
import { createHash as createHash5 } from "crypto";
|
|
55
|
+
import * as fs10 from "fs";
|
|
56
|
+
import * as path12 from "path";
|
|
57
|
+
import * as fs11 from "fs";
|
|
58
|
+
import * as path13 from "path";
|
|
59
|
+
import { createHash as createHash6 } from "crypto";
|
|
60
|
+
import * as fs12 from "fs";
|
|
61
|
+
import * as path14 from "path";
|
|
62
|
+
import * as fs13 from "fs";
|
|
63
|
+
import * as path15 from "path";
|
|
64
|
+
import { execFileSync as execFileSync2 } from "child_process";
|
|
65
|
+
import * as fs14 from "fs";
|
|
66
|
+
import * as path16 from "path";
|
|
67
|
+
import * as fs15 from "fs";
|
|
68
|
+
import * as path17 from "path";
|
|
69
|
+
import * as fs16 from "fs";
|
|
70
|
+
var DEFAULT_CODEMAP_CONFIG = CodemapConfigSchema.parse({});
|
|
71
|
+
function loadCodemapConfig(projectDir) {
|
|
72
|
+
const configPath = codemapConfigFile(projectDir);
|
|
73
|
+
if (!fs.existsSync(configPath)) {
|
|
74
|
+
return DEFAULT_CODEMAP_CONFIG;
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
const raw = fs.readFileSync(configPath, "utf-8");
|
|
78
|
+
const parsed = JSON.parse(raw);
|
|
79
|
+
return CodemapConfigSchema.parse(parsed);
|
|
80
|
+
} catch {
|
|
81
|
+
return DEFAULT_CODEMAP_CONFIG;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
function saveCodemapConfig(projectDir, config) {
|
|
85
|
+
const configPath = codemapConfigFile(projectDir);
|
|
86
|
+
const dir = path.dirname(configPath);
|
|
87
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
88
|
+
const tmpPath = configPath + ".tmp";
|
|
89
|
+
fs.writeFileSync(tmpPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
|
|
90
|
+
fs.renameSync(tmpPath, configPath);
|
|
91
|
+
}
|
|
92
|
+
function minimatch(filePath, pattern) {
|
|
93
|
+
const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, "{{GLOBSTAR}}").replace(/\*/g, "[^/]*").replace(/\?/g, "[^/]").replace(/\{\{GLOBSTAR\}\}/g, ".*");
|
|
94
|
+
return new RegExp(`^${regex}$`).test(filePath);
|
|
95
|
+
}
|
|
96
|
+
function matchesDenyPattern(filePath, patterns) {
|
|
97
|
+
for (const pattern of patterns) {
|
|
98
|
+
if (minimatch(filePath, pattern)) return true;
|
|
99
|
+
}
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
102
|
+
function loadCodemapIgnore(projectDir) {
|
|
103
|
+
const ignorePath = path2.join(projectDir, ".codemapignore");
|
|
104
|
+
if (!fs2.existsSync(ignorePath)) return [];
|
|
105
|
+
try {
|
|
106
|
+
const content = fs2.readFileSync(ignorePath, "utf-8");
|
|
107
|
+
return content.split("\n").map((line) => line.trim()).filter((line) => line && !line.startsWith("#"));
|
|
108
|
+
} catch {
|
|
109
|
+
return [];
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
function hashFileContent(filePath) {
|
|
113
|
+
const content = fs2.readFileSync(filePath);
|
|
114
|
+
return createHash("sha256").update(content).digest("hex");
|
|
115
|
+
}
|
|
116
|
+
var BINARY_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
117
|
+
".png",
|
|
118
|
+
".jpg",
|
|
119
|
+
".jpeg",
|
|
120
|
+
".gif",
|
|
121
|
+
".bmp",
|
|
122
|
+
".ico",
|
|
123
|
+
".svg",
|
|
124
|
+
".woff",
|
|
125
|
+
".woff2",
|
|
126
|
+
".ttf",
|
|
127
|
+
".eot",
|
|
128
|
+
".otf",
|
|
129
|
+
".mp3",
|
|
130
|
+
".mp4",
|
|
131
|
+
".wav",
|
|
132
|
+
".avi",
|
|
133
|
+
".mov",
|
|
134
|
+
".zip",
|
|
135
|
+
".gz",
|
|
136
|
+
".tar",
|
|
137
|
+
".rar",
|
|
138
|
+
".7z",
|
|
139
|
+
".pdf",
|
|
140
|
+
".doc",
|
|
141
|
+
".docx",
|
|
142
|
+
".xls",
|
|
143
|
+
".xlsx",
|
|
144
|
+
".exe",
|
|
145
|
+
".dll",
|
|
146
|
+
".so",
|
|
147
|
+
".dylib",
|
|
148
|
+
".pyc",
|
|
149
|
+
".pyo",
|
|
150
|
+
".class",
|
|
151
|
+
".lock"
|
|
152
|
+
// package lock files can be huge
|
|
153
|
+
]);
|
|
154
|
+
function scanRepository(projectDir, config) {
|
|
155
|
+
let gitFiles;
|
|
156
|
+
try {
|
|
157
|
+
const output = execFileSync("git", ["ls-files", "--cached", "--others", "--exclude-standard"], {
|
|
158
|
+
cwd: projectDir,
|
|
159
|
+
encoding: "utf-8",
|
|
160
|
+
timeout: 3e4,
|
|
161
|
+
maxBuffer: 10 * 1024 * 1024
|
|
162
|
+
// 10MB buffer for large repos
|
|
163
|
+
});
|
|
164
|
+
gitFiles = output.trim().split("\n").filter(Boolean);
|
|
165
|
+
} catch {
|
|
166
|
+
return [];
|
|
167
|
+
}
|
|
168
|
+
const ignorePatterns = loadCodemapIgnore(projectDir);
|
|
169
|
+
const denyPatterns = [...config.deny, ...ignorePatterns];
|
|
170
|
+
const results = [];
|
|
171
|
+
for (const relPath of gitFiles) {
|
|
172
|
+
const ext = path2.extname(relPath).toLowerCase();
|
|
173
|
+
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
174
|
+
if (matchesDenyPattern(relPath, denyPatterns)) continue;
|
|
175
|
+
const absPath = path2.join(projectDir, relPath);
|
|
176
|
+
let stat;
|
|
177
|
+
try {
|
|
178
|
+
stat = fs2.statSync(absPath);
|
|
179
|
+
} catch {
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
if (!stat.isFile()) continue;
|
|
183
|
+
if (stat.size > config.maxFileSize) continue;
|
|
184
|
+
if (stat.size === 0) continue;
|
|
185
|
+
try {
|
|
186
|
+
const contentHash = hashFileContent(absPath);
|
|
187
|
+
results.push({
|
|
188
|
+
filePath: relPath,
|
|
189
|
+
contentHash,
|
|
190
|
+
mtime: stat.mtimeMs,
|
|
191
|
+
sizeBytes: stat.size
|
|
192
|
+
});
|
|
193
|
+
} catch {
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
results.sort((a, b) => a.filePath.localeCompare(b.filePath));
|
|
198
|
+
return results;
|
|
199
|
+
}
|
|
200
|
+
var JS_DECLARATIONS = /* @__PURE__ */ new Map([
|
|
201
|
+
["function_declaration", "function"],
|
|
202
|
+
["generator_function_declaration", "function"],
|
|
203
|
+
["class_declaration", "class"],
|
|
204
|
+
["method_definition", "method"],
|
|
205
|
+
["variable_declarator", "const"]
|
|
206
|
+
]);
|
|
207
|
+
var TS_DECLARATIONS = new Map([
|
|
208
|
+
...JS_DECLARATIONS,
|
|
209
|
+
["interface_declaration", "interface"],
|
|
210
|
+
["type_alias_declaration", "type"],
|
|
211
|
+
["enum_declaration", "enum"]
|
|
212
|
+
]);
|
|
213
|
+
var PY_DECLARATIONS = /* @__PURE__ */ new Map([
|
|
214
|
+
["function_definition", "function"],
|
|
215
|
+
["class_definition", "class"]
|
|
216
|
+
]);
|
|
217
|
+
var GO_DECLARATIONS = /* @__PURE__ */ new Map([
|
|
218
|
+
["function_declaration", "function"],
|
|
219
|
+
["method_declaration", "method"],
|
|
220
|
+
["type_spec", "type"]
|
|
221
|
+
]);
|
|
222
|
+
var PHP_DECLARATIONS = /* @__PURE__ */ new Map([
|
|
223
|
+
["function_definition", "function"],
|
|
224
|
+
["class_declaration", "class"],
|
|
225
|
+
["method_declaration", "method"],
|
|
226
|
+
["interface_declaration", "interface"],
|
|
227
|
+
["trait_declaration", "class"],
|
|
228
|
+
["enum_declaration", "enum"]
|
|
229
|
+
]);
|
|
230
|
+
var JS_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
231
|
+
"function_declaration",
|
|
232
|
+
"generator_function_declaration",
|
|
233
|
+
"class_declaration",
|
|
234
|
+
"export_statement",
|
|
235
|
+
"variable_declaration",
|
|
236
|
+
"lexical_declaration",
|
|
237
|
+
"expression_statement",
|
|
238
|
+
"import_statement",
|
|
239
|
+
"if_statement",
|
|
240
|
+
"for_statement",
|
|
241
|
+
"for_in_statement",
|
|
242
|
+
"while_statement",
|
|
243
|
+
"try_statement",
|
|
244
|
+
"switch_statement"
|
|
245
|
+
]);
|
|
246
|
+
var TS_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
247
|
+
...JS_TOP_LEVEL,
|
|
248
|
+
"interface_declaration",
|
|
249
|
+
"type_alias_declaration",
|
|
250
|
+
"enum_declaration",
|
|
251
|
+
"module",
|
|
252
|
+
"ambient_declaration"
|
|
253
|
+
]);
|
|
254
|
+
var HTML_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
255
|
+
"element",
|
|
256
|
+
"script_element",
|
|
257
|
+
"style_element",
|
|
258
|
+
"doctype",
|
|
259
|
+
"comment"
|
|
260
|
+
]);
|
|
261
|
+
var CSS_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
262
|
+
"rule_set",
|
|
263
|
+
"media_statement",
|
|
264
|
+
"keyframes_statement",
|
|
265
|
+
"import_statement",
|
|
266
|
+
"charset_statement",
|
|
267
|
+
"namespace_statement",
|
|
268
|
+
"supports_statement",
|
|
269
|
+
"at_rule"
|
|
270
|
+
]);
|
|
271
|
+
var PHP_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
272
|
+
"function_definition",
|
|
273
|
+
"class_declaration",
|
|
274
|
+
"namespace_definition",
|
|
275
|
+
"trait_declaration",
|
|
276
|
+
"interface_declaration",
|
|
277
|
+
"enum_declaration",
|
|
278
|
+
"expression_statement",
|
|
279
|
+
"if_statement",
|
|
280
|
+
"foreach_statement",
|
|
281
|
+
"for_statement",
|
|
282
|
+
"while_statement",
|
|
283
|
+
"try_statement",
|
|
284
|
+
"switch_statement"
|
|
285
|
+
]);
|
|
286
|
+
var PY_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
287
|
+
"function_definition",
|
|
288
|
+
"class_definition",
|
|
289
|
+
"decorated_definition",
|
|
290
|
+
"import_statement",
|
|
291
|
+
"import_from_statement",
|
|
292
|
+
"if_statement",
|
|
293
|
+
"for_statement",
|
|
294
|
+
"while_statement",
|
|
295
|
+
"try_statement",
|
|
296
|
+
"with_statement",
|
|
297
|
+
"expression_statement",
|
|
298
|
+
"assignment"
|
|
299
|
+
]);
|
|
300
|
+
var GO_TOP_LEVEL = /* @__PURE__ */ new Set([
|
|
301
|
+
"function_declaration",
|
|
302
|
+
"method_declaration",
|
|
303
|
+
"type_declaration",
|
|
304
|
+
"var_declaration",
|
|
305
|
+
"const_declaration",
|
|
306
|
+
"import_declaration"
|
|
307
|
+
]);
|
|
308
|
+
var CONFIGS = {
|
|
309
|
+
javascript: {
|
|
310
|
+
grammarId: "javascript",
|
|
311
|
+
packageName: "tree-sitter-javascript",
|
|
312
|
+
wasmFile: "tree-sitter-javascript.wasm",
|
|
313
|
+
topLevelTypes: JS_TOP_LEVEL,
|
|
314
|
+
declarationTypes: JS_DECLARATIONS,
|
|
315
|
+
nameField: "name",
|
|
316
|
+
hasTagQuery: true
|
|
317
|
+
},
|
|
318
|
+
tsx: {
|
|
319
|
+
grammarId: "tsx",
|
|
320
|
+
packageName: "tree-sitter-typescript",
|
|
321
|
+
wasmFile: "tree-sitter-tsx.wasm",
|
|
322
|
+
topLevelTypes: TS_TOP_LEVEL,
|
|
323
|
+
declarationTypes: TS_DECLARATIONS,
|
|
324
|
+
nameField: "name",
|
|
325
|
+
hasTagQuery: true
|
|
326
|
+
},
|
|
327
|
+
typescript: {
|
|
328
|
+
grammarId: "typescript",
|
|
329
|
+
packageName: "tree-sitter-typescript",
|
|
330
|
+
wasmFile: "tree-sitter-typescript.wasm",
|
|
331
|
+
topLevelTypes: TS_TOP_LEVEL,
|
|
332
|
+
declarationTypes: TS_DECLARATIONS,
|
|
333
|
+
nameField: "name",
|
|
334
|
+
hasTagQuery: true
|
|
335
|
+
},
|
|
336
|
+
html: {
|
|
337
|
+
grammarId: "html",
|
|
338
|
+
packageName: "tree-sitter-html",
|
|
339
|
+
wasmFile: "tree-sitter-html.wasm",
|
|
340
|
+
topLevelTypes: HTML_TOP_LEVEL,
|
|
341
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
342
|
+
nameField: "tag_name"
|
|
343
|
+
},
|
|
344
|
+
css: {
|
|
345
|
+
grammarId: "css",
|
|
346
|
+
packageName: "tree-sitter-css",
|
|
347
|
+
wasmFile: "tree-sitter-css.wasm",
|
|
348
|
+
topLevelTypes: CSS_TOP_LEVEL,
|
|
349
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
350
|
+
nameField: "name"
|
|
351
|
+
},
|
|
352
|
+
php: {
|
|
353
|
+
grammarId: "php",
|
|
354
|
+
packageName: "tree-sitter-php",
|
|
355
|
+
wasmFile: "tree-sitter-php.wasm",
|
|
356
|
+
topLevelTypes: PHP_TOP_LEVEL,
|
|
357
|
+
declarationTypes: PHP_DECLARATIONS,
|
|
358
|
+
nameField: "name",
|
|
359
|
+
hasTagQuery: true
|
|
360
|
+
},
|
|
361
|
+
python: {
|
|
362
|
+
grammarId: "python",
|
|
363
|
+
packageName: "tree-sitter-python",
|
|
364
|
+
wasmFile: "tree-sitter-python.wasm",
|
|
365
|
+
topLevelTypes: PY_TOP_LEVEL,
|
|
366
|
+
declarationTypes: PY_DECLARATIONS,
|
|
367
|
+
nameField: "name",
|
|
368
|
+
hasTagQuery: true
|
|
369
|
+
},
|
|
370
|
+
go: {
|
|
371
|
+
grammarId: "go",
|
|
372
|
+
packageName: "tree-sitter-go",
|
|
373
|
+
wasmFile: "tree-sitter-go.wasm",
|
|
374
|
+
topLevelTypes: GO_TOP_LEVEL,
|
|
375
|
+
declarationTypes: GO_DECLARATIONS,
|
|
376
|
+
nameField: "name",
|
|
377
|
+
hasTagQuery: true
|
|
378
|
+
},
|
|
379
|
+
// --- Tag-query-only languages (no AST chunking support yet — fall back to sliding window) ---
|
|
380
|
+
c: {
|
|
381
|
+
grammarId: "c",
|
|
382
|
+
packageName: "tree-sitter-c",
|
|
383
|
+
wasmFile: "tree-sitter-c.wasm",
|
|
384
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
385
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
386
|
+
nameField: "name",
|
|
387
|
+
hasTagQuery: true
|
|
388
|
+
},
|
|
389
|
+
cpp: {
|
|
390
|
+
grammarId: "cpp",
|
|
391
|
+
packageName: "tree-sitter-cpp",
|
|
392
|
+
wasmFile: "tree-sitter-cpp.wasm",
|
|
393
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
394
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
395
|
+
nameField: "name",
|
|
396
|
+
hasTagQuery: true
|
|
397
|
+
},
|
|
398
|
+
java: {
|
|
399
|
+
grammarId: "java",
|
|
400
|
+
packageName: "tree-sitter-java",
|
|
401
|
+
wasmFile: "tree-sitter-java.wasm",
|
|
402
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
403
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
404
|
+
nameField: "name",
|
|
405
|
+
hasTagQuery: true
|
|
406
|
+
},
|
|
407
|
+
ruby: {
|
|
408
|
+
grammarId: "ruby",
|
|
409
|
+
packageName: "tree-sitter-ruby",
|
|
410
|
+
wasmFile: "tree-sitter-ruby.wasm",
|
|
411
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
412
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
413
|
+
nameField: "name",
|
|
414
|
+
hasTagQuery: true
|
|
415
|
+
},
|
|
416
|
+
rust: {
|
|
417
|
+
grammarId: "rust",
|
|
418
|
+
packageName: "tree-sitter-rust",
|
|
419
|
+
wasmFile: "tree-sitter-rust.wasm",
|
|
420
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
421
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
422
|
+
nameField: "name",
|
|
423
|
+
hasTagQuery: true
|
|
424
|
+
},
|
|
425
|
+
kotlin: {
|
|
426
|
+
grammarId: "kotlin",
|
|
427
|
+
packageName: "tree-sitter-kotlin",
|
|
428
|
+
wasmFile: "tree-sitter-kotlin.wasm",
|
|
429
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
430
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
431
|
+
nameField: "name",
|
|
432
|
+
hasTagQuery: true
|
|
433
|
+
},
|
|
434
|
+
swift: {
|
|
435
|
+
grammarId: "swift",
|
|
436
|
+
packageName: "tree-sitter-swift",
|
|
437
|
+
wasmFile: "tree-sitter-swift.wasm",
|
|
438
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
439
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
440
|
+
nameField: "name",
|
|
441
|
+
hasTagQuery: true
|
|
442
|
+
},
|
|
443
|
+
dart: {
|
|
444
|
+
grammarId: "dart",
|
|
445
|
+
packageName: "tree-sitter-dart",
|
|
446
|
+
wasmFile: "tree-sitter-dart.wasm",
|
|
447
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
448
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
449
|
+
nameField: "name",
|
|
450
|
+
hasTagQuery: true
|
|
451
|
+
},
|
|
452
|
+
lua: {
|
|
453
|
+
grammarId: "lua",
|
|
454
|
+
packageName: "tree-sitter-lua",
|
|
455
|
+
wasmFile: "tree-sitter-lua.wasm",
|
|
456
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
457
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
458
|
+
nameField: "name",
|
|
459
|
+
hasTagQuery: true
|
|
460
|
+
},
|
|
461
|
+
elixir: {
|
|
462
|
+
grammarId: "elixir",
|
|
463
|
+
packageName: "tree-sitter-elixir",
|
|
464
|
+
wasmFile: "tree-sitter-elixir.wasm",
|
|
465
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
466
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
467
|
+
nameField: "name",
|
|
468
|
+
hasTagQuery: true
|
|
469
|
+
},
|
|
470
|
+
scala: {
|
|
471
|
+
grammarId: "scala",
|
|
472
|
+
packageName: "tree-sitter-scala",
|
|
473
|
+
wasmFile: "tree-sitter-scala.wasm",
|
|
474
|
+
topLevelTypes: /* @__PURE__ */ new Set(),
|
|
475
|
+
declarationTypes: /* @__PURE__ */ new Map(),
|
|
476
|
+
nameField: "name",
|
|
477
|
+
hasTagQuery: true
|
|
478
|
+
}
|
|
479
|
+
};
|
|
480
|
+
var EXTENSION_MAP = {
|
|
481
|
+
".js": "javascript",
|
|
482
|
+
".jsx": "javascript",
|
|
483
|
+
".mjs": "javascript",
|
|
484
|
+
".cjs": "javascript",
|
|
485
|
+
".ts": "typescript",
|
|
486
|
+
".mts": "typescript",
|
|
487
|
+
".cts": "typescript",
|
|
488
|
+
".tsx": "tsx",
|
|
489
|
+
".html": "html",
|
|
490
|
+
".htm": "html",
|
|
491
|
+
".css": "css",
|
|
492
|
+
".php": "php",
|
|
493
|
+
".py": "python",
|
|
494
|
+
".go": "go",
|
|
495
|
+
// New tag-query-only languages
|
|
496
|
+
".c": "c",
|
|
497
|
+
".h": "c",
|
|
498
|
+
".cpp": "cpp",
|
|
499
|
+
".cxx": "cpp",
|
|
500
|
+
".cc": "cpp",
|
|
501
|
+
".hpp": "cpp",
|
|
502
|
+
".hxx": "cpp",
|
|
503
|
+
".hh": "cpp",
|
|
504
|
+
".java": "java",
|
|
505
|
+
".rb": "ruby",
|
|
506
|
+
".rs": "rust",
|
|
507
|
+
".kt": "kotlin",
|
|
508
|
+
".kts": "kotlin",
|
|
509
|
+
".swift": "swift",
|
|
510
|
+
".dart": "dart",
|
|
511
|
+
".lua": "lua",
|
|
512
|
+
".ex": "elixir",
|
|
513
|
+
".exs": "elixir",
|
|
514
|
+
".scala": "scala",
|
|
515
|
+
".sc": "scala"
|
|
516
|
+
};
|
|
517
|
+
var SUPPORTED_EXTENSIONS = new Set(Object.keys(EXTENSION_MAP));
|
|
518
|
+
function getLanguageConfig(filePath) {
|
|
519
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
520
|
+
const langId = EXTENSION_MAP[ext];
|
|
521
|
+
if (!langId) return null;
|
|
522
|
+
return CONFIGS[langId] ?? null;
|
|
523
|
+
}
|
|
524
|
+
function getLanguageId(filePath) {
|
|
525
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
526
|
+
return EXTENSION_MAP[ext] ?? null;
|
|
527
|
+
}
|
|
528
|
+
var ParserCtor = null;
|
|
529
|
+
var LanguageClass = null;
|
|
530
|
+
var initialized = false;
|
|
531
|
+
var languageCache = /* @__PURE__ */ new Map();
|
|
532
|
+
var require2 = createRequire(import.meta.url);
|
|
533
|
+
async function initTreeSitter() {
|
|
534
|
+
if (initialized) return;
|
|
535
|
+
const mod = await import("web-tree-sitter");
|
|
536
|
+
ParserCtor = mod.Parser;
|
|
537
|
+
LanguageClass = mod.Language;
|
|
538
|
+
const wasmPath = require2.resolve("web-tree-sitter/web-tree-sitter.wasm");
|
|
539
|
+
const wasmDir = path4.dirname(wasmPath) + "/";
|
|
540
|
+
await ParserCtor.init({
|
|
541
|
+
locateFile: (scriptName) => path4.join(wasmDir, scriptName)
|
|
542
|
+
});
|
|
543
|
+
initialized = true;
|
|
544
|
+
}
|
|
545
|
+
async function loadLanguage(langConfig) {
|
|
546
|
+
const cacheKey = langConfig.grammarId;
|
|
547
|
+
const cached = languageCache.get(cacheKey);
|
|
548
|
+
if (cached) return cached;
|
|
549
|
+
if (!LanguageClass) {
|
|
550
|
+
throw new Error("Tree-sitter not initialized. Call initTreeSitter() first.");
|
|
551
|
+
}
|
|
552
|
+
const wasmPath = require2.resolve(
|
|
553
|
+
`${langConfig.packageName}/${langConfig.wasmFile}`
|
|
554
|
+
);
|
|
555
|
+
const language = await LanguageClass.load(wasmPath);
|
|
556
|
+
languageCache.set(cacheKey, language);
|
|
557
|
+
return language;
|
|
558
|
+
}
|
|
559
|
+
async function parseSource(content, langConfig) {
|
|
560
|
+
if (!ParserCtor) {
|
|
561
|
+
throw new Error("Tree-sitter not initialized. Call initTreeSitter() first.");
|
|
562
|
+
}
|
|
563
|
+
const language = await loadLanguage(langConfig);
|
|
564
|
+
const parser = new ParserCtor();
|
|
565
|
+
parser.setLanguage(language);
|
|
566
|
+
const tree = parser.parse(content);
|
|
567
|
+
parser.delete();
|
|
568
|
+
return tree;
|
|
569
|
+
}
|
|
570
|
+
async function parseSourceWithLanguage(content, langConfig) {
|
|
571
|
+
if (!ParserCtor) {
|
|
572
|
+
throw new Error("Tree-sitter not initialized. Call initTreeSitter() first.");
|
|
573
|
+
}
|
|
574
|
+
const language = await loadLanguage(langConfig);
|
|
575
|
+
const parser = new ParserCtor();
|
|
576
|
+
parser.setLanguage(language);
|
|
577
|
+
const tree = parser.parse(content);
|
|
578
|
+
parser.delete();
|
|
579
|
+
return { tree, language };
|
|
580
|
+
}
|
|
581
|
+
function hashContent(content) {
|
|
582
|
+
return createHash2("sha256").update(content, "utf-8").digest("hex");
|
|
583
|
+
}
|
|
584
|
+
async function chunkFileAST(filePath, content, config) {
|
|
585
|
+
if (!content || content.trim().length === 0) return [];
|
|
586
|
+
const langConfig = getLanguageConfig(filePath);
|
|
587
|
+
if (!langConfig) return [];
|
|
588
|
+
await initTreeSitter();
|
|
589
|
+
let tree;
|
|
590
|
+
try {
|
|
591
|
+
tree = await parseSource(content, langConfig);
|
|
592
|
+
} catch {
|
|
593
|
+
return [];
|
|
594
|
+
}
|
|
595
|
+
try {
|
|
596
|
+
const lines = content.split("\n");
|
|
597
|
+
const rootNode = tree.rootNode;
|
|
598
|
+
if (rootNode.hasError && rootNode.namedChildCount === 0) {
|
|
599
|
+
return [];
|
|
600
|
+
}
|
|
601
|
+
const chunks = [];
|
|
602
|
+
const topNodes = rootNode.namedChildren;
|
|
603
|
+
let pendingNodes = [];
|
|
604
|
+
let pendingLineCount = 0;
|
|
605
|
+
const flushPending = () => {
|
|
606
|
+
if (pendingNodes.length === 0) return;
|
|
607
|
+
const chunk = buildChunkFromNodes(pendingNodes, filePath, lines);
|
|
608
|
+
if (chunk) chunks.push(chunk);
|
|
609
|
+
pendingNodes = [];
|
|
610
|
+
pendingLineCount = 0;
|
|
611
|
+
};
|
|
612
|
+
for (const node of topNodes) {
|
|
613
|
+
const nodeLines = node.endPosition.row - node.startPosition.row + 1;
|
|
614
|
+
if (nodeLines < config.minChunkSize) {
|
|
615
|
+
pendingNodes.push(node);
|
|
616
|
+
pendingLineCount += nodeLines;
|
|
617
|
+
if (pendingLineCount >= config.minChunkSize) {
|
|
618
|
+
flushPending();
|
|
619
|
+
}
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
flushPending();
|
|
623
|
+
if (nodeLines <= config.maxChunkSize) {
|
|
624
|
+
const chunk = buildChunkFromNode(node, filePath, lines);
|
|
625
|
+
if (chunk) chunks.push(chunk);
|
|
626
|
+
} else {
|
|
627
|
+
splitLargeNode(node, filePath, lines, config, chunks);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
flushPending();
|
|
631
|
+
if (chunks.length === 0 && content.trim().length > 0) {
|
|
632
|
+
return [];
|
|
633
|
+
}
|
|
634
|
+
return chunks;
|
|
635
|
+
} finally {
|
|
636
|
+
tree.delete();
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
function splitLargeNode(node, filePath, lines, config, chunks) {
|
|
640
|
+
const children = node.namedChildren;
|
|
641
|
+
if (children.length === 0) {
|
|
642
|
+
const chunk = buildChunkFromNode(node, filePath, lines);
|
|
643
|
+
if (chunk) chunks.push(chunk);
|
|
644
|
+
return;
|
|
645
|
+
}
|
|
646
|
+
const signatureStartRow = node.startPosition.row;
|
|
647
|
+
const firstChildRow = children[0].startPosition.row;
|
|
648
|
+
const signatureEndRow = Math.min(
|
|
649
|
+
firstChildRow > signatureStartRow ? firstChildRow : signatureStartRow + 1,
|
|
650
|
+
node.endPosition.row
|
|
651
|
+
);
|
|
652
|
+
const signature = lines.slice(signatureStartRow, signatureEndRow).join("\n");
|
|
653
|
+
let buffer = [];
|
|
654
|
+
let bufferLines = 0;
|
|
655
|
+
const flushBuffer = () => {
|
|
656
|
+
if (buffer.length === 0) return;
|
|
657
|
+
const chunk = buildChunkWithContext(signature, signatureStartRow, buffer, filePath, lines);
|
|
658
|
+
if (chunk) chunks.push(chunk);
|
|
659
|
+
buffer = [];
|
|
660
|
+
bufferLines = 0;
|
|
661
|
+
};
|
|
662
|
+
for (const child of children) {
|
|
663
|
+
const childLines = child.endPosition.row - child.startPosition.row + 1;
|
|
664
|
+
if (childLines > config.maxChunkSize) {
|
|
665
|
+
flushBuffer();
|
|
666
|
+
splitLargeNode(child, filePath, lines, config, chunks);
|
|
667
|
+
continue;
|
|
668
|
+
}
|
|
669
|
+
if (bufferLines + childLines > config.maxChunkSize) {
|
|
670
|
+
flushBuffer();
|
|
671
|
+
}
|
|
672
|
+
buffer.push(child);
|
|
673
|
+
bufferLines += childLines;
|
|
674
|
+
}
|
|
675
|
+
flushBuffer();
|
|
676
|
+
}
|
|
677
|
+
function buildChunkFromNode(node, filePath, lines) {
|
|
678
|
+
const startLine = node.startPosition.row + 1;
|
|
679
|
+
const endLine = node.endPosition.row + 1;
|
|
680
|
+
const chunkContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
681
|
+
if (chunkContent.trim().length === 0) return null;
|
|
682
|
+
return {
|
|
683
|
+
id: "",
|
|
684
|
+
filePath,
|
|
685
|
+
startLine,
|
|
686
|
+
endLine,
|
|
687
|
+
content: chunkContent,
|
|
688
|
+
chunkHash: hashContent(chunkContent)
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
function buildChunkFromNodes(nodes, filePath, lines) {
|
|
692
|
+
if (nodes.length === 0) return null;
|
|
693
|
+
const startLine = nodes[0].startPosition.row + 1;
|
|
694
|
+
const endLine = nodes[nodes.length - 1].endPosition.row + 1;
|
|
695
|
+
const chunkContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
696
|
+
if (chunkContent.trim().length === 0) return null;
|
|
697
|
+
return {
|
|
698
|
+
id: "",
|
|
699
|
+
filePath,
|
|
700
|
+
startLine,
|
|
701
|
+
endLine,
|
|
702
|
+
content: chunkContent,
|
|
703
|
+
chunkHash: hashContent(chunkContent)
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
function buildChunkWithContext(signature, signatureRow, nodes, filePath, lines) {
|
|
707
|
+
if (nodes.length === 0) return null;
|
|
708
|
+
const startLine = nodes[0].startPosition.row + 1;
|
|
709
|
+
const endLine = nodes[nodes.length - 1].endPosition.row + 1;
|
|
710
|
+
const bodyContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
711
|
+
if (bodyContent.trim().length === 0) return null;
|
|
712
|
+
const content = signature + "\n" + bodyContent;
|
|
713
|
+
return {
|
|
714
|
+
id: "",
|
|
715
|
+
filePath,
|
|
716
|
+
startLine: signatureRow + 1,
|
|
717
|
+
// Include signature in range
|
|
718
|
+
endLine,
|
|
719
|
+
content,
|
|
720
|
+
chunkHash: hashContent(content)
|
|
721
|
+
};
|
|
722
|
+
}
|
|
723
|
+
async function parseAndChunkFile(filePath, content, config) {
|
|
724
|
+
if (!content || content.trim().length === 0) {
|
|
725
|
+
return { chunks: [], tree: null, language: null, langId: null };
|
|
726
|
+
}
|
|
727
|
+
const langConfig = getLanguageConfig(filePath);
|
|
728
|
+
if (!langConfig) {
|
|
729
|
+
return { chunks: [], tree: null, language: null, langId: null };
|
|
730
|
+
}
|
|
731
|
+
const langId = getLanguageId(filePath);
|
|
732
|
+
await initTreeSitter();
|
|
733
|
+
let tree;
|
|
734
|
+
let language;
|
|
735
|
+
try {
|
|
736
|
+
const result = await parseSourceWithLanguage(content, langConfig);
|
|
737
|
+
tree = result.tree;
|
|
738
|
+
language = result.language;
|
|
739
|
+
} catch {
|
|
740
|
+
return { chunks: [], tree: null, language: null, langId };
|
|
741
|
+
}
|
|
742
|
+
const lines = content.split("\n");
|
|
743
|
+
const rootNode = tree.rootNode;
|
|
744
|
+
if (rootNode.hasError && rootNode.namedChildCount === 0) {
|
|
745
|
+
return { chunks: [], tree, language, langId };
|
|
746
|
+
}
|
|
747
|
+
const chunks = [];
|
|
748
|
+
const topNodes = rootNode.namedChildren;
|
|
749
|
+
let pendingNodes = [];
|
|
750
|
+
let pendingLineCount = 0;
|
|
751
|
+
const flushPending = () => {
|
|
752
|
+
if (pendingNodes.length === 0) return;
|
|
753
|
+
const chunk = buildChunkFromNodes(pendingNodes, filePath, lines);
|
|
754
|
+
if (chunk) chunks.push(chunk);
|
|
755
|
+
pendingNodes = [];
|
|
756
|
+
pendingLineCount = 0;
|
|
757
|
+
};
|
|
758
|
+
for (const node of topNodes) {
|
|
759
|
+
const nodeLines = node.endPosition.row - node.startPosition.row + 1;
|
|
760
|
+
if (nodeLines < config.minChunkSize) {
|
|
761
|
+
pendingNodes.push(node);
|
|
762
|
+
pendingLineCount += nodeLines;
|
|
763
|
+
if (pendingLineCount >= config.minChunkSize) {
|
|
764
|
+
flushPending();
|
|
765
|
+
}
|
|
766
|
+
continue;
|
|
767
|
+
}
|
|
768
|
+
flushPending();
|
|
769
|
+
if (nodeLines <= config.maxChunkSize) {
|
|
770
|
+
const chunk = buildChunkFromNode(node, filePath, lines);
|
|
771
|
+
if (chunk) chunks.push(chunk);
|
|
772
|
+
} else {
|
|
773
|
+
splitLargeNode(node, filePath, lines, config, chunks);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
flushPending();
|
|
777
|
+
return { chunks, tree, language, langId };
|
|
778
|
+
}
|
|
779
|
+
function hashContent2(content) {
|
|
780
|
+
return createHash3("sha256").update(content, "utf-8").digest("hex");
|
|
781
|
+
}
|
|
782
|
+
async function chunkFile(filePath, content, config) {
|
|
783
|
+
if (!content || content.trim().length === 0) return [];
|
|
784
|
+
if (config.strategy === "ast") {
|
|
785
|
+
const langConfig = getLanguageConfig(filePath);
|
|
786
|
+
if (langConfig) {
|
|
787
|
+
const astChunks = await chunkFileAST(filePath, content, config);
|
|
788
|
+
if (astChunks.length > 0) return astChunks;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
return chunkFileSlidingWindow(filePath, content, config);
|
|
792
|
+
}
|
|
793
|
+
function chunkFileSlidingWindow(filePath, content, config) {
|
|
794
|
+
const lines = content.split("\n");
|
|
795
|
+
const { windowSize, overlap } = config;
|
|
796
|
+
const chunks = [];
|
|
797
|
+
if (lines.length <= windowSize) {
|
|
798
|
+
const chunkHash = hashContent2(content);
|
|
799
|
+
chunks.push({
|
|
800
|
+
id: "",
|
|
801
|
+
filePath,
|
|
802
|
+
startLine: 1,
|
|
803
|
+
endLine: lines.length,
|
|
804
|
+
content,
|
|
805
|
+
chunkHash
|
|
806
|
+
});
|
|
807
|
+
return chunks;
|
|
808
|
+
}
|
|
809
|
+
const step = Math.max(1, windowSize - overlap);
|
|
810
|
+
for (let i = 0; i < lines.length; i += step) {
|
|
811
|
+
const startIdx = i;
|
|
812
|
+
const endIdx = Math.min(i + windowSize, lines.length);
|
|
813
|
+
const chunkLines = lines.slice(startIdx, endIdx);
|
|
814
|
+
const chunkContent = chunkLines.join("\n");
|
|
815
|
+
if (chunkContent.trim().length === 0) continue;
|
|
816
|
+
const chunkHash = hashContent2(chunkContent);
|
|
817
|
+
chunks.push({
|
|
818
|
+
id: "",
|
|
819
|
+
filePath,
|
|
820
|
+
startLine: startIdx + 1,
|
|
821
|
+
endLine: endIdx,
|
|
822
|
+
content: chunkContent,
|
|
823
|
+
chunkHash
|
|
824
|
+
});
|
|
825
|
+
if (endIdx >= lines.length) break;
|
|
826
|
+
}
|
|
827
|
+
return chunks;
|
|
828
|
+
}
|
|
829
|
+
async function chunkFiles(projectDir, filePaths, config) {
|
|
830
|
+
const allChunks = [];
|
|
831
|
+
for (const relPath of filePaths) {
|
|
832
|
+
const absPath = path5.join(projectDir, relPath);
|
|
833
|
+
try {
|
|
834
|
+
const content = fs3.readFileSync(absPath, "utf-8");
|
|
835
|
+
const chunks = await chunkFile(relPath, content, config);
|
|
836
|
+
allChunks.push(...chunks);
|
|
837
|
+
} catch {
|
|
838
|
+
continue;
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
return allChunks;
|
|
842
|
+
}
|
|
843
|
+
function generateChunkId(filePath, startLine, endLine, chunkHash, chunkerVersion) {
|
|
844
|
+
const input = `${filePath}:${startLine}:${endLine}:${chunkHash}:${chunkerVersion}`;
|
|
845
|
+
return createHash4("sha256").update(input, "utf-8").digest("hex");
|
|
846
|
+
}
|
|
847
|
+
function assignChunkIds(chunks, chunkerVersion) {
|
|
848
|
+
for (const chunk of chunks) {
|
|
849
|
+
chunk.id = generateChunkId(
|
|
850
|
+
chunk.filePath,
|
|
851
|
+
chunk.startLine,
|
|
852
|
+
chunk.endLine,
|
|
853
|
+
chunk.chunkHash,
|
|
854
|
+
chunkerVersion
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
function buildManifest(scannedFiles, chunks, chunkerVersion) {
|
|
859
|
+
const files = {};
|
|
860
|
+
const chunksByFile = /* @__PURE__ */ new Map();
|
|
861
|
+
for (const chunk of chunks) {
|
|
862
|
+
const ids = chunksByFile.get(chunk.filePath) ?? [];
|
|
863
|
+
ids.push(chunk.id);
|
|
864
|
+
chunksByFile.set(chunk.filePath, ids);
|
|
865
|
+
}
|
|
866
|
+
for (const scanned of scannedFiles) {
|
|
867
|
+
files[scanned.filePath] = {
|
|
868
|
+
contentHash: scanned.contentHash,
|
|
869
|
+
mtime: scanned.mtime,
|
|
870
|
+
sizeBytes: scanned.sizeBytes,
|
|
871
|
+
chunkIds: chunksByFile.get(scanned.filePath) ?? []
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
return {
|
|
875
|
+
version: 1,
|
|
876
|
+
chunkerVersion,
|
|
877
|
+
files
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
function loadManifest(projectDir, branch) {
|
|
881
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
882
|
+
const manifestPath = codemapManifestFile(projectDir, resolvedBranch);
|
|
883
|
+
if (!fs4.existsSync(manifestPath)) return null;
|
|
884
|
+
try {
|
|
885
|
+
const raw = fs4.readFileSync(manifestPath, "utf-8");
|
|
886
|
+
return JSON.parse(raw);
|
|
887
|
+
} catch {
|
|
888
|
+
return null;
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
function saveManifest(projectDir, branch, manifest) {
|
|
892
|
+
const manifestPath = codemapManifestFile(projectDir, branch);
|
|
893
|
+
const dir = path6.dirname(manifestPath);
|
|
894
|
+
fs4.mkdirSync(dir, { recursive: true });
|
|
895
|
+
const tmpPath = manifestPath + ".tmp";
|
|
896
|
+
fs4.writeFileSync(tmpPath, JSON.stringify(manifest, null, 2) + "\n", "utf-8");
|
|
897
|
+
fs4.renameSync(tmpPath, manifestPath);
|
|
898
|
+
}
|
|
899
|
+
function diffManifest(oldManifest, newFiles) {
|
|
900
|
+
if (!oldManifest) {
|
|
901
|
+
return { added: newFiles, updated: [], removed: [] };
|
|
902
|
+
}
|
|
903
|
+
const oldFileMap = oldManifest.files;
|
|
904
|
+
const newFileSet = new Set(newFiles.map((f) => f.filePath));
|
|
905
|
+
const added = [];
|
|
906
|
+
const updated = [];
|
|
907
|
+
for (const file of newFiles) {
|
|
908
|
+
const old = oldFileMap[file.filePath];
|
|
909
|
+
if (!old) {
|
|
910
|
+
added.push(file);
|
|
911
|
+
} else if (old.contentHash !== file.contentHash) {
|
|
912
|
+
updated.push(file);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
const removed = [];
|
|
916
|
+
for (const oldPath of Object.keys(oldFileMap)) {
|
|
917
|
+
if (!newFileSet.has(oldPath)) {
|
|
918
|
+
removed.push(oldPath);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
return { added, updated, removed };
|
|
922
|
+
}
|
|
923
|
+
async function createEmbedder(config) {
|
|
924
|
+
switch (config.provider) {
|
|
925
|
+
case "openai": {
|
|
926
|
+
const { OpenAIEmbedder: OpenAIEmbedder2 } = await import("./openai-E7G2YAHU-UYY4ZWON.js");
|
|
927
|
+
return new OpenAIEmbedder2(config.model, config.dimensions);
|
|
928
|
+
}
|
|
929
|
+
case "ollama": {
|
|
930
|
+
const { OllamaEmbedder: OllamaEmbedder2 } = await import("./ollama-3XCUZMZT-FYKHW4TZ.js");
|
|
931
|
+
return new OllamaEmbedder2(config.model, config.dimensions);
|
|
932
|
+
}
|
|
933
|
+
case "ulpi": {
|
|
934
|
+
const { UlpiEmbedder } = await import("./ulpi-RMMCUAGP-JCJ273T6.js");
|
|
935
|
+
return new UlpiEmbedder(config.model, config.dimensions, config.baseUrl);
|
|
936
|
+
}
|
|
937
|
+
default:
|
|
938
|
+
throw new Error(`Unknown embedding provider: ${config.provider}`);
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
var CodemapStore = class {
|
|
942
|
+
db = null;
|
|
943
|
+
table = null;
|
|
944
|
+
indexDir;
|
|
945
|
+
constructor(projectDir, branch) {
|
|
946
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
947
|
+
this.indexDir = codemapLanceDir(projectDir, resolvedBranch);
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Initialize the vector store. Creates the directory and opens the LanceDB connection.
|
|
951
|
+
* If a "chunks" table already exists, opens it.
|
|
952
|
+
*/
|
|
953
|
+
async initialize() {
|
|
954
|
+
fs5.mkdirSync(this.indexDir, { recursive: true });
|
|
955
|
+
this.db = await connect(this.indexDir);
|
|
956
|
+
const tables = await this.db.tableNames();
|
|
957
|
+
if (tables.includes("chunks")) {
|
|
958
|
+
this.table = await this.db.openTable("chunks");
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* Drop the existing chunks table so the next upsert creates a fresh one.
|
|
963
|
+
* Used by full reindex when dimensions or schema change.
|
|
964
|
+
*/
|
|
965
|
+
async dropTable() {
|
|
966
|
+
if (!this.db) return;
|
|
967
|
+
const tables = await this.db.tableNames();
|
|
968
|
+
if (tables.includes("chunks")) {
|
|
969
|
+
await this.db.dropTable("chunks");
|
|
970
|
+
this.table = null;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
/**
|
|
974
|
+
* Add or update items in the store.
|
|
975
|
+
* Flattens metadata into top-level record columns for LanceDB.
|
|
976
|
+
* Uses merge-insert (upsert) semantics keyed on `id`.
|
|
977
|
+
*/
|
|
978
|
+
async upsertItems(items) {
|
|
979
|
+
if (items.length === 0) return;
|
|
980
|
+
const records = items.map((item) => ({
|
|
981
|
+
id: item.id,
|
|
982
|
+
vector: item.vector,
|
|
983
|
+
filePath: item.metadata.filePath,
|
|
984
|
+
startLine: item.metadata.startLine,
|
|
985
|
+
endLine: item.metadata.endLine,
|
|
986
|
+
snippet: item.metadata.snippet
|
|
987
|
+
}));
|
|
988
|
+
if (!this.table) {
|
|
989
|
+
this.table = await this.db.createTable("chunks", records);
|
|
990
|
+
} else {
|
|
991
|
+
await this.table.mergeInsert("id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(records);
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
/**
|
|
995
|
+
* Remove items by their chunk IDs.
|
|
996
|
+
* Uses SQL IN clause with properly escaped string literals.
|
|
997
|
+
*/
|
|
998
|
+
async removeItems(chunkIds) {
|
|
999
|
+
if (!this.table || chunkIds.length === 0) return;
|
|
1000
|
+
const quoted = chunkIds.map((id) => "'" + id.replace(/'/g, "''") + "'").join(", ");
|
|
1001
|
+
await this.table.delete("id IN (" + quoted + ")");
|
|
1002
|
+
}
|
|
1003
|
+
/**
|
|
1004
|
+
* Query the store for similar vectors.
|
|
1005
|
+
* Returns results sorted by score (highest first, cosine similarity).
|
|
1006
|
+
*/
|
|
1007
|
+
async query(vector, topK = 10) {
|
|
1008
|
+
if (!this.table) return [];
|
|
1009
|
+
const results = await this.table.vectorSearch(vector).distanceType("cosine").limit(topK).toArray();
|
|
1010
|
+
return results.map((r) => ({
|
|
1011
|
+
id: r.id,
|
|
1012
|
+
filePath: r.filePath,
|
|
1013
|
+
startLine: r.startLine,
|
|
1014
|
+
endLine: r.endLine,
|
|
1015
|
+
snippet: r.snippet,
|
|
1016
|
+
score: 1 - (r._distance ?? 0)
|
|
1017
|
+
}));
|
|
1018
|
+
}
|
|
1019
|
+
/**
|
|
1020
|
+
* Get the approximate number of items in the store.
|
|
1021
|
+
*/
|
|
1022
|
+
async getItemCount() {
|
|
1023
|
+
if (!this.table) return 0;
|
|
1024
|
+
return this.table.countRows();
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* Get the index directory size in bytes.
|
|
1028
|
+
* Walks recursively since LanceDB stores data in subdirectories with fragment files.
|
|
1029
|
+
*/
|
|
1030
|
+
getIndexSizeBytes() {
|
|
1031
|
+
if (!fs5.existsSync(this.indexDir)) return 0;
|
|
1032
|
+
return walkDirSize(this.indexDir);
|
|
1033
|
+
}
|
|
1034
|
+
};
|
|
1035
|
+
function walkDirSize(dir) {
|
|
1036
|
+
let totalSize = 0;
|
|
1037
|
+
const entries = fs5.readdirSync(dir);
|
|
1038
|
+
for (const entry of entries) {
|
|
1039
|
+
const fullPath = path7.join(dir, entry);
|
|
1040
|
+
try {
|
|
1041
|
+
const stat = fs5.statSync(fullPath);
|
|
1042
|
+
if (stat.isDirectory()) {
|
|
1043
|
+
totalSize += walkDirSize(fullPath);
|
|
1044
|
+
} else if (stat.isFile()) {
|
|
1045
|
+
totalSize += stat.size;
|
|
1046
|
+
}
|
|
1047
|
+
} catch {
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
return totalSize;
|
|
1051
|
+
}
|
|
1052
|
+
function saveSchema(projectDir, branch, schema) {
|
|
1053
|
+
const schemaPath = codemapSchemaFile(projectDir, branch);
|
|
1054
|
+
const dir = path7.dirname(schemaPath);
|
|
1055
|
+
fs5.mkdirSync(dir, { recursive: true });
|
|
1056
|
+
const tmpPath = schemaPath + ".tmp";
|
|
1057
|
+
fs5.writeFileSync(tmpPath, JSON.stringify(schema, null, 2) + "\n", "utf-8");
|
|
1058
|
+
fs5.renameSync(tmpPath, schemaPath);
|
|
1059
|
+
}
|
|
1060
|
+
function loadSchema(projectDir, branch) {
|
|
1061
|
+
const schemaPath = codemapSchemaFile(projectDir, branch);
|
|
1062
|
+
if (!fs5.existsSync(schemaPath)) return null;
|
|
1063
|
+
try {
|
|
1064
|
+
return JSON.parse(fs5.readFileSync(schemaPath, "utf-8"));
|
|
1065
|
+
} catch {
|
|
1066
|
+
return null;
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
function getLanguage(filePath) {
|
|
1070
|
+
const ext = path8.extname(filePath).toLowerCase();
|
|
1071
|
+
if ([".ts", ".tsx", ".js", ".jsx", ".mts", ".mjs", ".cts", ".cjs"].includes(ext)) return "typescript";
|
|
1072
|
+
if (ext === ".py") return "python";
|
|
1073
|
+
if (ext === ".go") return "go";
|
|
1074
|
+
return "unknown";
|
|
1075
|
+
}
|
|
1076
|
+
var TS_PATTERNS = [
|
|
1077
|
+
{ pattern: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/gm, type: "function" },
|
|
1078
|
+
{ pattern: /^(?:export\s+)?class\s+(\w+)/gm, type: "class" },
|
|
1079
|
+
{ pattern: /^(?:export\s+)?interface\s+(\w+)/gm, type: "interface" },
|
|
1080
|
+
{ pattern: /^(?:export\s+)?type\s+(\w+)\s*[=<]/gm, type: "type" },
|
|
1081
|
+
{ pattern: /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*[=:]/gm, type: "const" },
|
|
1082
|
+
{ pattern: /^(?:export\s+)?enum\s+(\w+)/gm, type: "enum" },
|
|
1083
|
+
{ pattern: /^\s+(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\w+)?\s*\{/gm, type: "method" }
|
|
1084
|
+
];
|
|
1085
|
+
var PY_PATTERNS = [
|
|
1086
|
+
{ pattern: /^(?:async\s+)?def\s+(\w+)/gm, type: "function" },
|
|
1087
|
+
{ pattern: /^class\s+(\w+)/gm, type: "class" }
|
|
1088
|
+
];
|
|
1089
|
+
var GO_PATTERNS = [
|
|
1090
|
+
{ pattern: /^func\s+(?:\([^)]+\)\s+)?(\w+)/gm, type: "function" },
|
|
1091
|
+
{ pattern: /^type\s+(\w+)\s+struct\b/gm, type: "class" },
|
|
1092
|
+
{ pattern: /^type\s+(\w+)\s+interface\b/gm, type: "interface" }
|
|
1093
|
+
];
|
|
1094
|
+
function extractSymbols(filePath, content, startLine, endLine, chunkId) {
|
|
1095
|
+
const lang = getLanguage(filePath);
|
|
1096
|
+
if (lang === "unknown") return [];
|
|
1097
|
+
const patterns = lang === "typescript" ? TS_PATTERNS : lang === "python" ? PY_PATTERNS : GO_PATTERNS;
|
|
1098
|
+
const symbols = [];
|
|
1099
|
+
const lines = content.split("\n");
|
|
1100
|
+
for (const { pattern, type } of patterns) {
|
|
1101
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
1102
|
+
let match;
|
|
1103
|
+
while ((match = regex.exec(content)) !== null) {
|
|
1104
|
+
const name = match[1];
|
|
1105
|
+
if (!name || name.startsWith("_")) continue;
|
|
1106
|
+
const beforeMatch = content.slice(0, match.index);
|
|
1107
|
+
const lineOffset = beforeMatch.split("\n").length - 1;
|
|
1108
|
+
const symbolLine = startLine + lineOffset;
|
|
1109
|
+
let symbolEndLine = symbolLine;
|
|
1110
|
+
for (let i = lineOffset + 1; i < lines.length; i++) {
|
|
1111
|
+
symbolEndLine = startLine + i;
|
|
1112
|
+
if (i > lineOffset + 1 && lines[i].match(/^(?:export|function|class|interface|type|const|def |func )/)) {
|
|
1113
|
+
symbolEndLine = startLine + i - 1;
|
|
1114
|
+
break;
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
symbols.push({
|
|
1118
|
+
name,
|
|
1119
|
+
symbolType: type,
|
|
1120
|
+
filePath,
|
|
1121
|
+
startLine: symbolLine,
|
|
1122
|
+
endLine: Math.min(symbolEndLine, endLine),
|
|
1123
|
+
chunkId
|
|
1124
|
+
});
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
return symbols;
|
|
1128
|
+
}
|
|
1129
|
+
function extractSymbolsFromChunks(chunks) {
|
|
1130
|
+
const allSymbols = [];
|
|
1131
|
+
for (const chunk of chunks) {
|
|
1132
|
+
const symbols = extractSymbols(
|
|
1133
|
+
chunk.filePath,
|
|
1134
|
+
chunk.content,
|
|
1135
|
+
chunk.startLine,
|
|
1136
|
+
chunk.endLine,
|
|
1137
|
+
chunk.id
|
|
1138
|
+
);
|
|
1139
|
+
allSymbols.push(...symbols);
|
|
1140
|
+
}
|
|
1141
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1142
|
+
return allSymbols.filter((s) => {
|
|
1143
|
+
const key = `${s.name}:${s.filePath}:${s.startLine}`;
|
|
1144
|
+
if (seen.has(key)) return false;
|
|
1145
|
+
seen.add(key);
|
|
1146
|
+
return true;
|
|
1147
|
+
});
|
|
1148
|
+
}
|
|
1149
|
+
function symbolIndexPath(projectDir, branch) {
|
|
1150
|
+
return path8.join(codemapMetadataDir(projectDir, branch), "symbols.jsonl");
|
|
1151
|
+
}
|
|
1152
|
+
function saveSymbolIndex(projectDir, branch, symbols) {
|
|
1153
|
+
const indexPath = symbolIndexPath(projectDir, branch);
|
|
1154
|
+
const dir = path8.dirname(indexPath);
|
|
1155
|
+
fs6.mkdirSync(dir, { recursive: true });
|
|
1156
|
+
const content = symbols.map((s) => JSON.stringify(s)).join("\n") + "\n";
|
|
1157
|
+
const tmpPath = indexPath + ".tmp";
|
|
1158
|
+
fs6.writeFileSync(tmpPath, content, "utf-8");
|
|
1159
|
+
fs6.renameSync(tmpPath, indexPath);
|
|
1160
|
+
}
|
|
1161
|
+
function loadSymbolIndex(projectDir, branch) {
|
|
1162
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
1163
|
+
const indexPath = symbolIndexPath(projectDir, resolvedBranch);
|
|
1164
|
+
if (!fs6.existsSync(indexPath)) return [];
|
|
1165
|
+
try {
|
|
1166
|
+
const content = fs6.readFileSync(indexPath, "utf-8");
|
|
1167
|
+
return content.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
|
|
1168
|
+
} catch {
|
|
1169
|
+
return [];
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
function searchSymbols(projectDir, query, options, branch) {
|
|
1173
|
+
const symbols = loadSymbolIndex(projectDir, branch);
|
|
1174
|
+
const limit = options?.limit ?? 20;
|
|
1175
|
+
const queryLower = query.toLowerCase();
|
|
1176
|
+
const results = [];
|
|
1177
|
+
for (const symbol of symbols) {
|
|
1178
|
+
if (options?.symbolType && symbol.symbolType !== options.symbolType) continue;
|
|
1179
|
+
if (options?.pathPrefix && !symbol.filePath.startsWith(options.pathPrefix)) continue;
|
|
1180
|
+
const nameLower = symbol.name.toLowerCase();
|
|
1181
|
+
let score = 0;
|
|
1182
|
+
if (nameLower === queryLower) {
|
|
1183
|
+
score = 1;
|
|
1184
|
+
} else if (nameLower.startsWith(queryLower)) {
|
|
1185
|
+
score = 0.8;
|
|
1186
|
+
} else if (nameLower.includes(queryLower)) {
|
|
1187
|
+
score = 0.5;
|
|
1188
|
+
} else {
|
|
1189
|
+
continue;
|
|
1190
|
+
}
|
|
1191
|
+
results.push({
|
|
1192
|
+
...symbol,
|
|
1193
|
+
score
|
|
1194
|
+
});
|
|
1195
|
+
}
|
|
1196
|
+
results.sort((a, b) => b.score - a.score || a.name.localeCompare(b.name));
|
|
1197
|
+
return results.slice(0, limit);
|
|
1198
|
+
}
|
|
1199
|
+
var BM25_K1 = 1.2;
|
|
1200
|
+
var BM25_B = 0.75;
|
|
1201
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
1202
|
+
"the",
|
|
1203
|
+
"is",
|
|
1204
|
+
"a",
|
|
1205
|
+
"an",
|
|
1206
|
+
"in",
|
|
1207
|
+
"on",
|
|
1208
|
+
"of",
|
|
1209
|
+
"to",
|
|
1210
|
+
"for",
|
|
1211
|
+
"it",
|
|
1212
|
+
"and",
|
|
1213
|
+
"or",
|
|
1214
|
+
"not",
|
|
1215
|
+
"with",
|
|
1216
|
+
"as",
|
|
1217
|
+
"by",
|
|
1218
|
+
"from",
|
|
1219
|
+
"at",
|
|
1220
|
+
"be",
|
|
1221
|
+
"this",
|
|
1222
|
+
"that",
|
|
1223
|
+
"are",
|
|
1224
|
+
"was",
|
|
1225
|
+
"were",
|
|
1226
|
+
"has",
|
|
1227
|
+
"have",
|
|
1228
|
+
"had",
|
|
1229
|
+
"do",
|
|
1230
|
+
"does",
|
|
1231
|
+
"did",
|
|
1232
|
+
"will",
|
|
1233
|
+
"would",
|
|
1234
|
+
"can",
|
|
1235
|
+
"could",
|
|
1236
|
+
"may",
|
|
1237
|
+
"might",
|
|
1238
|
+
"shall",
|
|
1239
|
+
"should"
|
|
1240
|
+
]);
|
|
1241
|
+
function tokenize(text) {
|
|
1242
|
+
return text.toLowerCase().split(/[^a-z0-9]+/).filter((t) => t.length >= 2 && !STOP_WORDS.has(t));
|
|
1243
|
+
}
|
|
1244
|
+
function buildBM25Index(documents) {
|
|
1245
|
+
const index = {
|
|
1246
|
+
docCount: 0,
|
|
1247
|
+
avgDocLen: 0,
|
|
1248
|
+
df: {},
|
|
1249
|
+
tf: {},
|
|
1250
|
+
docLen: {},
|
|
1251
|
+
docPaths: {}
|
|
1252
|
+
};
|
|
1253
|
+
if (documents.length === 0) return index;
|
|
1254
|
+
let totalLen = 0;
|
|
1255
|
+
for (const doc of documents) {
|
|
1256
|
+
index.docCount++;
|
|
1257
|
+
index.docPaths[doc.id] = doc.filePath;
|
|
1258
|
+
index.docLen[doc.id] = doc.terms.length;
|
|
1259
|
+
totalLen += doc.terms.length;
|
|
1260
|
+
const termCounts = {};
|
|
1261
|
+
for (const term of doc.terms) {
|
|
1262
|
+
termCounts[term] = (termCounts[term] ?? 0) + 1;
|
|
1263
|
+
}
|
|
1264
|
+
index.tf[doc.id] = termCounts;
|
|
1265
|
+
const uniqueTerms = new Set(doc.terms);
|
|
1266
|
+
for (const term of uniqueTerms) {
|
|
1267
|
+
index.df[term] = (index.df[term] ?? 0) + 1;
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
index.avgDocLen = totalLen / index.docCount;
|
|
1271
|
+
return index;
|
|
1272
|
+
}
|
|
1273
|
+
function addDocuments(index, documents) {
|
|
1274
|
+
let totalLen = index.avgDocLen * index.docCount;
|
|
1275
|
+
for (const doc of documents) {
|
|
1276
|
+
if (index.tf[doc.id]) {
|
|
1277
|
+
removeDocuments(index, [doc.id]);
|
|
1278
|
+
totalLen = index.avgDocLen * index.docCount;
|
|
1279
|
+
}
|
|
1280
|
+
index.docCount++;
|
|
1281
|
+
index.docPaths[doc.id] = doc.filePath;
|
|
1282
|
+
index.docLen[doc.id] = doc.terms.length;
|
|
1283
|
+
totalLen += doc.terms.length;
|
|
1284
|
+
const termCounts = {};
|
|
1285
|
+
for (const term of doc.terms) {
|
|
1286
|
+
termCounts[term] = (termCounts[term] ?? 0) + 1;
|
|
1287
|
+
}
|
|
1288
|
+
index.tf[doc.id] = termCounts;
|
|
1289
|
+
const uniqueTerms = new Set(doc.terms);
|
|
1290
|
+
for (const term of uniqueTerms) {
|
|
1291
|
+
index.df[term] = (index.df[term] ?? 0) + 1;
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
index.avgDocLen = index.docCount > 0 ? totalLen / index.docCount : 0;
|
|
1295
|
+
}
|
|
1296
|
+
function removeDocuments(index, docIds) {
|
|
1297
|
+
for (const docId of docIds) {
|
|
1298
|
+
const termCounts = index.tf[docId];
|
|
1299
|
+
if (!termCounts) continue;
|
|
1300
|
+
for (const term of Object.keys(termCounts)) {
|
|
1301
|
+
if (index.df[term] !== void 0) {
|
|
1302
|
+
index.df[term]--;
|
|
1303
|
+
if (index.df[term] <= 0) {
|
|
1304
|
+
delete index.df[term];
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
delete index.tf[docId];
|
|
1309
|
+
delete index.docLen[docId];
|
|
1310
|
+
delete index.docPaths[docId];
|
|
1311
|
+
index.docCount--;
|
|
1312
|
+
}
|
|
1313
|
+
if (index.docCount > 0) {
|
|
1314
|
+
let totalLen = 0;
|
|
1315
|
+
for (const len of Object.values(index.docLen)) {
|
|
1316
|
+
totalLen += len;
|
|
1317
|
+
}
|
|
1318
|
+
index.avgDocLen = totalLen / index.docCount;
|
|
1319
|
+
} else {
|
|
1320
|
+
index.avgDocLen = 0;
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
function queryBM25(index, queryTerms, topK) {
|
|
1324
|
+
if (index.docCount === 0 || queryTerms.length === 0) return [];
|
|
1325
|
+
const scores = {};
|
|
1326
|
+
for (const term of queryTerms) {
|
|
1327
|
+
const n = index.df[term] ?? 0;
|
|
1328
|
+
if (n === 0) continue;
|
|
1329
|
+
const idf = Math.log((index.docCount - n + 0.5) / (n + 0.5) + 1);
|
|
1330
|
+
for (const docId of Object.keys(index.tf)) {
|
|
1331
|
+
const tf = index.tf[docId][term] ?? 0;
|
|
1332
|
+
if (tf === 0) continue;
|
|
1333
|
+
const dl = index.docLen[docId];
|
|
1334
|
+
const numerator = tf * (BM25_K1 + 1);
|
|
1335
|
+
const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * dl / index.avgDocLen);
|
|
1336
|
+
const termScore = idf * numerator / denominator;
|
|
1337
|
+
scores[docId] = (scores[docId] ?? 0) + termScore;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
return Object.entries(scores).sort((a, b) => b[1] - a[1]).slice(0, topK).map(([id, score]) => ({
|
|
1341
|
+
id,
|
|
1342
|
+
filePath: index.docPaths[id] ?? "",
|
|
1343
|
+
score
|
|
1344
|
+
}));
|
|
1345
|
+
}
|
|
1346
|
+
function bm25IndexPath(projectDir, branch) {
|
|
1347
|
+
return path9.join(codemapMetadataDir(projectDir, branch), "bm25.json");
|
|
1348
|
+
}
|
|
1349
|
+
function saveBM25Index(projectDir, branch, index) {
|
|
1350
|
+
const indexPath = bm25IndexPath(projectDir, branch);
|
|
1351
|
+
const dir = path9.dirname(indexPath);
|
|
1352
|
+
fs7.mkdirSync(dir, { recursive: true });
|
|
1353
|
+
const tmpPath = indexPath + ".tmp";
|
|
1354
|
+
fs7.writeFileSync(tmpPath, JSON.stringify(index) + "\n", "utf-8");
|
|
1355
|
+
fs7.renameSync(tmpPath, indexPath);
|
|
1356
|
+
}
|
|
1357
|
+
function loadBM25Index(projectDir, branch) {
|
|
1358
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
1359
|
+
const indexPath = bm25IndexPath(projectDir, resolvedBranch);
|
|
1360
|
+
if (!fs7.existsSync(indexPath)) return null;
|
|
1361
|
+
try {
|
|
1362
|
+
return JSON.parse(fs7.readFileSync(indexPath, "utf-8"));
|
|
1363
|
+
} catch {
|
|
1364
|
+
return null;
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
var STALE_TIMEOUT_MS = 6e4;
|
|
1368
|
+
var ACQUIRE_TIMEOUT_MS = 1e4;
|
|
1369
|
+
var POLL_INTERVAL_MS = 500;
|
|
1370
|
+
function sanitizeSlug(projectDir, branch) {
|
|
1371
|
+
const input = branch ? `${path10.resolve(projectDir)}:${branch}` : path10.resolve(projectDir);
|
|
1372
|
+
const hash = createHash5("sha256").update(input).digest("hex").slice(0, 16);
|
|
1373
|
+
const name = path10.basename(projectDir).replace(/[^a-z0-9-]/gi, "-").toLowerCase();
|
|
1374
|
+
return branch ? `${name}-${branch.replace(/[^a-z0-9-]/gi, "-").toLowerCase()}-${hash}` : `${name}-${hash}`;
|
|
1375
|
+
}
|
|
1376
|
+
function lockFilePath(projectDir, branch) {
|
|
1377
|
+
const slug = sanitizeSlug(projectDir, branch);
|
|
1378
|
+
return path10.join(os.tmpdir(), `ulpi-codemap-${slug}.lock`);
|
|
1379
|
+
}
|
|
1380
|
+
function isProcessAlive(pid) {
|
|
1381
|
+
try {
|
|
1382
|
+
process.kill(pid, 0);
|
|
1383
|
+
return true;
|
|
1384
|
+
} catch {
|
|
1385
|
+
return false;
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
function isLockStale(lockPath) {
|
|
1389
|
+
try {
|
|
1390
|
+
const content = fs8.readFileSync(lockPath, "utf-8");
|
|
1391
|
+
const data = JSON.parse(content);
|
|
1392
|
+
if (!isProcessAlive(data.pid)) return true;
|
|
1393
|
+
const lockTime = new Date(data.timestamp).getTime();
|
|
1394
|
+
if (Date.now() - lockTime > STALE_TIMEOUT_MS) return true;
|
|
1395
|
+
return false;
|
|
1396
|
+
} catch {
|
|
1397
|
+
return true;
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
function acquireCodemapLock(projectDir, branch) {
|
|
1401
|
+
const lockPath = lockFilePath(projectDir, branch);
|
|
1402
|
+
const startTime = Date.now();
|
|
1403
|
+
while (true) {
|
|
1404
|
+
if (fs8.existsSync(lockPath)) {
|
|
1405
|
+
if (isLockStale(lockPath)) {
|
|
1406
|
+
try {
|
|
1407
|
+
fs8.unlinkSync(lockPath);
|
|
1408
|
+
} catch {
|
|
1409
|
+
}
|
|
1410
|
+
} else {
|
|
1411
|
+
if (Date.now() - startTime > ACQUIRE_TIMEOUT_MS) {
|
|
1412
|
+
throw new Error("Timeout acquiring codemap lock");
|
|
1413
|
+
}
|
|
1414
|
+
const waitUntil = Date.now() + POLL_INTERVAL_MS;
|
|
1415
|
+
while (Date.now() < waitUntil) {
|
|
1416
|
+
}
|
|
1417
|
+
continue;
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
const lockData = {
|
|
1421
|
+
pid: process.pid,
|
|
1422
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1423
|
+
};
|
|
1424
|
+
try {
|
|
1425
|
+
fs8.writeFileSync(lockPath, JSON.stringify(lockData), { flag: "wx" });
|
|
1426
|
+
return;
|
|
1427
|
+
} catch {
|
|
1428
|
+
if (Date.now() - startTime > ACQUIRE_TIMEOUT_MS) {
|
|
1429
|
+
throw new Error("Timeout acquiring codemap lock");
|
|
1430
|
+
}
|
|
1431
|
+
const waitUntil = Date.now() + POLL_INTERVAL_MS;
|
|
1432
|
+
while (Date.now() < waitUntil) {
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
function releaseCodemapLock(projectDir, branch) {
|
|
1438
|
+
const lockPath = lockFilePath(projectDir, branch);
|
|
1439
|
+
try {
|
|
1440
|
+
const content = fs8.readFileSync(lockPath, "utf-8");
|
|
1441
|
+
const data = JSON.parse(content);
|
|
1442
|
+
if (data.pid === process.pid) {
|
|
1443
|
+
fs8.unlinkSync(lockPath);
|
|
1444
|
+
}
|
|
1445
|
+
} catch {
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
function isCodemapLocked(projectDir, branch) {
|
|
1449
|
+
const lockPath = lockFilePath(projectDir, branch);
|
|
1450
|
+
if (!fs8.existsSync(lockPath)) return false;
|
|
1451
|
+
return !isLockStale(lockPath);
|
|
1452
|
+
}
|
|
1453
|
+
function log(level, message) {
|
|
1454
|
+
try {
|
|
1455
|
+
fs9.mkdirSync(LOGS_DIR, { recursive: true });
|
|
1456
|
+
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
1457
|
+
const line = `[${ts}] [${level.toUpperCase()}] ${message}
|
|
1458
|
+
`;
|
|
1459
|
+
fs9.appendFileSync(path11.join(LOGS_DIR, "codemap.log"), line);
|
|
1460
|
+
} catch {
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
function buildEmbeddingTexts(chunks, symbols) {
|
|
1464
|
+
const symbolsByChunk = /* @__PURE__ */ new Map();
|
|
1465
|
+
for (const s of symbols) {
|
|
1466
|
+
const list = symbolsByChunk.get(s.chunkId) ?? [];
|
|
1467
|
+
list.push(s);
|
|
1468
|
+
symbolsByChunk.set(s.chunkId, list);
|
|
1469
|
+
}
|
|
1470
|
+
return chunks.map((c) => {
|
|
1471
|
+
const chunkSymbols = symbolsByChunk.get(c.id) ?? [];
|
|
1472
|
+
const symbolLine = chunkSymbols.map((s) => `${s.name} (${s.symbolType})`).join(", ");
|
|
1473
|
+
const header = symbolLine ? `${c.filePath}
|
|
1474
|
+
${symbolLine}
|
|
1475
|
+
|
|
1476
|
+
` : `${c.filePath}
|
|
1477
|
+
|
|
1478
|
+
`;
|
|
1479
|
+
return header + c.content;
|
|
1480
|
+
});
|
|
1481
|
+
}
|
|
1482
|
+
async function runInitPipeline(projectDir, onProgress, branch) {
|
|
1483
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
1484
|
+
acquireCodemapLock(projectDir, resolvedBranch);
|
|
1485
|
+
try {
|
|
1486
|
+
return await runPipelineInner(projectDir, onProgress, resolvedBranch);
|
|
1487
|
+
} finally {
|
|
1488
|
+
releaseCodemapLock(projectDir, resolvedBranch);
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
async function runPipelineInner(projectDir, onProgress, branch) {
|
|
1492
|
+
const startTime = Date.now();
|
|
1493
|
+
const config = loadCodemapConfig(projectDir);
|
|
1494
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
1495
|
+
const codemapDir = codemapBranchDir(projectDir, resolvedBranch);
|
|
1496
|
+
fs9.mkdirSync(codemapDir, { recursive: true });
|
|
1497
|
+
onProgress?.({
|
|
1498
|
+
phase: "scanning",
|
|
1499
|
+
current: 0,
|
|
1500
|
+
total: 0,
|
|
1501
|
+
message: "Scanning repository..."
|
|
1502
|
+
});
|
|
1503
|
+
const scannedFiles = scanRepository(projectDir, config);
|
|
1504
|
+
onProgress?.({
|
|
1505
|
+
phase: "scanning",
|
|
1506
|
+
current: scannedFiles.length,
|
|
1507
|
+
total: scannedFiles.length,
|
|
1508
|
+
message: `Found ${scannedFiles.length} files`
|
|
1509
|
+
});
|
|
1510
|
+
if (scannedFiles.length === 0) {
|
|
1511
|
+
const emptyManifest = buildManifest([], [], config.chunking.version);
|
|
1512
|
+
saveManifest(projectDir, resolvedBranch, emptyManifest);
|
|
1513
|
+
writeStats(projectDir, resolvedBranch, {
|
|
1514
|
+
totalFiles: 0,
|
|
1515
|
+
totalChunks: 0,
|
|
1516
|
+
staleFiles: 0,
|
|
1517
|
+
indexSizeBytes: 0,
|
|
1518
|
+
lastFullIndexMs: Date.now() - startTime,
|
|
1519
|
+
lastUpdated: (/* @__PURE__ */ new Date()).toISOString()
|
|
1520
|
+
});
|
|
1521
|
+
return {
|
|
1522
|
+
totalFiles: 0,
|
|
1523
|
+
totalChunks: 0,
|
|
1524
|
+
durationMs: Date.now() - startTime,
|
|
1525
|
+
embeddingProvider: config.embedding.provider,
|
|
1526
|
+
embeddingModel: config.embedding.model
|
|
1527
|
+
};
|
|
1528
|
+
}
|
|
1529
|
+
onProgress?.({
|
|
1530
|
+
phase: "chunking",
|
|
1531
|
+
current: 0,
|
|
1532
|
+
total: scannedFiles.length,
|
|
1533
|
+
message: "Chunking files..."
|
|
1534
|
+
});
|
|
1535
|
+
let extractTagsFromTree = null;
|
|
1536
|
+
try {
|
|
1537
|
+
const depgraph = await import("./dist-R5ZJ4LX5.js");
|
|
1538
|
+
extractTagsFromTree = depgraph.extractTagsFromTree;
|
|
1539
|
+
} catch {
|
|
1540
|
+
}
|
|
1541
|
+
await initTreeSitter();
|
|
1542
|
+
const allTags = /* @__PURE__ */ new Map();
|
|
1543
|
+
const chunks = [];
|
|
1544
|
+
const filePaths = scannedFiles.map((f) => f.filePath);
|
|
1545
|
+
for (let i = 0; i < filePaths.length; i++) {
|
|
1546
|
+
const filePath = filePaths[i];
|
|
1547
|
+
const absPath = path11.join(projectDir, filePath);
|
|
1548
|
+
let content;
|
|
1549
|
+
try {
|
|
1550
|
+
content = fs9.readFileSync(absPath, "utf-8");
|
|
1551
|
+
} catch {
|
|
1552
|
+
continue;
|
|
1553
|
+
}
|
|
1554
|
+
const langConfig = getLanguageConfig(filePath);
|
|
1555
|
+
const langId = getLanguageId(filePath);
|
|
1556
|
+
if (langConfig) {
|
|
1557
|
+
const result = await parseAndChunkFile(filePath, content, config.chunking);
|
|
1558
|
+
if (result.chunks.length > 0) {
|
|
1559
|
+
chunks.push(...result.chunks);
|
|
1560
|
+
} else {
|
|
1561
|
+
const fallbackChunks = await chunkFile(filePath, content, config.chunking);
|
|
1562
|
+
chunks.push(...fallbackChunks);
|
|
1563
|
+
}
|
|
1564
|
+
if (extractTagsFromTree && result.tree && result.language && langId && langConfig.hasTagQuery) {
|
|
1565
|
+
try {
|
|
1566
|
+
const lang = result.language;
|
|
1567
|
+
const createQuery = (source) => lang.query(source);
|
|
1568
|
+
const tags = extractTagsFromTree(createQuery, result.tree.rootNode, filePath, langId);
|
|
1569
|
+
if (tags.length > 0) {
|
|
1570
|
+
allTags.set(filePath, tags);
|
|
1571
|
+
}
|
|
1572
|
+
} catch {
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
if (result.tree) {
|
|
1576
|
+
result.tree.delete();
|
|
1577
|
+
}
|
|
1578
|
+
} else {
|
|
1579
|
+
const fileChunks = await chunkFile(filePath, content, config.chunking);
|
|
1580
|
+
chunks.push(...fileChunks);
|
|
1581
|
+
}
|
|
1582
|
+
if ((i + 1) % 100 === 0 || i === filePaths.length - 1) {
|
|
1583
|
+
onProgress?.({
|
|
1584
|
+
phase: "chunking",
|
|
1585
|
+
current: i + 1,
|
|
1586
|
+
total: scannedFiles.length,
|
|
1587
|
+
message: `Chunked ${i + 1}/${scannedFiles.length} files (${chunks.length} chunks)`
|
|
1588
|
+
});
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
onProgress?.({
|
|
1592
|
+
phase: "chunking",
|
|
1593
|
+
current: scannedFiles.length,
|
|
1594
|
+
total: scannedFiles.length,
|
|
1595
|
+
message: `Created ${chunks.length} chunks`
|
|
1596
|
+
});
|
|
1597
|
+
assignChunkIds(chunks, config.chunking.version);
|
|
1598
|
+
onProgress?.({
|
|
1599
|
+
phase: "finalizing",
|
|
1600
|
+
current: 0,
|
|
1601
|
+
total: 3,
|
|
1602
|
+
message: "Extracting symbols..."
|
|
1603
|
+
});
|
|
1604
|
+
const symbols = extractSymbolsFromChunks(chunks);
|
|
1605
|
+
saveSymbolIndex(projectDir, resolvedBranch, symbols);
|
|
1606
|
+
onProgress?.({
|
|
1607
|
+
phase: "finalizing",
|
|
1608
|
+
current: 1,
|
|
1609
|
+
total: 3,
|
|
1610
|
+
message: "Building BM25 index..."
|
|
1611
|
+
});
|
|
1612
|
+
const bm25Docs = chunks.map((c) => ({
|
|
1613
|
+
id: c.id,
|
|
1614
|
+
filePath: c.filePath,
|
|
1615
|
+
terms: tokenize(c.filePath + " " + c.content)
|
|
1616
|
+
}));
|
|
1617
|
+
const bm25Index = buildBM25Index(bm25Docs);
|
|
1618
|
+
saveBM25Index(projectDir, resolvedBranch, bm25Index);
|
|
1619
|
+
const store = new CodemapStore(projectDir, resolvedBranch);
|
|
1620
|
+
await store.initialize();
|
|
1621
|
+
await store.dropTable();
|
|
1622
|
+
const embedder = await createEmbedder(config.embedding);
|
|
1623
|
+
const schema = {
|
|
1624
|
+
formatVersion: 1,
|
|
1625
|
+
vectorStore: { engine: "lancedb", dimensions: embedder.dimensions },
|
|
1626
|
+
embedding: { provider: embedder.provider, model: embedder.model },
|
|
1627
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1628
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1629
|
+
};
|
|
1630
|
+
saveSchema(projectDir, resolvedBranch, schema);
|
|
1631
|
+
const manifest = buildManifest(scannedFiles, chunks, config.chunking.version);
|
|
1632
|
+
saveManifest(projectDir, resolvedBranch, manifest);
|
|
1633
|
+
saveCodemapConfig(projectDir, config);
|
|
1634
|
+
const allTexts = buildEmbeddingTexts(chunks, symbols);
|
|
1635
|
+
const totalChars = allTexts.reduce((sum, t) => sum + t.length, 0);
|
|
1636
|
+
const avgChunkChars = Math.round(totalChars / (allTexts.length || 1));
|
|
1637
|
+
const maxChunkChars = allTexts.length > 0 ? Math.max(...allTexts.map((t) => t.length)) : 0;
|
|
1638
|
+
log("info", `Embedding ${allTexts.length} chunks: totalChars=${totalChars}, avgChars=${avgChunkChars}, maxChars=${maxChunkChars}, provider=${embedder.provider}, model=${embedder.model}, dims=${embedder.dimensions}`);
|
|
1639
|
+
let embeddingError = null;
|
|
1640
|
+
try {
|
|
1641
|
+
await embedAndStore(embedder, allTexts, chunks, store, config.batch, onProgress);
|
|
1642
|
+
} catch (err) {
|
|
1643
|
+
embeddingError = err instanceof Error ? err : new Error(String(err));
|
|
1644
|
+
log("warn", `Embedding incomplete: ${embeddingError.message}`);
|
|
1645
|
+
}
|
|
1646
|
+
let depgraphResult;
|
|
1647
|
+
if (allTags.size > 0) {
|
|
1648
|
+
try {
|
|
1649
|
+
const depgraph = await import("./dist-R5ZJ4LX5.js");
|
|
1650
|
+
onProgress?.({
|
|
1651
|
+
phase: "graph",
|
|
1652
|
+
current: 0,
|
|
1653
|
+
total: 3,
|
|
1654
|
+
message: "Building dependency graph..."
|
|
1655
|
+
});
|
|
1656
|
+
const graph = depgraph.buildReferenceGraph(allTags, filePaths);
|
|
1657
|
+
for (const sf of scannedFiles) {
|
|
1658
|
+
if (graph.nodes[sf.filePath]) {
|
|
1659
|
+
graph.nodes[sf.filePath].sizeBytes = sf.sizeBytes;
|
|
1660
|
+
}
|
|
1661
|
+
}
|
|
1662
|
+
depgraph.saveGraph(graph, projectDir, resolvedBranch);
|
|
1663
|
+
onProgress?.({
|
|
1664
|
+
phase: "graph",
|
|
1665
|
+
current: 1,
|
|
1666
|
+
total: 3,
|
|
1667
|
+
message: `Graph: ${Object.keys(graph.nodes).length} nodes, ${graph.edges.length} edges`
|
|
1668
|
+
});
|
|
1669
|
+
onProgress?.({
|
|
1670
|
+
phase: "ranking",
|
|
1671
|
+
current: 1,
|
|
1672
|
+
total: 3,
|
|
1673
|
+
message: "Computing PageRank..."
|
|
1674
|
+
});
|
|
1675
|
+
const pageRank = depgraph.computePageRank(graph);
|
|
1676
|
+
depgraph.savePageRank(pageRank, projectDir, resolvedBranch);
|
|
1677
|
+
onProgress?.({
|
|
1678
|
+
phase: "ranking",
|
|
1679
|
+
current: 2,
|
|
1680
|
+
total: 3,
|
|
1681
|
+
message: `PageRank: ${pageRank.iterations} iterations, converged=${pageRank.converged}`
|
|
1682
|
+
});
|
|
1683
|
+
onProgress?.({
|
|
1684
|
+
phase: "metrics",
|
|
1685
|
+
current: 2,
|
|
1686
|
+
total: 3,
|
|
1687
|
+
message: "Computing metrics..."
|
|
1688
|
+
});
|
|
1689
|
+
const metrics = depgraph.computeMetrics(graph, pageRank);
|
|
1690
|
+
depgraph.saveMetrics(metrics, projectDir, resolvedBranch);
|
|
1691
|
+
onProgress?.({
|
|
1692
|
+
phase: "metrics",
|
|
1693
|
+
current: 3,
|
|
1694
|
+
total: 3,
|
|
1695
|
+
message: `Metrics: ${metrics.cycles.length} cycles, ${metrics.totalDefinitions} defs, ${metrics.totalReferences} refs`
|
|
1696
|
+
});
|
|
1697
|
+
depgraphResult = {
|
|
1698
|
+
nodeCount: metrics.totalFiles,
|
|
1699
|
+
edgeCount: metrics.totalEdges,
|
|
1700
|
+
definitionCount: metrics.totalDefinitions,
|
|
1701
|
+
referenceCount: metrics.totalReferences,
|
|
1702
|
+
cycleCount: metrics.cycles.length
|
|
1703
|
+
};
|
|
1704
|
+
log("info", `DepGraph: ${metrics.totalFiles} nodes, ${metrics.totalEdges} edges, ${metrics.totalDefinitions} defs, ${metrics.totalReferences} refs, ${metrics.cycles.length} cycles`);
|
|
1705
|
+
} catch (err) {
|
|
1706
|
+
log("warn", `DepGraph failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
const durationMs = Date.now() - startTime;
|
|
1710
|
+
const storedCount = await store.getItemCount();
|
|
1711
|
+
const stats = {
|
|
1712
|
+
totalFiles: scannedFiles.length,
|
|
1713
|
+
totalChunks: storedCount,
|
|
1714
|
+
staleFiles: embeddingError ? chunks.length - storedCount : 0,
|
|
1715
|
+
indexSizeBytes: store.getIndexSizeBytes(),
|
|
1716
|
+
lastFullIndexMs: durationMs,
|
|
1717
|
+
lastUpdated: (/* @__PURE__ */ new Date()).toISOString()
|
|
1718
|
+
};
|
|
1719
|
+
writeStats(projectDir, resolvedBranch, stats);
|
|
1720
|
+
if (embeddingError) {
|
|
1721
|
+
onProgress?.({
|
|
1722
|
+
phase: "finalizing",
|
|
1723
|
+
current: 3,
|
|
1724
|
+
total: 3,
|
|
1725
|
+
message: `Partial: ${storedCount}/${chunks.length} chunks stored`
|
|
1726
|
+
});
|
|
1727
|
+
throw embeddingError;
|
|
1728
|
+
}
|
|
1729
|
+
onProgress?.({
|
|
1730
|
+
phase: "finalizing",
|
|
1731
|
+
current: 3,
|
|
1732
|
+
total: 3,
|
|
1733
|
+
message: "Done"
|
|
1734
|
+
});
|
|
1735
|
+
return {
|
|
1736
|
+
totalFiles: scannedFiles.length,
|
|
1737
|
+
totalChunks: chunks.length,
|
|
1738
|
+
durationMs,
|
|
1739
|
+
embeddingProvider: embedder.provider,
|
|
1740
|
+
embeddingModel: embedder.model,
|
|
1741
|
+
depgraph: depgraphResult
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
var STORE_BATCH_SIZE = 5e3;
|
|
1745
|
+
async function embedAndStore(embedder, texts, chunks, store, batchConfig, onProgress) {
|
|
1746
|
+
if (texts.length === 0) return;
|
|
1747
|
+
const total = texts.length;
|
|
1748
|
+
if (embedder.supportsBatch && embedder.submitBatch && embedder.pollBatch) {
|
|
1749
|
+
await embedAndStoreBatch(embedder, texts, chunks, store, batchConfig, onProgress);
|
|
1750
|
+
return;
|
|
1751
|
+
}
|
|
1752
|
+
onProgress?.({
|
|
1753
|
+
phase: "embedding",
|
|
1754
|
+
current: 0,
|
|
1755
|
+
total,
|
|
1756
|
+
message: "Creating embeddings..."
|
|
1757
|
+
});
|
|
1758
|
+
const allEmbeddings = [];
|
|
1759
|
+
for (let i = 0; i < texts.length; i += batchConfig.size) {
|
|
1760
|
+
const batch = texts.slice(i, i + batchConfig.size);
|
|
1761
|
+
const embeddings = await embedder.embed(batch);
|
|
1762
|
+
allEmbeddings.push(...embeddings);
|
|
1763
|
+
const processed = Math.min(i + batchConfig.size, total);
|
|
1764
|
+
onProgress?.({
|
|
1765
|
+
phase: "embedding",
|
|
1766
|
+
current: processed,
|
|
1767
|
+
total,
|
|
1768
|
+
message: `Embedded ${processed}/${total} chunks`
|
|
1769
|
+
});
|
|
1770
|
+
}
|
|
1771
|
+
onProgress?.({
|
|
1772
|
+
phase: "storing",
|
|
1773
|
+
current: 0,
|
|
1774
|
+
total,
|
|
1775
|
+
message: "Storing vectors..."
|
|
1776
|
+
});
|
|
1777
|
+
for (let i = 0; i < allEmbeddings.length; i += STORE_BATCH_SIZE) {
|
|
1778
|
+
const items = buildVectorItems(chunks, allEmbeddings, i, Math.min(i + STORE_BATCH_SIZE, allEmbeddings.length));
|
|
1779
|
+
await store.upsertItems(items);
|
|
1780
|
+
const stored = Math.min(i + STORE_BATCH_SIZE, allEmbeddings.length);
|
|
1781
|
+
onProgress?.({
|
|
1782
|
+
phase: "storing",
|
|
1783
|
+
current: stored,
|
|
1784
|
+
total,
|
|
1785
|
+
message: `Stored ${stored}/${total} vectors`
|
|
1786
|
+
});
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
async function embedAndStoreBatch(embedder, texts, chunks, store, batchConfig, onProgress) {
|
|
1790
|
+
const total = texts.length;
|
|
1791
|
+
onProgress?.({
|
|
1792
|
+
phase: "embedding",
|
|
1793
|
+
current: 0,
|
|
1794
|
+
total,
|
|
1795
|
+
message: `Submitting batch of ${total} texts...`
|
|
1796
|
+
});
|
|
1797
|
+
const batchId = await embedder.submitBatch(texts);
|
|
1798
|
+
log("info", `Batch submitted: id=${batchId}, texts=${total}`);
|
|
1799
|
+
onProgress?.({
|
|
1800
|
+
phase: "embedding",
|
|
1801
|
+
current: 0,
|
|
1802
|
+
total,
|
|
1803
|
+
message: `Batch ${batchId.slice(0, 8)}... submitted, polling...`
|
|
1804
|
+
});
|
|
1805
|
+
await pollAndStoreProgressively(embedder, batchId, chunks, store, batchConfig, total, onProgress);
|
|
1806
|
+
}
|
|
1807
|
+
async function pollAndStoreProgressively(embedder, batchId, chunks, store, batchConfig, total, onProgress) {
|
|
1808
|
+
const deadline = batchConfig.timeoutMs > 0 ? Date.now() + batchConfig.timeoutMs : 0;
|
|
1809
|
+
let pollCount = 0;
|
|
1810
|
+
let watermark = 0;
|
|
1811
|
+
while (deadline === 0 || Date.now() < deadline) {
|
|
1812
|
+
const status = await embedder.pollBatch(batchId);
|
|
1813
|
+
pollCount++;
|
|
1814
|
+
if (pollCount <= 3 || pollCount % 10 === 0) {
|
|
1815
|
+
log("info", `Poll #${pollCount} batch=${batchId.slice(0, 8)}: status=${status.status}, processed=${status.processedInputs}/${status.totalInputs}, watermark=${watermark}`);
|
|
1816
|
+
}
|
|
1817
|
+
if (status.results && status.results.length > watermark) {
|
|
1818
|
+
const newCount = status.results.length - watermark;
|
|
1819
|
+
try {
|
|
1820
|
+
for (let i = watermark; i < status.results.length; i += STORE_BATCH_SIZE) {
|
|
1821
|
+
const end = Math.min(i + STORE_BATCH_SIZE, status.results.length);
|
|
1822
|
+
const items = buildVectorItems(chunks, status.results, i, end);
|
|
1823
|
+
await store.upsertItems(items);
|
|
1824
|
+
}
|
|
1825
|
+
const prevWatermark = watermark;
|
|
1826
|
+
watermark = status.results.length;
|
|
1827
|
+
log("info", `Stored ${newCount} new embeddings (${prevWatermark}\u2192${watermark}/${total})`);
|
|
1828
|
+
} catch (storeErr) {
|
|
1829
|
+
log("error", `Store failed at watermark=${watermark}: ${storeErr instanceof Error ? storeErr.message : String(storeErr)}`);
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
if (status.status === "failed") {
|
|
1833
|
+
log("error", `Batch ${batchId.slice(0, 8)} failed (watermark=${watermark}/${total} stored)`);
|
|
1834
|
+
throw new Error(`Batch embedding failed (batch ${batchId}). ${watermark} of ${total} chunks were stored before failure.`);
|
|
1835
|
+
}
|
|
1836
|
+
onProgress?.({
|
|
1837
|
+
phase: "embedding",
|
|
1838
|
+
current: watermark,
|
|
1839
|
+
total,
|
|
1840
|
+
message: `Embedded & stored ${watermark}/${total} chunks`
|
|
1841
|
+
});
|
|
1842
|
+
if (status.status === "completed") {
|
|
1843
|
+
log("info", `Batch ${batchId.slice(0, 8)} completed: ${watermark}/${total} stored`);
|
|
1844
|
+
onProgress?.({
|
|
1845
|
+
phase: "embedding",
|
|
1846
|
+
current: total,
|
|
1847
|
+
total,
|
|
1848
|
+
message: `Embedded & stored ${total}/${total} chunks`
|
|
1849
|
+
});
|
|
1850
|
+
return;
|
|
1851
|
+
}
|
|
1852
|
+
await new Promise((resolve2) => setTimeout(resolve2, batchConfig.pollIntervalMs));
|
|
1853
|
+
}
|
|
1854
|
+
log("error", `Batch ${batchId} timed out after ${Math.round(batchConfig.timeoutMs / 1e3)}s (${pollCount} polls, watermark=${watermark}/${total})`);
|
|
1855
|
+
throw new Error(
|
|
1856
|
+
`Batch embedding timed out after ${Math.round(batchConfig.timeoutMs / 1e3)}s (batch ${batchId}). ${watermark} of ${total} chunks were stored.`
|
|
1857
|
+
);
|
|
1858
|
+
}
|
|
1859
|
+
function buildVectorItems(chunks, embeddings, from, to) {
|
|
1860
|
+
const items = [];
|
|
1861
|
+
for (let i = from; i < to; i++) {
|
|
1862
|
+
const chunk = chunks[i];
|
|
1863
|
+
items.push({
|
|
1864
|
+
id: chunk.id,
|
|
1865
|
+
vector: embeddings[i],
|
|
1866
|
+
metadata: {
|
|
1867
|
+
id: chunk.id,
|
|
1868
|
+
filePath: chunk.filePath,
|
|
1869
|
+
startLine: chunk.startLine,
|
|
1870
|
+
endLine: chunk.endLine,
|
|
1871
|
+
snippet: chunk.content.slice(0, 200)
|
|
1872
|
+
}
|
|
1873
|
+
});
|
|
1874
|
+
}
|
|
1875
|
+
return items;
|
|
1876
|
+
}
|
|
1877
|
+
function writeStats(projectDir, branch, stats) {
|
|
1878
|
+
const statsPath = codemapStatsFile(projectDir, branch);
|
|
1879
|
+
const dir = path11.dirname(statsPath);
|
|
1880
|
+
fs9.mkdirSync(dir, { recursive: true });
|
|
1881
|
+
const tmpPath = statsPath + ".tmp";
|
|
1882
|
+
fs9.writeFileSync(tmpPath, JSON.stringify(stats, null, 2) + "\n", "utf-8");
|
|
1883
|
+
fs9.renameSync(tmpPath, statsPath);
|
|
1884
|
+
}
|
|
1885
|
+
var DEFAULT_HYBRID_WEIGHTS = {
|
|
1886
|
+
vector: 0.6,
|
|
1887
|
+
bm25: 0.25,
|
|
1888
|
+
symbolBoost: 0.1,
|
|
1889
|
+
pathBoost: 0.05,
|
|
1890
|
+
graphRank: 0
|
|
1891
|
+
};
|
|
1892
|
+
function normalizeScores(values) {
|
|
1893
|
+
if (values.length === 0) return [];
|
|
1894
|
+
const min = Math.min(...values);
|
|
1895
|
+
const max = Math.max(...values);
|
|
1896
|
+
if (max === min) return values.map(() => 1);
|
|
1897
|
+
return values.map((v) => (v - min) / (max - min));
|
|
1898
|
+
}
|
|
1899
|
+
function computePathBoost(filePath, queryTerms) {
|
|
1900
|
+
if (queryTerms.length === 0) return 0;
|
|
1901
|
+
const pathLower = filePath.toLowerCase();
|
|
1902
|
+
let matches = 0;
|
|
1903
|
+
for (const term of queryTerms) {
|
|
1904
|
+
if (pathLower.includes(term.toLowerCase())) matches++;
|
|
1905
|
+
}
|
|
1906
|
+
return matches / queryTerms.length;
|
|
1907
|
+
}
|
|
1908
|
+
function fuseResults(vectorResults, bm25Results, queryTerms, symbolChunkIds, weights, graphRanks) {
|
|
1909
|
+
const vectorScores = vectorResults.map((r) => r.score);
|
|
1910
|
+
const normalizedVector = normalizeScores(vectorScores);
|
|
1911
|
+
const bm25Scores = bm25Results.map((r) => r.score);
|
|
1912
|
+
const normalizedBm25 = normalizeScores(bm25Scores);
|
|
1913
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
1914
|
+
for (let i = 0; i < vectorResults.length; i++) {
|
|
1915
|
+
const vr = vectorResults[i];
|
|
1916
|
+
candidates.set(vr.id, {
|
|
1917
|
+
vectorScore: normalizedVector[i],
|
|
1918
|
+
bm25Score: 0,
|
|
1919
|
+
result: vr,
|
|
1920
|
+
filePath: vr.filePath
|
|
1921
|
+
});
|
|
1922
|
+
}
|
|
1923
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
1924
|
+
const br = bm25Results[i];
|
|
1925
|
+
const existing = candidates.get(br.id);
|
|
1926
|
+
if (existing) {
|
|
1927
|
+
existing.bm25Score = normalizedBm25[i];
|
|
1928
|
+
} else {
|
|
1929
|
+
candidates.set(br.id, {
|
|
1930
|
+
vectorScore: 0,
|
|
1931
|
+
bm25Score: normalizedBm25[i],
|
|
1932
|
+
result: null,
|
|
1933
|
+
filePath: br.filePath
|
|
1934
|
+
});
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
const scored = [];
|
|
1938
|
+
for (const [id, c] of candidates) {
|
|
1939
|
+
const symbolScore = symbolChunkIds.has(id) ? 1 : 0;
|
|
1940
|
+
const pathBoost = computePathBoost(c.filePath, queryTerms);
|
|
1941
|
+
const graphRankScore = graphRanks?.get(c.filePath) ?? 0;
|
|
1942
|
+
const fusedScore = weights.vector * c.vectorScore + weights.bm25 * c.bm25Score + weights.symbolBoost * symbolScore + weights.pathBoost * pathBoost + (weights.graphRank ?? 0) * graphRankScore;
|
|
1943
|
+
const base = c.result ?? {
|
|
1944
|
+
id,
|
|
1945
|
+
filePath: c.filePath,
|
|
1946
|
+
startLine: 0,
|
|
1947
|
+
endLine: 0,
|
|
1948
|
+
snippet: "",
|
|
1949
|
+
score: 0
|
|
1950
|
+
};
|
|
1951
|
+
scored.push({
|
|
1952
|
+
...base,
|
|
1953
|
+
score: fusedScore,
|
|
1954
|
+
fusedScore
|
|
1955
|
+
});
|
|
1956
|
+
}
|
|
1957
|
+
scored.sort((a, b) => b.fusedScore - a.fusedScore);
|
|
1958
|
+
return scored.map(({ fusedScore: _, ...rest }) => rest);
|
|
1959
|
+
}
|
|
1960
|
+
async function searchCode(projectDir, query, options = {}) {
|
|
1961
|
+
const startTime = Date.now();
|
|
1962
|
+
const { limit = 10, threshold = 0, pathPrefix, includeTests = true, includeDocs = true } = options;
|
|
1963
|
+
const resolvedBranch = options.branch ?? getCurrentBranch(projectDir);
|
|
1964
|
+
const config = loadCodemapConfig(projectDir);
|
|
1965
|
+
const embedder = await createEmbedder(config.embedding);
|
|
1966
|
+
const [queryVector] = await embedder.embed([query]);
|
|
1967
|
+
const store = new CodemapStore(projectDir, resolvedBranch);
|
|
1968
|
+
await store.initialize();
|
|
1969
|
+
const hybridConfig = config.hybrid;
|
|
1970
|
+
const bm25Index = hybridConfig?.enabled ? loadBM25Index(projectDir, resolvedBranch) : null;
|
|
1971
|
+
let results;
|
|
1972
|
+
if (bm25Index && hybridConfig?.enabled) {
|
|
1973
|
+
const vectorK = hybridConfig.vectorK ?? 30;
|
|
1974
|
+
const bm25K = hybridConfig.bm25K ?? 30;
|
|
1975
|
+
const weights = hybridConfig.weights ?? { vector: 0.6, bm25: 0.25, symbolBoost: 0.1, pathBoost: 0.05 };
|
|
1976
|
+
const vectorResults = await store.query(queryVector, vectorK);
|
|
1977
|
+
const queryTerms = tokenize(query);
|
|
1978
|
+
const bm25Results = queryBM25(bm25Index, queryTerms, bm25K);
|
|
1979
|
+
const matchingSymbols = searchSymbols(projectDir, query, { limit: 50 }, resolvedBranch);
|
|
1980
|
+
const symbolChunkIds = new Set(matchingSymbols.map((s) => s.chunkId));
|
|
1981
|
+
let graphRanks;
|
|
1982
|
+
if (weights.graphRank && weights.graphRank > 0) {
|
|
1983
|
+
try {
|
|
1984
|
+
const { loadPageRankMap } = await import("./dist-R5ZJ4LX5.js");
|
|
1985
|
+
graphRanks = loadPageRankMap(projectDir, resolvedBranch) ?? void 0;
|
|
1986
|
+
} catch {
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1989
|
+
results = fuseResults(vectorResults, bm25Results, queryTerms, symbolChunkIds, weights, graphRanks);
|
|
1990
|
+
} else {
|
|
1991
|
+
const candidateK = Math.min(limit * 3, 80);
|
|
1992
|
+
results = await store.query(queryVector, candidateK);
|
|
1993
|
+
}
|
|
1994
|
+
let filtered = results;
|
|
1995
|
+
if (threshold > 0) {
|
|
1996
|
+
filtered = filtered.filter((r) => r.score >= threshold);
|
|
1997
|
+
}
|
|
1998
|
+
if (pathPrefix) {
|
|
1999
|
+
filtered = filtered.filter((r) => r.filePath.startsWith(pathPrefix));
|
|
2000
|
+
}
|
|
2001
|
+
if (!includeTests) {
|
|
2002
|
+
filtered = filtered.filter((r) => {
|
|
2003
|
+
const lower = r.filePath.toLowerCase();
|
|
2004
|
+
return !lower.includes("__tests__") && !lower.includes(".test.") && !lower.includes(".spec.") && !lower.includes("/test/") && !lower.includes("/tests/");
|
|
2005
|
+
});
|
|
2006
|
+
}
|
|
2007
|
+
if (!includeDocs) {
|
|
2008
|
+
filtered = filtered.filter((r) => {
|
|
2009
|
+
const lower = r.filePath.toLowerCase();
|
|
2010
|
+
return !lower.endsWith(".md") && !lower.endsWith(".mdx");
|
|
2011
|
+
});
|
|
2012
|
+
}
|
|
2013
|
+
filtered = filtered.slice(0, limit);
|
|
2014
|
+
return {
|
|
2015
|
+
query,
|
|
2016
|
+
durationMs: Date.now() - startTime,
|
|
2017
|
+
results: filtered
|
|
2018
|
+
};
|
|
2019
|
+
}
|
|
2020
|
+
function getCodemapStatus(projectDir, branch) {
|
|
2021
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
2022
|
+
const codemapDir = codemapBranchDir(projectDir, resolvedBranch);
|
|
2023
|
+
const exists = fs10.existsSync(codemapDir);
|
|
2024
|
+
if (!exists) {
|
|
2025
|
+
const config2 = loadCodemapConfig(projectDir);
|
|
2026
|
+
return {
|
|
2027
|
+
available: true,
|
|
2028
|
+
initialized: false,
|
|
2029
|
+
mode: "idle",
|
|
2030
|
+
branch: resolvedBranch,
|
|
2031
|
+
embeddingProvider: config2.embedding.provider,
|
|
2032
|
+
embeddingModel: config2.embedding.model,
|
|
2033
|
+
dimensions: config2.embedding.dimensions,
|
|
2034
|
+
totalFiles: 0,
|
|
2035
|
+
totalChunks: 0,
|
|
2036
|
+
staleFiles: 0,
|
|
2037
|
+
indexSizeBytes: 0
|
|
2038
|
+
};
|
|
2039
|
+
}
|
|
2040
|
+
const config = loadCodemapConfig(projectDir);
|
|
2041
|
+
const schema = loadSchema(projectDir, resolvedBranch);
|
|
2042
|
+
const stats = loadStats(projectDir, resolvedBranch);
|
|
2043
|
+
let mode = "idle";
|
|
2044
|
+
const watcherPidPath = path12.join(codemapDir, "watcher.pid");
|
|
2045
|
+
if (fs10.existsSync(watcherPidPath)) {
|
|
2046
|
+
try {
|
|
2047
|
+
const pidData = JSON.parse(fs10.readFileSync(watcherPidPath, "utf-8"));
|
|
2048
|
+
if (pidData.pid && typeof pidData.pid === "number") {
|
|
2049
|
+
try {
|
|
2050
|
+
process.kill(pidData.pid, 0);
|
|
2051
|
+
mode = "watching";
|
|
2052
|
+
} catch {
|
|
2053
|
+
try {
|
|
2054
|
+
fs10.unlinkSync(watcherPidPath);
|
|
2055
|
+
} catch {
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
} catch {
|
|
2060
|
+
}
|
|
2061
|
+
}
|
|
2062
|
+
const configMismatch = schema !== null && (schema.embedding.provider !== config.embedding.provider || schema.embedding.model !== config.embedding.model || schema.vectorStore.dimensions !== config.embedding.dimensions);
|
|
2063
|
+
return {
|
|
2064
|
+
available: true,
|
|
2065
|
+
branch: resolvedBranch,
|
|
2066
|
+
initialized: schema !== null,
|
|
2067
|
+
mode,
|
|
2068
|
+
embeddingProvider: config.embedding.provider,
|
|
2069
|
+
embeddingModel: config.embedding.model,
|
|
2070
|
+
dimensions: config.embedding.dimensions,
|
|
2071
|
+
totalFiles: stats?.totalFiles ?? 0,
|
|
2072
|
+
totalChunks: stats?.totalChunks ?? 0,
|
|
2073
|
+
staleFiles: stats?.staleFiles ?? 0,
|
|
2074
|
+
indexSizeBytes: stats?.indexSizeBytes ?? 0,
|
|
2075
|
+
lastUpdated: stats?.lastUpdated,
|
|
2076
|
+
configMismatch,
|
|
2077
|
+
indexedProvider: schema?.embedding.provider,
|
|
2078
|
+
indexedModel: schema?.embedding.model,
|
|
2079
|
+
indexedDimensions: schema?.vectorStore.dimensions
|
|
2080
|
+
};
|
|
2081
|
+
}
|
|
2082
|
+
function loadStats(projectDir, branch) {
|
|
2083
|
+
const statsPath = codemapStatsFile(projectDir, branch);
|
|
2084
|
+
if (!fs10.existsSync(statsPath)) return null;
|
|
2085
|
+
try {
|
|
2086
|
+
return JSON.parse(fs10.readFileSync(statsPath, "utf-8"));
|
|
2087
|
+
} catch {
|
|
2088
|
+
return null;
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2091
|
+
var DebounceQueue = class {
|
|
2092
|
+
queue = /* @__PURE__ */ new Map();
|
|
2093
|
+
timer = null;
|
|
2094
|
+
debounceMs;
|
|
2095
|
+
/** Callback invoked when the queue flushes */
|
|
2096
|
+
onFlush = null;
|
|
2097
|
+
constructor(debounceMs = 300) {
|
|
2098
|
+
this.debounceMs = debounceMs;
|
|
2099
|
+
}
|
|
2100
|
+
/**
|
|
2101
|
+
* Push an event into the queue. Resets the debounce timer.
|
|
2102
|
+
*/
|
|
2103
|
+
push(event) {
|
|
2104
|
+
this.queue.set(event.filePath, event);
|
|
2105
|
+
if (this.timer) clearTimeout(this.timer);
|
|
2106
|
+
this.timer = setTimeout(() => {
|
|
2107
|
+
this.doFlush();
|
|
2108
|
+
}, this.debounceMs);
|
|
2109
|
+
}
|
|
2110
|
+
/**
|
|
2111
|
+
* Immediately flush all queued events, canceling any pending timer.
|
|
2112
|
+
*/
|
|
2113
|
+
flush() {
|
|
2114
|
+
if (this.timer) {
|
|
2115
|
+
clearTimeout(this.timer);
|
|
2116
|
+
this.timer = null;
|
|
2117
|
+
}
|
|
2118
|
+
const events = Array.from(this.queue.values());
|
|
2119
|
+
this.queue.clear();
|
|
2120
|
+
return events;
|
|
2121
|
+
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Current number of queued events.
|
|
2124
|
+
*/
|
|
2125
|
+
get depth() {
|
|
2126
|
+
return this.queue.size;
|
|
2127
|
+
}
|
|
2128
|
+
/**
|
|
2129
|
+
* Stop the queue: clear timer and events.
|
|
2130
|
+
*/
|
|
2131
|
+
destroy() {
|
|
2132
|
+
if (this.timer) {
|
|
2133
|
+
clearTimeout(this.timer);
|
|
2134
|
+
this.timer = null;
|
|
2135
|
+
}
|
|
2136
|
+
this.queue.clear();
|
|
2137
|
+
this.onFlush = null;
|
|
2138
|
+
}
|
|
2139
|
+
doFlush() {
|
|
2140
|
+
this.timer = null;
|
|
2141
|
+
const events = Array.from(this.queue.values());
|
|
2142
|
+
this.queue.clear();
|
|
2143
|
+
if (events.length > 0 && this.onFlush) {
|
|
2144
|
+
this.onFlush(events);
|
|
2145
|
+
}
|
|
2146
|
+
}
|
|
2147
|
+
};
|
|
2148
|
+
async function processIncremental(projectDir, events, config, store, embedder, branch) {
|
|
2149
|
+
const startTime = Date.now();
|
|
2150
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
2151
|
+
let added = 0;
|
|
2152
|
+
let updated = 0;
|
|
2153
|
+
let removed = 0;
|
|
2154
|
+
acquireCodemapLock(projectDir, resolvedBranch);
|
|
2155
|
+
try {
|
|
2156
|
+
const manifest = loadManifest(projectDir, resolvedBranch);
|
|
2157
|
+
if (!manifest) {
|
|
2158
|
+
throw new Error("No manifest found \u2014 run full index first");
|
|
2159
|
+
}
|
|
2160
|
+
const bm25Index = loadBM25Index(projectDir, resolvedBranch);
|
|
2161
|
+
let symbols = loadSymbolIndex(projectDir, resolvedBranch);
|
|
2162
|
+
const unlinkEvents = events.filter((e) => e.type === "unlink");
|
|
2163
|
+
const addChangeEvents = events.filter((e) => e.type === "add" || e.type === "change");
|
|
2164
|
+
for (const event of unlinkEvents) {
|
|
2165
|
+
const entry = manifest.files[event.filePath];
|
|
2166
|
+
if (!entry) continue;
|
|
2167
|
+
await store.removeItems(entry.chunkIds);
|
|
2168
|
+
if (bm25Index) {
|
|
2169
|
+
removeDocuments(bm25Index, entry.chunkIds);
|
|
2170
|
+
}
|
|
2171
|
+
symbols = symbols.filter((s) => s.filePath !== event.filePath);
|
|
2172
|
+
delete manifest.files[event.filePath];
|
|
2173
|
+
removed++;
|
|
2174
|
+
}
|
|
2175
|
+
for (const event of addChangeEvents) {
|
|
2176
|
+
const absPath = path13.join(projectDir, event.filePath);
|
|
2177
|
+
let content;
|
|
2178
|
+
try {
|
|
2179
|
+
content = fs11.readFileSync(absPath, "utf-8");
|
|
2180
|
+
} catch {
|
|
2181
|
+
continue;
|
|
2182
|
+
}
|
|
2183
|
+
const contentHash = createHash6("sha256").update(content).digest("hex");
|
|
2184
|
+
const existingEntry = manifest.files[event.filePath];
|
|
2185
|
+
if (existingEntry && existingEntry.contentHash === contentHash) continue;
|
|
2186
|
+
if (existingEntry) {
|
|
2187
|
+
await store.removeItems(existingEntry.chunkIds);
|
|
2188
|
+
if (bm25Index) {
|
|
2189
|
+
removeDocuments(bm25Index, existingEntry.chunkIds);
|
|
2190
|
+
}
|
|
2191
|
+
symbols = symbols.filter((s) => s.filePath !== event.filePath);
|
|
2192
|
+
updated++;
|
|
2193
|
+
} else {
|
|
2194
|
+
added++;
|
|
2195
|
+
}
|
|
2196
|
+
const chunks = await chunkFile(event.filePath, content, config.chunking);
|
|
2197
|
+
assignChunkIds(chunks, config.chunking.version);
|
|
2198
|
+
if (chunks.length === 0) continue;
|
|
2199
|
+
const chunkSymbols = [];
|
|
2200
|
+
for (const chunk of chunks) {
|
|
2201
|
+
const extracted = extractSymbols(
|
|
2202
|
+
chunk.filePath,
|
|
2203
|
+
chunk.content,
|
|
2204
|
+
chunk.startLine,
|
|
2205
|
+
chunk.endLine,
|
|
2206
|
+
chunk.id
|
|
2207
|
+
);
|
|
2208
|
+
chunkSymbols.push(...extracted);
|
|
2209
|
+
}
|
|
2210
|
+
symbols.push(...chunkSymbols);
|
|
2211
|
+
const texts = buildEmbeddingTexts(chunks, chunkSymbols);
|
|
2212
|
+
const embeddings = await embedder.embed(texts);
|
|
2213
|
+
const vectorItems = chunks.map((chunk, idx) => ({
|
|
2214
|
+
id: chunk.id,
|
|
2215
|
+
vector: embeddings[idx],
|
|
2216
|
+
metadata: {
|
|
2217
|
+
id: chunk.id,
|
|
2218
|
+
filePath: chunk.filePath,
|
|
2219
|
+
startLine: chunk.startLine,
|
|
2220
|
+
endLine: chunk.endLine,
|
|
2221
|
+
snippet: chunk.content.slice(0, 200)
|
|
2222
|
+
}
|
|
2223
|
+
}));
|
|
2224
|
+
await store.upsertItems(vectorItems);
|
|
2225
|
+
if (bm25Index) {
|
|
2226
|
+
const bm25Docs = chunks.map((c) => ({
|
|
2227
|
+
id: c.id,
|
|
2228
|
+
filePath: c.filePath,
|
|
2229
|
+
terms: tokenize(c.filePath + " " + c.content)
|
|
2230
|
+
}));
|
|
2231
|
+
addDocuments(bm25Index, bm25Docs);
|
|
2232
|
+
}
|
|
2233
|
+
const stat = fs11.statSync(absPath);
|
|
2234
|
+
manifest.files[event.filePath] = {
|
|
2235
|
+
contentHash,
|
|
2236
|
+
mtime: stat.mtimeMs,
|
|
2237
|
+
sizeBytes: stat.size,
|
|
2238
|
+
chunkIds: chunks.map((c) => c.id)
|
|
2239
|
+
};
|
|
2240
|
+
}
|
|
2241
|
+
saveManifest(projectDir, resolvedBranch, manifest);
|
|
2242
|
+
if (bm25Index) saveBM25Index(projectDir, resolvedBranch, bm25Index);
|
|
2243
|
+
saveSymbolIndex(projectDir, resolvedBranch, symbols);
|
|
2244
|
+
const statsPath = codemapStatsFile(projectDir, resolvedBranch);
|
|
2245
|
+
try {
|
|
2246
|
+
const statsContent = fs11.readFileSync(statsPath, "utf-8");
|
|
2247
|
+
const stats = JSON.parse(statsContent);
|
|
2248
|
+
stats.totalFiles = Object.keys(manifest.files).length;
|
|
2249
|
+
stats.totalChunks = Object.values(manifest.files).reduce(
|
|
2250
|
+
(sum, f) => sum + f.chunkIds.length,
|
|
2251
|
+
0
|
|
2252
|
+
);
|
|
2253
|
+
stats.staleFiles = 0;
|
|
2254
|
+
stats.lastUpdated = (/* @__PURE__ */ new Date()).toISOString();
|
|
2255
|
+
const tmpPath = statsPath + ".tmp";
|
|
2256
|
+
fs11.writeFileSync(tmpPath, JSON.stringify(stats, null, 2) + "\n", "utf-8");
|
|
2257
|
+
fs11.renameSync(tmpPath, statsPath);
|
|
2258
|
+
} catch {
|
|
2259
|
+
}
|
|
2260
|
+
} finally {
|
|
2261
|
+
releaseCodemapLock(projectDir, resolvedBranch);
|
|
2262
|
+
}
|
|
2263
|
+
return {
|
|
2264
|
+
added,
|
|
2265
|
+
updated,
|
|
2266
|
+
removed,
|
|
2267
|
+
durationMs: Date.now() - startTime
|
|
2268
|
+
};
|
|
2269
|
+
}
|
|
2270
|
+
var DEFAULT_RECONCILE_INTERVAL_MS = 60 * 1e3;
|
|
2271
|
+
var BINARY_EXTENSIONS2 = /* @__PURE__ */ new Set([
|
|
2272
|
+
".png",
|
|
2273
|
+
".jpg",
|
|
2274
|
+
".jpeg",
|
|
2275
|
+
".gif",
|
|
2276
|
+
".bmp",
|
|
2277
|
+
".ico",
|
|
2278
|
+
".svg",
|
|
2279
|
+
".woff",
|
|
2280
|
+
".woff2",
|
|
2281
|
+
".ttf",
|
|
2282
|
+
".eot",
|
|
2283
|
+
".otf",
|
|
2284
|
+
".mp3",
|
|
2285
|
+
".mp4",
|
|
2286
|
+
".wav",
|
|
2287
|
+
".avi",
|
|
2288
|
+
".mov",
|
|
2289
|
+
".zip",
|
|
2290
|
+
".gz",
|
|
2291
|
+
".tar",
|
|
2292
|
+
".rar",
|
|
2293
|
+
".7z",
|
|
2294
|
+
".pdf",
|
|
2295
|
+
".doc",
|
|
2296
|
+
".docx",
|
|
2297
|
+
".xls",
|
|
2298
|
+
".xlsx",
|
|
2299
|
+
".exe",
|
|
2300
|
+
".dll",
|
|
2301
|
+
".so",
|
|
2302
|
+
".dylib",
|
|
2303
|
+
".pyc",
|
|
2304
|
+
".pyo",
|
|
2305
|
+
".class",
|
|
2306
|
+
".lock"
|
|
2307
|
+
]);
|
|
2308
|
+
var IGNORED_DIRS = /* @__PURE__ */ new Set([
|
|
2309
|
+
"node_modules",
|
|
2310
|
+
".git",
|
|
2311
|
+
".ulpi",
|
|
2312
|
+
"dist",
|
|
2313
|
+
"build",
|
|
2314
|
+
".next",
|
|
2315
|
+
".cache",
|
|
2316
|
+
".turbo",
|
|
2317
|
+
".parcel-cache",
|
|
2318
|
+
"__pycache__",
|
|
2319
|
+
".tsbuildinfo"
|
|
2320
|
+
]);
|
|
2321
|
+
var MAX_RECENT_EVENTS = 20;
|
|
2322
|
+
var CodemapWatcher = class {
|
|
2323
|
+
projectDir;
|
|
2324
|
+
branch;
|
|
2325
|
+
reconcileIntervalMs;
|
|
2326
|
+
fsWatcher = null;
|
|
2327
|
+
queue;
|
|
2328
|
+
reconcileTimer = null;
|
|
2329
|
+
running = false;
|
|
2330
|
+
processing = false;
|
|
2331
|
+
pendingEvents = [];
|
|
2332
|
+
_recentEvents = [];
|
|
2333
|
+
_totalEventsProcessed = 0;
|
|
2334
|
+
_lastFlushAt = null;
|
|
2335
|
+
constructor(options) {
|
|
2336
|
+
this.projectDir = options.projectDir;
|
|
2337
|
+
this.branch = options.branch ?? getCurrentBranch(options.projectDir);
|
|
2338
|
+
this.reconcileIntervalMs = options.reconcileIntervalMs ?? DEFAULT_RECONCILE_INTERVAL_MS;
|
|
2339
|
+
this.queue = new DebounceQueue(300);
|
|
2340
|
+
}
|
|
2341
|
+
get isRunning() {
|
|
2342
|
+
return this.running;
|
|
2343
|
+
}
|
|
2344
|
+
get queueDepth() {
|
|
2345
|
+
return this.queue.depth;
|
|
2346
|
+
}
|
|
2347
|
+
get recentEvents() {
|
|
2348
|
+
return this._recentEvents;
|
|
2349
|
+
}
|
|
2350
|
+
get totalEventsProcessed() {
|
|
2351
|
+
return this._totalEventsProcessed;
|
|
2352
|
+
}
|
|
2353
|
+
get lastFlushAt() {
|
|
2354
|
+
return this._lastFlushAt;
|
|
2355
|
+
}
|
|
2356
|
+
trackEvent(type, filePath) {
|
|
2357
|
+
this._recentEvents.push({ type, filePath, ts: (/* @__PURE__ */ new Date()).toISOString() });
|
|
2358
|
+
if (this._recentEvents.length > MAX_RECENT_EVENTS) {
|
|
2359
|
+
this._recentEvents.shift();
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
async start() {
|
|
2363
|
+
if (this.running) return;
|
|
2364
|
+
this.running = true;
|
|
2365
|
+
const config = loadCodemapConfig(this.projectDir);
|
|
2366
|
+
const embedder = await createEmbedder(config.embedding);
|
|
2367
|
+
const store = new CodemapStore(this.projectDir, this.branch);
|
|
2368
|
+
await store.initialize();
|
|
2369
|
+
const ignorePatterns = loadCodemapIgnore(this.projectDir);
|
|
2370
|
+
const denyPatterns = [...config.deny, ...ignorePatterns];
|
|
2371
|
+
const processBatch = async (initialEvents) => {
|
|
2372
|
+
this.processing = true;
|
|
2373
|
+
let events = initialEvents;
|
|
2374
|
+
while (events.length > 0) {
|
|
2375
|
+
try {
|
|
2376
|
+
await processIncremental(this.projectDir, events, config, store, embedder, this.branch);
|
|
2377
|
+
this._totalEventsProcessed += events.length;
|
|
2378
|
+
this._lastFlushAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
2379
|
+
} catch (err) {
|
|
2380
|
+
console.error(`[codemap-watcher] Incremental update error: ${err instanceof Error ? err.message : String(err)}`);
|
|
2381
|
+
}
|
|
2382
|
+
events = this.drainPendingEvents();
|
|
2383
|
+
}
|
|
2384
|
+
this.processing = false;
|
|
2385
|
+
};
|
|
2386
|
+
this.queue.onFlush = async (events) => {
|
|
2387
|
+
if (events.length === 0) return;
|
|
2388
|
+
if (this.processing) {
|
|
2389
|
+
this.pendingEvents.push(...events);
|
|
2390
|
+
return;
|
|
2391
|
+
}
|
|
2392
|
+
await processBatch(events);
|
|
2393
|
+
};
|
|
2394
|
+
const shouldWatch = (relPath) => {
|
|
2395
|
+
const segments = relPath.split(path14.sep);
|
|
2396
|
+
for (const seg of segments) {
|
|
2397
|
+
if (IGNORED_DIRS.has(seg) || seg.startsWith(".")) return false;
|
|
2398
|
+
}
|
|
2399
|
+
const ext = path14.extname(relPath).toLowerCase();
|
|
2400
|
+
if (BINARY_EXTENSIONS2.has(ext)) return false;
|
|
2401
|
+
if (matchesDenyPattern(relPath, denyPatterns)) return false;
|
|
2402
|
+
return true;
|
|
2403
|
+
};
|
|
2404
|
+
try {
|
|
2405
|
+
this.fsWatcher = fs12.watch(this.projectDir, { recursive: true }, (eventType, filename) => {
|
|
2406
|
+
if (!filename) return;
|
|
2407
|
+
const relPath = filename;
|
|
2408
|
+
if (!shouldWatch(relPath)) return;
|
|
2409
|
+
const absPath = path14.join(this.projectDir, relPath);
|
|
2410
|
+
let type;
|
|
2411
|
+
try {
|
|
2412
|
+
fs12.accessSync(absPath);
|
|
2413
|
+
type = eventType === "rename" ? "add" : "change";
|
|
2414
|
+
} catch {
|
|
2415
|
+
type = "unlink";
|
|
2416
|
+
}
|
|
2417
|
+
this.trackEvent(type, relPath);
|
|
2418
|
+
this.queue.push({ type, filePath: relPath, timestamp: Date.now() });
|
|
2419
|
+
});
|
|
2420
|
+
this.fsWatcher.on("error", (err) => {
|
|
2421
|
+
console.error(`[codemap-watcher] fs.watch error: ${err.message}`);
|
|
2422
|
+
});
|
|
2423
|
+
} catch (err) {
|
|
2424
|
+
console.error(`[codemap-watcher] Failed to start fs.watch: ${err instanceof Error ? err.message : String(err)}`);
|
|
2425
|
+
}
|
|
2426
|
+
this.reconcileTimer = setInterval(async () => {
|
|
2427
|
+
if (this.processing) return;
|
|
2428
|
+
try {
|
|
2429
|
+
const currentConfig = loadCodemapConfig(this.projectDir);
|
|
2430
|
+
const scannedFiles = scanRepository(this.projectDir, currentConfig);
|
|
2431
|
+
const manifest = loadManifest(this.projectDir, this.branch);
|
|
2432
|
+
const diff = diffManifest(manifest, scannedFiles);
|
|
2433
|
+
for (const file of diff.added) {
|
|
2434
|
+
this.queue.push({ type: "add", filePath: file.filePath, timestamp: Date.now() });
|
|
2435
|
+
}
|
|
2436
|
+
for (const file of diff.updated) {
|
|
2437
|
+
this.queue.push({ type: "change", filePath: file.filePath, timestamp: Date.now() });
|
|
2438
|
+
}
|
|
2439
|
+
for (const filePath of diff.removed) {
|
|
2440
|
+
this.queue.push({ type: "unlink", filePath, timestamp: Date.now() });
|
|
2441
|
+
}
|
|
2442
|
+
} catch (err) {
|
|
2443
|
+
console.error(`[codemap-watcher] Reconciliation error: ${err instanceof Error ? err.message : String(err)}`);
|
|
2444
|
+
}
|
|
2445
|
+
}, this.reconcileIntervalMs);
|
|
2446
|
+
this.writePidFile();
|
|
2447
|
+
const shutdown = () => {
|
|
2448
|
+
this.stop().catch(() => {
|
|
2449
|
+
});
|
|
2450
|
+
};
|
|
2451
|
+
process.on("SIGINT", shutdown);
|
|
2452
|
+
process.on("SIGTERM", shutdown);
|
|
2453
|
+
}
|
|
2454
|
+
async stop() {
|
|
2455
|
+
if (!this.running) return;
|
|
2456
|
+
this.running = false;
|
|
2457
|
+
if (this.fsWatcher) {
|
|
2458
|
+
this.fsWatcher.close();
|
|
2459
|
+
this.fsWatcher = null;
|
|
2460
|
+
}
|
|
2461
|
+
if (this.reconcileTimer) {
|
|
2462
|
+
clearInterval(this.reconcileTimer);
|
|
2463
|
+
this.reconcileTimer = null;
|
|
2464
|
+
}
|
|
2465
|
+
const queued = this.queue.flush();
|
|
2466
|
+
const pending = this.drainPendingEvents();
|
|
2467
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
2468
|
+
for (const e of [...queued, ...pending]) deduped.set(e.filePath, e);
|
|
2469
|
+
const remaining = Array.from(deduped.values());
|
|
2470
|
+
if (remaining.length > 0) {
|
|
2471
|
+
try {
|
|
2472
|
+
const config = loadCodemapConfig(this.projectDir);
|
|
2473
|
+
const embedder = await createEmbedder(config.embedding);
|
|
2474
|
+
const store = new CodemapStore(this.projectDir, this.branch);
|
|
2475
|
+
await processIncremental(this.projectDir, remaining, config, store, embedder, this.branch);
|
|
2476
|
+
} catch {
|
|
2477
|
+
}
|
|
2478
|
+
}
|
|
2479
|
+
this.queue.destroy();
|
|
2480
|
+
this.removePidFile();
|
|
2481
|
+
}
|
|
2482
|
+
/**
|
|
2483
|
+
* Drain pending events buffer, deduplicating by file path (keep latest).
|
|
2484
|
+
*/
|
|
2485
|
+
drainPendingEvents() {
|
|
2486
|
+
if (this.pendingEvents.length === 0) return [];
|
|
2487
|
+
const events = this.pendingEvents;
|
|
2488
|
+
this.pendingEvents = [];
|
|
2489
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
2490
|
+
for (const e of events) deduped.set(e.filePath, e);
|
|
2491
|
+
return Array.from(deduped.values());
|
|
2492
|
+
}
|
|
2493
|
+
writePidFile() {
|
|
2494
|
+
const pidPath = path14.join(codemapBranchDir(this.projectDir, this.branch), "watcher.pid");
|
|
2495
|
+
try {
|
|
2496
|
+
fs12.mkdirSync(path14.dirname(pidPath), { recursive: true });
|
|
2497
|
+
const data = { pid: process.pid, startedAt: (/* @__PURE__ */ new Date()).toISOString() };
|
|
2498
|
+
fs12.writeFileSync(pidPath, JSON.stringify(data), "utf-8");
|
|
2499
|
+
} catch {
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2502
|
+
removePidFile() {
|
|
2503
|
+
const pidPath = path14.join(codemapBranchDir(this.projectDir, this.branch), "watcher.pid");
|
|
2504
|
+
try {
|
|
2505
|
+
fs12.unlinkSync(pidPath);
|
|
2506
|
+
} catch {
|
|
2507
|
+
}
|
|
2508
|
+
}
|
|
2509
|
+
};
|
|
2510
|
+
var GITHUB_MAX_FILE_BYTES = 100 * 1024 * 1024;
|
|
2511
|
+
var GITHUB_WARN_FILE_BYTES = 50 * 1024 * 1024;
|
|
2512
|
+
function ensureBranch(projectDir, branchName) {
|
|
2513
|
+
if (historyBranchExists(projectDir, branchName)) return;
|
|
2514
|
+
const emptyBlob = execFileSync2("git", ["hash-object", "-t", "blob", "--stdin"], {
|
|
2515
|
+
cwd: projectDir,
|
|
2516
|
+
input: "",
|
|
2517
|
+
encoding: "utf-8",
|
|
2518
|
+
timeout: 5e3
|
|
2519
|
+
}).trim();
|
|
2520
|
+
const tree = execFileSync2("git", ["mktree"], {
|
|
2521
|
+
cwd: projectDir,
|
|
2522
|
+
input: `100644 blob ${emptyBlob} .gitkeep
|
|
2523
|
+
`,
|
|
2524
|
+
encoding: "utf-8",
|
|
2525
|
+
timeout: 5e3
|
|
2526
|
+
}).trim();
|
|
2527
|
+
const commit = execFileSync2("git", ["commit-tree", tree, "-m", "init codemap index branch"], {
|
|
2528
|
+
cwd: projectDir,
|
|
2529
|
+
encoding: "utf-8",
|
|
2530
|
+
timeout: 5e3
|
|
2531
|
+
}).trim();
|
|
2532
|
+
execFileSync2("git", ["update-ref", `refs/heads/${branchName}`, commit], {
|
|
2533
|
+
cwd: projectDir,
|
|
2534
|
+
timeout: 5e3
|
|
2535
|
+
});
|
|
2536
|
+
}
|
|
2537
|
+
async function exportIndex(projectDir, branch) {
|
|
2538
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
2539
|
+
const schema = loadSchema(projectDir, resolvedBranch);
|
|
2540
|
+
if (!schema) {
|
|
2541
|
+
throw new Error("Index not initialized. Run 'ulpi codemap init' first.");
|
|
2542
|
+
}
|
|
2543
|
+
const branchName = getCodemapBranch(resolvedBranch);
|
|
2544
|
+
ensureBranch(projectDir, branchName);
|
|
2545
|
+
const codemapDir = codemapBranchDir(projectDir, resolvedBranch);
|
|
2546
|
+
const lanceDir = codemapLanceDir(projectDir, resolvedBranch);
|
|
2547
|
+
let filesExported = 0;
|
|
2548
|
+
let totalSizeBytes = 0;
|
|
2549
|
+
const commitSha = await withWorktree(projectDir, branchName, (worktreeDir) => {
|
|
2550
|
+
const textFiles = [
|
|
2551
|
+
["schema.json", codemapSchemaFile(projectDir, resolvedBranch)],
|
|
2552
|
+
["manifest.json", codemapManifestFile(projectDir, resolvedBranch)],
|
|
2553
|
+
["config.json", codemapConfigFile(projectDir)],
|
|
2554
|
+
["stats.json", codemapStatsFile(projectDir, resolvedBranch)]
|
|
2555
|
+
];
|
|
2556
|
+
for (const [name, sourcePath] of textFiles) {
|
|
2557
|
+
if (fs13.existsSync(sourcePath)) {
|
|
2558
|
+
const content = fs13.readFileSync(sourcePath, "utf-8");
|
|
2559
|
+
writeAndStage(worktreeDir, name, content);
|
|
2560
|
+
totalSizeBytes += Buffer.byteLength(content, "utf-8");
|
|
2561
|
+
filesExported++;
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
if (fs13.existsSync(lanceDir)) {
|
|
2565
|
+
const result = copyDirRecursive(lanceDir, worktreeDir, "index/lance");
|
|
2566
|
+
filesExported += result.fileCount;
|
|
2567
|
+
totalSizeBytes += result.totalBytes;
|
|
2568
|
+
for (const w of result.warnings) {
|
|
2569
|
+
process.stderr.write(`[ulpi codemap] Warning: ${w}
|
|
2570
|
+
`);
|
|
2571
|
+
}
|
|
2572
|
+
}
|
|
2573
|
+
const metadataDir = codemapMetadataDir(projectDir, resolvedBranch);
|
|
2574
|
+
if (fs13.existsSync(metadataDir)) {
|
|
2575
|
+
const metadataFiles = fs13.readdirSync(metadataDir);
|
|
2576
|
+
for (const file of metadataFiles) {
|
|
2577
|
+
const sourcePath = path15.join(metadataDir, file);
|
|
2578
|
+
const stat = fs13.statSync(sourcePath);
|
|
2579
|
+
if (stat.isFile()) {
|
|
2580
|
+
copyAndStage(worktreeDir, `index/metadata/${file}`, sourcePath);
|
|
2581
|
+
totalSizeBytes += stat.size;
|
|
2582
|
+
filesExported++;
|
|
2583
|
+
}
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
const exportMeta = {
|
|
2587
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2588
|
+
indexVersion: 1,
|
|
2589
|
+
vectorStoreFormatVersion: 1,
|
|
2590
|
+
engine: "lancedb"
|
|
2591
|
+
};
|
|
2592
|
+
writeAndStage(worktreeDir, "export-meta.json", JSON.stringify(exportMeta, null, 2) + "\n");
|
|
2593
|
+
filesExported++;
|
|
2594
|
+
return commitInWorktree(worktreeDir, `codemap: export index (${filesExported} files)`);
|
|
2595
|
+
});
|
|
2596
|
+
const commitMessage = `codemap: export index (${filesExported} files)`;
|
|
2597
|
+
const tree = execFileSync2("git", ["rev-parse", `${commitSha}^{tree}`], {
|
|
2598
|
+
cwd: projectDir,
|
|
2599
|
+
encoding: "utf-8",
|
|
2600
|
+
timeout: 5e3
|
|
2601
|
+
}).trim();
|
|
2602
|
+
const orphanSha = execFileSync2("git", ["commit-tree", tree, "-m", commitMessage], {
|
|
2603
|
+
cwd: projectDir,
|
|
2604
|
+
encoding: "utf-8",
|
|
2605
|
+
timeout: 5e3
|
|
2606
|
+
}).trim();
|
|
2607
|
+
execFileSync2("git", ["update-ref", `refs/heads/${branchName}`, orphanSha], {
|
|
2608
|
+
cwd: projectDir,
|
|
2609
|
+
timeout: 5e3
|
|
2610
|
+
});
|
|
2611
|
+
return {
|
|
2612
|
+
branchName,
|
|
2613
|
+
commitSha: orphanSha,
|
|
2614
|
+
filesExported,
|
|
2615
|
+
totalSizeBytes
|
|
2616
|
+
};
|
|
2617
|
+
}
|
|
2618
|
+
function copyDirRecursive(srcDir, worktreeDir, prefix) {
|
|
2619
|
+
let fileCount = 0;
|
|
2620
|
+
let totalBytes = 0;
|
|
2621
|
+
const warnings = [];
|
|
2622
|
+
const entries = fs13.readdirSync(srcDir);
|
|
2623
|
+
for (const entry of entries) {
|
|
2624
|
+
const srcPath = path15.join(srcDir, entry);
|
|
2625
|
+
const destRelative = `${prefix}/${entry}`;
|
|
2626
|
+
const stat = fs13.statSync(srcPath);
|
|
2627
|
+
if (stat.isDirectory()) {
|
|
2628
|
+
const sub = copyDirRecursive(srcPath, worktreeDir, destRelative);
|
|
2629
|
+
fileCount += sub.fileCount;
|
|
2630
|
+
totalBytes += sub.totalBytes;
|
|
2631
|
+
warnings.push(...sub.warnings);
|
|
2632
|
+
} else if (stat.isFile()) {
|
|
2633
|
+
if (stat.size >= GITHUB_MAX_FILE_BYTES) {
|
|
2634
|
+
throw new Error(
|
|
2635
|
+
`File "${destRelative}" is ${Math.round(stat.size / 1024 / 1024)} MB, exceeding GitHub's 100 MB limit. Run a full reindex to regenerate the index with smaller fragment files.`
|
|
2636
|
+
);
|
|
2637
|
+
}
|
|
2638
|
+
if (stat.size >= GITHUB_WARN_FILE_BYTES) {
|
|
2639
|
+
warnings.push(
|
|
2640
|
+
`File "${destRelative}" is ${Math.round(stat.size / 1024 / 1024)} MB (GitHub warns at 50 MB)`
|
|
2641
|
+
);
|
|
2642
|
+
}
|
|
2643
|
+
copyAndStage(worktreeDir, destRelative, srcPath);
|
|
2644
|
+
totalBytes += stat.size;
|
|
2645
|
+
fileCount++;
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
return { fileCount, totalBytes, warnings };
|
|
2649
|
+
}
|
|
2650
|
+
async function importIndex(projectDir, branch) {
|
|
2651
|
+
const resolvedBranch = branch ?? getCurrentBranch(projectDir);
|
|
2652
|
+
const branchName = getCodemapBranch(resolvedBranch);
|
|
2653
|
+
if (!historyBranchExists(projectDir, branchName)) {
|
|
2654
|
+
throw new Error(`Branch '${branchName}' does not exist. Nothing to import.`);
|
|
2655
|
+
}
|
|
2656
|
+
const codemapDir = codemapBranchDir(projectDir, resolvedBranch);
|
|
2657
|
+
let filesImported = 0;
|
|
2658
|
+
let totalSizeBytes = 0;
|
|
2659
|
+
let rebuildRecommended = false;
|
|
2660
|
+
await withWorktree(projectDir, branchName, (worktreeDir) => {
|
|
2661
|
+
const metaPath = path16.join(worktreeDir, "export-meta.json");
|
|
2662
|
+
if (!fs14.existsSync(metaPath)) {
|
|
2663
|
+
throw new Error("No export-meta.json found on branch. Invalid export.");
|
|
2664
|
+
}
|
|
2665
|
+
const meta = JSON.parse(fs14.readFileSync(metaPath, "utf-8"));
|
|
2666
|
+
if (meta.indexVersion !== 1) {
|
|
2667
|
+
throw new Error(`Unsupported index version: ${meta.indexVersion}`);
|
|
2668
|
+
}
|
|
2669
|
+
if (meta.engine !== "lancedb") {
|
|
2670
|
+
throw new Error(`Unsupported engine: ${meta.engine}. Expected 'lancedb'.`);
|
|
2671
|
+
}
|
|
2672
|
+
const schemaPath = path16.join(worktreeDir, "schema.json");
|
|
2673
|
+
if (!fs14.existsSync(schemaPath)) {
|
|
2674
|
+
throw new Error("No schema.json found on branch.");
|
|
2675
|
+
}
|
|
2676
|
+
const schema = JSON.parse(fs14.readFileSync(schemaPath, "utf-8"));
|
|
2677
|
+
const tmpDir = codemapDir + ".import-tmp";
|
|
2678
|
+
fs14.mkdirSync(tmpDir, { recursive: true });
|
|
2679
|
+
try {
|
|
2680
|
+
const textFiles = ["schema.json", "manifest.json", "config.json", "stats.json"];
|
|
2681
|
+
for (const file of textFiles) {
|
|
2682
|
+
const src = path16.join(worktreeDir, file);
|
|
2683
|
+
if (fs14.existsSync(src)) {
|
|
2684
|
+
const content = fs14.readFileSync(src);
|
|
2685
|
+
const dest = path16.join(tmpDir, file);
|
|
2686
|
+
fs14.writeFileSync(dest, content);
|
|
2687
|
+
totalSizeBytes += content.length;
|
|
2688
|
+
filesImported++;
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2691
|
+
const lanceSrcDir = path16.join(worktreeDir, "index", "lance");
|
|
2692
|
+
if (fs14.existsSync(lanceSrcDir)) {
|
|
2693
|
+
const lanceDestDir = path16.join(tmpDir, "index", "lance");
|
|
2694
|
+
const result = copyDirRecursiveImport(lanceSrcDir, lanceDestDir);
|
|
2695
|
+
filesImported += result.fileCount;
|
|
2696
|
+
totalSizeBytes += result.totalBytes;
|
|
2697
|
+
}
|
|
2698
|
+
const metadataSrcDir = path16.join(worktreeDir, "index", "metadata");
|
|
2699
|
+
if (fs14.existsSync(metadataSrcDir)) {
|
|
2700
|
+
const metadataDestDir = path16.join(tmpDir, "index", "metadata");
|
|
2701
|
+
fs14.mkdirSync(metadataDestDir, { recursive: true });
|
|
2702
|
+
const files = fs14.readdirSync(metadataSrcDir);
|
|
2703
|
+
for (const file of files) {
|
|
2704
|
+
const src = path16.join(metadataSrcDir, file);
|
|
2705
|
+
const stat = fs14.statSync(src);
|
|
2706
|
+
if (stat.isFile()) {
|
|
2707
|
+
fs14.copyFileSync(src, path16.join(metadataDestDir, file));
|
|
2708
|
+
totalSizeBytes += stat.size;
|
|
2709
|
+
filesImported++;
|
|
2710
|
+
}
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
if (fs14.existsSync(codemapDir)) {
|
|
2714
|
+
fs14.rmSync(codemapDir, { recursive: true });
|
|
2715
|
+
}
|
|
2716
|
+
fs14.renameSync(tmpDir, codemapDir);
|
|
2717
|
+
} catch (err) {
|
|
2718
|
+
try {
|
|
2719
|
+
fs14.rmSync(tmpDir, { recursive: true });
|
|
2720
|
+
} catch {
|
|
2721
|
+
}
|
|
2722
|
+
throw err;
|
|
2723
|
+
}
|
|
2724
|
+
try {
|
|
2725
|
+
const config = loadCodemapConfig(projectDir);
|
|
2726
|
+
if (schema.embedding?.provider && schema.embedding.provider !== config.embedding.provider) {
|
|
2727
|
+
rebuildRecommended = true;
|
|
2728
|
+
}
|
|
2729
|
+
if (schema.embedding?.model && schema.embedding.model !== config.embedding.model) {
|
|
2730
|
+
rebuildRecommended = true;
|
|
2731
|
+
}
|
|
2732
|
+
} catch {
|
|
2733
|
+
}
|
|
2734
|
+
});
|
|
2735
|
+
return {
|
|
2736
|
+
success: true,
|
|
2737
|
+
filesImported,
|
|
2738
|
+
totalSizeBytes,
|
|
2739
|
+
rebuildRecommended,
|
|
2740
|
+
message: rebuildRecommended ? "Import complete. Rebuild recommended: embedding provider/model mismatch." : "Import complete."
|
|
2741
|
+
};
|
|
2742
|
+
}
|
|
2743
|
+
function copyDirRecursiveImport(srcDir, destDir) {
|
|
2744
|
+
let fileCount = 0;
|
|
2745
|
+
let totalBytes = 0;
|
|
2746
|
+
fs14.mkdirSync(destDir, { recursive: true });
|
|
2747
|
+
const entries = fs14.readdirSync(srcDir);
|
|
2748
|
+
for (const entry of entries) {
|
|
2749
|
+
const srcPath = path16.join(srcDir, entry);
|
|
2750
|
+
const destPath = path16.join(destDir, entry);
|
|
2751
|
+
const stat = fs14.statSync(srcPath);
|
|
2752
|
+
if (stat.isDirectory()) {
|
|
2753
|
+
const sub = copyDirRecursiveImport(srcPath, destPath);
|
|
2754
|
+
fileCount += sub.fileCount;
|
|
2755
|
+
totalBytes += sub.totalBytes;
|
|
2756
|
+
} else if (stat.isFile()) {
|
|
2757
|
+
fs14.copyFileSync(srcPath, destPath);
|
|
2758
|
+
totalBytes += stat.size;
|
|
2759
|
+
fileCount++;
|
|
2760
|
+
}
|
|
2761
|
+
}
|
|
2762
|
+
return { fileCount, totalBytes };
|
|
2763
|
+
}
|
|
2764
|
+
function copyDirRecursive2(src, dest) {
|
|
2765
|
+
let files = 0;
|
|
2766
|
+
let bytes = 0;
|
|
2767
|
+
fs15.mkdirSync(dest, { recursive: true });
|
|
2768
|
+
const entries = fs15.readdirSync(src, { withFileTypes: true });
|
|
2769
|
+
for (const entry of entries) {
|
|
2770
|
+
const srcPath = path17.join(src, entry.name);
|
|
2771
|
+
const destPath = path17.join(dest, entry.name);
|
|
2772
|
+
if (entry.isDirectory()) {
|
|
2773
|
+
const sub = copyDirRecursive2(srcPath, destPath);
|
|
2774
|
+
files += sub.files;
|
|
2775
|
+
bytes += sub.bytes;
|
|
2776
|
+
} else if (entry.isFile()) {
|
|
2777
|
+
fs15.copyFileSync(srcPath, destPath);
|
|
2778
|
+
const stat = fs15.statSync(srcPath);
|
|
2779
|
+
bytes += stat.size;
|
|
2780
|
+
files++;
|
|
2781
|
+
}
|
|
2782
|
+
}
|
|
2783
|
+
return { files, bytes };
|
|
2784
|
+
}
|
|
2785
|
+
async function migrateFromLegacy(projectDir) {
|
|
2786
|
+
const legacyDir = path17.join(projectDir, ".codemap");
|
|
2787
|
+
const targetDir = projectCodemapDir(projectDir);
|
|
2788
|
+
if (!fs15.existsSync(legacyDir)) {
|
|
2789
|
+
return {
|
|
2790
|
+
migrated: false,
|
|
2791
|
+
filesCount: 0,
|
|
2792
|
+
totalSizeBytes: 0,
|
|
2793
|
+
message: "No legacy .codemap/ directory found."
|
|
2794
|
+
};
|
|
2795
|
+
}
|
|
2796
|
+
const targetSchema = codemapSchemaFile(projectDir, "main");
|
|
2797
|
+
if (fs15.existsSync(targetSchema)) {
|
|
2798
|
+
return {
|
|
2799
|
+
migrated: false,
|
|
2800
|
+
filesCount: 0,
|
|
2801
|
+
totalSizeBytes: 0,
|
|
2802
|
+
message: "Target .ulpi/codemap/ already has an index. Skipping migration."
|
|
2803
|
+
};
|
|
2804
|
+
}
|
|
2805
|
+
const legacySchema = path17.join(legacyDir, "schema.json");
|
|
2806
|
+
if (fs15.existsSync(legacySchema)) {
|
|
2807
|
+
try {
|
|
2808
|
+
const schema = JSON.parse(fs15.readFileSync(legacySchema, "utf-8"));
|
|
2809
|
+
if (schema.formatVersion !== 1) {
|
|
2810
|
+
return {
|
|
2811
|
+
migrated: false,
|
|
2812
|
+
filesCount: 0,
|
|
2813
|
+
totalSizeBytes: 0,
|
|
2814
|
+
message: `Unsupported legacy schema version: ${schema.formatVersion}`
|
|
2815
|
+
};
|
|
2816
|
+
}
|
|
2817
|
+
} catch {
|
|
2818
|
+
return {
|
|
2819
|
+
migrated: false,
|
|
2820
|
+
filesCount: 0,
|
|
2821
|
+
totalSizeBytes: 0,
|
|
2822
|
+
message: "Invalid schema.json in legacy directory."
|
|
2823
|
+
};
|
|
2824
|
+
}
|
|
2825
|
+
}
|
|
2826
|
+
const { files, bytes } = copyDirRecursive2(legacyDir, targetDir);
|
|
2827
|
+
return {
|
|
2828
|
+
migrated: true,
|
|
2829
|
+
filesCount: files,
|
|
2830
|
+
totalSizeBytes: bytes,
|
|
2831
|
+
message: `Migration complete. Copied ${files} files (${(bytes / 1024).toFixed(1)} KB). Legacy .codemap/ preserved.`
|
|
2832
|
+
};
|
|
2833
|
+
}
|
|
2834
|
+
async function migrateToPerBranch(projectDir) {
|
|
2835
|
+
const codemapDir = projectCodemapDir(projectDir);
|
|
2836
|
+
const flatSchema = path17.join(codemapDir, "schema.json");
|
|
2837
|
+
if (!fs15.existsSync(flatSchema)) {
|
|
2838
|
+
return {
|
|
2839
|
+
migrated: false,
|
|
2840
|
+
filesCount: 0,
|
|
2841
|
+
totalSizeBytes: 0,
|
|
2842
|
+
message: "No flat codemap layout detected. Nothing to migrate."
|
|
2843
|
+
};
|
|
2844
|
+
}
|
|
2845
|
+
const branch = getCurrentBranch(projectDir);
|
|
2846
|
+
const branchDir = codemapBranchDir(projectDir, branch);
|
|
2847
|
+
if (fs15.existsSync(path17.join(branchDir, "schema.json"))) {
|
|
2848
|
+
return {
|
|
2849
|
+
migrated: false,
|
|
2850
|
+
filesCount: 0,
|
|
2851
|
+
totalSizeBytes: 0,
|
|
2852
|
+
message: `Per-branch layout already exists for branch "${branch}".`
|
|
2853
|
+
};
|
|
2854
|
+
}
|
|
2855
|
+
const dataFiles = ["schema.json", "manifest.json", "stats.json"];
|
|
2856
|
+
const dataDirs = ["index"];
|
|
2857
|
+
let files = 0;
|
|
2858
|
+
let bytes = 0;
|
|
2859
|
+
fs15.mkdirSync(branchDir, { recursive: true });
|
|
2860
|
+
for (const name of dataFiles) {
|
|
2861
|
+
const src = path17.join(codemapDir, name);
|
|
2862
|
+
if (fs15.existsSync(src)) {
|
|
2863
|
+
const dest = path17.join(branchDir, name);
|
|
2864
|
+
fs15.renameSync(src, dest);
|
|
2865
|
+
try {
|
|
2866
|
+
const stat = fs15.statSync(dest);
|
|
2867
|
+
bytes += stat.size;
|
|
2868
|
+
} catch {
|
|
2869
|
+
}
|
|
2870
|
+
files++;
|
|
2871
|
+
}
|
|
2872
|
+
}
|
|
2873
|
+
for (const dir of dataDirs) {
|
|
2874
|
+
const src = path17.join(codemapDir, dir);
|
|
2875
|
+
if (fs15.existsSync(src)) {
|
|
2876
|
+
const dest = path17.join(branchDir, dir);
|
|
2877
|
+
fs15.renameSync(src, dest);
|
|
2878
|
+
files++;
|
|
2879
|
+
}
|
|
2880
|
+
}
|
|
2881
|
+
return {
|
|
2882
|
+
migrated: true,
|
|
2883
|
+
filesCount: files,
|
|
2884
|
+
totalSizeBytes: bytes,
|
|
2885
|
+
message: `Migrated flat layout to per-branch layout for "${branch}". ${files} items moved.`
|
|
2886
|
+
};
|
|
2887
|
+
}
|
|
2888
|
+
function loadEvalDataset(datasetPath) {
|
|
2889
|
+
if (!fs16.existsSync(datasetPath)) {
|
|
2890
|
+
throw new Error(`Dataset file not found: ${datasetPath}`);
|
|
2891
|
+
}
|
|
2892
|
+
const raw = JSON.parse(fs16.readFileSync(datasetPath, "utf-8"));
|
|
2893
|
+
if (raw.version !== 1) {
|
|
2894
|
+
throw new Error(`Unsupported dataset version: ${raw.version}. Expected 1.`);
|
|
2895
|
+
}
|
|
2896
|
+
if (!Array.isArray(raw.queries) || raw.queries.length === 0) {
|
|
2897
|
+
throw new Error("Dataset must have a non-empty 'queries' array.");
|
|
2898
|
+
}
|
|
2899
|
+
for (let i = 0; i < raw.queries.length; i++) {
|
|
2900
|
+
const q = raw.queries[i];
|
|
2901
|
+
if (typeof q.query !== "string" || !q.query.trim()) {
|
|
2902
|
+
throw new Error(`Query at index ${i} must have a non-empty 'query' string.`);
|
|
2903
|
+
}
|
|
2904
|
+
if (!Array.isArray(q.expectedFiles) || q.expectedFiles.length === 0) {
|
|
2905
|
+
throw new Error(`Query at index ${i} must have a non-empty 'expectedFiles' array.`);
|
|
2906
|
+
}
|
|
2907
|
+
}
|
|
2908
|
+
return raw;
|
|
2909
|
+
}
|
|
2910
|
+
function recallAtK(expected, returned, k) {
|
|
2911
|
+
if (expected.length === 0) return 0;
|
|
2912
|
+
const topK = new Set(returned.slice(0, k));
|
|
2913
|
+
let found = 0;
|
|
2914
|
+
for (const item of expected) {
|
|
2915
|
+
if (topK.has(item)) found++;
|
|
2916
|
+
}
|
|
2917
|
+
return found / expected.length;
|
|
2918
|
+
}
|
|
2919
|
+
function mrrAtK(expected, returned, k) {
|
|
2920
|
+
const expectedSet = new Set(expected);
|
|
2921
|
+
const topK = returned.slice(0, k);
|
|
2922
|
+
for (let i = 0; i < topK.length; i++) {
|
|
2923
|
+
if (expectedSet.has(topK[i])) {
|
|
2924
|
+
return 1 / (i + 1);
|
|
2925
|
+
}
|
|
2926
|
+
}
|
|
2927
|
+
return 0;
|
|
2928
|
+
}
|
|
2929
|
+
function ndcgAtK(expected, returned, k) {
|
|
2930
|
+
if (expected.length === 0) return 0;
|
|
2931
|
+
const expectedSet = new Set(expected);
|
|
2932
|
+
const topK = returned.slice(0, k);
|
|
2933
|
+
let dcg = 0;
|
|
2934
|
+
for (let i = 0; i < topK.length; i++) {
|
|
2935
|
+
if (expectedSet.has(topK[i])) {
|
|
2936
|
+
dcg += 1 / Math.log2(i + 2);
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
const idealCount = Math.min(expected.length, k);
|
|
2940
|
+
let idcg = 0;
|
|
2941
|
+
for (let i = 0; i < idealCount; i++) {
|
|
2942
|
+
idcg += 1 / Math.log2(i + 2);
|
|
2943
|
+
}
|
|
2944
|
+
if (idcg === 0) return 0;
|
|
2945
|
+
return dcg / idcg;
|
|
2946
|
+
}
|
|
2947
|
+
async function runEvalHarness(projectDir, datasetPath) {
|
|
2948
|
+
const dataset = loadEvalDataset(datasetPath);
|
|
2949
|
+
const totalStart = Date.now();
|
|
2950
|
+
const perQuery = [];
|
|
2951
|
+
for (const evalQuery of dataset.queries) {
|
|
2952
|
+
const queryStart = Date.now();
|
|
2953
|
+
const result = await searchCode(projectDir, evalQuery.query, { limit: 10 });
|
|
2954
|
+
const returnedFiles = [...new Set(result.results.map((r) => r.filePath))];
|
|
2955
|
+
const recall5 = recallAtK(evalQuery.expectedFiles, returnedFiles, 5);
|
|
2956
|
+
const mrr10 = mrrAtK(evalQuery.expectedFiles, returnedFiles, 10);
|
|
2957
|
+
const ndcg10 = ndcgAtK(evalQuery.expectedFiles, returnedFiles, 10);
|
|
2958
|
+
perQuery.push({
|
|
2959
|
+
query: evalQuery.query,
|
|
2960
|
+
returnedFiles,
|
|
2961
|
+
expectedFiles: evalQuery.expectedFiles,
|
|
2962
|
+
recallAt5: recall5,
|
|
2963
|
+
mrrAt10: mrr10,
|
|
2964
|
+
ndcgAt10: ndcg10,
|
|
2965
|
+
durationMs: Date.now() - queryStart
|
|
2966
|
+
});
|
|
2967
|
+
}
|
|
2968
|
+
const aggregate = {
|
|
2969
|
+
recallAt5: perQuery.reduce((sum, q) => sum + q.recallAt5, 0) / perQuery.length,
|
|
2970
|
+
mrrAt10: perQuery.reduce((sum, q) => sum + q.mrrAt10, 0) / perQuery.length,
|
|
2971
|
+
ndcgAt10: perQuery.reduce((sum, q) => sum + q.ndcgAt10, 0) / perQuery.length
|
|
2972
|
+
};
|
|
2973
|
+
const totalMs = Date.now() - totalStart;
|
|
2974
|
+
return {
|
|
2975
|
+
aggregate,
|
|
2976
|
+
perQuery,
|
|
2977
|
+
timing: {
|
|
2978
|
+
totalMs,
|
|
2979
|
+
avgPerQueryMs: totalMs / perQuery.length
|
|
2980
|
+
},
|
|
2981
|
+
datasetVersion: dataset.version,
|
|
2982
|
+
queriesCount: dataset.queries.length
|
|
2983
|
+
};
|
|
2984
|
+
}
|
|
2985
|
+
|
|
2986
|
+
export {
|
|
2987
|
+
DEFAULT_CODEMAP_CONFIG,
|
|
2988
|
+
loadCodemapConfig,
|
|
2989
|
+
saveCodemapConfig,
|
|
2990
|
+
matchesDenyPattern,
|
|
2991
|
+
loadCodemapIgnore,
|
|
2992
|
+
scanRepository,
|
|
2993
|
+
SUPPORTED_EXTENSIONS,
|
|
2994
|
+
getLanguageConfig,
|
|
2995
|
+
getLanguageId,
|
|
2996
|
+
initTreeSitter,
|
|
2997
|
+
loadLanguage,
|
|
2998
|
+
parseSourceWithLanguage,
|
|
2999
|
+
chunkFileAST,
|
|
3000
|
+
parseAndChunkFile,
|
|
3001
|
+
chunkFile,
|
|
3002
|
+
chunkFiles,
|
|
3003
|
+
generateChunkId,
|
|
3004
|
+
assignChunkIds,
|
|
3005
|
+
buildManifest,
|
|
3006
|
+
loadManifest,
|
|
3007
|
+
saveManifest,
|
|
3008
|
+
diffManifest,
|
|
3009
|
+
createEmbedder,
|
|
3010
|
+
CodemapStore,
|
|
3011
|
+
saveSchema,
|
|
3012
|
+
loadSchema,
|
|
3013
|
+
extractSymbols,
|
|
3014
|
+
extractSymbolsFromChunks,
|
|
3015
|
+
saveSymbolIndex,
|
|
3016
|
+
loadSymbolIndex,
|
|
3017
|
+
searchSymbols,
|
|
3018
|
+
tokenize,
|
|
3019
|
+
buildBM25Index,
|
|
3020
|
+
addDocuments,
|
|
3021
|
+
removeDocuments,
|
|
3022
|
+
queryBM25,
|
|
3023
|
+
saveBM25Index,
|
|
3024
|
+
loadBM25Index,
|
|
3025
|
+
acquireCodemapLock,
|
|
3026
|
+
releaseCodemapLock,
|
|
3027
|
+
isCodemapLocked,
|
|
3028
|
+
buildEmbeddingTexts,
|
|
3029
|
+
runInitPipeline,
|
|
3030
|
+
DEFAULT_HYBRID_WEIGHTS,
|
|
3031
|
+
normalizeScores,
|
|
3032
|
+
computePathBoost,
|
|
3033
|
+
fuseResults,
|
|
3034
|
+
searchCode,
|
|
3035
|
+
getCodemapStatus,
|
|
3036
|
+
DebounceQueue,
|
|
3037
|
+
processIncremental,
|
|
3038
|
+
CodemapWatcher,
|
|
3039
|
+
exportIndex,
|
|
3040
|
+
importIndex,
|
|
3041
|
+
migrateFromLegacy,
|
|
3042
|
+
migrateToPerBranch,
|
|
3043
|
+
loadEvalDataset,
|
|
3044
|
+
recallAtK,
|
|
3045
|
+
mrrAtK,
|
|
3046
|
+
ndcgAtK,
|
|
3047
|
+
runEvalHarness
|
|
3048
|
+
};
|