cto-ai-cli 6.1.0 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +195 -62
- package/dist/cli/index.js +5752 -1733
- package/dist/engine/index.d.ts +548 -12
- package/dist/engine/index.js +1974 -298
- package/dist/mcp/index.js +1822 -446
- package/package.json +1 -1
package/dist/mcp/index.js
CHANGED
|
@@ -1,15 +1,518 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __esm = (fn, res) => function __init() {
|
|
5
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
+
};
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// src/engine/tfidf.ts
|
|
13
|
+
function buildIndex(files) {
|
|
14
|
+
const documents = /* @__PURE__ */ new Map();
|
|
15
|
+
const docFreq = /* @__PURE__ */ new Map();
|
|
16
|
+
for (const file of files) {
|
|
17
|
+
const terms = tokenize(file.content);
|
|
18
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
19
|
+
for (const term of terms) {
|
|
20
|
+
termCounts.set(term, (termCounts.get(term) ?? 0) + 1);
|
|
21
|
+
}
|
|
22
|
+
documents.set(file.relativePath, { terms: termCounts, length: terms.length });
|
|
23
|
+
for (const term of termCounts.keys()) {
|
|
24
|
+
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
const totalDocs = files.length;
|
|
28
|
+
const idf = /* @__PURE__ */ new Map();
|
|
29
|
+
for (const [term, df] of docFreq) {
|
|
30
|
+
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
31
|
+
}
|
|
32
|
+
let totalLength = 0;
|
|
33
|
+
for (const doc of documents.values()) totalLength += doc.length;
|
|
34
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
35
|
+
return { documents, idf, avgDocLength, totalDocs };
|
|
36
|
+
}
|
|
37
|
+
function query(index, taskDescription, maxResults = 50) {
|
|
38
|
+
const queryTerms = tokenize(taskDescription);
|
|
39
|
+
if (queryTerms.length === 0) return [];
|
|
40
|
+
const querySet = /* @__PURE__ */ new Map();
|
|
41
|
+
for (const term of queryTerms) {
|
|
42
|
+
querySet.set(term, (querySet.get(term) ?? 0) + 1);
|
|
43
|
+
}
|
|
44
|
+
const results = [];
|
|
45
|
+
const k1 = 1.5;
|
|
46
|
+
const b = 0.75;
|
|
47
|
+
for (const [filePath, doc] of index.documents) {
|
|
48
|
+
let score = 0;
|
|
49
|
+
const matchedTerms = [];
|
|
50
|
+
for (const [qTerm, qCount] of querySet) {
|
|
51
|
+
const tf = doc.terms.get(qTerm) ?? 0;
|
|
52
|
+
if (tf === 0) continue;
|
|
53
|
+
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
54
|
+
if (termIdf <= 0) continue;
|
|
55
|
+
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
56
|
+
score += termIdf * tfNorm * qCount;
|
|
57
|
+
matchedTerms.push(qTerm);
|
|
58
|
+
}
|
|
59
|
+
if (score > 0) {
|
|
60
|
+
results.push({ filePath, score, matchedTerms });
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const maxScore = results.reduce((max, r) => Math.max(max, r.score), 0);
|
|
64
|
+
if (maxScore > 0) {
|
|
65
|
+
for (const r of results) r.score = r.score / maxScore;
|
|
66
|
+
}
|
|
67
|
+
return results.sort((a, b2) => b2.score - a.score).slice(0, maxResults);
|
|
68
|
+
}
|
|
69
|
+
function tokenize(text) {
|
|
70
|
+
const tokens = [];
|
|
71
|
+
const rawTokens = text.match(/[a-zA-Z][a-zA-Z0-9]*|[0-9]+/g) ?? [];
|
|
72
|
+
for (const raw of rawTokens) {
|
|
73
|
+
const parts = raw.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
|
|
74
|
+
for (const part of parts) {
|
|
75
|
+
if (part.length < 2) continue;
|
|
76
|
+
const stemmed = stem(part);
|
|
77
|
+
if (stemmed.length < 2) continue;
|
|
78
|
+
if (STOP_WORDS.has(stemmed)) continue;
|
|
79
|
+
tokens.push(stemmed);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return tokens;
|
|
83
|
+
}
|
|
84
|
+
function stem(word) {
|
|
85
|
+
let w = word.toLowerCase();
|
|
86
|
+
for (const [prefix, root] of TERM_FAMILIES) {
|
|
87
|
+
if (w.startsWith(prefix) || w === root) return root;
|
|
88
|
+
}
|
|
89
|
+
if (w.endsWith("ication") && w.length > 9) return w.slice(0, -7);
|
|
90
|
+
if (w.endsWith("ation") && w.length > 7) return w.slice(0, -5);
|
|
91
|
+
if (w.endsWith("tion") && w.length > 6) return w.slice(0, -4);
|
|
92
|
+
if (w.endsWith("sion") && w.length > 6) return w.slice(0, -4);
|
|
93
|
+
if (w.endsWith("ment") && w.length > 6) return w.slice(0, -4);
|
|
94
|
+
if (w.endsWith("ness") && w.length > 6) return w.slice(0, -4);
|
|
95
|
+
if (w.endsWith("able") && w.length > 6) return w.slice(0, -4);
|
|
96
|
+
if (w.endsWith("ible") && w.length > 6) return w.slice(0, -4);
|
|
97
|
+
if (w.endsWith("ator") && w.length > 6) return w.slice(0, -4);
|
|
98
|
+
if (w.endsWith("izer") && w.length > 6) return w.slice(0, -4);
|
|
99
|
+
if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
|
|
100
|
+
if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
|
|
101
|
+
if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
|
|
102
|
+
if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
|
|
103
|
+
if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
|
|
104
|
+
if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
|
|
105
|
+
if (w.endsWith("ion") && w.length > 5) return w.slice(0, -3);
|
|
106
|
+
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
107
|
+
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
108
|
+
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
109
|
+
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
110
|
+
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
111
|
+
return w;
|
|
112
|
+
}
|
|
113
|
+
function boostByPath(matches, allFiles, taskDescription) {
|
|
114
|
+
const queryTerms = new Set(tokenize(taskDescription));
|
|
115
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
116
|
+
for (const m of matches) {
|
|
117
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
118
|
+
}
|
|
119
|
+
for (const filePath of allFiles) {
|
|
120
|
+
const pathTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
|
|
121
|
+
const pathMatches = pathTerms.filter((t) => queryTerms.has(t));
|
|
122
|
+
if (pathMatches.length > 0) {
|
|
123
|
+
const existing = boosted.get(filePath);
|
|
124
|
+
const pathBoost = pathMatches.length * 0.3;
|
|
125
|
+
if (existing) {
|
|
126
|
+
existing.score = Math.min(1, existing.score + pathBoost);
|
|
127
|
+
for (const t of pathMatches) {
|
|
128
|
+
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
boosted.set(filePath, {
|
|
132
|
+
filePath,
|
|
133
|
+
score: Math.min(1, pathBoost),
|
|
134
|
+
matchedTerms: pathMatches
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
140
|
+
}
|
|
141
|
+
var STOP_WORDS, TERM_FAMILIES;
|
|
142
|
+
var init_tfidf = __esm({
|
|
143
|
+
"src/engine/tfidf.ts"() {
|
|
144
|
+
"use strict";
|
|
145
|
+
STOP_WORDS = /* @__PURE__ */ new Set([
|
|
146
|
+
// Language keywords
|
|
147
|
+
"import",
|
|
148
|
+
"export",
|
|
149
|
+
"from",
|
|
150
|
+
"const",
|
|
151
|
+
"let",
|
|
152
|
+
"var",
|
|
153
|
+
"function",
|
|
154
|
+
"class",
|
|
155
|
+
"interface",
|
|
156
|
+
"type",
|
|
157
|
+
"return",
|
|
158
|
+
"async",
|
|
159
|
+
"await",
|
|
160
|
+
"new",
|
|
161
|
+
"this",
|
|
162
|
+
"that",
|
|
163
|
+
"true",
|
|
164
|
+
"false",
|
|
165
|
+
"null",
|
|
166
|
+
"undefined",
|
|
167
|
+
"void",
|
|
168
|
+
"string",
|
|
169
|
+
"number",
|
|
170
|
+
"boolean",
|
|
171
|
+
"any",
|
|
172
|
+
"unknown",
|
|
173
|
+
"never",
|
|
174
|
+
"object",
|
|
175
|
+
"array",
|
|
176
|
+
"promise",
|
|
177
|
+
"if",
|
|
178
|
+
"else",
|
|
179
|
+
"for",
|
|
180
|
+
"while",
|
|
181
|
+
"do",
|
|
182
|
+
"switch",
|
|
183
|
+
"case",
|
|
184
|
+
"break",
|
|
185
|
+
"continue",
|
|
186
|
+
"try",
|
|
187
|
+
"catch",
|
|
188
|
+
"throw",
|
|
189
|
+
"finally",
|
|
190
|
+
"default",
|
|
191
|
+
"extends",
|
|
192
|
+
"implements",
|
|
193
|
+
"static",
|
|
194
|
+
"private",
|
|
195
|
+
"public",
|
|
196
|
+
"protected",
|
|
197
|
+
"readonly",
|
|
198
|
+
"abstract",
|
|
199
|
+
"override",
|
|
200
|
+
"super",
|
|
201
|
+
"typeof",
|
|
202
|
+
"instanceof",
|
|
203
|
+
"in",
|
|
204
|
+
"of",
|
|
205
|
+
"as",
|
|
206
|
+
"is",
|
|
207
|
+
"keyof",
|
|
208
|
+
"enum",
|
|
209
|
+
"namespace",
|
|
210
|
+
"module",
|
|
211
|
+
"declare",
|
|
212
|
+
// Python
|
|
213
|
+
"def",
|
|
214
|
+
"self",
|
|
215
|
+
"cls",
|
|
216
|
+
"none",
|
|
217
|
+
"pass",
|
|
218
|
+
"yield",
|
|
219
|
+
"lambda",
|
|
220
|
+
"with",
|
|
221
|
+
"elif",
|
|
222
|
+
"except",
|
|
223
|
+
"raise",
|
|
224
|
+
"assert",
|
|
225
|
+
"global",
|
|
226
|
+
"nonlocal",
|
|
227
|
+
// Natural language stop words only — NOT domain terms that carry signal
|
|
228
|
+
"the",
|
|
229
|
+
"and",
|
|
230
|
+
"for",
|
|
231
|
+
"with",
|
|
232
|
+
"not",
|
|
233
|
+
"but",
|
|
234
|
+
"are",
|
|
235
|
+
"was",
|
|
236
|
+
"were",
|
|
237
|
+
"has",
|
|
238
|
+
"have",
|
|
239
|
+
"had",
|
|
240
|
+
"will",
|
|
241
|
+
"would",
|
|
242
|
+
"could",
|
|
243
|
+
"should",
|
|
244
|
+
"may",
|
|
245
|
+
"can",
|
|
246
|
+
"its",
|
|
247
|
+
"also",
|
|
248
|
+
"than",
|
|
249
|
+
"then",
|
|
250
|
+
"into",
|
|
251
|
+
"only",
|
|
252
|
+
"very",
|
|
253
|
+
"just",
|
|
254
|
+
"about",
|
|
255
|
+
"being",
|
|
256
|
+
"been",
|
|
257
|
+
"does",
|
|
258
|
+
"did",
|
|
259
|
+
"doing",
|
|
260
|
+
"todo",
|
|
261
|
+
"fixme",
|
|
262
|
+
"hack",
|
|
263
|
+
"note",
|
|
264
|
+
"xxx"
|
|
265
|
+
]);
|
|
266
|
+
TERM_FAMILIES = [
|
|
267
|
+
["authenticat", "auth"],
|
|
268
|
+
["authori", "auth"],
|
|
269
|
+
["configur", "config"],
|
|
270
|
+
["connect", "connect"],
|
|
271
|
+
["request", "request"],
|
|
272
|
+
["response", "respons"],
|
|
273
|
+
["middlewar", "middlewar"],
|
|
274
|
+
["validat", "valid"],
|
|
275
|
+
["initiali", "init"],
|
|
276
|
+
["subscri", "subscrib"],
|
|
277
|
+
["transform", "transform"],
|
|
278
|
+
["seriali", "serial"],
|
|
279
|
+
["deseriali", "serial"],
|
|
280
|
+
["dependen", "depend"],
|
|
281
|
+
["environ", "environ"],
|
|
282
|
+
["permiss", "permiss"],
|
|
283
|
+
["migrat", "migrat"],
|
|
284
|
+
["transact", "transact"],
|
|
285
|
+
["encryp", "encrypt"],
|
|
286
|
+
["decryp", "encrypt"]
|
|
287
|
+
];
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
// src/engine/multi-repo.ts
|
|
292
|
+
var multi_repo_exports = {};
|
|
293
|
+
__export(multi_repo_exports, {
|
|
294
|
+
discoverSiblingRepos: () => discoverSiblingRepos,
|
|
295
|
+
parseSiblingPaths: () => parseSiblingPaths,
|
|
296
|
+
querySiblingRepos: () => querySiblingRepos,
|
|
297
|
+
renderMultiRepoSummary: () => renderMultiRepoSummary
|
|
298
|
+
});
|
|
299
|
+
import { readdirSync, readFileSync as readFileSync5, statSync as statSync2, existsSync as existsSync5 } from "fs";
|
|
300
|
+
import { join as join7, basename as basename3, resolve as resolve5, relative as relative5 } from "path";
|
|
301
|
+
function discoverSiblingRepos(projectPath) {
|
|
302
|
+
const absProject = resolve5(projectPath);
|
|
303
|
+
const parentDir = join7(absProject, "..");
|
|
304
|
+
const projectName = basename3(absProject);
|
|
305
|
+
const siblings = [];
|
|
306
|
+
let entries;
|
|
307
|
+
try {
|
|
308
|
+
entries = readdirSync(parentDir);
|
|
309
|
+
} catch {
|
|
310
|
+
return [];
|
|
311
|
+
}
|
|
312
|
+
for (const entry of entries) {
|
|
313
|
+
if (entry === projectName) continue;
|
|
314
|
+
if (entry.startsWith(".")) continue;
|
|
315
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
316
|
+
const candidatePath = join7(parentDir, entry);
|
|
317
|
+
try {
|
|
318
|
+
if (!statSync2(candidatePath).isDirectory()) continue;
|
|
319
|
+
} catch {
|
|
320
|
+
continue;
|
|
321
|
+
}
|
|
322
|
+
const hasMarker = REPO_MARKERS.some((marker) => {
|
|
323
|
+
try {
|
|
324
|
+
return existsSync5(join7(candidatePath, marker));
|
|
325
|
+
} catch {
|
|
326
|
+
return false;
|
|
327
|
+
}
|
|
328
|
+
});
|
|
329
|
+
if (!hasMarker) continue;
|
|
330
|
+
const stack = detectStack2(candidatePath);
|
|
331
|
+
siblings.push({
|
|
332
|
+
path: candidatePath,
|
|
333
|
+
name: entry,
|
|
334
|
+
stack,
|
|
335
|
+
fileCount: 0
|
|
336
|
+
// filled during indexing
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
return siblings;
|
|
340
|
+
}
|
|
341
|
+
function detectStack2(repoPath) {
|
|
342
|
+
const stack = [];
|
|
343
|
+
try {
|
|
344
|
+
if (existsSync5(join7(repoPath, "tsconfig.json"))) stack.push("TypeScript");
|
|
345
|
+
if (existsSync5(join7(repoPath, "package.json"))) stack.push("Node.js");
|
|
346
|
+
if (existsSync5(join7(repoPath, "Cargo.toml"))) stack.push("Rust");
|
|
347
|
+
if (existsSync5(join7(repoPath, "go.mod"))) stack.push("Go");
|
|
348
|
+
if (existsSync5(join7(repoPath, "pyproject.toml"))) stack.push("Python");
|
|
349
|
+
if (existsSync5(join7(repoPath, "pom.xml"))) stack.push("Java");
|
|
350
|
+
} catch {
|
|
351
|
+
}
|
|
352
|
+
return stack;
|
|
353
|
+
}
|
|
354
|
+
function listSourceFiles(repoPath, maxFiles = MAX_FILES_PER_REPO) {
|
|
355
|
+
const files = [];
|
|
356
|
+
function walk(dir, depth) {
|
|
357
|
+
if (depth > 8 || files.length >= maxFiles) return;
|
|
358
|
+
let entries;
|
|
359
|
+
try {
|
|
360
|
+
entries = readdirSync(dir);
|
|
361
|
+
} catch {
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
for (const entry of entries) {
|
|
365
|
+
if (files.length >= maxFiles) return;
|
|
366
|
+
if (entry.startsWith(".")) continue;
|
|
367
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
368
|
+
const fullPath = join7(dir, entry);
|
|
369
|
+
try {
|
|
370
|
+
const stat3 = statSync2(fullPath);
|
|
371
|
+
if (stat3.isDirectory()) {
|
|
372
|
+
walk(fullPath, depth + 1);
|
|
373
|
+
} else if (stat3.isFile() && stat3.size <= MAX_FILE_SIZE) {
|
|
374
|
+
const ext = entry.split(".").pop()?.toLowerCase() ?? "";
|
|
375
|
+
if (SOURCE_EXTENSIONS.has(ext)) {
|
|
376
|
+
files.push(relative5(repoPath, fullPath));
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
} catch {
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
walk(repoPath, 0);
|
|
384
|
+
return files;
|
|
385
|
+
}
|
|
386
|
+
function indexSiblingRepo(repo) {
|
|
387
|
+
const filePaths = listSourceFiles(repo.path);
|
|
388
|
+
repo.fileCount = filePaths.length;
|
|
389
|
+
const contents = [];
|
|
390
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
391
|
+
for (const relPath of filePaths) {
|
|
392
|
+
try {
|
|
393
|
+
const content = readFileSync5(join7(repo.path, relPath), "utf-8");
|
|
394
|
+
contents.push({ relativePath: relPath, content });
|
|
395
|
+
contentMap.set(relPath, content);
|
|
396
|
+
} catch {
|
|
397
|
+
contents.push({ relativePath: relPath, content: "" });
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
return { contents, contentMap };
|
|
401
|
+
}
|
|
402
|
+
function querySiblingRepos(siblings, task, maxPerRepo = 5, minScore = 0.3) {
|
|
403
|
+
const startTime = performance.now();
|
|
404
|
+
const allMatches = [];
|
|
405
|
+
for (const repo of siblings) {
|
|
406
|
+
const { contents, contentMap } = indexSiblingRepo(repo);
|
|
407
|
+
if (contents.length === 0) continue;
|
|
408
|
+
const index = buildIndex(contents);
|
|
409
|
+
const matches = query(index, task, maxPerRepo * 2);
|
|
410
|
+
const boosted = boostByPath(
|
|
411
|
+
matches,
|
|
412
|
+
contents.map((c) => c.relativePath),
|
|
413
|
+
task
|
|
414
|
+
);
|
|
415
|
+
for (const match of boosted.slice(0, maxPerRepo)) {
|
|
416
|
+
if (match.score < minScore) continue;
|
|
417
|
+
const content = contentMap.get(match.filePath) ?? "";
|
|
418
|
+
const tokens = Math.ceil(content.length / 4);
|
|
419
|
+
allMatches.push({
|
|
420
|
+
repoName: repo.name,
|
|
421
|
+
repoPath: repo.path,
|
|
422
|
+
relativePath: match.filePath,
|
|
423
|
+
absolutePath: join7(repo.path, match.filePath),
|
|
424
|
+
score: match.score,
|
|
425
|
+
content,
|
|
426
|
+
tokens
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
allMatches.sort((a, b) => b.score - a.score);
|
|
431
|
+
return {
|
|
432
|
+
siblings,
|
|
433
|
+
matches: allMatches,
|
|
434
|
+
timeMs: Math.round(performance.now() - startTime)
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
function parseSiblingPaths(pathsStr, projectPath) {
|
|
438
|
+
const absProject = resolve5(projectPath);
|
|
439
|
+
return pathsStr.split(",").map((p) => p.trim()).filter((p) => p.length > 0).map((p) => {
|
|
440
|
+
const absPath = resolve5(join7(absProject, ".."), p);
|
|
441
|
+
return {
|
|
442
|
+
path: absPath,
|
|
443
|
+
name: basename3(absPath),
|
|
444
|
+
stack: detectStack2(absPath),
|
|
445
|
+
fileCount: 0
|
|
446
|
+
};
|
|
447
|
+
}).filter((repo) => existsSync5(repo.path));
|
|
448
|
+
}
|
|
449
|
+
function renderMultiRepoSummary(result) {
|
|
450
|
+
const lines = [];
|
|
451
|
+
if (result.siblings.length === 0) {
|
|
452
|
+
lines.push(" No sibling repos found.");
|
|
453
|
+
return lines.join("\n");
|
|
454
|
+
}
|
|
455
|
+
lines.push(` Sibling repos scanned: ${result.siblings.length} (${result.timeMs}ms)`);
|
|
456
|
+
for (const repo of result.siblings) {
|
|
457
|
+
lines.push(` ${repo.name}/ \u2014 ${repo.fileCount} files [${repo.stack.join(", ") || "unknown"}]`);
|
|
458
|
+
}
|
|
459
|
+
if (result.matches.length === 0) {
|
|
460
|
+
lines.push(" No relevant files found in sibling repos.");
|
|
461
|
+
} else {
|
|
462
|
+
lines.push(` Cross-repo matches: ${result.matches.length}`);
|
|
463
|
+
for (const m of result.matches.slice(0, 10)) {
|
|
464
|
+
const pct = Math.round(m.score * 100);
|
|
465
|
+
lines.push(` ${m.repoName}/${m.relativePath} sem: ${pct}% (~${Math.round(m.tokens / 1e3)}K tok)`);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return lines.join("\n");
|
|
469
|
+
}
|
|
470
|
+
var REPO_MARKERS, SKIP_DIRS, SOURCE_EXTENSIONS, MAX_FILES_PER_REPO, MAX_FILE_SIZE;
|
|
471
|
+
var init_multi_repo = __esm({
|
|
472
|
+
"src/engine/multi-repo.ts"() {
|
|
473
|
+
"use strict";
|
|
474
|
+
init_tfidf();
|
|
475
|
+
REPO_MARKERS = ["package.json", "tsconfig.json", "Cargo.toml", "go.mod", "pyproject.toml", "pom.xml"];
|
|
476
|
+
SKIP_DIRS = /* @__PURE__ */ new Set(["node_modules", ".git", "dist", "build", ".next", "__pycache__", "target", "vendor"]);
|
|
477
|
+
SOURCE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
478
|
+
"ts",
|
|
479
|
+
"tsx",
|
|
480
|
+
"js",
|
|
481
|
+
"jsx",
|
|
482
|
+
"mjs",
|
|
483
|
+
"cjs",
|
|
484
|
+
"py",
|
|
485
|
+
"rs",
|
|
486
|
+
"go",
|
|
487
|
+
"java",
|
|
488
|
+
"kt",
|
|
489
|
+
"rb",
|
|
490
|
+
"c",
|
|
491
|
+
"cpp",
|
|
492
|
+
"h",
|
|
493
|
+
"hpp",
|
|
494
|
+
"cs",
|
|
495
|
+
"json",
|
|
496
|
+
"yaml",
|
|
497
|
+
"yml",
|
|
498
|
+
"toml",
|
|
499
|
+
"md",
|
|
500
|
+
"txt"
|
|
501
|
+
]);
|
|
502
|
+
MAX_FILES_PER_REPO = 500;
|
|
503
|
+
MAX_FILE_SIZE = 1e5;
|
|
504
|
+
}
|
|
505
|
+
});
|
|
2
506
|
|
|
3
507
|
// src/mcp/index.ts
|
|
4
508
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
5
509
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
510
|
import { z } from "zod";
|
|
7
|
-
import { resolve as
|
|
8
|
-
import { readFileSync as readFileSync2 } from "fs";
|
|
511
|
+
import { resolve as resolve6 } from "path";
|
|
9
512
|
|
|
10
513
|
// src/engine/analyzer.ts
|
|
11
514
|
import { readFile as readFile2, readdir, stat as stat2 } from "fs/promises";
|
|
12
|
-
import { join as
|
|
515
|
+
import { join as join3, extname, relative as relative3, resolve as resolve3, basename as basename2 } from "path";
|
|
13
516
|
import { createHash } from "crypto";
|
|
14
517
|
|
|
15
518
|
// src/types/engine.ts
|
|
@@ -93,12 +596,727 @@ function estimateTokens(content, sizeInBytes, method = "chars4") {
|
|
|
93
596
|
|
|
94
597
|
// src/engine/graph.ts
|
|
95
598
|
import { Project, SyntaxKind } from "ts-morph";
|
|
96
|
-
import { resolve, relative, dirname, join } from "path";
|
|
97
|
-
import { existsSync } from "fs";
|
|
599
|
+
import { resolve as resolve2, relative as relative2, dirname as dirname2, join as join2 } from "path";
|
|
600
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
601
|
+
|
|
602
|
+
// src/engine/polyglot-graph.ts
|
|
603
|
+
import { readFileSync } from "fs";
|
|
604
|
+
import { join, dirname } from "path";
|
|
605
|
+
var LANG_EXTENSIONS = {
|
|
606
|
+
"py": "python",
|
|
607
|
+
"pyw": "python",
|
|
608
|
+
"go": "go",
|
|
609
|
+
"java": "java",
|
|
610
|
+
"rs": "rust",
|
|
611
|
+
"ts": "typescript",
|
|
612
|
+
"tsx": "typescript",
|
|
613
|
+
"js": "typescript",
|
|
614
|
+
"jsx": "typescript",
|
|
615
|
+
"mts": "typescript",
|
|
616
|
+
"mjs": "typescript",
|
|
617
|
+
"cts": "typescript",
|
|
618
|
+
"cjs": "typescript"
|
|
619
|
+
};
|
|
620
|
+
function detectLanguage(filePath) {
|
|
621
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
622
|
+
return LANG_EXTENSIONS[ext] ?? null;
|
|
623
|
+
}
|
|
624
|
+
function parseImports(filePath, relativePath, projectPath, allRelativePaths, content) {
|
|
625
|
+
const lang = detectLanguage(relativePath);
|
|
626
|
+
if (!lang || lang === "typescript") return [];
|
|
627
|
+
const src = content ?? safeReadFile(filePath);
|
|
628
|
+
if (!src) return [];
|
|
629
|
+
const edges = [];
|
|
630
|
+
let specs;
|
|
631
|
+
switch (lang) {
|
|
632
|
+
case "python":
|
|
633
|
+
specs = parsePythonImports(src);
|
|
634
|
+
break;
|
|
635
|
+
case "go":
|
|
636
|
+
specs = parseGoImports(src);
|
|
637
|
+
break;
|
|
638
|
+
case "java":
|
|
639
|
+
specs = parseJavaImports(src);
|
|
640
|
+
break;
|
|
641
|
+
case "rust":
|
|
642
|
+
specs = parseRustImports(src);
|
|
643
|
+
break;
|
|
644
|
+
default:
|
|
645
|
+
return [];
|
|
646
|
+
}
|
|
647
|
+
for (const spec of specs) {
|
|
648
|
+
const resolved = resolveImportSpec(spec, relativePath, projectPath, allRelativePaths, lang);
|
|
649
|
+
if (resolved) {
|
|
650
|
+
edges.push({ from: relativePath, to: resolved, type: "import" });
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
return edges;
|
|
654
|
+
}
|
|
655
|
+
function parseAllPolyglotImports(files, projectPath) {
|
|
656
|
+
const allPaths = new Set(files.map((f) => f.relativePath));
|
|
657
|
+
const edges = [];
|
|
658
|
+
for (const file of files) {
|
|
659
|
+
const lang = detectLanguage(file.relativePath);
|
|
660
|
+
if (!lang || lang === "typescript") continue;
|
|
661
|
+
const fileEdges = parseImports(
|
|
662
|
+
file.absolutePath,
|
|
663
|
+
file.relativePath,
|
|
664
|
+
projectPath,
|
|
665
|
+
allPaths,
|
|
666
|
+
file.content
|
|
667
|
+
);
|
|
668
|
+
edges.push(...fileEdges);
|
|
669
|
+
}
|
|
670
|
+
return edges;
|
|
671
|
+
}
|
|
672
|
+
function estimateComplexity(content, lang) {
|
|
673
|
+
let complexity = 1;
|
|
674
|
+
const lines = content.split("\n");
|
|
675
|
+
const patterns = {
|
|
676
|
+
python: [
|
|
677
|
+
/^\s*if\s/,
|
|
678
|
+
/^\s*elif\s/,
|
|
679
|
+
/^\s*for\s/,
|
|
680
|
+
/^\s*while\s/,
|
|
681
|
+
/^\s*except\s/,
|
|
682
|
+
/\sif\s.*\selse\s/,
|
|
683
|
+
// ternary
|
|
684
|
+
/\sand\s/,
|
|
685
|
+
/\sor\s/
|
|
686
|
+
],
|
|
687
|
+
go: [
|
|
688
|
+
/^\s*if\s/,
|
|
689
|
+
/^\s*for\s/,
|
|
690
|
+
/^\s*case\s/,
|
|
691
|
+
/^\s*select\s*{/,
|
|
692
|
+
/&&/,
|
|
693
|
+
/\|\|/
|
|
694
|
+
],
|
|
695
|
+
java: [
|
|
696
|
+
/^\s*if\s*\(/,
|
|
697
|
+
/^\s*for\s*\(/,
|
|
698
|
+
/^\s*while\s*\(/,
|
|
699
|
+
/^\s*case\s/,
|
|
700
|
+
/^\s*catch\s*\(/,
|
|
701
|
+
/\?\s/,
|
|
702
|
+
// ternary
|
|
703
|
+
/&&/,
|
|
704
|
+
/\|\|/
|
|
705
|
+
],
|
|
706
|
+
rust: [
|
|
707
|
+
/^\s*if\s/,
|
|
708
|
+
/^\s*for\s/,
|
|
709
|
+
/^\s*while\s/,
|
|
710
|
+
/^\s*match\s/,
|
|
711
|
+
/=>\s/,
|
|
712
|
+
// match arms
|
|
713
|
+
/&&/,
|
|
714
|
+
/\|\|/
|
|
715
|
+
],
|
|
716
|
+
typescript: []
|
|
717
|
+
// handled by ts-morph
|
|
718
|
+
};
|
|
719
|
+
const langPatterns = patterns[lang];
|
|
720
|
+
for (const line of lines) {
|
|
721
|
+
for (const pattern of langPatterns) {
|
|
722
|
+
if (pattern.test(line)) {
|
|
723
|
+
complexity++;
|
|
724
|
+
break;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
return complexity;
|
|
729
|
+
}
|
|
730
|
+
var PYTHON_STDLIB = /* @__PURE__ */ new Set([
|
|
731
|
+
"__future__",
|
|
732
|
+
"abc",
|
|
733
|
+
"aifc",
|
|
734
|
+
"argparse",
|
|
735
|
+
"array",
|
|
736
|
+
"ast",
|
|
737
|
+
"asynchat",
|
|
738
|
+
"asyncio",
|
|
739
|
+
"asyncore",
|
|
740
|
+
"atexit",
|
|
741
|
+
"audioop",
|
|
742
|
+
"base64",
|
|
743
|
+
"bdb",
|
|
744
|
+
"binascii",
|
|
745
|
+
"binhex",
|
|
746
|
+
"bisect",
|
|
747
|
+
"builtins",
|
|
748
|
+
"bz2",
|
|
749
|
+
"calendar",
|
|
750
|
+
"cgi",
|
|
751
|
+
"cgitb",
|
|
752
|
+
"chunk",
|
|
753
|
+
"cmath",
|
|
754
|
+
"cmd",
|
|
755
|
+
"code",
|
|
756
|
+
"codecs",
|
|
757
|
+
"codeop",
|
|
758
|
+
"collections",
|
|
759
|
+
"colorsys",
|
|
760
|
+
"compileall",
|
|
761
|
+
"concurrent",
|
|
762
|
+
"configparser",
|
|
763
|
+
"contextlib",
|
|
764
|
+
"contextvars",
|
|
765
|
+
"copy",
|
|
766
|
+
"copyreg",
|
|
767
|
+
"cProfile",
|
|
768
|
+
"crypt",
|
|
769
|
+
"csv",
|
|
770
|
+
"ctypes",
|
|
771
|
+
"curses",
|
|
772
|
+
"dataclasses",
|
|
773
|
+
"datetime",
|
|
774
|
+
"dbm",
|
|
775
|
+
"decimal",
|
|
776
|
+
"difflib",
|
|
777
|
+
"dis",
|
|
778
|
+
"distutils",
|
|
779
|
+
"doctest",
|
|
780
|
+
"email",
|
|
781
|
+
"encodings",
|
|
782
|
+
"enum",
|
|
783
|
+
"errno",
|
|
784
|
+
"faulthandler",
|
|
785
|
+
"fcntl",
|
|
786
|
+
"filecmp",
|
|
787
|
+
"fileinput",
|
|
788
|
+
"fnmatch",
|
|
789
|
+
"fractions",
|
|
790
|
+
"ftplib",
|
|
791
|
+
"functools",
|
|
792
|
+
"gc",
|
|
793
|
+
"getopt",
|
|
794
|
+
"getpass",
|
|
795
|
+
"gettext",
|
|
796
|
+
"glob",
|
|
797
|
+
"grp",
|
|
798
|
+
"gzip",
|
|
799
|
+
"hashlib",
|
|
800
|
+
"heapq",
|
|
801
|
+
"hmac",
|
|
802
|
+
"html",
|
|
803
|
+
"http",
|
|
804
|
+
"idlelib",
|
|
805
|
+
"imaplib",
|
|
806
|
+
"imghdr",
|
|
807
|
+
"imp",
|
|
808
|
+
"importlib",
|
|
809
|
+
"inspect",
|
|
810
|
+
"io",
|
|
811
|
+
"ipaddress",
|
|
812
|
+
"itertools",
|
|
813
|
+
"json",
|
|
814
|
+
"keyword",
|
|
815
|
+
"lib2to3",
|
|
816
|
+
"linecache",
|
|
817
|
+
"locale",
|
|
818
|
+
"logging",
|
|
819
|
+
"lzma",
|
|
820
|
+
"mailbox",
|
|
821
|
+
"mailcap",
|
|
822
|
+
"marshal",
|
|
823
|
+
"math",
|
|
824
|
+
"mimetypes",
|
|
825
|
+
"mmap",
|
|
826
|
+
"modulefinder",
|
|
827
|
+
"multiprocessing",
|
|
828
|
+
"netrc",
|
|
829
|
+
"nis",
|
|
830
|
+
"nntplib",
|
|
831
|
+
"numbers",
|
|
832
|
+
"operator",
|
|
833
|
+
"optparse",
|
|
834
|
+
"os",
|
|
835
|
+
"ossaudiodev",
|
|
836
|
+
"pathlib",
|
|
837
|
+
"pdb",
|
|
838
|
+
"pickle",
|
|
839
|
+
"pickletools",
|
|
840
|
+
"pipes",
|
|
841
|
+
"pkgutil",
|
|
842
|
+
"platform",
|
|
843
|
+
"plistlib",
|
|
844
|
+
"poplib",
|
|
845
|
+
"posix",
|
|
846
|
+
"posixpath",
|
|
847
|
+
"pprint",
|
|
848
|
+
"profile",
|
|
849
|
+
"pstats",
|
|
850
|
+
"pty",
|
|
851
|
+
"pwd",
|
|
852
|
+
"py_compile",
|
|
853
|
+
"pyclbr",
|
|
854
|
+
"pydoc",
|
|
855
|
+
"queue",
|
|
856
|
+
"quopri",
|
|
857
|
+
"random",
|
|
858
|
+
"re",
|
|
859
|
+
"readline",
|
|
860
|
+
"reprlib",
|
|
861
|
+
"resource",
|
|
862
|
+
"rlcompleter",
|
|
863
|
+
"runpy",
|
|
864
|
+
"sched",
|
|
865
|
+
"secrets",
|
|
866
|
+
"select",
|
|
867
|
+
"selectors",
|
|
868
|
+
"shelve",
|
|
869
|
+
"shlex",
|
|
870
|
+
"shutil",
|
|
871
|
+
"signal",
|
|
872
|
+
"site",
|
|
873
|
+
"smtpd",
|
|
874
|
+
"smtplib",
|
|
875
|
+
"sndhdr",
|
|
876
|
+
"socket",
|
|
877
|
+
"socketserver",
|
|
878
|
+
"spwd",
|
|
879
|
+
"sqlite3",
|
|
880
|
+
"ssl",
|
|
881
|
+
"stat",
|
|
882
|
+
"statistics",
|
|
883
|
+
"string",
|
|
884
|
+
"stringprep",
|
|
885
|
+
"struct",
|
|
886
|
+
"subprocess",
|
|
887
|
+
"sunau",
|
|
888
|
+
"symtable",
|
|
889
|
+
"sys",
|
|
890
|
+
"sysconfig",
|
|
891
|
+
"syslog",
|
|
892
|
+
"tabnanny",
|
|
893
|
+
"tarfile",
|
|
894
|
+
"telnetlib",
|
|
895
|
+
"tempfile",
|
|
896
|
+
"termios",
|
|
897
|
+
"test",
|
|
898
|
+
"textwrap",
|
|
899
|
+
"threading",
|
|
900
|
+
"time",
|
|
901
|
+
"timeit",
|
|
902
|
+
"tkinter",
|
|
903
|
+
"token",
|
|
904
|
+
"tokenize",
|
|
905
|
+
"tomllib",
|
|
906
|
+
"trace",
|
|
907
|
+
"traceback",
|
|
908
|
+
"tracemalloc",
|
|
909
|
+
"tty",
|
|
910
|
+
"turtle",
|
|
911
|
+
"turtledemo",
|
|
912
|
+
"types",
|
|
913
|
+
"typing",
|
|
914
|
+
"unicodedata",
|
|
915
|
+
"unittest",
|
|
916
|
+
"urllib",
|
|
917
|
+
"uu",
|
|
918
|
+
"uuid",
|
|
919
|
+
"venv",
|
|
920
|
+
"warnings",
|
|
921
|
+
"wave",
|
|
922
|
+
"weakref",
|
|
923
|
+
"webbrowser",
|
|
924
|
+
"winreg",
|
|
925
|
+
"winsound",
|
|
926
|
+
"wsgiref",
|
|
927
|
+
"xdrlib",
|
|
928
|
+
"xml",
|
|
929
|
+
"xmlrpc",
|
|
930
|
+
"zipapp",
|
|
931
|
+
"zipfile",
|
|
932
|
+
"zipimport",
|
|
933
|
+
"zlib",
|
|
934
|
+
"_thread"
|
|
935
|
+
]);
|
|
936
|
+
function isPythonStdlib(module) {
|
|
937
|
+
const topLevel = module.split(".")[0];
|
|
938
|
+
return PYTHON_STDLIB.has(topLevel);
|
|
939
|
+
}
|
|
940
|
+
function parsePythonImports(content) {
|
|
941
|
+
const specs = [];
|
|
942
|
+
const joined = content.replace(/\(\s*\n([^)]*?)\)/gs, (_, inner) => {
|
|
943
|
+
return "(" + inner.replace(/\n/g, " ").replace(/\s+/g, " ") + ")";
|
|
944
|
+
});
|
|
945
|
+
const lines = joined.split("\n");
|
|
946
|
+
for (const line of lines) {
|
|
947
|
+
const trimmed = line.trimStart();
|
|
948
|
+
if (trimmed.startsWith("#")) continue;
|
|
949
|
+
const fromMatch = trimmed.match(/^from\s+(\.{0,10}[\w.]*)\s+import\s+(.+)/);
|
|
950
|
+
if (fromMatch) {
|
|
951
|
+
const raw = fromMatch[1];
|
|
952
|
+
const isRelative = raw.startsWith(".");
|
|
953
|
+
if (!isRelative && isPythonStdlib(raw)) continue;
|
|
954
|
+
specs.push({ raw, isRelative });
|
|
955
|
+
continue;
|
|
956
|
+
}
|
|
957
|
+
const importMatch = trimmed.match(/^import\s+(.+)/);
|
|
958
|
+
if (importMatch) {
|
|
959
|
+
const modules = importMatch[1].split(",").map((m) => m.trim().split(/\s+as\s+/)[0].trim());
|
|
960
|
+
for (const mod of modules) {
|
|
961
|
+
if (!mod || !mod.match(/^[\w.]+$/)) continue;
|
|
962
|
+
if (isPythonStdlib(mod)) continue;
|
|
963
|
+
specs.push({ raw: mod, isRelative: false });
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
return specs;
|
|
968
|
+
}
|
|
969
|
+
var GO_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
970
|
+
"archive",
|
|
971
|
+
"bufio",
|
|
972
|
+
"bytes",
|
|
973
|
+
"cmp",
|
|
974
|
+
"compress",
|
|
975
|
+
"container",
|
|
976
|
+
"context",
|
|
977
|
+
"crypto",
|
|
978
|
+
"database",
|
|
979
|
+
"debug",
|
|
980
|
+
"embed",
|
|
981
|
+
"encoding",
|
|
982
|
+
"errors",
|
|
983
|
+
"expvar",
|
|
984
|
+
"flag",
|
|
985
|
+
"fmt",
|
|
986
|
+
"go",
|
|
987
|
+
"hash",
|
|
988
|
+
"html",
|
|
989
|
+
"image",
|
|
990
|
+
"index",
|
|
991
|
+
"internal",
|
|
992
|
+
"io",
|
|
993
|
+
"iter",
|
|
994
|
+
"log",
|
|
995
|
+
"maps",
|
|
996
|
+
"math",
|
|
997
|
+
"mime",
|
|
998
|
+
"net",
|
|
999
|
+
"os",
|
|
1000
|
+
"path",
|
|
1001
|
+
"plugin",
|
|
1002
|
+
"reflect",
|
|
1003
|
+
"regexp",
|
|
1004
|
+
"runtime",
|
|
1005
|
+
"slices",
|
|
1006
|
+
"sort",
|
|
1007
|
+
"strconv",
|
|
1008
|
+
"strings",
|
|
1009
|
+
"structs",
|
|
1010
|
+
"sync",
|
|
1011
|
+
"syscall",
|
|
1012
|
+
"testing",
|
|
1013
|
+
"text",
|
|
1014
|
+
"time",
|
|
1015
|
+
"unicode",
|
|
1016
|
+
"unsafe"
|
|
1017
|
+
]);
|
|
1018
|
+
function isGoStdlib(importPath) {
|
|
1019
|
+
const firstSegment = importPath.split("/")[0];
|
|
1020
|
+
if (firstSegment.includes(".")) return false;
|
|
1021
|
+
return GO_STDLIB_PREFIXES.has(firstSegment);
|
|
1022
|
+
}
|
|
1023
|
+
function parseGoImports(content) {
|
|
1024
|
+
const specs = [];
|
|
1025
|
+
const singlePattern = /^\s*import\s+(?:[\w_.]+\s+)?"([^"]+)"/gm;
|
|
1026
|
+
let match;
|
|
1027
|
+
while ((match = singlePattern.exec(content)) !== null) {
|
|
1028
|
+
const pkg = match[1];
|
|
1029
|
+
if (isGoStdlib(pkg)) continue;
|
|
1030
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1031
|
+
}
|
|
1032
|
+
const blockPattern = /import\s*\(([\s\S]*?)\)/g;
|
|
1033
|
+
while ((match = blockPattern.exec(content)) !== null) {
|
|
1034
|
+
const block = match[1];
|
|
1035
|
+
const linePattern = /(?:[\w_.]+\s+)?"([^"]+)"/g;
|
|
1036
|
+
let lineMatch;
|
|
1037
|
+
while ((lineMatch = linePattern.exec(block)) !== null) {
|
|
1038
|
+
const pkg = lineMatch[1];
|
|
1039
|
+
if (isGoStdlib(pkg)) continue;
|
|
1040
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
return specs;
|
|
1044
|
+
}
|
|
1045
|
+
var JAVA_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
1046
|
+
"java",
|
|
1047
|
+
"javax",
|
|
1048
|
+
"jdk",
|
|
1049
|
+
"sun",
|
|
1050
|
+
"com.sun",
|
|
1051
|
+
"org.w3c",
|
|
1052
|
+
"org.xml",
|
|
1053
|
+
"org.ietf"
|
|
1054
|
+
]);
|
|
1055
|
+
function isJavaStdlib(importPath) {
|
|
1056
|
+
for (const prefix of JAVA_STDLIB_PREFIXES) {
|
|
1057
|
+
if (importPath === prefix || importPath.startsWith(prefix + ".")) return true;
|
|
1058
|
+
}
|
|
1059
|
+
return false;
|
|
1060
|
+
}
|
|
1061
|
+
function parseJavaImports(content) {
|
|
1062
|
+
const specs = [];
|
|
1063
|
+
const pattern = /^\s*import\s+(?:static\s+)?([\w.*]+)\s*;/gm;
|
|
1064
|
+
let match;
|
|
1065
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
1066
|
+
const raw = match[1];
|
|
1067
|
+
if (isJavaStdlib(raw)) continue;
|
|
1068
|
+
specs.push({ raw, isRelative: false });
|
|
1069
|
+
}
|
|
1070
|
+
return specs;
|
|
1071
|
+
}
|
|
1072
|
+
function parseRustImports(content) {
|
|
1073
|
+
const specs = [];
|
|
1074
|
+
const usePattern = /^\s*(?:pub\s+)?use\s+((?:crate|super|self)(?:::\w+)*)/gm;
|
|
1075
|
+
let match;
|
|
1076
|
+
while ((match = usePattern.exec(content)) !== null) {
|
|
1077
|
+
const raw = match[1];
|
|
1078
|
+
const isRelative = raw.startsWith("super") || raw.startsWith("self");
|
|
1079
|
+
specs.push({ raw, isRelative });
|
|
1080
|
+
}
|
|
1081
|
+
const modPattern = /^\s*(?:pub\s+)?mod\s+(\w+)\s*;/gm;
|
|
1082
|
+
while ((match = modPattern.exec(content)) !== null) {
|
|
1083
|
+
specs.push({ raw: `mod::${match[1]}`, isRelative: true });
|
|
1084
|
+
}
|
|
1085
|
+
return specs;
|
|
1086
|
+
}
|
|
1087
|
+
function resolveImportSpec(spec, fromRelativePath, projectPath, allPaths, lang) {
|
|
1088
|
+
switch (lang) {
|
|
1089
|
+
case "python":
|
|
1090
|
+
return resolvePythonImport(spec, fromRelativePath, allPaths);
|
|
1091
|
+
case "go":
|
|
1092
|
+
return resolveGoImport(spec, fromRelativePath, projectPath, allPaths);
|
|
1093
|
+
case "java":
|
|
1094
|
+
return resolveJavaImport(spec, allPaths);
|
|
1095
|
+
case "rust":
|
|
1096
|
+
return resolveRustImport(spec, fromRelativePath, allPaths);
|
|
1097
|
+
default:
|
|
1098
|
+
return null;
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
function resolvePythonImport(spec, fromRelativePath, allPaths) {
|
|
1102
|
+
if (spec.isRelative) {
|
|
1103
|
+
const dots = spec.raw.match(/^\.+/)?.[0].length ?? 0;
|
|
1104
|
+
const modulePart = spec.raw.slice(dots);
|
|
1105
|
+
let baseDir = dirname(fromRelativePath);
|
|
1106
|
+
for (let i = 1; i < dots; i++) {
|
|
1107
|
+
baseDir = dirname(baseDir);
|
|
1108
|
+
}
|
|
1109
|
+
if (!modulePart) {
|
|
1110
|
+
return tryResolvePython(baseDir, "", allPaths);
|
|
1111
|
+
}
|
|
1112
|
+
const modulePath2 = modulePart.replace(/\./g, "/");
|
|
1113
|
+
return tryResolvePython(baseDir, modulePath2, allPaths);
|
|
1114
|
+
}
|
|
1115
|
+
const modulePath = spec.raw.replace(/\./g, "/");
|
|
1116
|
+
return tryResolvePython("", modulePath, allPaths);
|
|
1117
|
+
}
|
|
1118
|
+
function tryResolvePython(baseDir, modulePath, allPaths) {
|
|
1119
|
+
const candidates = [];
|
|
1120
|
+
if (!modulePath) {
|
|
1121
|
+
candidates.push(join(baseDir, "__init__.py"));
|
|
1122
|
+
} else {
|
|
1123
|
+
candidates.push(
|
|
1124
|
+
join(baseDir, `${modulePath}.py`),
|
|
1125
|
+
join(baseDir, modulePath, "__init__.py")
|
|
1126
|
+
);
|
|
1127
|
+
if (baseDir) {
|
|
1128
|
+
candidates.push(
|
|
1129
|
+
`${modulePath}.py`,
|
|
1130
|
+
join(modulePath, "__init__.py")
|
|
1131
|
+
);
|
|
1132
|
+
}
|
|
1133
|
+
for (const prefix of ["src", "lib", "app"]) {
|
|
1134
|
+
candidates.push(
|
|
1135
|
+
join(prefix, `${modulePath}.py`),
|
|
1136
|
+
join(prefix, modulePath, "__init__.py")
|
|
1137
|
+
);
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
const normalized = candidates.map((p) => p.replace(/^\.[\\/]/, ""));
|
|
1141
|
+
for (const candidate of normalized) {
|
|
1142
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1143
|
+
}
|
|
1144
|
+
return null;
|
|
1145
|
+
}
|
|
1146
|
+
function resolveGoImport(spec, fromRelativePath, projectPath, allPaths) {
|
|
1147
|
+
const dirFiles = /* @__PURE__ */ new Map();
|
|
1148
|
+
for (const p of allPaths) {
|
|
1149
|
+
if (!p.endsWith(".go")) continue;
|
|
1150
|
+
if (p.endsWith("_test.go")) continue;
|
|
1151
|
+
const dir = dirname(p);
|
|
1152
|
+
const existing = dirFiles.get(dir);
|
|
1153
|
+
if (existing) existing.push(p);
|
|
1154
|
+
else dirFiles.set(dir, [p]);
|
|
1155
|
+
}
|
|
1156
|
+
const importParts = spec.raw.split("/");
|
|
1157
|
+
const pkgName = importParts[importParts.length - 1];
|
|
1158
|
+
let goModModule = "";
|
|
1159
|
+
for (const p of allPaths) {
|
|
1160
|
+
if (p === "go.mod" || p.endsWith("/go.mod")) {
|
|
1161
|
+
try {
|
|
1162
|
+
const goModContent = safeReadFile(join(projectPath, p));
|
|
1163
|
+
if (goModContent) {
|
|
1164
|
+
const modMatch = goModContent.match(/^module\s+(\S+)/m);
|
|
1165
|
+
if (modMatch) goModModule = modMatch[1];
|
|
1166
|
+
}
|
|
1167
|
+
} catch {
|
|
1168
|
+
}
|
|
1169
|
+
break;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
if (goModModule && spec.raw.startsWith(goModModule + "/")) {
|
|
1173
|
+
const localPath = spec.raw.slice(goModModule.length + 1);
|
|
1174
|
+
const files = dirFiles.get(localPath);
|
|
1175
|
+
if (files && files.length > 0) return files.sort()[0];
|
|
1176
|
+
for (const prefix of ["", "cmd/", "pkg/", "internal/"]) {
|
|
1177
|
+
const tryPath = prefix + localPath;
|
|
1178
|
+
const tryFiles = dirFiles.get(tryPath);
|
|
1179
|
+
if (tryFiles && tryFiles.length > 0) return tryFiles.sort()[0];
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
for (const [dir, files] of dirFiles) {
|
|
1183
|
+
const dirName = dir.split("/").pop();
|
|
1184
|
+
if (dirName === pkgName) return files.sort()[0];
|
|
1185
|
+
}
|
|
1186
|
+
for (let depth = 2; depth <= Math.min(importParts.length, 4); depth++) {
|
|
1187
|
+
const suffix = importParts.slice(-depth).join("/");
|
|
1188
|
+
for (const [dir, files] of dirFiles) {
|
|
1189
|
+
if (dir === suffix || dir.endsWith("/" + suffix)) {
|
|
1190
|
+
return files.sort()[0];
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
return null;
|
|
1195
|
+
}
|
|
1196
|
+
function resolveJavaImport(spec, allPaths) {
|
|
1197
|
+
const parts = spec.raw.split(".");
|
|
1198
|
+
if (parts[parts.length - 1] === "*") {
|
|
1199
|
+
const packagePath2 = parts.slice(0, -1).join("/");
|
|
1200
|
+
for (const prefix of ["src/main/java/", "src/", ""]) {
|
|
1201
|
+
for (const path of allPaths) {
|
|
1202
|
+
if (path.startsWith(prefix + packagePath2 + "/") && path.endsWith(".java")) {
|
|
1203
|
+
return path;
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
for (const path of allPaths) {
|
|
1208
|
+
if (path.includes(packagePath2 + "/") && path.endsWith(".java")) {
|
|
1209
|
+
return path;
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
return null;
|
|
1213
|
+
}
|
|
1214
|
+
let className = parts[parts.length - 1];
|
|
1215
|
+
let packageParts = parts.slice(0, -1);
|
|
1216
|
+
if (className[0] && className[0] === className[0].toLowerCase() && packageParts.length > 0) {
|
|
1217
|
+
className = packageParts[packageParts.length - 1];
|
|
1218
|
+
packageParts = packageParts.slice(0, -1);
|
|
1219
|
+
}
|
|
1220
|
+
if (packageParts.length > 0) {
|
|
1221
|
+
const maybeOuter = packageParts[packageParts.length - 1];
|
|
1222
|
+
if (maybeOuter[0] && maybeOuter[0] === maybeOuter[0].toUpperCase() && className[0] && className[0] === className[0].toUpperCase()) {
|
|
1223
|
+
className = maybeOuter;
|
|
1224
|
+
packageParts = packageParts.slice(0, -1);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
const packagePath = packageParts.join("/");
|
|
1228
|
+
const javaFile = `${className}.java`;
|
|
1229
|
+
const fullPath = packagePath ? `${packagePath}/${javaFile}` : javaFile;
|
|
1230
|
+
const prefixes = ["src/main/java/", "src/main/kotlin/", "src/", "app/src/main/java/", ""];
|
|
1231
|
+
for (const prefix of prefixes) {
|
|
1232
|
+
const candidate = prefix + fullPath;
|
|
1233
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1234
|
+
}
|
|
1235
|
+
for (const path of allPaths) {
|
|
1236
|
+
if (path.endsWith(fullPath)) return path;
|
|
1237
|
+
}
|
|
1238
|
+
if (packageParts.length >= 2) {
|
|
1239
|
+
const lastTwoPkg = packageParts.slice(-2).join("/");
|
|
1240
|
+
for (const path of allPaths) {
|
|
1241
|
+
if (path.endsWith(javaFile) && path.includes(lastTwoPkg)) return path;
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
return null;
|
|
1245
|
+
}
|
|
1246
|
+
function resolveRustImport(spec, fromRelativePath, allPaths) {
|
|
1247
|
+
if (spec.raw.startsWith("mod::")) {
|
|
1248
|
+
const modName = spec.raw.slice(5);
|
|
1249
|
+
const dir = dirname(fromRelativePath);
|
|
1250
|
+
const candidates = [
|
|
1251
|
+
join(dir, `${modName}.rs`),
|
|
1252
|
+
join(dir, modName, "mod.rs")
|
|
1253
|
+
];
|
|
1254
|
+
for (const c of candidates) {
|
|
1255
|
+
if (allPaths.has(c)) return c;
|
|
1256
|
+
}
|
|
1257
|
+
return null;
|
|
1258
|
+
}
|
|
1259
|
+
if (spec.raw.startsWith("crate::")) {
|
|
1260
|
+
const parts = spec.raw.replace("crate::", "").split("::");
|
|
1261
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1262
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1263
|
+
const candidates = [
|
|
1264
|
+
`src/${modulePath}.rs`,
|
|
1265
|
+
`src/${modulePath}/mod.rs`,
|
|
1266
|
+
`${modulePath}.rs`,
|
|
1267
|
+
`${modulePath}/mod.rs`
|
|
1268
|
+
];
|
|
1269
|
+
for (const c of candidates) {
|
|
1270
|
+
if (allPaths.has(c)) return c;
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
return null;
|
|
1274
|
+
}
|
|
1275
|
+
if (spec.raw.startsWith("super::")) {
|
|
1276
|
+
const parts = spec.raw.replace("super::", "").split("::");
|
|
1277
|
+
const parentDir = dirname(dirname(fromRelativePath));
|
|
1278
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1279
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1280
|
+
const candidates = [
|
|
1281
|
+
join(parentDir, `${modulePath}.rs`),
|
|
1282
|
+
join(parentDir, modulePath, "mod.rs")
|
|
1283
|
+
];
|
|
1284
|
+
for (const c of candidates) {
|
|
1285
|
+
if (allPaths.has(c)) return c;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
return null;
|
|
1289
|
+
}
|
|
1290
|
+
if (spec.raw.startsWith("self::")) {
|
|
1291
|
+
const parts = spec.raw.replace("self::", "").split("::");
|
|
1292
|
+
const dir = dirname(fromRelativePath);
|
|
1293
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1294
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1295
|
+
const candidates = [
|
|
1296
|
+
join(dir, `${modulePath}.rs`),
|
|
1297
|
+
join(dir, modulePath, "mod.rs")
|
|
1298
|
+
];
|
|
1299
|
+
for (const c of candidates) {
|
|
1300
|
+
if (allPaths.has(c)) return c;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
return null;
|
|
1304
|
+
}
|
|
1305
|
+
return null;
|
|
1306
|
+
}
|
|
1307
|
+
function safeReadFile(path) {
|
|
1308
|
+
try {
|
|
1309
|
+
return readFileSync(path, "utf-8");
|
|
1310
|
+
} catch {
|
|
1311
|
+
return null;
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
// src/engine/graph.ts
|
|
98
1316
|
var TS_EXTENSIONS = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs", "cts", "cjs"]);
|
|
99
1317
|
function createProject(projectPath, filePaths) {
|
|
100
|
-
const tsConfigPath =
|
|
101
|
-
const hasTsConfig =
|
|
1318
|
+
const tsConfigPath = join2(projectPath, "tsconfig.json");
|
|
1319
|
+
const hasTsConfig = existsSync2(tsConfigPath);
|
|
102
1320
|
const project = new Project({
|
|
103
1321
|
tsConfigFilePath: hasTsConfig ? tsConfigPath : void 0,
|
|
104
1322
|
skipAddingFilesFromTsConfig: true,
|
|
@@ -124,9 +1342,11 @@ function createProject(projectPath, filePaths) {
|
|
|
124
1342
|
return project;
|
|
125
1343
|
}
|
|
126
1344
|
function buildProjectGraph(projectPath, files) {
|
|
127
|
-
const absPath =
|
|
1345
|
+
const absPath = resolve2(projectPath);
|
|
128
1346
|
const tsFiles = files.filter((f) => TS_EXTENSIONS.has(f.extension)).map((f) => f.path);
|
|
129
|
-
|
|
1347
|
+
const polyglotFiles = files.filter((f) => !TS_EXTENSIONS.has(f.extension)).map((f) => ({ relativePath: f.relativePath, absolutePath: f.path }));
|
|
1348
|
+
const polyglotEdges = parseAllPolyglotImports(polyglotFiles, absPath);
|
|
1349
|
+
if (tsFiles.length === 0 && polyglotEdges.length === 0) {
|
|
130
1350
|
return emptyGraph(files);
|
|
131
1351
|
}
|
|
132
1352
|
let project;
|
|
@@ -138,7 +1358,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
138
1358
|
const edges = [];
|
|
139
1359
|
const nodeSet = /* @__PURE__ */ new Set();
|
|
140
1360
|
for (const sourceFile of project.getSourceFiles()) {
|
|
141
|
-
const fromRel =
|
|
1361
|
+
const fromRel = relative2(absPath, sourceFile.getFilePath());
|
|
142
1362
|
if (fromRel.startsWith("..") || fromRel.includes("node_modules")) continue;
|
|
143
1363
|
nodeSet.add(fromRel);
|
|
144
1364
|
for (const imp of sourceFile.getImportDeclarations()) {
|
|
@@ -160,6 +1380,11 @@ function buildProjectGraph(projectPath, files) {
|
|
|
160
1380
|
}
|
|
161
1381
|
}
|
|
162
1382
|
}
|
|
1383
|
+
for (const edge of polyglotEdges) {
|
|
1384
|
+
nodeSet.add(edge.from);
|
|
1385
|
+
nodeSet.add(edge.to);
|
|
1386
|
+
edges.push(edge);
|
|
1387
|
+
}
|
|
163
1388
|
const nodes = Array.from(nodeSet);
|
|
164
1389
|
const importedByCount = /* @__PURE__ */ new Map();
|
|
165
1390
|
const importCount = /* @__PURE__ */ new Map();
|
|
@@ -192,6 +1417,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
192
1417
|
const orphans = Array.from(allFileNodes).filter((n) => !connectedNodes.has(n));
|
|
193
1418
|
const clusters = detectClusters(nodes, edges, files);
|
|
194
1419
|
enrichComplexity(project, absPath, files);
|
|
1420
|
+
enrichPolyglotComplexity(files);
|
|
195
1421
|
return { nodes, edges, hubs, leaves, orphans, clusters };
|
|
196
1422
|
}
|
|
197
1423
|
var UnionFind = class {
|
|
@@ -284,7 +1510,7 @@ function commonPrefix(paths) {
|
|
|
284
1510
|
function enrichComplexity(project, absPath, files) {
|
|
285
1511
|
const fileMap = new Map(files.map((f) => [f.relativePath, f]));
|
|
286
1512
|
for (const sourceFile of project.getSourceFiles()) {
|
|
287
|
-
const relPath =
|
|
1513
|
+
const relPath = relative2(absPath, sourceFile.getFilePath());
|
|
288
1514
|
if (relPath.startsWith("..") || relPath.includes("node_modules")) continue;
|
|
289
1515
|
const file = fileMap.get(relPath);
|
|
290
1516
|
if (!file) continue;
|
|
@@ -335,22 +1561,34 @@ function calculateCyclomaticComplexity(node) {
|
|
|
335
1561
|
});
|
|
336
1562
|
return complexity;
|
|
337
1563
|
}
|
|
1564
|
+
function enrichPolyglotComplexity(files) {
|
|
1565
|
+
for (const file of files) {
|
|
1566
|
+
if (TS_EXTENSIONS.has(file.extension)) continue;
|
|
1567
|
+
const lang = detectLanguage(file.relativePath);
|
|
1568
|
+
if (!lang) continue;
|
|
1569
|
+
try {
|
|
1570
|
+
const content = readFileSync2(file.path, "utf-8");
|
|
1571
|
+
file.complexity = Math.max(1, estimateComplexity(content, lang));
|
|
1572
|
+
} catch {
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
338
1576
|
function resolveImport(sourceFile, moduleSpecifier, projectRoot) {
|
|
339
1577
|
if (!moduleSpecifier.startsWith(".")) return null;
|
|
340
|
-
const sourceDir =
|
|
341
|
-
const basePath =
|
|
1578
|
+
const sourceDir = dirname2(sourceFile.getFilePath());
|
|
1579
|
+
const basePath = resolve2(sourceDir, moduleSpecifier);
|
|
342
1580
|
const extensions = [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"];
|
|
343
1581
|
for (const ext of extensions) {
|
|
344
1582
|
const candidate = basePath.endsWith(ext) ? basePath : basePath + ext;
|
|
345
|
-
if (
|
|
346
|
-
const rel =
|
|
1583
|
+
if (existsSync2(candidate)) {
|
|
1584
|
+
const rel = relative2(projectRoot, candidate);
|
|
347
1585
|
if (!rel.startsWith("..")) return rel;
|
|
348
1586
|
}
|
|
349
1587
|
}
|
|
350
1588
|
if (moduleSpecifier.endsWith(".js")) {
|
|
351
1589
|
const tsPath = basePath.replace(/\.js$/, ".ts");
|
|
352
|
-
if (
|
|
353
|
-
const rel =
|
|
1590
|
+
if (existsSync2(tsPath)) {
|
|
1591
|
+
const rel = relative2(projectRoot, tsPath);
|
|
354
1592
|
if (!rel.startsWith("..")) return rel;
|
|
355
1593
|
}
|
|
356
1594
|
}
|
|
@@ -510,7 +1748,7 @@ async function walkProject(rootPath, options) {
|
|
|
510
1748
|
}
|
|
511
1749
|
const promises = [];
|
|
512
1750
|
for (const entry of entries) {
|
|
513
|
-
const fullPath =
|
|
1751
|
+
const fullPath = join3(dir, entry.name);
|
|
514
1752
|
if (entry.isDirectory()) {
|
|
515
1753
|
if (!ignoreDirSet.has(entry.name) && !entry.name.startsWith(".")) {
|
|
516
1754
|
promises.push(walk(fullPath, depth + 1));
|
|
@@ -531,7 +1769,7 @@ async function walkProject(rootPath, options) {
|
|
|
531
1769
|
}
|
|
532
1770
|
results.push({
|
|
533
1771
|
path: fullPath,
|
|
534
|
-
relativePath:
|
|
1772
|
+
relativePath: relative3(rootPath, fullPath),
|
|
535
1773
|
extension: ext,
|
|
536
1774
|
size: fileStat.size,
|
|
537
1775
|
lastModified: fileStat.mtime,
|
|
@@ -580,7 +1818,7 @@ function detectStack(files) {
|
|
|
580
1818
|
return stack;
|
|
581
1819
|
}
|
|
582
1820
|
async function analyzeProject(projectPath, config) {
|
|
583
|
-
const absPath =
|
|
1821
|
+
const absPath = resolve3(projectPath);
|
|
584
1822
|
const projectName = basename2(absPath);
|
|
585
1823
|
const mergedConfig = mergeConfig(DEFAULT_CONFIG, config);
|
|
586
1824
|
const allExtensions = [
|
|
@@ -714,13 +1952,90 @@ function mergeConfig(base, overrides) {
|
|
|
714
1952
|
};
|
|
715
1953
|
}
|
|
716
1954
|
|
|
717
|
-
// src/engine/
|
|
718
|
-
import {
|
|
1955
|
+
// src/engine/learner.ts
|
|
1956
|
+
import { readFile as readFile3, writeFile, mkdir } from "fs/promises";
|
|
1957
|
+
import { join as join4 } from "path";
|
|
1958
|
+
var MODEL_DIR = ".cto";
|
|
1959
|
+
var MODEL_FILE = "learner.json";
|
|
1960
|
+
var MIN_OBSERVATIONS = 3;
|
|
1961
|
+
async function loadLearner(projectPath) {
|
|
1962
|
+
const modelPath = join4(projectPath, MODEL_DIR, MODEL_FILE);
|
|
1963
|
+
try {
|
|
1964
|
+
const raw = await readFile3(modelPath, "utf-8");
|
|
1965
|
+
const parsed = JSON.parse(raw);
|
|
1966
|
+
if (parsed.version === 2) return parsed;
|
|
1967
|
+
} catch {
|
|
1968
|
+
}
|
|
1969
|
+
return createEmptyModel();
|
|
1970
|
+
}
|
|
1971
|
+
function getLearnerBoosts(model, taskType, files) {
|
|
1972
|
+
if (model.totalSelections < MIN_OBSERVATIONS) return [];
|
|
1973
|
+
const boosts = [];
|
|
1974
|
+
const taskPatterns = model.taskPatterns[taskType] ?? {};
|
|
1975
|
+
for (const file of files) {
|
|
1976
|
+
const pattern = extractPattern(file);
|
|
1977
|
+
const taskStats = taskPatterns[pattern];
|
|
1978
|
+
const globalStats = model.patterns[pattern];
|
|
1979
|
+
const stats = taskStats ?? globalStats;
|
|
1980
|
+
if (!stats) continue;
|
|
1981
|
+
const total = stats.alpha + stats.beta;
|
|
1982
|
+
if (total < MIN_OBSERVATIONS) continue;
|
|
1983
|
+
const p = stats.alpha / total;
|
|
1984
|
+
const z2 = 1.96;
|
|
1985
|
+
const denominator = 1 + z2 * z2 / total;
|
|
1986
|
+
const center = p + z2 * z2 / (2 * total);
|
|
1987
|
+
const spread = z2 * Math.sqrt((p * (1 - p) + z2 * z2 / (4 * total)) / total);
|
|
1988
|
+
const lower = (center - spread) / denominator;
|
|
1989
|
+
const boost = (lower - 0.5) * 2;
|
|
1990
|
+
const confidence = Math.min(1, total / 20);
|
|
1991
|
+
if (Math.abs(boost) > 0.05) {
|
|
1992
|
+
boosts.push({
|
|
1993
|
+
filePath: file,
|
|
1994
|
+
boost,
|
|
1995
|
+
confidence,
|
|
1996
|
+
reason: taskStats ? `${pattern} selected ${Math.round(p * 100)}% of the time for ${taskType} tasks` : `${pattern} selected ${Math.round(p * 100)}% of the time globally`
|
|
1997
|
+
});
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
return boosts.sort((a, b) => Math.abs(b.boost) - Math.abs(a.boost));
|
|
2001
|
+
}
|
|
2002
|
+
function createEmptyModel() {
|
|
2003
|
+
return {
|
|
2004
|
+
version: 2,
|
|
2005
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2006
|
+
patterns: {},
|
|
2007
|
+
taskPatterns: {},
|
|
2008
|
+
totalSelections: 0
|
|
2009
|
+
};
|
|
2010
|
+
}
|
|
2011
|
+
function extractPattern(filePath) {
|
|
2012
|
+
const parts = filePath.split("/");
|
|
2013
|
+
const fileName = parts[parts.length - 1];
|
|
2014
|
+
const ext = fileName.includes(".") ? "." + fileName.split(".").pop() : "";
|
|
2015
|
+
if (fileName.includes(".test.") || fileName.includes(".spec.")) {
|
|
2016
|
+
const testExt = fileName.includes(".test.") ? ".test" + ext : ".spec" + ext;
|
|
2017
|
+
if (parts.includes("tests") || parts.includes("test") || parts.includes("__tests__")) {
|
|
2018
|
+
return `tests/**/*${testExt}`;
|
|
2019
|
+
}
|
|
2020
|
+
return `*${testExt}`;
|
|
2021
|
+
}
|
|
2022
|
+
if (["config", "configuration", "settings"].some((c) => fileName.toLowerCase().includes(c))) {
|
|
2023
|
+
return `config/*${ext}`;
|
|
2024
|
+
}
|
|
2025
|
+
if (fileName.endsWith(".d.ts") || parts.includes("types") || parts.includes("typings")) {
|
|
2026
|
+
return `types/*${ext}`;
|
|
2027
|
+
}
|
|
2028
|
+
const meaningfulDirs = parts.slice(0, -1).filter((d) => d !== "src" && d !== "lib" && d !== "app");
|
|
2029
|
+
if (meaningfulDirs.length > 0) {
|
|
2030
|
+
return `${meaningfulDirs[0]}/*${ext}`;
|
|
2031
|
+
}
|
|
2032
|
+
return `*${ext}`;
|
|
2033
|
+
}
|
|
719
2034
|
|
|
720
2035
|
// src/govern/secrets.ts
|
|
721
|
-
import { readFile as
|
|
722
|
-
import { readFileSync, existsSync as
|
|
723
|
-
import { resolve as
|
|
2036
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
2037
|
+
import { readFileSync as readFileSync3, existsSync as existsSync3, mkdirSync, writeFileSync } from "fs";
|
|
2038
|
+
import { resolve as resolve4, relative as relative4, join as join5, dirname as dirname4 } from "path";
|
|
724
2039
|
import { createHash as createHash2 } from "crypto";
|
|
725
2040
|
var BUILTIN_PATTERNS = [
|
|
726
2041
|
// API Keys
|
|
@@ -847,8 +2162,8 @@ function scanContentForSecrets(content, filePath, customPatterns = [], extraPiiS
|
|
|
847
2162
|
}
|
|
848
2163
|
async function scanFileForSecrets(filePath, projectPath, customPatterns = []) {
|
|
849
2164
|
try {
|
|
850
|
-
const content = await
|
|
851
|
-
const relPath =
|
|
2165
|
+
const content = await readFile4(filePath, "utf-8");
|
|
2166
|
+
const relPath = relative4(resolve4(projectPath), resolve4(filePath));
|
|
852
2167
|
return scanContentForSecrets(content, relPath, customPatterns);
|
|
853
2168
|
} catch {
|
|
854
2169
|
return [];
|
|
@@ -941,8 +2256,42 @@ function deduplicateFindings(findings) {
|
|
|
941
2256
|
});
|
|
942
2257
|
}
|
|
943
2258
|
|
|
2259
|
+
// src/interact/router.ts
|
|
2260
|
+
var TASK_KEYWORDS = {
|
|
2261
|
+
debug: ["debug", "fix", "bug", "error", "issue", "broken", "crash", "failing", "wrong"],
|
|
2262
|
+
review: ["review", "check", "assess", "evaluate", "audit", "inspect", "critique"],
|
|
2263
|
+
refactor: ["refactor", "restructure", "reorganize", "clean up", "simplify", "extract", "move"],
|
|
2264
|
+
test: ["test", "spec", "coverage", "unit test", "integration test", "e2e"],
|
|
2265
|
+
docs: ["document", "docs", "readme", "jsdoc", "comment", "explain"],
|
|
2266
|
+
feature: ["add", "implement", "create", "build", "new", "feature", "endpoint"],
|
|
2267
|
+
architecture: ["architecture", "design", "system", "structure", "migrate", "pattern"],
|
|
2268
|
+
"simple-edit": ["rename", "typo", "update", "change", "modify", "tweak", "adjust"]
|
|
2269
|
+
};
|
|
2270
|
+
function classifyTask(taskDescription) {
|
|
2271
|
+
const lower = taskDescription.toLowerCase();
|
|
2272
|
+
let bestType = "simple-edit";
|
|
2273
|
+
let bestScore = 0;
|
|
2274
|
+
for (const [type, keywords] of Object.entries(TASK_KEYWORDS)) {
|
|
2275
|
+
let score = 0;
|
|
2276
|
+
for (const kw of keywords) {
|
|
2277
|
+
if (lower.includes(kw)) score++;
|
|
2278
|
+
}
|
|
2279
|
+
if (score > bestScore) {
|
|
2280
|
+
bestScore = score;
|
|
2281
|
+
bestType = type;
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
return bestType;
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
// src/engine/context-pipeline.ts
|
|
2288
|
+
import { readFileSync as readFileSync6 } from "fs";
|
|
2289
|
+
|
|
2290
|
+
// src/engine/selector.ts
|
|
2291
|
+
import { createHash as createHash3 } from "crypto";
|
|
2292
|
+
|
|
944
2293
|
// src/engine/pruner.ts
|
|
945
|
-
import { readFile as
|
|
2294
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
946
2295
|
var TS_EXTENSIONS2 = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs"]);
|
|
947
2296
|
async function pruneFile(file, level) {
|
|
948
2297
|
if (level === "excluded") {
|
|
@@ -961,7 +2310,7 @@ async function pruneFile(file, level) {
|
|
|
961
2310
|
async function pruneTypeScript(file, level) {
|
|
962
2311
|
let content;
|
|
963
2312
|
try {
|
|
964
|
-
content = await
|
|
2313
|
+
content = await readFile5(file.path, "utf-8");
|
|
965
2314
|
} catch {
|
|
966
2315
|
return emptyResult(file, level);
|
|
967
2316
|
}
|
|
@@ -1255,7 +2604,7 @@ function extractClassOutline(lines, start) {
|
|
|
1255
2604
|
async function pruneGeneric(file, level) {
|
|
1256
2605
|
let content;
|
|
1257
2606
|
try {
|
|
1258
|
-
content = await
|
|
2607
|
+
content = await readFile5(file.path, "utf-8");
|
|
1259
2608
|
} catch {
|
|
1260
2609
|
return emptyResult(file, level);
|
|
1261
2610
|
}
|
|
@@ -1290,7 +2639,7 @@ function pruneGenericFromContent(file, content, level) {
|
|
|
1290
2639
|
async function fullContent(file) {
|
|
1291
2640
|
let content = "";
|
|
1292
2641
|
try {
|
|
1293
|
-
content = await
|
|
2642
|
+
content = await readFile5(file.path, "utf-8");
|
|
1294
2643
|
} catch {
|
|
1295
2644
|
}
|
|
1296
2645
|
return {
|
|
@@ -1442,19 +2791,8 @@ async function selectContext(input) {
|
|
|
1442
2791
|
for (const s of input.semanticScores ?? []) semanticMap.set(s.filePath, s.score);
|
|
1443
2792
|
const learnerMap = /* @__PURE__ */ new Map();
|
|
1444
2793
|
for (const b of input.learnerBoosts ?? []) learnerMap.set(b.filePath, b.boost);
|
|
1445
|
-
|
|
1446
|
-
if (targetPaths.length
|
|
1447
|
-
const sorted = [...semanticMap.entries()].sort((a, b) => b[1] - a[1]);
|
|
1448
|
-
const threshold = 0.5;
|
|
1449
|
-
targetPaths = sorted.filter(([, score]) => score >= threshold).slice(0, 10).map(([path]) => path);
|
|
1450
|
-
if (targetPaths.length > 0) {
|
|
1451
|
-
decisions.push({
|
|
1452
|
-
file: targetPaths.join(", "),
|
|
1453
|
-
action: "include-full",
|
|
1454
|
-
reason: `Top ${targetPaths.length} file(s) identified via semantic matching (score \u2265 ${threshold})`
|
|
1455
|
-
});
|
|
1456
|
-
}
|
|
1457
|
-
} else if (targetPaths.length > 0) {
|
|
2794
|
+
const targetPaths = identifyTargetFiles(task, analysis.files);
|
|
2795
|
+
if (targetPaths.length > 0) {
|
|
1458
2796
|
decisions.push({
|
|
1459
2797
|
file: targetPaths.join(", "),
|
|
1460
2798
|
action: "include-full",
|
|
@@ -1486,7 +2824,7 @@ async function selectContext(input) {
|
|
|
1486
2824
|
}
|
|
1487
2825
|
const { mustInclude, mustExclude } = applyPolicies(analysis.files, policies);
|
|
1488
2826
|
const candidateSet = /* @__PURE__ */ new Set([...expandedPaths, ...mustInclude]);
|
|
1489
|
-
if (targetPaths.length === 0) {
|
|
2827
|
+
if (semanticMap.size > 0 || targetPaths.length === 0) {
|
|
1490
2828
|
for (const f of analysis.files) {
|
|
1491
2829
|
candidateSet.add(f.relativePath);
|
|
1492
2830
|
}
|
|
@@ -1525,22 +2863,32 @@ async function selectContext(input) {
|
|
|
1525
2863
|
const riskNorm = file.riskScore / maxRisk;
|
|
1526
2864
|
const semantic = semanticMap.get(file.relativePath) ?? 0;
|
|
1527
2865
|
const learner = ((learnerMap.get(file.relativePath) ?? 0) + 1) / 2;
|
|
1528
|
-
return
|
|
2866
|
+
return semantic * 0.55 + riskNorm * 0.25 + learner * 0.2;
|
|
1529
2867
|
}
|
|
2868
|
+
const targetSet = new Set(targetPaths);
|
|
1530
2869
|
const candidates = Array.from(candidateSet).map((p) => allFileMap.get(p)).filter((f) => f !== void 0).sort((a, b) => {
|
|
1531
|
-
const
|
|
1532
|
-
const
|
|
1533
|
-
|
|
1534
|
-
const aIsMust = mustInclude.has(a.relativePath) ? 0 : 1;
|
|
1535
|
-
const bIsMust = mustInclude.has(b.relativePath) ? 0 : 1;
|
|
1536
|
-
if (aIsMust !== bIsMust) return aIsMust - bIsMust;
|
|
1537
|
-
return compositeScore(b) - compositeScore(a);
|
|
2870
|
+
const aBonus = (targetSet.has(a.relativePath) ? 0.3 : 0) + (mustInclude.has(a.relativePath) ? 0.15 : 0);
|
|
2871
|
+
const bBonus = (targetSet.has(b.relativePath) ? 0.3 : 0) + (mustInclude.has(b.relativePath) ? 0.15 : 0);
|
|
2872
|
+
return compositeScore(b) + bBonus - (compositeScore(a) + aBonus);
|
|
1538
2873
|
});
|
|
1539
2874
|
const selectedFiles = [];
|
|
1540
2875
|
let usedTokens = 0;
|
|
2876
|
+
const hasSemanticSignal = semanticMap.size > 0;
|
|
1541
2877
|
for (const file of candidates) {
|
|
1542
|
-
const isTarget =
|
|
2878
|
+
const isTarget = targetSet.has(file.relativePath);
|
|
1543
2879
|
const isMustInclude = mustInclude.has(file.relativePath);
|
|
2880
|
+
if (hasSemanticSignal && !isTarget && !isMustInclude) {
|
|
2881
|
+
const semScore = semanticMap.get(file.relativePath) ?? 0;
|
|
2882
|
+
const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
|
|
2883
|
+
if (semScore === 0 && lrnBoost === 0) {
|
|
2884
|
+
decisions.push({
|
|
2885
|
+
file: file.relativePath,
|
|
2886
|
+
action: "exclude",
|
|
2887
|
+
reason: "Skipped: no semantic relevance to task"
|
|
2888
|
+
});
|
|
2889
|
+
continue;
|
|
2890
|
+
}
|
|
2891
|
+
}
|
|
1544
2892
|
const defaultLevel = isTarget ? "full" : getPruneLevelForRisk(file.riskScore);
|
|
1545
2893
|
const levels = getCascadeLevels(defaultLevel);
|
|
1546
2894
|
let included = false;
|
|
@@ -1653,381 +3001,455 @@ function getCascadeLevels(startLevel) {
|
|
|
1653
3001
|
const startIdx = all.indexOf(startLevel);
|
|
1654
3002
|
return all.slice(startIdx);
|
|
1655
3003
|
}
|
|
1656
|
-
function buildReason(file, level, isTarget, isMustInclude) {
|
|
1657
|
-
if (isTarget) return "Target file";
|
|
1658
|
-
if (isMustInclude) return "Required by policy";
|
|
1659
|
-
const impact = file.exclusionImpact;
|
|
1660
|
-
const levelStr = level === "full" ? "full content" : level;
|
|
1661
|
-
if (impact === "critical") return `Critical dependency (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
1662
|
-
if (impact === "high") return `High-risk dependency (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
1663
|
-
if (impact === "medium") return `Medium relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
1664
|
-
return `Low relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
3004
|
+
function buildReason(file, level, isTarget, isMustInclude) {
|
|
3005
|
+
if (isTarget) return "Target file";
|
|
3006
|
+
if (isMustInclude) return "Required by policy";
|
|
3007
|
+
const impact = file.exclusionImpact;
|
|
3008
|
+
const levelStr = level === "full" ? "full content" : level;
|
|
3009
|
+
if (impact === "critical") return `Critical dependency (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
3010
|
+
if (impact === "high") return `High-risk dependency (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
3011
|
+
if (impact === "medium") return `Medium relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
3012
|
+
return `Low relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
3013
|
+
}
|
|
3014
|
+
|
|
3015
|
+
// src/engine/context-pipeline.ts
|
|
3016
|
+
init_tfidf();
|
|
3017
|
+
|
|
3018
|
+
// src/engine/index-cache.ts
|
|
3019
|
+
init_tfidf();
|
|
3020
|
+
import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
|
|
3021
|
+
import { join as join6 } from "path";
|
|
3022
|
+
var CACHE_VERSION = 2;
|
|
3023
|
+
var CACHE_DIR = ".cto";
|
|
3024
|
+
var CACHE_FILE = "index-cache.json";
|
|
3025
|
+
function buildIndexCached(projectPath, files) {
|
|
3026
|
+
const startTime = Date.now();
|
|
3027
|
+
const cachePath = join6(projectPath, CACHE_DIR, CACHE_FILE);
|
|
3028
|
+
const existing = loadCache(cachePath);
|
|
3029
|
+
const cacheHit = existing !== null;
|
|
3030
|
+
const cachedFiles = existing?.files ?? {};
|
|
3031
|
+
const newCachedFiles = {};
|
|
3032
|
+
let updatedFiles = 0;
|
|
3033
|
+
let removedFiles = 0;
|
|
3034
|
+
let cachedCount = 0;
|
|
3035
|
+
const currentPaths = new Set(files.map((f) => f.relativePath));
|
|
3036
|
+
if (existing) {
|
|
3037
|
+
for (const path of Object.keys(cachedFiles)) {
|
|
3038
|
+
if (!currentPaths.has(path)) {
|
|
3039
|
+
removedFiles++;
|
|
3040
|
+
}
|
|
3041
|
+
}
|
|
3042
|
+
}
|
|
3043
|
+
for (const file of files) {
|
|
3044
|
+
const cached = cachedFiles[file.relativePath];
|
|
3045
|
+
let currentMtime = 0;
|
|
3046
|
+
try {
|
|
3047
|
+
const st = statSync(file.absolutePath);
|
|
3048
|
+
currentMtime = st.mtimeMs;
|
|
3049
|
+
} catch {
|
|
3050
|
+
continue;
|
|
3051
|
+
}
|
|
3052
|
+
if (cached && cached.mtime === currentMtime) {
|
|
3053
|
+
newCachedFiles[file.relativePath] = cached;
|
|
3054
|
+
cachedCount++;
|
|
3055
|
+
} else {
|
|
3056
|
+
let content = file.content;
|
|
3057
|
+
if (content === void 0) {
|
|
3058
|
+
try {
|
|
3059
|
+
content = readFileSync4(file.absolutePath, "utf-8");
|
|
3060
|
+
} catch {
|
|
3061
|
+
continue;
|
|
3062
|
+
}
|
|
3063
|
+
}
|
|
3064
|
+
const terms = tokenize(content);
|
|
3065
|
+
const termCounts = {};
|
|
3066
|
+
for (const term of terms) {
|
|
3067
|
+
termCounts[term] = (termCounts[term] ?? 0) + 1;
|
|
3068
|
+
}
|
|
3069
|
+
newCachedFiles[file.relativePath] = {
|
|
3070
|
+
mtime: currentMtime,
|
|
3071
|
+
terms: termCounts,
|
|
3072
|
+
length: terms.length
|
|
3073
|
+
};
|
|
3074
|
+
updatedFiles++;
|
|
3075
|
+
}
|
|
3076
|
+
}
|
|
3077
|
+
const index = rebuildIndex(newCachedFiles);
|
|
3078
|
+
saveCache(cachePath, newCachedFiles);
|
|
3079
|
+
const stats = {
|
|
3080
|
+
totalFiles: Object.keys(newCachedFiles).length,
|
|
3081
|
+
updatedFiles,
|
|
3082
|
+
removedFiles,
|
|
3083
|
+
cachedFiles: cachedCount,
|
|
3084
|
+
cacheHit,
|
|
3085
|
+
buildTimeMs: Date.now() - startTime
|
|
3086
|
+
};
|
|
3087
|
+
return { index, stats };
|
|
3088
|
+
}
|
|
3089
|
+
function loadCache(cachePath) {
|
|
3090
|
+
try {
|
|
3091
|
+
if (!existsSync4(cachePath)) return null;
|
|
3092
|
+
const raw = readFileSync4(cachePath, "utf-8");
|
|
3093
|
+
const data = JSON.parse(raw);
|
|
3094
|
+
if (data.version !== CACHE_VERSION) return null;
|
|
3095
|
+
if (!data.files || typeof data.files !== "object") return null;
|
|
3096
|
+
return data;
|
|
3097
|
+
} catch {
|
|
3098
|
+
return null;
|
|
3099
|
+
}
|
|
3100
|
+
}
|
|
3101
|
+
function saveCache(cachePath, files) {
|
|
3102
|
+
try {
|
|
3103
|
+
const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
|
|
3104
|
+
if (!existsSync4(dir)) {
|
|
3105
|
+
mkdirSync2(dir, { recursive: true });
|
|
3106
|
+
}
|
|
3107
|
+
const data = {
|
|
3108
|
+
version: CACHE_VERSION,
|
|
3109
|
+
builtAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3110
|
+
files
|
|
3111
|
+
};
|
|
3112
|
+
writeFileSync2(cachePath, JSON.stringify(data));
|
|
3113
|
+
} catch {
|
|
3114
|
+
}
|
|
1665
3115
|
}
|
|
1666
|
-
|
|
1667
|
-
// src/engine/tfidf.ts
|
|
1668
|
-
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
1669
|
-
// Language keywords
|
|
1670
|
-
"import",
|
|
1671
|
-
"export",
|
|
1672
|
-
"from",
|
|
1673
|
-
"const",
|
|
1674
|
-
"let",
|
|
1675
|
-
"var",
|
|
1676
|
-
"function",
|
|
1677
|
-
"class",
|
|
1678
|
-
"interface",
|
|
1679
|
-
"type",
|
|
1680
|
-
"return",
|
|
1681
|
-
"async",
|
|
1682
|
-
"await",
|
|
1683
|
-
"new",
|
|
1684
|
-
"this",
|
|
1685
|
-
"that",
|
|
1686
|
-
"true",
|
|
1687
|
-
"false",
|
|
1688
|
-
"null",
|
|
1689
|
-
"undefined",
|
|
1690
|
-
"void",
|
|
1691
|
-
"string",
|
|
1692
|
-
"number",
|
|
1693
|
-
"boolean",
|
|
1694
|
-
"any",
|
|
1695
|
-
"unknown",
|
|
1696
|
-
"never",
|
|
1697
|
-
"object",
|
|
1698
|
-
"array",
|
|
1699
|
-
"promise",
|
|
1700
|
-
"if",
|
|
1701
|
-
"else",
|
|
1702
|
-
"for",
|
|
1703
|
-
"while",
|
|
1704
|
-
"do",
|
|
1705
|
-
"switch",
|
|
1706
|
-
"case",
|
|
1707
|
-
"break",
|
|
1708
|
-
"continue",
|
|
1709
|
-
"try",
|
|
1710
|
-
"catch",
|
|
1711
|
-
"throw",
|
|
1712
|
-
"finally",
|
|
1713
|
-
"default",
|
|
1714
|
-
"extends",
|
|
1715
|
-
"implements",
|
|
1716
|
-
"static",
|
|
1717
|
-
"private",
|
|
1718
|
-
"public",
|
|
1719
|
-
"protected",
|
|
1720
|
-
"readonly",
|
|
1721
|
-
"abstract",
|
|
1722
|
-
"override",
|
|
1723
|
-
"super",
|
|
1724
|
-
"typeof",
|
|
1725
|
-
"instanceof",
|
|
1726
|
-
"in",
|
|
1727
|
-
"of",
|
|
1728
|
-
"as",
|
|
1729
|
-
"is",
|
|
1730
|
-
"keyof",
|
|
1731
|
-
"enum",
|
|
1732
|
-
"namespace",
|
|
1733
|
-
"module",
|
|
1734
|
-
"declare",
|
|
1735
|
-
// Python
|
|
1736
|
-
"def",
|
|
1737
|
-
"self",
|
|
1738
|
-
"cls",
|
|
1739
|
-
"none",
|
|
1740
|
-
"pass",
|
|
1741
|
-
"yield",
|
|
1742
|
-
"lambda",
|
|
1743
|
-
"with",
|
|
1744
|
-
"elif",
|
|
1745
|
-
"except",
|
|
1746
|
-
"raise",
|
|
1747
|
-
"assert",
|
|
1748
|
-
"global",
|
|
1749
|
-
"nonlocal",
|
|
1750
|
-
// Natural language stop words only — NOT domain terms that carry signal
|
|
1751
|
-
"the",
|
|
1752
|
-
"and",
|
|
1753
|
-
"for",
|
|
1754
|
-
"with",
|
|
1755
|
-
"not",
|
|
1756
|
-
"but",
|
|
1757
|
-
"are",
|
|
1758
|
-
"was",
|
|
1759
|
-
"were",
|
|
1760
|
-
"has",
|
|
1761
|
-
"have",
|
|
1762
|
-
"had",
|
|
1763
|
-
"will",
|
|
1764
|
-
"would",
|
|
1765
|
-
"could",
|
|
1766
|
-
"should",
|
|
1767
|
-
"may",
|
|
1768
|
-
"can",
|
|
1769
|
-
"its",
|
|
1770
|
-
"also",
|
|
1771
|
-
"than",
|
|
1772
|
-
"then",
|
|
1773
|
-
"into",
|
|
1774
|
-
"only",
|
|
1775
|
-
"very",
|
|
1776
|
-
"just",
|
|
1777
|
-
"about",
|
|
1778
|
-
"being",
|
|
1779
|
-
"been",
|
|
1780
|
-
"does",
|
|
1781
|
-
"did",
|
|
1782
|
-
"doing",
|
|
1783
|
-
"todo",
|
|
1784
|
-
"fixme",
|
|
1785
|
-
"hack",
|
|
1786
|
-
"note",
|
|
1787
|
-
"xxx"
|
|
1788
|
-
]);
|
|
1789
|
-
function buildIndex(files) {
|
|
3116
|
+
function rebuildIndex(cachedFiles) {
|
|
1790
3117
|
const documents = /* @__PURE__ */ new Map();
|
|
1791
3118
|
const docFreq = /* @__PURE__ */ new Map();
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
const
|
|
1795
|
-
for (const term of terms) {
|
|
1796
|
-
|
|
1797
|
-
}
|
|
1798
|
-
documents.set(
|
|
1799
|
-
|
|
3119
|
+
let totalLength = 0;
|
|
3120
|
+
for (const [path, cached] of Object.entries(cachedFiles)) {
|
|
3121
|
+
const termMap = /* @__PURE__ */ new Map();
|
|
3122
|
+
for (const [term, count] of Object.entries(cached.terms)) {
|
|
3123
|
+
termMap.set(term, count);
|
|
3124
|
+
}
|
|
3125
|
+
documents.set(path, { terms: termMap, length: cached.length });
|
|
3126
|
+
totalLength += cached.length;
|
|
3127
|
+
for (const term of termMap.keys()) {
|
|
1800
3128
|
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
1801
3129
|
}
|
|
1802
3130
|
}
|
|
1803
|
-
const totalDocs =
|
|
3131
|
+
const totalDocs = documents.size;
|
|
3132
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
1804
3133
|
const idf = /* @__PURE__ */ new Map();
|
|
1805
3134
|
for (const [term, df] of docFreq) {
|
|
1806
3135
|
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
1807
3136
|
}
|
|
1808
|
-
let totalLength = 0;
|
|
1809
|
-
for (const doc of documents.values()) totalLength += doc.length;
|
|
1810
|
-
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
1811
3137
|
return { documents, idf, avgDocLength, totalDocs };
|
|
1812
3138
|
}
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
3139
|
+
|
|
3140
|
+
// src/engine/reranker.ts
|
|
3141
|
+
init_tfidf();
|
|
3142
|
+
var WEIGHTS = {
|
|
3143
|
+
termCoverage: 0.35,
|
|
3144
|
+
// What fraction of query terms does the file match?
|
|
3145
|
+
termSpecificity: 0.25,
|
|
3146
|
+
// Are matched terms rare or common?
|
|
3147
|
+
bigramProximity: 0.15,
|
|
3148
|
+
// Do query terms appear near each other?
|
|
3149
|
+
dependencySignal: 0.1,
|
|
3150
|
+
// Is this file connected to a top match?
|
|
3151
|
+
pathRelevance: 0.15
|
|
3152
|
+
// Does the file path match query terms?
|
|
3153
|
+
};
|
|
3154
|
+
var ABSOLUTE_FLOOR = 0.18;
|
|
3155
|
+
var ELBOW_DROP_RATIO = 0.35;
|
|
3156
|
+
var MIN_TERM_COVERAGE = 0.3;
|
|
3157
|
+
function rerank(input) {
|
|
3158
|
+
const startTime = Date.now();
|
|
3159
|
+
const { task, candidates, index, fileContents, dependencies, allFilePaths } = input;
|
|
3160
|
+
const emptyTelemetry = {
|
|
3161
|
+
candidatesIn: candidates.length,
|
|
3162
|
+
candidatesOut: 0,
|
|
3163
|
+
candidatesFiltered: 0,
|
|
3164
|
+
durationMs: 0,
|
|
3165
|
+
weights: { ...WEIGHTS },
|
|
3166
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3167
|
+
signalStats: {
|
|
3168
|
+
termCoverage: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3169
|
+
termSpecificity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3170
|
+
bigramProximity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3171
|
+
dependencySignal: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3172
|
+
pathRelevance: { min: 0, max: 0, mean: 0, median: 0 }
|
|
3173
|
+
},
|
|
3174
|
+
filterReasons: {},
|
|
3175
|
+
scoreDistribution: [0, 0, 0, 0, 0],
|
|
3176
|
+
queryTermCount: 0,
|
|
3177
|
+
relevanceConeSize: 0
|
|
3178
|
+
};
|
|
3179
|
+
if (candidates.length === 0) {
|
|
3180
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
3181
|
+
}
|
|
3182
|
+
const queryTerms = tokenize(task);
|
|
3183
|
+
const uniqueQueryTerms = [...new Set(queryTerms)];
|
|
3184
|
+
if (uniqueQueryTerms.length === 0) {
|
|
3185
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
3186
|
+
}
|
|
3187
|
+
const queryTermIdfs = /* @__PURE__ */ new Map();
|
|
3188
|
+
for (const term of uniqueQueryTerms) {
|
|
3189
|
+
queryTermIdfs.set(term, index.idf.get(term) ?? 0);
|
|
3190
|
+
}
|
|
3191
|
+
const maxIdf = Math.max(1, ...queryTermIdfs.values());
|
|
3192
|
+
const pathTermsCache = /* @__PURE__ */ new Map();
|
|
3193
|
+
for (const fp of allFilePaths) {
|
|
3194
|
+
pathTermsCache.set(fp, new Set(tokenize(fp.replace(/[/\\.]/g, " "))));
|
|
3195
|
+
}
|
|
3196
|
+
const scored = [];
|
|
3197
|
+
for (const candidate of candidates) {
|
|
3198
|
+
const doc = index.documents.get(candidate.filePath);
|
|
3199
|
+
if (!doc) continue;
|
|
3200
|
+
const matchedQueryTerms = /* @__PURE__ */ new Set();
|
|
3201
|
+
for (const term of uniqueQueryTerms) {
|
|
3202
|
+
if ((doc.terms.get(term) ?? 0) > 0) {
|
|
3203
|
+
matchedQueryTerms.add(term);
|
|
3204
|
+
}
|
|
1837
3205
|
}
|
|
3206
|
+
const termCoverage = matchedQueryTerms.size / uniqueQueryTerms.length;
|
|
3207
|
+
let specificitySum = 0;
|
|
3208
|
+
let specificityMax = 0;
|
|
3209
|
+
for (const term of matchedQueryTerms) {
|
|
3210
|
+
const idf = queryTermIdfs.get(term) ?? 0;
|
|
3211
|
+
specificitySum += idf;
|
|
3212
|
+
specificityMax += maxIdf;
|
|
3213
|
+
}
|
|
3214
|
+
const termSpecificity = specificityMax > 0 ? specificitySum / specificityMax : 0;
|
|
3215
|
+
const content = fileContents.get(candidate.filePath) ?? "";
|
|
3216
|
+
const bigramProximity = computeBigramProximity(content, uniqueQueryTerms);
|
|
3217
|
+
const dependencySignal = 0;
|
|
3218
|
+
const pathTerms = pathTermsCache.get(candidate.filePath) ?? /* @__PURE__ */ new Set();
|
|
3219
|
+
const queryTermSet = new Set(uniqueQueryTerms);
|
|
3220
|
+
let pathHits = 0;
|
|
3221
|
+
for (const pt of pathTerms) {
|
|
3222
|
+
if (queryTermSet.has(pt)) pathHits++;
|
|
3223
|
+
}
|
|
3224
|
+
const pathRelevance = Math.min(1, pathHits / Math.max(1, uniqueQueryTerms.length) * 2);
|
|
3225
|
+
const score = termCoverage * WEIGHTS.termCoverage + termSpecificity * WEIGHTS.termSpecificity + bigramProximity * WEIGHTS.bigramProximity + dependencySignal * WEIGHTS.dependencySignal + pathRelevance * WEIGHTS.pathRelevance;
|
|
3226
|
+
scored.push({
|
|
3227
|
+
filePath: candidate.filePath,
|
|
3228
|
+
score,
|
|
3229
|
+
bm25Score: candidate.score,
|
|
3230
|
+
signals: {
|
|
3231
|
+
termCoverage,
|
|
3232
|
+
termSpecificity,
|
|
3233
|
+
bigramProximity,
|
|
3234
|
+
dependencySignal,
|
|
3235
|
+
pathRelevance
|
|
3236
|
+
}
|
|
3237
|
+
});
|
|
1838
3238
|
}
|
|
1839
|
-
const
|
|
1840
|
-
|
|
1841
|
-
|
|
3239
|
+
const topByScore = [...scored].sort((a, b) => b.score - a.score).slice(0, 5);
|
|
3240
|
+
const relevanceCone = /* @__PURE__ */ new Set();
|
|
3241
|
+
for (const top of topByScore) {
|
|
3242
|
+
relevanceCone.add(top.filePath);
|
|
3243
|
+
const deps = dependencies.get(top.filePath) ?? [];
|
|
3244
|
+
for (const dep of deps) relevanceCone.add(dep);
|
|
3245
|
+
for (const [from, tos] of dependencies) {
|
|
3246
|
+
if (tos.includes(top.filePath)) relevanceCone.add(from);
|
|
3247
|
+
}
|
|
3248
|
+
}
|
|
3249
|
+
for (const item of scored) {
|
|
3250
|
+
const inCone = relevanceCone.has(item.filePath) ? 1 : 0;
|
|
3251
|
+
item.signals.dependencySignal = inCone;
|
|
3252
|
+
item.score = item.signals.termCoverage * WEIGHTS.termCoverage + item.signals.termSpecificity * WEIGHTS.termSpecificity + item.signals.bigramProximity * WEIGHTS.bigramProximity + item.signals.dependencySignal * WEIGHTS.dependencySignal + item.signals.pathRelevance * WEIGHTS.pathRelevance;
|
|
3253
|
+
}
|
|
3254
|
+
scored.sort((a, b) => b.score - a.score);
|
|
3255
|
+
const { passed, filtered, threshold } = applyQualityGate(scored);
|
|
3256
|
+
const filterReasons = {};
|
|
3257
|
+
for (const f of filtered) {
|
|
3258
|
+
const reason = f.reason.replace(/\([^)]+\)/g, "").trim();
|
|
3259
|
+
filterReasons[reason] = (filterReasons[reason] ?? 0) + 1;
|
|
3260
|
+
}
|
|
3261
|
+
const allScores = scored.map((s) => s.score).sort((a, b) => a - b);
|
|
3262
|
+
const signalNames = ["termCoverage", "termSpecificity", "bigramProximity", "dependencySignal", "pathRelevance"];
|
|
3263
|
+
const signalStats = {};
|
|
3264
|
+
for (const name of signalNames) {
|
|
3265
|
+
const vals = scored.map((s) => s.signals[name]).sort((a, b) => a - b);
|
|
3266
|
+
signalStats[name] = {
|
|
3267
|
+
min: vals[0] ?? 0,
|
|
3268
|
+
max: vals[vals.length - 1] ?? 0,
|
|
3269
|
+
mean: vals.length > 0 ? vals.reduce((a, b) => a + b, 0) / vals.length : 0,
|
|
3270
|
+
median: vals.length > 0 ? vals[Math.floor(vals.length / 2)] : 0
|
|
3271
|
+
};
|
|
1842
3272
|
}
|
|
1843
|
-
|
|
3273
|
+
const telemetry = {
|
|
3274
|
+
candidatesIn: candidates.length,
|
|
3275
|
+
candidatesOut: passed.length,
|
|
3276
|
+
candidatesFiltered: filtered.length,
|
|
3277
|
+
durationMs: Date.now() - startTime,
|
|
3278
|
+
weights: { ...WEIGHTS },
|
|
3279
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3280
|
+
signalStats,
|
|
3281
|
+
filterReasons,
|
|
3282
|
+
scoreDistribution: [
|
|
3283
|
+
allScores[0] ?? 0,
|
|
3284
|
+
allScores[Math.floor(allScores.length * 0.25)] ?? 0,
|
|
3285
|
+
allScores[Math.floor(allScores.length * 0.5)] ?? 0,
|
|
3286
|
+
allScores[Math.floor(allScores.length * 0.75)] ?? 0,
|
|
3287
|
+
allScores[allScores.length - 1] ?? 0
|
|
3288
|
+
],
|
|
3289
|
+
queryTermCount: uniqueQueryTerms.length,
|
|
3290
|
+
relevanceConeSize: relevanceCone.size
|
|
3291
|
+
};
|
|
3292
|
+
return {
|
|
3293
|
+
files: passed,
|
|
3294
|
+
filtered,
|
|
3295
|
+
qualityThreshold: threshold,
|
|
3296
|
+
telemetry
|
|
3297
|
+
};
|
|
1844
3298
|
}
|
|
1845
|
-
function
|
|
1846
|
-
|
|
1847
|
-
const
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
const
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
3299
|
+
function computeBigramProximity(content, queryTerms) {
|
|
3300
|
+
if (queryTerms.length < 2 || !content) return 0;
|
|
3301
|
+
const contentTokens = tokenize(content);
|
|
3302
|
+
const termPositions = /* @__PURE__ */ new Map();
|
|
3303
|
+
for (let i = 0; i < contentTokens.length; i++) {
|
|
3304
|
+
const token = contentTokens[i];
|
|
3305
|
+
if (queryTerms.includes(token)) {
|
|
3306
|
+
const positions = termPositions.get(token) ?? [];
|
|
3307
|
+
positions.push(i);
|
|
3308
|
+
termPositions.set(token, positions);
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
let totalScore = 0;
|
|
3312
|
+
let pairCount = 0;
|
|
3313
|
+
for (let i = 0; i < queryTerms.length; i++) {
|
|
3314
|
+
for (let j = i + 1; j < queryTerms.length; j++) {
|
|
3315
|
+
const posA = termPositions.get(queryTerms[i]);
|
|
3316
|
+
const posB = termPositions.get(queryTerms[j]);
|
|
3317
|
+
if (!posA || !posB) continue;
|
|
3318
|
+
let minDist = Infinity;
|
|
3319
|
+
for (const a of posA) {
|
|
3320
|
+
for (const b of posB) {
|
|
3321
|
+
minDist = Math.min(minDist, Math.abs(a - b));
|
|
3322
|
+
}
|
|
3323
|
+
}
|
|
3324
|
+
if (minDist < Infinity) {
|
|
3325
|
+
totalScore += Math.max(0, 1 - minDist / 20);
|
|
3326
|
+
pairCount++;
|
|
3327
|
+
}
|
|
1856
3328
|
}
|
|
1857
3329
|
}
|
|
1858
|
-
return
|
|
1859
|
-
}
|
|
1860
|
-
function stem(word) {
|
|
1861
|
-
let w = word.toLowerCase();
|
|
1862
|
-
if (w.endsWith("tion")) return w.slice(0, -4);
|
|
1863
|
-
if (w.endsWith("sion")) return w.slice(0, -4);
|
|
1864
|
-
if (w.endsWith("ment")) return w.slice(0, -4);
|
|
1865
|
-
if (w.endsWith("ness")) return w.slice(0, -4);
|
|
1866
|
-
if (w.endsWith("able")) return w.slice(0, -4);
|
|
1867
|
-
if (w.endsWith("ible")) return w.slice(0, -4);
|
|
1868
|
-
if (w.endsWith("ator")) return w.slice(0, -4);
|
|
1869
|
-
if (w.endsWith("izer")) return w.slice(0, -4);
|
|
1870
|
-
if (w.endsWith("ling")) return w.slice(0, -4);
|
|
1871
|
-
if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
|
|
1872
|
-
if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
|
|
1873
|
-
if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
|
|
1874
|
-
if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
|
|
1875
|
-
if (w.endsWith("ess") && w.length > 5) return w.slice(0, -3);
|
|
1876
|
-
if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
|
|
1877
|
-
if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
|
|
1878
|
-
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
1879
|
-
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
1880
|
-
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
1881
|
-
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
1882
|
-
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
1883
|
-
return w;
|
|
3330
|
+
return pairCount > 0 ? totalScore / pairCount : 0;
|
|
1884
3331
|
}
|
|
1885
|
-
function
|
|
1886
|
-
const
|
|
1887
|
-
const
|
|
1888
|
-
|
|
1889
|
-
|
|
3332
|
+
function applyQualityGate(scored) {
|
|
3333
|
+
const passed = [];
|
|
3334
|
+
const filtered = [];
|
|
3335
|
+
if (scored.length === 0) {
|
|
3336
|
+
return { passed, filtered, threshold: 0 };
|
|
1890
3337
|
}
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
3338
|
+
let elbowIndex = scored.length;
|
|
3339
|
+
if (scored.length >= 3) {
|
|
3340
|
+
let maxDrop = 0;
|
|
3341
|
+
for (let i = 1; i < scored.length; i++) {
|
|
3342
|
+
if (scored[i - 1].score > 0) {
|
|
3343
|
+
const drop = (scored[i - 1].score - scored[i].score) / scored[i - 1].score;
|
|
3344
|
+
if (drop > maxDrop && drop >= ELBOW_DROP_RATIO) {
|
|
3345
|
+
maxDrop = drop;
|
|
3346
|
+
elbowIndex = i;
|
|
1901
3347
|
}
|
|
1902
|
-
} else {
|
|
1903
|
-
boosted.set(filePath, {
|
|
1904
|
-
filePath,
|
|
1905
|
-
score: Math.min(1, pathBoost),
|
|
1906
|
-
matchedTerms: pathMatches
|
|
1907
|
-
});
|
|
1908
3348
|
}
|
|
1909
3349
|
}
|
|
1910
3350
|
}
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
async function loadLearner(projectPath) {
|
|
1921
|
-
const modelPath = join4(projectPath, MODEL_DIR, MODEL_FILE);
|
|
1922
|
-
try {
|
|
1923
|
-
const raw = await readFile5(modelPath, "utf-8");
|
|
1924
|
-
const parsed = JSON.parse(raw);
|
|
1925
|
-
if (parsed.version === 2) return parsed;
|
|
1926
|
-
} catch {
|
|
1927
|
-
}
|
|
1928
|
-
return createEmptyModel();
|
|
1929
|
-
}
|
|
1930
|
-
function getLearnerBoosts(model, taskType, files) {
|
|
1931
|
-
if (model.totalSelections < MIN_OBSERVATIONS) return [];
|
|
1932
|
-
const boosts = [];
|
|
1933
|
-
const taskPatterns = model.taskPatterns[taskType] ?? {};
|
|
1934
|
-
for (const file of files) {
|
|
1935
|
-
const pattern = extractPattern(file);
|
|
1936
|
-
const taskStats = taskPatterns[pattern];
|
|
1937
|
-
const globalStats = model.patterns[pattern];
|
|
1938
|
-
const stats = taskStats ?? globalStats;
|
|
1939
|
-
if (!stats) continue;
|
|
1940
|
-
const total = stats.alpha + stats.beta;
|
|
1941
|
-
if (total < MIN_OBSERVATIONS) continue;
|
|
1942
|
-
const p = stats.alpha / total;
|
|
1943
|
-
const z2 = 1.96;
|
|
1944
|
-
const denominator = 1 + z2 * z2 / total;
|
|
1945
|
-
const center = p + z2 * z2 / (2 * total);
|
|
1946
|
-
const spread = z2 * Math.sqrt((p * (1 - p) + z2 * z2 / (4 * total)) / total);
|
|
1947
|
-
const lower = (center - spread) / denominator;
|
|
1948
|
-
const boost = (lower - 0.5) * 2;
|
|
1949
|
-
const confidence = Math.min(1, total / 20);
|
|
1950
|
-
if (Math.abs(boost) > 0.05) {
|
|
1951
|
-
boosts.push({
|
|
1952
|
-
filePath: file,
|
|
1953
|
-
boost,
|
|
1954
|
-
confidence,
|
|
1955
|
-
reason: taskStats ? `${pattern} selected ${Math.round(p * 100)}% of the time for ${taskType} tasks` : `${pattern} selected ${Math.round(p * 100)}% of the time globally`
|
|
1956
|
-
});
|
|
3351
|
+
const threshold = Math.max(
|
|
3352
|
+
ABSOLUTE_FLOOR,
|
|
3353
|
+
elbowIndex < scored.length ? scored[elbowIndex].score : 0
|
|
3354
|
+
);
|
|
3355
|
+
for (let i = 0; i < scored.length; i++) {
|
|
3356
|
+
const item = scored[i];
|
|
3357
|
+
if (item.score < ABSOLUTE_FLOOR) {
|
|
3358
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below absolute floor (${item.score.toFixed(3)} < ${ABSOLUTE_FLOOR})` });
|
|
3359
|
+
continue;
|
|
1957
3360
|
}
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
function createEmptyModel() {
|
|
1962
|
-
return {
|
|
1963
|
-
version: 2,
|
|
1964
|
-
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1965
|
-
patterns: {},
|
|
1966
|
-
taskPatterns: {},
|
|
1967
|
-
totalSelections: 0
|
|
1968
|
-
};
|
|
1969
|
-
}
|
|
1970
|
-
function extractPattern(filePath) {
|
|
1971
|
-
const parts = filePath.split("/");
|
|
1972
|
-
const fileName = parts[parts.length - 1];
|
|
1973
|
-
const ext = fileName.includes(".") ? "." + fileName.split(".").pop() : "";
|
|
1974
|
-
if (fileName.includes(".test.") || fileName.includes(".spec.")) {
|
|
1975
|
-
const testExt = fileName.includes(".test.") ? ".test" + ext : ".spec" + ext;
|
|
1976
|
-
if (parts.includes("tests") || parts.includes("test") || parts.includes("__tests__")) {
|
|
1977
|
-
return `tests/**/*${testExt}`;
|
|
3361
|
+
if (item.signals.termCoverage < MIN_TERM_COVERAGE) {
|
|
3362
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Low term coverage (${(item.signals.termCoverage * 100).toFixed(0)}% < ${MIN_TERM_COVERAGE * 100}%)` });
|
|
3363
|
+
continue;
|
|
1978
3364
|
}
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
if (fileName.endsWith(".d.ts") || parts.includes("types") || parts.includes("typings")) {
|
|
1985
|
-
return `types/*${ext}`;
|
|
1986
|
-
}
|
|
1987
|
-
const meaningfulDirs = parts.slice(0, -1).filter((d) => d !== "src" && d !== "lib" && d !== "app");
|
|
1988
|
-
if (meaningfulDirs.length > 0) {
|
|
1989
|
-
return `${meaningfulDirs[0]}/*${ext}`;
|
|
3365
|
+
if (i >= elbowIndex && item.score < scored[0].score * 0.5) {
|
|
3366
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below elbow cutoff (rank ${i + 1}, score ${item.score.toFixed(3)})` });
|
|
3367
|
+
continue;
|
|
3368
|
+
}
|
|
3369
|
+
passed.push(item);
|
|
1990
3370
|
}
|
|
1991
|
-
return
|
|
3371
|
+
return { passed, filtered, threshold };
|
|
1992
3372
|
}
|
|
1993
3373
|
|
|
1994
|
-
// src/
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
};
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
3374
|
+
// src/engine/context-pipeline.ts
|
|
3375
|
+
async function runContextPipeline(input) {
|
|
3376
|
+
const { projectPath, task, analysis, budget = 5e4 } = input;
|
|
3377
|
+
const taskType = classifyTask(task);
|
|
3378
|
+
const fileContentMap = /* @__PURE__ */ new Map();
|
|
3379
|
+
const fileContents = [];
|
|
3380
|
+
for (const file of analysis.files) {
|
|
3381
|
+
try {
|
|
3382
|
+
const content = readFileSync6(file.path, "utf-8");
|
|
3383
|
+
fileContentMap.set(file.relativePath, content);
|
|
3384
|
+
fileContents.push({ relativePath: file.relativePath, content });
|
|
3385
|
+
} catch {
|
|
3386
|
+
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
3387
|
+
}
|
|
3388
|
+
}
|
|
3389
|
+
const indexFiles = analysis.files.map((f) => ({
|
|
3390
|
+
relativePath: f.relativePath,
|
|
3391
|
+
absolutePath: f.path,
|
|
3392
|
+
content: fileContentMap.get(f.relativePath)
|
|
3393
|
+
}));
|
|
3394
|
+
const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
|
|
3395
|
+
const semanticMatches = query(index, task, 50);
|
|
3396
|
+
const boostedMatches = boostByPath(
|
|
3397
|
+
semanticMatches,
|
|
3398
|
+
analysis.files.map((f) => f.relativePath),
|
|
3399
|
+
task
|
|
3400
|
+
);
|
|
3401
|
+
const depMap = /* @__PURE__ */ new Map();
|
|
3402
|
+
for (const file of analysis.files) {
|
|
3403
|
+
depMap.set(file.relativePath, file.imports);
|
|
3404
|
+
}
|
|
3405
|
+
const rerankResult = rerank({
|
|
3406
|
+
task,
|
|
3407
|
+
candidates: boostedMatches,
|
|
3408
|
+
index,
|
|
3409
|
+
fileContents: fileContentMap,
|
|
3410
|
+
dependencies: depMap,
|
|
3411
|
+
allFilePaths: analysis.files.map((f) => f.relativePath)
|
|
3412
|
+
});
|
|
3413
|
+
const rerankedMatches = rerankResult.files.map((rf) => ({
|
|
3414
|
+
filePath: rf.filePath,
|
|
3415
|
+
score: rf.bm25Score,
|
|
3416
|
+
// Keep original BM25 score for composite
|
|
3417
|
+
matchedTerms: boostedMatches.find((m) => m.filePath === rf.filePath)?.matchedTerms ?? []
|
|
3418
|
+
}));
|
|
3419
|
+
const learner = await loadLearner(projectPath);
|
|
3420
|
+
const learnerBoosts = getLearnerBoosts(
|
|
3421
|
+
learner,
|
|
3422
|
+
taskType,
|
|
3423
|
+
analysis.files.map((f) => f.relativePath)
|
|
3424
|
+
);
|
|
3425
|
+
const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
3426
|
+
const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
3427
|
+
const selection = await selectContext({
|
|
3428
|
+
task,
|
|
3429
|
+
analysis,
|
|
3430
|
+
budget,
|
|
3431
|
+
semanticScores,
|
|
3432
|
+
learnerBoosts: learnerBoostInputs
|
|
3433
|
+
});
|
|
3434
|
+
const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
|
|
3435
|
+
const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
|
|
3436
|
+
let multiRepo;
|
|
3437
|
+
if (input.siblingRepos && input.siblingRepos.length > 0) {
|
|
3438
|
+
const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
|
|
3439
|
+
multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
|
|
2018
3440
|
}
|
|
2019
|
-
return
|
|
3441
|
+
return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
|
|
2020
3442
|
}
|
|
2021
3443
|
|
|
2022
3444
|
// src/mcp/index.ts
|
|
2023
3445
|
var server = new McpServer({
|
|
2024
3446
|
name: "cto",
|
|
2025
|
-
version: "6.
|
|
3447
|
+
version: "6.1.0"
|
|
2026
3448
|
});
|
|
2027
3449
|
var lastAnalysis = null;
|
|
2028
3450
|
var lastProjectPath = null;
|
|
2029
3451
|
async function getAnalysis(projectPath) {
|
|
2030
|
-
const absPath =
|
|
3452
|
+
const absPath = resolve6(projectPath);
|
|
2031
3453
|
if (lastAnalysis && lastProjectPath === absPath) return lastAnalysis;
|
|
2032
3454
|
lastAnalysis = await analyzeProject(absPath);
|
|
2033
3455
|
lastProjectPath = absPath;
|
|
@@ -2048,35 +3470,10 @@ server.tool(
|
|
|
2048
3470
|
},
|
|
2049
3471
|
async ({ projectPath, task, budget, includeContents }) => {
|
|
2050
3472
|
try {
|
|
2051
|
-
const absPath =
|
|
3473
|
+
const absPath = resolve6(projectPath);
|
|
2052
3474
|
const analysis = await getAnalysis(absPath);
|
|
2053
3475
|
const tokenBudget = budget ?? 5e4;
|
|
2054
|
-
const taskType =
|
|
2055
|
-
const fileContents = [];
|
|
2056
|
-
for (const file of analysis.files) {
|
|
2057
|
-
try {
|
|
2058
|
-
const content = readFileSync2(file.path, "utf-8");
|
|
2059
|
-
fileContents.push({ relativePath: file.relativePath, content });
|
|
2060
|
-
} catch {
|
|
2061
|
-
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
2062
|
-
}
|
|
2063
|
-
}
|
|
2064
|
-
const index = buildIndex(fileContents);
|
|
2065
|
-
const semanticMatches = query(index, task, 50);
|
|
2066
|
-
const boosted = boostByPath(semanticMatches, analysis.files.map((f) => f.relativePath), task);
|
|
2067
|
-
const semanticMap = new Map(boosted.map((m) => [m.filePath, m]));
|
|
2068
|
-
const learner = await loadLearner(absPath);
|
|
2069
|
-
const learnerBoosts = getLearnerBoosts(learner, taskType, analysis.files.map((f) => f.relativePath));
|
|
2070
|
-
const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
|
|
2071
|
-
const semanticScores = boosted.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
2072
|
-
const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
2073
|
-
const selection = await selectContext({
|
|
2074
|
-
task,
|
|
2075
|
-
analysis,
|
|
2076
|
-
budget: tokenBudget,
|
|
2077
|
-
semanticScores,
|
|
2078
|
-
learnerBoosts: learnerBoostInputs
|
|
2079
|
-
});
|
|
3476
|
+
const { selection, taskType, fileContentMap, semanticMap, learnerMap } = await runContextPipeline({ projectPath: absPath, task, analysis, budget: tokenBudget });
|
|
2080
3477
|
const files = selection.files.map((f) => {
|
|
2081
3478
|
const sem = semanticMap.get(f.relativePath);
|
|
2082
3479
|
const lb = learnerMap.get(f.relativePath);
|
|
@@ -2090,11 +3487,11 @@ server.tool(
|
|
|
2090
3487
|
learnerBoost: lb?.boost ?? 0
|
|
2091
3488
|
};
|
|
2092
3489
|
if (includeContents) {
|
|
2093
|
-
|
|
2094
|
-
entry.content = sanitizeContent(fc?.content ?? "");
|
|
3490
|
+
entry.content = sanitizeContent(fileContentMap.get(f.relativePath) ?? "");
|
|
2095
3491
|
}
|
|
2096
3492
|
return entry;
|
|
2097
3493
|
});
|
|
3494
|
+
const learner = await loadLearner(absPath);
|
|
2098
3495
|
const result = {
|
|
2099
3496
|
task,
|
|
2100
3497
|
taskType,
|
|
@@ -2121,7 +3518,7 @@ server.tool(
|
|
|
2121
3518
|
},
|
|
2122
3519
|
async ({ projectPath, files }) => {
|
|
2123
3520
|
try {
|
|
2124
|
-
const absPath =
|
|
3521
|
+
const absPath = resolve6(projectPath);
|
|
2125
3522
|
const analysis = await getAnalysis(absPath);
|
|
2126
3523
|
const filePaths = files ?? analysis.files.map((f) => f.path);
|
|
2127
3524
|
const findings = await scanProjectForSecrets(absPath, filePaths);
|
|
@@ -2156,42 +3553,21 @@ server.tool(
|
|
|
2156
3553
|
},
|
|
2157
3554
|
async ({ projectPath, task, filePath }) => {
|
|
2158
3555
|
try {
|
|
2159
|
-
const absPath =
|
|
3556
|
+
const absPath = resolve6(projectPath);
|
|
2160
3557
|
const analysis = await getAnalysis(absPath);
|
|
2161
3558
|
const taskType = classifyTask(task);
|
|
2162
3559
|
const file = analysis.files.find((f) => f.relativePath === filePath);
|
|
2163
3560
|
if (!file) {
|
|
2164
3561
|
return { content: [{ type: "text", text: `File not found in project: ${filePath}` }] };
|
|
2165
3562
|
}
|
|
2166
|
-
const
|
|
2167
|
-
for (const f of analysis.files) {
|
|
2168
|
-
try {
|
|
2169
|
-
fileContents.push({ relativePath: f.relativePath, content: readFileSync2(f.path, "utf-8") });
|
|
2170
|
-
} catch {
|
|
2171
|
-
fileContents.push({ relativePath: f.relativePath, content: "" });
|
|
2172
|
-
}
|
|
2173
|
-
}
|
|
2174
|
-
const idx = buildIndex(fileContents);
|
|
2175
|
-
const semMatches = query(idx, task, 50);
|
|
2176
|
-
const boosted = boostByPath(semMatches, analysis.files.map((f) => f.relativePath), task);
|
|
2177
|
-
const semanticScores = boosted.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
2178
|
-
const learner = await loadLearner(absPath);
|
|
2179
|
-
const allBoosts = getLearnerBoosts(learner, taskType, analysis.files.map((f) => f.relativePath));
|
|
2180
|
-
const learnerBoostInputs = allBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
2181
|
-
const selection = await selectContext({
|
|
2182
|
-
task,
|
|
2183
|
-
analysis,
|
|
2184
|
-
budget: 5e4,
|
|
2185
|
-
semanticScores,
|
|
2186
|
-
learnerBoosts: learnerBoostInputs
|
|
2187
|
-
});
|
|
3563
|
+
const { selection, semanticMap, learnerMap } = await runContextPipeline({ projectPath: absPath, task, analysis, budget: 5e4 });
|
|
2188
3564
|
const isSelected = selection.files.some((f) => f.relativePath === filePath);
|
|
2189
3565
|
const selectionEntry = selection.files.find((f) => f.relativePath === filePath);
|
|
2190
|
-
const semEntry =
|
|
3566
|
+
const semEntry = semanticMap.get(filePath);
|
|
2191
3567
|
const semanticScore = semEntry?.score ?? 0;
|
|
2192
3568
|
const semanticTerms = semEntry?.matchedTerms ?? [];
|
|
2193
|
-
const
|
|
2194
|
-
const learnerBoost =
|
|
3569
|
+
const lbEntry = learnerMap.get(filePath);
|
|
3570
|
+
const learnerBoost = lbEntry ?? null;
|
|
2195
3571
|
const importedBy = file.importedBy;
|
|
2196
3572
|
const imports = file.imports;
|
|
2197
3573
|
const explanation = {
|