cto-ai-cli 6.1.0 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +195 -62
- package/dist/cli/index.js +5752 -1733
- package/dist/engine/index.d.ts +548 -12
- package/dist/engine/index.js +1974 -298
- package/dist/mcp/index.js +1822 -446
- package/package.json +1 -1
package/dist/engine/index.js
CHANGED
|
@@ -1,6 +1,530 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
// src/engine/tfidf.ts
|
|
12
|
+
function buildIndex(files) {
|
|
13
|
+
const documents = /* @__PURE__ */ new Map();
|
|
14
|
+
const docFreq = /* @__PURE__ */ new Map();
|
|
15
|
+
for (const file of files) {
|
|
16
|
+
const terms = tokenize(file.content);
|
|
17
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
18
|
+
for (const term of terms) {
|
|
19
|
+
termCounts.set(term, (termCounts.get(term) ?? 0) + 1);
|
|
20
|
+
}
|
|
21
|
+
documents.set(file.relativePath, { terms: termCounts, length: terms.length });
|
|
22
|
+
for (const term of termCounts.keys()) {
|
|
23
|
+
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const totalDocs = files.length;
|
|
27
|
+
const idf = /* @__PURE__ */ new Map();
|
|
28
|
+
for (const [term, df] of docFreq) {
|
|
29
|
+
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
30
|
+
}
|
|
31
|
+
let totalLength = 0;
|
|
32
|
+
for (const doc of documents.values()) totalLength += doc.length;
|
|
33
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
34
|
+
return { documents, idf, avgDocLength, totalDocs };
|
|
35
|
+
}
|
|
36
|
+
function query(index, taskDescription, maxResults = 50) {
|
|
37
|
+
const queryTerms = tokenize(taskDescription);
|
|
38
|
+
if (queryTerms.length === 0) return [];
|
|
39
|
+
const querySet = /* @__PURE__ */ new Map();
|
|
40
|
+
for (const term of queryTerms) {
|
|
41
|
+
querySet.set(term, (querySet.get(term) ?? 0) + 1);
|
|
42
|
+
}
|
|
43
|
+
const results = [];
|
|
44
|
+
const k1 = 1.5;
|
|
45
|
+
const b = 0.75;
|
|
46
|
+
for (const [filePath, doc] of index.documents) {
|
|
47
|
+
let score = 0;
|
|
48
|
+
const matchedTerms = [];
|
|
49
|
+
for (const [qTerm, qCount] of querySet) {
|
|
50
|
+
const tf = doc.terms.get(qTerm) ?? 0;
|
|
51
|
+
if (tf === 0) continue;
|
|
52
|
+
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
53
|
+
if (termIdf <= 0) continue;
|
|
54
|
+
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
55
|
+
score += termIdf * tfNorm * qCount;
|
|
56
|
+
matchedTerms.push(qTerm);
|
|
57
|
+
}
|
|
58
|
+
if (score > 0) {
|
|
59
|
+
results.push({ filePath, score, matchedTerms });
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const maxScore = results.reduce((max, r) => Math.max(max, r.score), 0);
|
|
63
|
+
if (maxScore > 0) {
|
|
64
|
+
for (const r of results) r.score = r.score / maxScore;
|
|
65
|
+
}
|
|
66
|
+
return results.sort((a, b2) => b2.score - a.score).slice(0, maxResults);
|
|
67
|
+
}
|
|
68
|
+
function similarity(index, pathA, pathB) {
|
|
69
|
+
const docA = index.documents.get(pathA);
|
|
70
|
+
const docB = index.documents.get(pathB);
|
|
71
|
+
if (!docA || !docB) return 0;
|
|
72
|
+
let dotProduct = 0;
|
|
73
|
+
let normA = 0;
|
|
74
|
+
let normB = 0;
|
|
75
|
+
const allTerms = /* @__PURE__ */ new Set([...docA.terms.keys(), ...docB.terms.keys()]);
|
|
76
|
+
for (const term of allTerms) {
|
|
77
|
+
const idf = index.idf.get(term) ?? 0;
|
|
78
|
+
const wA = (docA.terms.get(term) ?? 0) * idf;
|
|
79
|
+
const wB = (docB.terms.get(term) ?? 0) * idf;
|
|
80
|
+
dotProduct += wA * wB;
|
|
81
|
+
normA += wA * wA;
|
|
82
|
+
normB += wB * wB;
|
|
83
|
+
}
|
|
84
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
85
|
+
return denom > 0 ? dotProduct / denom : 0;
|
|
86
|
+
}
|
|
87
|
+
function tokenize(text) {
|
|
88
|
+
const tokens = [];
|
|
89
|
+
const rawTokens = text.match(/[a-zA-Z][a-zA-Z0-9]*|[0-9]+/g) ?? [];
|
|
90
|
+
for (const raw of rawTokens) {
|
|
91
|
+
const parts = raw.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
|
|
92
|
+
for (const part of parts) {
|
|
93
|
+
if (part.length < 2) continue;
|
|
94
|
+
const stemmed = stem(part);
|
|
95
|
+
if (stemmed.length < 2) continue;
|
|
96
|
+
if (STOP_WORDS.has(stemmed)) continue;
|
|
97
|
+
tokens.push(stemmed);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return tokens;
|
|
101
|
+
}
|
|
102
|
+
function stem(word) {
|
|
103
|
+
let w = word.toLowerCase();
|
|
104
|
+
for (const [prefix, root] of TERM_FAMILIES) {
|
|
105
|
+
if (w.startsWith(prefix) || w === root) return root;
|
|
106
|
+
}
|
|
107
|
+
if (w.endsWith("ication") && w.length > 9) return w.slice(0, -7);
|
|
108
|
+
if (w.endsWith("ation") && w.length > 7) return w.slice(0, -5);
|
|
109
|
+
if (w.endsWith("tion") && w.length > 6) return w.slice(0, -4);
|
|
110
|
+
if (w.endsWith("sion") && w.length > 6) return w.slice(0, -4);
|
|
111
|
+
if (w.endsWith("ment") && w.length > 6) return w.slice(0, -4);
|
|
112
|
+
if (w.endsWith("ness") && w.length > 6) return w.slice(0, -4);
|
|
113
|
+
if (w.endsWith("able") && w.length > 6) return w.slice(0, -4);
|
|
114
|
+
if (w.endsWith("ible") && w.length > 6) return w.slice(0, -4);
|
|
115
|
+
if (w.endsWith("ator") && w.length > 6) return w.slice(0, -4);
|
|
116
|
+
if (w.endsWith("izer") && w.length > 6) return w.slice(0, -4);
|
|
117
|
+
if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
|
|
118
|
+
if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
|
|
119
|
+
if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
|
|
120
|
+
if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
|
|
121
|
+
if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
|
|
122
|
+
if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
|
|
123
|
+
if (w.endsWith("ion") && w.length > 5) return w.slice(0, -3);
|
|
124
|
+
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
125
|
+
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
126
|
+
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
127
|
+
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
128
|
+
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
129
|
+
return w;
|
|
130
|
+
}
|
|
131
|
+
function boostByPath(matches, allFiles, taskDescription) {
|
|
132
|
+
const queryTerms = new Set(tokenize(taskDescription));
|
|
133
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
134
|
+
for (const m of matches) {
|
|
135
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
136
|
+
}
|
|
137
|
+
for (const filePath of allFiles) {
|
|
138
|
+
const pathTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
|
|
139
|
+
const pathMatches = pathTerms.filter((t) => queryTerms.has(t));
|
|
140
|
+
if (pathMatches.length > 0) {
|
|
141
|
+
const existing = boosted.get(filePath);
|
|
142
|
+
const pathBoost = pathMatches.length * 0.3;
|
|
143
|
+
if (existing) {
|
|
144
|
+
existing.score = Math.min(1, existing.score + pathBoost);
|
|
145
|
+
for (const t of pathMatches) {
|
|
146
|
+
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
147
|
+
}
|
|
148
|
+
} else {
|
|
149
|
+
boosted.set(filePath, {
|
|
150
|
+
filePath,
|
|
151
|
+
score: Math.min(1, pathBoost),
|
|
152
|
+
matchedTerms: pathMatches
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
158
|
+
}
|
|
159
|
+
var STOP_WORDS, TERM_FAMILIES;
|
|
160
|
+
var init_tfidf = __esm({
|
|
161
|
+
"src/engine/tfidf.ts"() {
|
|
162
|
+
"use strict";
|
|
163
|
+
STOP_WORDS = /* @__PURE__ */ new Set([
|
|
164
|
+
// Language keywords
|
|
165
|
+
"import",
|
|
166
|
+
"export",
|
|
167
|
+
"from",
|
|
168
|
+
"const",
|
|
169
|
+
"let",
|
|
170
|
+
"var",
|
|
171
|
+
"function",
|
|
172
|
+
"class",
|
|
173
|
+
"interface",
|
|
174
|
+
"type",
|
|
175
|
+
"return",
|
|
176
|
+
"async",
|
|
177
|
+
"await",
|
|
178
|
+
"new",
|
|
179
|
+
"this",
|
|
180
|
+
"that",
|
|
181
|
+
"true",
|
|
182
|
+
"false",
|
|
183
|
+
"null",
|
|
184
|
+
"undefined",
|
|
185
|
+
"void",
|
|
186
|
+
"string",
|
|
187
|
+
"number",
|
|
188
|
+
"boolean",
|
|
189
|
+
"any",
|
|
190
|
+
"unknown",
|
|
191
|
+
"never",
|
|
192
|
+
"object",
|
|
193
|
+
"array",
|
|
194
|
+
"promise",
|
|
195
|
+
"if",
|
|
196
|
+
"else",
|
|
197
|
+
"for",
|
|
198
|
+
"while",
|
|
199
|
+
"do",
|
|
200
|
+
"switch",
|
|
201
|
+
"case",
|
|
202
|
+
"break",
|
|
203
|
+
"continue",
|
|
204
|
+
"try",
|
|
205
|
+
"catch",
|
|
206
|
+
"throw",
|
|
207
|
+
"finally",
|
|
208
|
+
"default",
|
|
209
|
+
"extends",
|
|
210
|
+
"implements",
|
|
211
|
+
"static",
|
|
212
|
+
"private",
|
|
213
|
+
"public",
|
|
214
|
+
"protected",
|
|
215
|
+
"readonly",
|
|
216
|
+
"abstract",
|
|
217
|
+
"override",
|
|
218
|
+
"super",
|
|
219
|
+
"typeof",
|
|
220
|
+
"instanceof",
|
|
221
|
+
"in",
|
|
222
|
+
"of",
|
|
223
|
+
"as",
|
|
224
|
+
"is",
|
|
225
|
+
"keyof",
|
|
226
|
+
"enum",
|
|
227
|
+
"namespace",
|
|
228
|
+
"module",
|
|
229
|
+
"declare",
|
|
230
|
+
// Python
|
|
231
|
+
"def",
|
|
232
|
+
"self",
|
|
233
|
+
"cls",
|
|
234
|
+
"none",
|
|
235
|
+
"pass",
|
|
236
|
+
"yield",
|
|
237
|
+
"lambda",
|
|
238
|
+
"with",
|
|
239
|
+
"elif",
|
|
240
|
+
"except",
|
|
241
|
+
"raise",
|
|
242
|
+
"assert",
|
|
243
|
+
"global",
|
|
244
|
+
"nonlocal",
|
|
245
|
+
// Natural language stop words only — NOT domain terms that carry signal
|
|
246
|
+
"the",
|
|
247
|
+
"and",
|
|
248
|
+
"for",
|
|
249
|
+
"with",
|
|
250
|
+
"not",
|
|
251
|
+
"but",
|
|
252
|
+
"are",
|
|
253
|
+
"was",
|
|
254
|
+
"were",
|
|
255
|
+
"has",
|
|
256
|
+
"have",
|
|
257
|
+
"had",
|
|
258
|
+
"will",
|
|
259
|
+
"would",
|
|
260
|
+
"could",
|
|
261
|
+
"should",
|
|
262
|
+
"may",
|
|
263
|
+
"can",
|
|
264
|
+
"its",
|
|
265
|
+
"also",
|
|
266
|
+
"than",
|
|
267
|
+
"then",
|
|
268
|
+
"into",
|
|
269
|
+
"only",
|
|
270
|
+
"very",
|
|
271
|
+
"just",
|
|
272
|
+
"about",
|
|
273
|
+
"being",
|
|
274
|
+
"been",
|
|
275
|
+
"does",
|
|
276
|
+
"did",
|
|
277
|
+
"doing",
|
|
278
|
+
"todo",
|
|
279
|
+
"fixme",
|
|
280
|
+
"hack",
|
|
281
|
+
"note",
|
|
282
|
+
"xxx"
|
|
283
|
+
]);
|
|
284
|
+
TERM_FAMILIES = [
|
|
285
|
+
["authenticat", "auth"],
|
|
286
|
+
["authori", "auth"],
|
|
287
|
+
["configur", "config"],
|
|
288
|
+
["connect", "connect"],
|
|
289
|
+
["request", "request"],
|
|
290
|
+
["response", "respons"],
|
|
291
|
+
["middlewar", "middlewar"],
|
|
292
|
+
["validat", "valid"],
|
|
293
|
+
["initiali", "init"],
|
|
294
|
+
["subscri", "subscrib"],
|
|
295
|
+
["transform", "transform"],
|
|
296
|
+
["seriali", "serial"],
|
|
297
|
+
["deseriali", "serial"],
|
|
298
|
+
["dependen", "depend"],
|
|
299
|
+
["environ", "environ"],
|
|
300
|
+
["permiss", "permiss"],
|
|
301
|
+
["migrat", "migrat"],
|
|
302
|
+
["transact", "transact"],
|
|
303
|
+
["encryp", "encrypt"],
|
|
304
|
+
["decryp", "encrypt"]
|
|
305
|
+
];
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
// src/engine/multi-repo.ts
|
|
310
|
+
var multi_repo_exports = {};
|
|
311
|
+
__export(multi_repo_exports, {
|
|
312
|
+
discoverSiblingRepos: () => discoverSiblingRepos,
|
|
313
|
+
parseSiblingPaths: () => parseSiblingPaths,
|
|
314
|
+
querySiblingRepos: () => querySiblingRepos,
|
|
315
|
+
renderMultiRepoSummary: () => renderMultiRepoSummary
|
|
316
|
+
});
|
|
317
|
+
import { readdirSync, readFileSync as readFileSync5, statSync as statSync2, existsSync as existsSync5 } from "fs";
|
|
318
|
+
import { join as join7, basename as basename3, resolve as resolve5, relative as relative5 } from "path";
|
|
319
|
+
function discoverSiblingRepos(projectPath) {
|
|
320
|
+
const absProject = resolve5(projectPath);
|
|
321
|
+
const parentDir = join7(absProject, "..");
|
|
322
|
+
const projectName = basename3(absProject);
|
|
323
|
+
const siblings = [];
|
|
324
|
+
let entries;
|
|
325
|
+
try {
|
|
326
|
+
entries = readdirSync(parentDir);
|
|
327
|
+
} catch {
|
|
328
|
+
return [];
|
|
329
|
+
}
|
|
330
|
+
for (const entry of entries) {
|
|
331
|
+
if (entry === projectName) continue;
|
|
332
|
+
if (entry.startsWith(".")) continue;
|
|
333
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
334
|
+
const candidatePath = join7(parentDir, entry);
|
|
335
|
+
try {
|
|
336
|
+
if (!statSync2(candidatePath).isDirectory()) continue;
|
|
337
|
+
} catch {
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
const hasMarker = REPO_MARKERS.some((marker) => {
|
|
341
|
+
try {
|
|
342
|
+
return existsSync5(join7(candidatePath, marker));
|
|
343
|
+
} catch {
|
|
344
|
+
return false;
|
|
345
|
+
}
|
|
346
|
+
});
|
|
347
|
+
if (!hasMarker) continue;
|
|
348
|
+
const stack = detectStack2(candidatePath);
|
|
349
|
+
siblings.push({
|
|
350
|
+
path: candidatePath,
|
|
351
|
+
name: entry,
|
|
352
|
+
stack,
|
|
353
|
+
fileCount: 0
|
|
354
|
+
// filled during indexing
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
return siblings;
|
|
358
|
+
}
|
|
359
|
+
function detectStack2(repoPath) {
|
|
360
|
+
const stack = [];
|
|
361
|
+
try {
|
|
362
|
+
if (existsSync5(join7(repoPath, "tsconfig.json"))) stack.push("TypeScript");
|
|
363
|
+
if (existsSync5(join7(repoPath, "package.json"))) stack.push("Node.js");
|
|
364
|
+
if (existsSync5(join7(repoPath, "Cargo.toml"))) stack.push("Rust");
|
|
365
|
+
if (existsSync5(join7(repoPath, "go.mod"))) stack.push("Go");
|
|
366
|
+
if (existsSync5(join7(repoPath, "pyproject.toml"))) stack.push("Python");
|
|
367
|
+
if (existsSync5(join7(repoPath, "pom.xml"))) stack.push("Java");
|
|
368
|
+
} catch {
|
|
369
|
+
}
|
|
370
|
+
return stack;
|
|
371
|
+
}
|
|
372
|
+
function listSourceFiles(repoPath, maxFiles = MAX_FILES_PER_REPO) {
|
|
373
|
+
const files = [];
|
|
374
|
+
function walk(dir, depth) {
|
|
375
|
+
if (depth > 8 || files.length >= maxFiles) return;
|
|
376
|
+
let entries;
|
|
377
|
+
try {
|
|
378
|
+
entries = readdirSync(dir);
|
|
379
|
+
} catch {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
for (const entry of entries) {
|
|
383
|
+
if (files.length >= maxFiles) return;
|
|
384
|
+
if (entry.startsWith(".")) continue;
|
|
385
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
386
|
+
const fullPath = join7(dir, entry);
|
|
387
|
+
try {
|
|
388
|
+
const stat3 = statSync2(fullPath);
|
|
389
|
+
if (stat3.isDirectory()) {
|
|
390
|
+
walk(fullPath, depth + 1);
|
|
391
|
+
} else if (stat3.isFile() && stat3.size <= MAX_FILE_SIZE) {
|
|
392
|
+
const ext = entry.split(".").pop()?.toLowerCase() ?? "";
|
|
393
|
+
if (SOURCE_EXTENSIONS.has(ext)) {
|
|
394
|
+
files.push(relative5(repoPath, fullPath));
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
} catch {
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
walk(repoPath, 0);
|
|
402
|
+
return files;
|
|
403
|
+
}
|
|
404
|
+
function indexSiblingRepo(repo) {
|
|
405
|
+
const filePaths = listSourceFiles(repo.path);
|
|
406
|
+
repo.fileCount = filePaths.length;
|
|
407
|
+
const contents = [];
|
|
408
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
409
|
+
for (const relPath of filePaths) {
|
|
410
|
+
try {
|
|
411
|
+
const content = readFileSync5(join7(repo.path, relPath), "utf-8");
|
|
412
|
+
contents.push({ relativePath: relPath, content });
|
|
413
|
+
contentMap.set(relPath, content);
|
|
414
|
+
} catch {
|
|
415
|
+
contents.push({ relativePath: relPath, content: "" });
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return { contents, contentMap };
|
|
419
|
+
}
|
|
420
|
+
function querySiblingRepos(siblings, task, maxPerRepo = 5, minScore = 0.3) {
|
|
421
|
+
const startTime = performance.now();
|
|
422
|
+
const allMatches = [];
|
|
423
|
+
for (const repo of siblings) {
|
|
424
|
+
const { contents, contentMap } = indexSiblingRepo(repo);
|
|
425
|
+
if (contents.length === 0) continue;
|
|
426
|
+
const index = buildIndex(contents);
|
|
427
|
+
const matches = query(index, task, maxPerRepo * 2);
|
|
428
|
+
const boosted = boostByPath(
|
|
429
|
+
matches,
|
|
430
|
+
contents.map((c) => c.relativePath),
|
|
431
|
+
task
|
|
432
|
+
);
|
|
433
|
+
for (const match of boosted.slice(0, maxPerRepo)) {
|
|
434
|
+
if (match.score < minScore) continue;
|
|
435
|
+
const content = contentMap.get(match.filePath) ?? "";
|
|
436
|
+
const tokens = Math.ceil(content.length / 4);
|
|
437
|
+
allMatches.push({
|
|
438
|
+
repoName: repo.name,
|
|
439
|
+
repoPath: repo.path,
|
|
440
|
+
relativePath: match.filePath,
|
|
441
|
+
absolutePath: join7(repo.path, match.filePath),
|
|
442
|
+
score: match.score,
|
|
443
|
+
content,
|
|
444
|
+
tokens
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
allMatches.sort((a, b) => b.score - a.score);
|
|
449
|
+
return {
|
|
450
|
+
siblings,
|
|
451
|
+
matches: allMatches,
|
|
452
|
+
timeMs: Math.round(performance.now() - startTime)
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
function parseSiblingPaths(pathsStr, projectPath) {
|
|
456
|
+
const absProject = resolve5(projectPath);
|
|
457
|
+
return pathsStr.split(",").map((p) => p.trim()).filter((p) => p.length > 0).map((p) => {
|
|
458
|
+
const absPath = resolve5(join7(absProject, ".."), p);
|
|
459
|
+
return {
|
|
460
|
+
path: absPath,
|
|
461
|
+
name: basename3(absPath),
|
|
462
|
+
stack: detectStack2(absPath),
|
|
463
|
+
fileCount: 0
|
|
464
|
+
};
|
|
465
|
+
}).filter((repo) => existsSync5(repo.path));
|
|
466
|
+
}
|
|
467
|
+
function renderMultiRepoSummary(result) {
|
|
468
|
+
const lines = [];
|
|
469
|
+
if (result.siblings.length === 0) {
|
|
470
|
+
lines.push(" No sibling repos found.");
|
|
471
|
+
return lines.join("\n");
|
|
472
|
+
}
|
|
473
|
+
lines.push(` Sibling repos scanned: ${result.siblings.length} (${result.timeMs}ms)`);
|
|
474
|
+
for (const repo of result.siblings) {
|
|
475
|
+
lines.push(` ${repo.name}/ \u2014 ${repo.fileCount} files [${repo.stack.join(", ") || "unknown"}]`);
|
|
476
|
+
}
|
|
477
|
+
if (result.matches.length === 0) {
|
|
478
|
+
lines.push(" No relevant files found in sibling repos.");
|
|
479
|
+
} else {
|
|
480
|
+
lines.push(` Cross-repo matches: ${result.matches.length}`);
|
|
481
|
+
for (const m of result.matches.slice(0, 10)) {
|
|
482
|
+
const pct = Math.round(m.score * 100);
|
|
483
|
+
lines.push(` ${m.repoName}/${m.relativePath} sem: ${pct}% (~${Math.round(m.tokens / 1e3)}K tok)`);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return lines.join("\n");
|
|
487
|
+
}
|
|
488
|
+
var REPO_MARKERS, SKIP_DIRS, SOURCE_EXTENSIONS, MAX_FILES_PER_REPO, MAX_FILE_SIZE;
|
|
489
|
+
var init_multi_repo = __esm({
|
|
490
|
+
"src/engine/multi-repo.ts"() {
|
|
491
|
+
"use strict";
|
|
492
|
+
init_tfidf();
|
|
493
|
+
REPO_MARKERS = ["package.json", "tsconfig.json", "Cargo.toml", "go.mod", "pyproject.toml", "pom.xml"];
|
|
494
|
+
SKIP_DIRS = /* @__PURE__ */ new Set(["node_modules", ".git", "dist", "build", ".next", "__pycache__", "target", "vendor"]);
|
|
495
|
+
SOURCE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
496
|
+
"ts",
|
|
497
|
+
"tsx",
|
|
498
|
+
"js",
|
|
499
|
+
"jsx",
|
|
500
|
+
"mjs",
|
|
501
|
+
"cjs",
|
|
502
|
+
"py",
|
|
503
|
+
"rs",
|
|
504
|
+
"go",
|
|
505
|
+
"java",
|
|
506
|
+
"kt",
|
|
507
|
+
"rb",
|
|
508
|
+
"c",
|
|
509
|
+
"cpp",
|
|
510
|
+
"h",
|
|
511
|
+
"hpp",
|
|
512
|
+
"cs",
|
|
513
|
+
"json",
|
|
514
|
+
"yaml",
|
|
515
|
+
"yml",
|
|
516
|
+
"toml",
|
|
517
|
+
"md",
|
|
518
|
+
"txt"
|
|
519
|
+
]);
|
|
520
|
+
MAX_FILES_PER_REPO = 500;
|
|
521
|
+
MAX_FILE_SIZE = 1e5;
|
|
522
|
+
}
|
|
523
|
+
});
|
|
524
|
+
|
|
1
525
|
// src/engine/analyzer.ts
|
|
2
526
|
import { readFile as readFile2, readdir, stat as stat2 } from "fs/promises";
|
|
3
|
-
import { join as
|
|
527
|
+
import { join as join3, extname, relative as relative3, resolve as resolve3, basename as basename2 } from "path";
|
|
4
528
|
import { createHash } from "crypto";
|
|
5
529
|
|
|
6
530
|
// src/types/engine.ts
|
|
@@ -100,12 +624,727 @@ function freeEncoder() {
|
|
|
100
624
|
|
|
101
625
|
// src/engine/graph.ts
|
|
102
626
|
import { Project, SyntaxKind } from "ts-morph";
|
|
103
|
-
import { resolve, relative, dirname, join } from "path";
|
|
104
|
-
import { existsSync } from "fs";
|
|
627
|
+
import { resolve as resolve2, relative as relative2, dirname as dirname2, join as join2 } from "path";
|
|
628
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
629
|
+
|
|
630
|
+
// src/engine/polyglot-graph.ts
|
|
631
|
+
import { readFileSync } from "fs";
|
|
632
|
+
import { join, dirname } from "path";
|
|
633
|
+
var LANG_EXTENSIONS = {
|
|
634
|
+
"py": "python",
|
|
635
|
+
"pyw": "python",
|
|
636
|
+
"go": "go",
|
|
637
|
+
"java": "java",
|
|
638
|
+
"rs": "rust",
|
|
639
|
+
"ts": "typescript",
|
|
640
|
+
"tsx": "typescript",
|
|
641
|
+
"js": "typescript",
|
|
642
|
+
"jsx": "typescript",
|
|
643
|
+
"mts": "typescript",
|
|
644
|
+
"mjs": "typescript",
|
|
645
|
+
"cts": "typescript",
|
|
646
|
+
"cjs": "typescript"
|
|
647
|
+
};
|
|
648
|
+
function detectLanguage(filePath) {
|
|
649
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
650
|
+
return LANG_EXTENSIONS[ext] ?? null;
|
|
651
|
+
}
|
|
652
|
+
function parseImports(filePath, relativePath, projectPath, allRelativePaths, content) {
|
|
653
|
+
const lang = detectLanguage(relativePath);
|
|
654
|
+
if (!lang || lang === "typescript") return [];
|
|
655
|
+
const src = content ?? safeReadFile(filePath);
|
|
656
|
+
if (!src) return [];
|
|
657
|
+
const edges = [];
|
|
658
|
+
let specs;
|
|
659
|
+
switch (lang) {
|
|
660
|
+
case "python":
|
|
661
|
+
specs = parsePythonImports(src);
|
|
662
|
+
break;
|
|
663
|
+
case "go":
|
|
664
|
+
specs = parseGoImports(src);
|
|
665
|
+
break;
|
|
666
|
+
case "java":
|
|
667
|
+
specs = parseJavaImports(src);
|
|
668
|
+
break;
|
|
669
|
+
case "rust":
|
|
670
|
+
specs = parseRustImports(src);
|
|
671
|
+
break;
|
|
672
|
+
default:
|
|
673
|
+
return [];
|
|
674
|
+
}
|
|
675
|
+
for (const spec of specs) {
|
|
676
|
+
const resolved = resolveImportSpec(spec, relativePath, projectPath, allRelativePaths, lang);
|
|
677
|
+
if (resolved) {
|
|
678
|
+
edges.push({ from: relativePath, to: resolved, type: "import" });
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
return edges;
|
|
682
|
+
}
|
|
683
|
+
function parseAllPolyglotImports(files, projectPath) {
|
|
684
|
+
const allPaths = new Set(files.map((f) => f.relativePath));
|
|
685
|
+
const edges = [];
|
|
686
|
+
for (const file of files) {
|
|
687
|
+
const lang = detectLanguage(file.relativePath);
|
|
688
|
+
if (!lang || lang === "typescript") continue;
|
|
689
|
+
const fileEdges = parseImports(
|
|
690
|
+
file.absolutePath,
|
|
691
|
+
file.relativePath,
|
|
692
|
+
projectPath,
|
|
693
|
+
allPaths,
|
|
694
|
+
file.content
|
|
695
|
+
);
|
|
696
|
+
edges.push(...fileEdges);
|
|
697
|
+
}
|
|
698
|
+
return edges;
|
|
699
|
+
}
|
|
700
|
+
function estimateComplexity(content, lang) {
|
|
701
|
+
let complexity = 1;
|
|
702
|
+
const lines = content.split("\n");
|
|
703
|
+
const patterns = {
|
|
704
|
+
python: [
|
|
705
|
+
/^\s*if\s/,
|
|
706
|
+
/^\s*elif\s/,
|
|
707
|
+
/^\s*for\s/,
|
|
708
|
+
/^\s*while\s/,
|
|
709
|
+
/^\s*except\s/,
|
|
710
|
+
/\sif\s.*\selse\s/,
|
|
711
|
+
// ternary
|
|
712
|
+
/\sand\s/,
|
|
713
|
+
/\sor\s/
|
|
714
|
+
],
|
|
715
|
+
go: [
|
|
716
|
+
/^\s*if\s/,
|
|
717
|
+
/^\s*for\s/,
|
|
718
|
+
/^\s*case\s/,
|
|
719
|
+
/^\s*select\s*{/,
|
|
720
|
+
/&&/,
|
|
721
|
+
/\|\|/
|
|
722
|
+
],
|
|
723
|
+
java: [
|
|
724
|
+
/^\s*if\s*\(/,
|
|
725
|
+
/^\s*for\s*\(/,
|
|
726
|
+
/^\s*while\s*\(/,
|
|
727
|
+
/^\s*case\s/,
|
|
728
|
+
/^\s*catch\s*\(/,
|
|
729
|
+
/\?\s/,
|
|
730
|
+
// ternary
|
|
731
|
+
/&&/,
|
|
732
|
+
/\|\|/
|
|
733
|
+
],
|
|
734
|
+
rust: [
|
|
735
|
+
/^\s*if\s/,
|
|
736
|
+
/^\s*for\s/,
|
|
737
|
+
/^\s*while\s/,
|
|
738
|
+
/^\s*match\s/,
|
|
739
|
+
/=>\s/,
|
|
740
|
+
// match arms
|
|
741
|
+
/&&/,
|
|
742
|
+
/\|\|/
|
|
743
|
+
],
|
|
744
|
+
typescript: []
|
|
745
|
+
// handled by ts-morph
|
|
746
|
+
};
|
|
747
|
+
const langPatterns = patterns[lang];
|
|
748
|
+
for (const line of lines) {
|
|
749
|
+
for (const pattern of langPatterns) {
|
|
750
|
+
if (pattern.test(line)) {
|
|
751
|
+
complexity++;
|
|
752
|
+
break;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
return complexity;
|
|
757
|
+
}
|
|
758
|
+
var PYTHON_STDLIB = /* @__PURE__ */ new Set([
|
|
759
|
+
"__future__",
|
|
760
|
+
"abc",
|
|
761
|
+
"aifc",
|
|
762
|
+
"argparse",
|
|
763
|
+
"array",
|
|
764
|
+
"ast",
|
|
765
|
+
"asynchat",
|
|
766
|
+
"asyncio",
|
|
767
|
+
"asyncore",
|
|
768
|
+
"atexit",
|
|
769
|
+
"audioop",
|
|
770
|
+
"base64",
|
|
771
|
+
"bdb",
|
|
772
|
+
"binascii",
|
|
773
|
+
"binhex",
|
|
774
|
+
"bisect",
|
|
775
|
+
"builtins",
|
|
776
|
+
"bz2",
|
|
777
|
+
"calendar",
|
|
778
|
+
"cgi",
|
|
779
|
+
"cgitb",
|
|
780
|
+
"chunk",
|
|
781
|
+
"cmath",
|
|
782
|
+
"cmd",
|
|
783
|
+
"code",
|
|
784
|
+
"codecs",
|
|
785
|
+
"codeop",
|
|
786
|
+
"collections",
|
|
787
|
+
"colorsys",
|
|
788
|
+
"compileall",
|
|
789
|
+
"concurrent",
|
|
790
|
+
"configparser",
|
|
791
|
+
"contextlib",
|
|
792
|
+
"contextvars",
|
|
793
|
+
"copy",
|
|
794
|
+
"copyreg",
|
|
795
|
+
"cProfile",
|
|
796
|
+
"crypt",
|
|
797
|
+
"csv",
|
|
798
|
+
"ctypes",
|
|
799
|
+
"curses",
|
|
800
|
+
"dataclasses",
|
|
801
|
+
"datetime",
|
|
802
|
+
"dbm",
|
|
803
|
+
"decimal",
|
|
804
|
+
"difflib",
|
|
805
|
+
"dis",
|
|
806
|
+
"distutils",
|
|
807
|
+
"doctest",
|
|
808
|
+
"email",
|
|
809
|
+
"encodings",
|
|
810
|
+
"enum",
|
|
811
|
+
"errno",
|
|
812
|
+
"faulthandler",
|
|
813
|
+
"fcntl",
|
|
814
|
+
"filecmp",
|
|
815
|
+
"fileinput",
|
|
816
|
+
"fnmatch",
|
|
817
|
+
"fractions",
|
|
818
|
+
"ftplib",
|
|
819
|
+
"functools",
|
|
820
|
+
"gc",
|
|
821
|
+
"getopt",
|
|
822
|
+
"getpass",
|
|
823
|
+
"gettext",
|
|
824
|
+
"glob",
|
|
825
|
+
"grp",
|
|
826
|
+
"gzip",
|
|
827
|
+
"hashlib",
|
|
828
|
+
"heapq",
|
|
829
|
+
"hmac",
|
|
830
|
+
"html",
|
|
831
|
+
"http",
|
|
832
|
+
"idlelib",
|
|
833
|
+
"imaplib",
|
|
834
|
+
"imghdr",
|
|
835
|
+
"imp",
|
|
836
|
+
"importlib",
|
|
837
|
+
"inspect",
|
|
838
|
+
"io",
|
|
839
|
+
"ipaddress",
|
|
840
|
+
"itertools",
|
|
841
|
+
"json",
|
|
842
|
+
"keyword",
|
|
843
|
+
"lib2to3",
|
|
844
|
+
"linecache",
|
|
845
|
+
"locale",
|
|
846
|
+
"logging",
|
|
847
|
+
"lzma",
|
|
848
|
+
"mailbox",
|
|
849
|
+
"mailcap",
|
|
850
|
+
"marshal",
|
|
851
|
+
"math",
|
|
852
|
+
"mimetypes",
|
|
853
|
+
"mmap",
|
|
854
|
+
"modulefinder",
|
|
855
|
+
"multiprocessing",
|
|
856
|
+
"netrc",
|
|
857
|
+
"nis",
|
|
858
|
+
"nntplib",
|
|
859
|
+
"numbers",
|
|
860
|
+
"operator",
|
|
861
|
+
"optparse",
|
|
862
|
+
"os",
|
|
863
|
+
"ossaudiodev",
|
|
864
|
+
"pathlib",
|
|
865
|
+
"pdb",
|
|
866
|
+
"pickle",
|
|
867
|
+
"pickletools",
|
|
868
|
+
"pipes",
|
|
869
|
+
"pkgutil",
|
|
870
|
+
"platform",
|
|
871
|
+
"plistlib",
|
|
872
|
+
"poplib",
|
|
873
|
+
"posix",
|
|
874
|
+
"posixpath",
|
|
875
|
+
"pprint",
|
|
876
|
+
"profile",
|
|
877
|
+
"pstats",
|
|
878
|
+
"pty",
|
|
879
|
+
"pwd",
|
|
880
|
+
"py_compile",
|
|
881
|
+
"pyclbr",
|
|
882
|
+
"pydoc",
|
|
883
|
+
"queue",
|
|
884
|
+
"quopri",
|
|
885
|
+
"random",
|
|
886
|
+
"re",
|
|
887
|
+
"readline",
|
|
888
|
+
"reprlib",
|
|
889
|
+
"resource",
|
|
890
|
+
"rlcompleter",
|
|
891
|
+
"runpy",
|
|
892
|
+
"sched",
|
|
893
|
+
"secrets",
|
|
894
|
+
"select",
|
|
895
|
+
"selectors",
|
|
896
|
+
"shelve",
|
|
897
|
+
"shlex",
|
|
898
|
+
"shutil",
|
|
899
|
+
"signal",
|
|
900
|
+
"site",
|
|
901
|
+
"smtpd",
|
|
902
|
+
"smtplib",
|
|
903
|
+
"sndhdr",
|
|
904
|
+
"socket",
|
|
905
|
+
"socketserver",
|
|
906
|
+
"spwd",
|
|
907
|
+
"sqlite3",
|
|
908
|
+
"ssl",
|
|
909
|
+
"stat",
|
|
910
|
+
"statistics",
|
|
911
|
+
"string",
|
|
912
|
+
"stringprep",
|
|
913
|
+
"struct",
|
|
914
|
+
"subprocess",
|
|
915
|
+
"sunau",
|
|
916
|
+
"symtable",
|
|
917
|
+
"sys",
|
|
918
|
+
"sysconfig",
|
|
919
|
+
"syslog",
|
|
920
|
+
"tabnanny",
|
|
921
|
+
"tarfile",
|
|
922
|
+
"telnetlib",
|
|
923
|
+
"tempfile",
|
|
924
|
+
"termios",
|
|
925
|
+
"test",
|
|
926
|
+
"textwrap",
|
|
927
|
+
"threading",
|
|
928
|
+
"time",
|
|
929
|
+
"timeit",
|
|
930
|
+
"tkinter",
|
|
931
|
+
"token",
|
|
932
|
+
"tokenize",
|
|
933
|
+
"tomllib",
|
|
934
|
+
"trace",
|
|
935
|
+
"traceback",
|
|
936
|
+
"tracemalloc",
|
|
937
|
+
"tty",
|
|
938
|
+
"turtle",
|
|
939
|
+
"turtledemo",
|
|
940
|
+
"types",
|
|
941
|
+
"typing",
|
|
942
|
+
"unicodedata",
|
|
943
|
+
"unittest",
|
|
944
|
+
"urllib",
|
|
945
|
+
"uu",
|
|
946
|
+
"uuid",
|
|
947
|
+
"venv",
|
|
948
|
+
"warnings",
|
|
949
|
+
"wave",
|
|
950
|
+
"weakref",
|
|
951
|
+
"webbrowser",
|
|
952
|
+
"winreg",
|
|
953
|
+
"winsound",
|
|
954
|
+
"wsgiref",
|
|
955
|
+
"xdrlib",
|
|
956
|
+
"xml",
|
|
957
|
+
"xmlrpc",
|
|
958
|
+
"zipapp",
|
|
959
|
+
"zipfile",
|
|
960
|
+
"zipimport",
|
|
961
|
+
"zlib",
|
|
962
|
+
"_thread"
|
|
963
|
+
]);
|
|
964
|
+
function isPythonStdlib(module) {
|
|
965
|
+
const topLevel = module.split(".")[0];
|
|
966
|
+
return PYTHON_STDLIB.has(topLevel);
|
|
967
|
+
}
|
|
968
|
+
function parsePythonImports(content) {
|
|
969
|
+
const specs = [];
|
|
970
|
+
const joined = content.replace(/\(\s*\n([^)]*?)\)/gs, (_, inner) => {
|
|
971
|
+
return "(" + inner.replace(/\n/g, " ").replace(/\s+/g, " ") + ")";
|
|
972
|
+
});
|
|
973
|
+
const lines = joined.split("\n");
|
|
974
|
+
for (const line of lines) {
|
|
975
|
+
const trimmed = line.trimStart();
|
|
976
|
+
if (trimmed.startsWith("#")) continue;
|
|
977
|
+
const fromMatch = trimmed.match(/^from\s+(\.{0,10}[\w.]*)\s+import\s+(.+)/);
|
|
978
|
+
if (fromMatch) {
|
|
979
|
+
const raw = fromMatch[1];
|
|
980
|
+
const isRelative = raw.startsWith(".");
|
|
981
|
+
if (!isRelative && isPythonStdlib(raw)) continue;
|
|
982
|
+
specs.push({ raw, isRelative });
|
|
983
|
+
continue;
|
|
984
|
+
}
|
|
985
|
+
const importMatch = trimmed.match(/^import\s+(.+)/);
|
|
986
|
+
if (importMatch) {
|
|
987
|
+
const modules = importMatch[1].split(",").map((m) => m.trim().split(/\s+as\s+/)[0].trim());
|
|
988
|
+
for (const mod of modules) {
|
|
989
|
+
if (!mod || !mod.match(/^[\w.]+$/)) continue;
|
|
990
|
+
if (isPythonStdlib(mod)) continue;
|
|
991
|
+
specs.push({ raw: mod, isRelative: false });
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
return specs;
|
|
996
|
+
}
|
|
997
|
+
var GO_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
998
|
+
"archive",
|
|
999
|
+
"bufio",
|
|
1000
|
+
"bytes",
|
|
1001
|
+
"cmp",
|
|
1002
|
+
"compress",
|
|
1003
|
+
"container",
|
|
1004
|
+
"context",
|
|
1005
|
+
"crypto",
|
|
1006
|
+
"database",
|
|
1007
|
+
"debug",
|
|
1008
|
+
"embed",
|
|
1009
|
+
"encoding",
|
|
1010
|
+
"errors",
|
|
1011
|
+
"expvar",
|
|
1012
|
+
"flag",
|
|
1013
|
+
"fmt",
|
|
1014
|
+
"go",
|
|
1015
|
+
"hash",
|
|
1016
|
+
"html",
|
|
1017
|
+
"image",
|
|
1018
|
+
"index",
|
|
1019
|
+
"internal",
|
|
1020
|
+
"io",
|
|
1021
|
+
"iter",
|
|
1022
|
+
"log",
|
|
1023
|
+
"maps",
|
|
1024
|
+
"math",
|
|
1025
|
+
"mime",
|
|
1026
|
+
"net",
|
|
1027
|
+
"os",
|
|
1028
|
+
"path",
|
|
1029
|
+
"plugin",
|
|
1030
|
+
"reflect",
|
|
1031
|
+
"regexp",
|
|
1032
|
+
"runtime",
|
|
1033
|
+
"slices",
|
|
1034
|
+
"sort",
|
|
1035
|
+
"strconv",
|
|
1036
|
+
"strings",
|
|
1037
|
+
"structs",
|
|
1038
|
+
"sync",
|
|
1039
|
+
"syscall",
|
|
1040
|
+
"testing",
|
|
1041
|
+
"text",
|
|
1042
|
+
"time",
|
|
1043
|
+
"unicode",
|
|
1044
|
+
"unsafe"
|
|
1045
|
+
]);
|
|
1046
|
+
function isGoStdlib(importPath) {
|
|
1047
|
+
const firstSegment = importPath.split("/")[0];
|
|
1048
|
+
if (firstSegment.includes(".")) return false;
|
|
1049
|
+
return GO_STDLIB_PREFIXES.has(firstSegment);
|
|
1050
|
+
}
|
|
1051
|
+
function parseGoImports(content) {
|
|
1052
|
+
const specs = [];
|
|
1053
|
+
const singlePattern = /^\s*import\s+(?:[\w_.]+\s+)?"([^"]+)"/gm;
|
|
1054
|
+
let match;
|
|
1055
|
+
while ((match = singlePattern.exec(content)) !== null) {
|
|
1056
|
+
const pkg = match[1];
|
|
1057
|
+
if (isGoStdlib(pkg)) continue;
|
|
1058
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1059
|
+
}
|
|
1060
|
+
const blockPattern = /import\s*\(([\s\S]*?)\)/g;
|
|
1061
|
+
while ((match = blockPattern.exec(content)) !== null) {
|
|
1062
|
+
const block = match[1];
|
|
1063
|
+
const linePattern = /(?:[\w_.]+\s+)?"([^"]+)"/g;
|
|
1064
|
+
let lineMatch;
|
|
1065
|
+
while ((lineMatch = linePattern.exec(block)) !== null) {
|
|
1066
|
+
const pkg = lineMatch[1];
|
|
1067
|
+
if (isGoStdlib(pkg)) continue;
|
|
1068
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
return specs;
|
|
1072
|
+
}
|
|
1073
|
+
var JAVA_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
1074
|
+
"java",
|
|
1075
|
+
"javax",
|
|
1076
|
+
"jdk",
|
|
1077
|
+
"sun",
|
|
1078
|
+
"com.sun",
|
|
1079
|
+
"org.w3c",
|
|
1080
|
+
"org.xml",
|
|
1081
|
+
"org.ietf"
|
|
1082
|
+
]);
|
|
1083
|
+
function isJavaStdlib(importPath) {
|
|
1084
|
+
for (const prefix of JAVA_STDLIB_PREFIXES) {
|
|
1085
|
+
if (importPath === prefix || importPath.startsWith(prefix + ".")) return true;
|
|
1086
|
+
}
|
|
1087
|
+
return false;
|
|
1088
|
+
}
|
|
1089
|
+
function parseJavaImports(content) {
|
|
1090
|
+
const specs = [];
|
|
1091
|
+
const pattern = /^\s*import\s+(?:static\s+)?([\w.*]+)\s*;/gm;
|
|
1092
|
+
let match;
|
|
1093
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
1094
|
+
const raw = match[1];
|
|
1095
|
+
if (isJavaStdlib(raw)) continue;
|
|
1096
|
+
specs.push({ raw, isRelative: false });
|
|
1097
|
+
}
|
|
1098
|
+
return specs;
|
|
1099
|
+
}
|
|
1100
|
+
function parseRustImports(content) {
|
|
1101
|
+
const specs = [];
|
|
1102
|
+
const usePattern = /^\s*(?:pub\s+)?use\s+((?:crate|super|self)(?:::\w+)*)/gm;
|
|
1103
|
+
let match;
|
|
1104
|
+
while ((match = usePattern.exec(content)) !== null) {
|
|
1105
|
+
const raw = match[1];
|
|
1106
|
+
const isRelative = raw.startsWith("super") || raw.startsWith("self");
|
|
1107
|
+
specs.push({ raw, isRelative });
|
|
1108
|
+
}
|
|
1109
|
+
const modPattern = /^\s*(?:pub\s+)?mod\s+(\w+)\s*;/gm;
|
|
1110
|
+
while ((match = modPattern.exec(content)) !== null) {
|
|
1111
|
+
specs.push({ raw: `mod::${match[1]}`, isRelative: true });
|
|
1112
|
+
}
|
|
1113
|
+
return specs;
|
|
1114
|
+
}
|
|
1115
|
+
function resolveImportSpec(spec, fromRelativePath, projectPath, allPaths, lang) {
|
|
1116
|
+
switch (lang) {
|
|
1117
|
+
case "python":
|
|
1118
|
+
return resolvePythonImport(spec, fromRelativePath, allPaths);
|
|
1119
|
+
case "go":
|
|
1120
|
+
return resolveGoImport(spec, fromRelativePath, projectPath, allPaths);
|
|
1121
|
+
case "java":
|
|
1122
|
+
return resolveJavaImport(spec, allPaths);
|
|
1123
|
+
case "rust":
|
|
1124
|
+
return resolveRustImport(spec, fromRelativePath, allPaths);
|
|
1125
|
+
default:
|
|
1126
|
+
return null;
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
function resolvePythonImport(spec, fromRelativePath, allPaths) {
|
|
1130
|
+
if (spec.isRelative) {
|
|
1131
|
+
const dots = spec.raw.match(/^\.+/)?.[0].length ?? 0;
|
|
1132
|
+
const modulePart = spec.raw.slice(dots);
|
|
1133
|
+
let baseDir = dirname(fromRelativePath);
|
|
1134
|
+
for (let i = 1; i < dots; i++) {
|
|
1135
|
+
baseDir = dirname(baseDir);
|
|
1136
|
+
}
|
|
1137
|
+
if (!modulePart) {
|
|
1138
|
+
return tryResolvePython(baseDir, "", allPaths);
|
|
1139
|
+
}
|
|
1140
|
+
const modulePath2 = modulePart.replace(/\./g, "/");
|
|
1141
|
+
return tryResolvePython(baseDir, modulePath2, allPaths);
|
|
1142
|
+
}
|
|
1143
|
+
const modulePath = spec.raw.replace(/\./g, "/");
|
|
1144
|
+
return tryResolvePython("", modulePath, allPaths);
|
|
1145
|
+
}
|
|
1146
|
+
function tryResolvePython(baseDir, modulePath, allPaths) {
|
|
1147
|
+
const candidates = [];
|
|
1148
|
+
if (!modulePath) {
|
|
1149
|
+
candidates.push(join(baseDir, "__init__.py"));
|
|
1150
|
+
} else {
|
|
1151
|
+
candidates.push(
|
|
1152
|
+
join(baseDir, `${modulePath}.py`),
|
|
1153
|
+
join(baseDir, modulePath, "__init__.py")
|
|
1154
|
+
);
|
|
1155
|
+
if (baseDir) {
|
|
1156
|
+
candidates.push(
|
|
1157
|
+
`${modulePath}.py`,
|
|
1158
|
+
join(modulePath, "__init__.py")
|
|
1159
|
+
);
|
|
1160
|
+
}
|
|
1161
|
+
for (const prefix of ["src", "lib", "app"]) {
|
|
1162
|
+
candidates.push(
|
|
1163
|
+
join(prefix, `${modulePath}.py`),
|
|
1164
|
+
join(prefix, modulePath, "__init__.py")
|
|
1165
|
+
);
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
const normalized = candidates.map((p) => p.replace(/^\.[\\/]/, ""));
|
|
1169
|
+
for (const candidate of normalized) {
|
|
1170
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1171
|
+
}
|
|
1172
|
+
return null;
|
|
1173
|
+
}
|
|
1174
|
+
function resolveGoImport(spec, fromRelativePath, projectPath, allPaths) {
|
|
1175
|
+
const dirFiles = /* @__PURE__ */ new Map();
|
|
1176
|
+
for (const p of allPaths) {
|
|
1177
|
+
if (!p.endsWith(".go")) continue;
|
|
1178
|
+
if (p.endsWith("_test.go")) continue;
|
|
1179
|
+
const dir = dirname(p);
|
|
1180
|
+
const existing = dirFiles.get(dir);
|
|
1181
|
+
if (existing) existing.push(p);
|
|
1182
|
+
else dirFiles.set(dir, [p]);
|
|
1183
|
+
}
|
|
1184
|
+
const importParts = spec.raw.split("/");
|
|
1185
|
+
const pkgName = importParts[importParts.length - 1];
|
|
1186
|
+
let goModModule = "";
|
|
1187
|
+
for (const p of allPaths) {
|
|
1188
|
+
if (p === "go.mod" || p.endsWith("/go.mod")) {
|
|
1189
|
+
try {
|
|
1190
|
+
const goModContent = safeReadFile(join(projectPath, p));
|
|
1191
|
+
if (goModContent) {
|
|
1192
|
+
const modMatch = goModContent.match(/^module\s+(\S+)/m);
|
|
1193
|
+
if (modMatch) goModModule = modMatch[1];
|
|
1194
|
+
}
|
|
1195
|
+
} catch {
|
|
1196
|
+
}
|
|
1197
|
+
break;
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
if (goModModule && spec.raw.startsWith(goModModule + "/")) {
|
|
1201
|
+
const localPath = spec.raw.slice(goModModule.length + 1);
|
|
1202
|
+
const files = dirFiles.get(localPath);
|
|
1203
|
+
if (files && files.length > 0) return files.sort()[0];
|
|
1204
|
+
for (const prefix of ["", "cmd/", "pkg/", "internal/"]) {
|
|
1205
|
+
const tryPath = prefix + localPath;
|
|
1206
|
+
const tryFiles = dirFiles.get(tryPath);
|
|
1207
|
+
if (tryFiles && tryFiles.length > 0) return tryFiles.sort()[0];
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
for (const [dir, files] of dirFiles) {
|
|
1211
|
+
const dirName = dir.split("/").pop();
|
|
1212
|
+
if (dirName === pkgName) return files.sort()[0];
|
|
1213
|
+
}
|
|
1214
|
+
for (let depth = 2; depth <= Math.min(importParts.length, 4); depth++) {
|
|
1215
|
+
const suffix = importParts.slice(-depth).join("/");
|
|
1216
|
+
for (const [dir, files] of dirFiles) {
|
|
1217
|
+
if (dir === suffix || dir.endsWith("/" + suffix)) {
|
|
1218
|
+
return files.sort()[0];
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
return null;
|
|
1223
|
+
}
|
|
1224
|
+
function resolveJavaImport(spec, allPaths) {
|
|
1225
|
+
const parts = spec.raw.split(".");
|
|
1226
|
+
if (parts[parts.length - 1] === "*") {
|
|
1227
|
+
const packagePath2 = parts.slice(0, -1).join("/");
|
|
1228
|
+
for (const prefix of ["src/main/java/", "src/", ""]) {
|
|
1229
|
+
for (const path of allPaths) {
|
|
1230
|
+
if (path.startsWith(prefix + packagePath2 + "/") && path.endsWith(".java")) {
|
|
1231
|
+
return path;
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
for (const path of allPaths) {
|
|
1236
|
+
if (path.includes(packagePath2 + "/") && path.endsWith(".java")) {
|
|
1237
|
+
return path;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
return null;
|
|
1241
|
+
}
|
|
1242
|
+
let className = parts[parts.length - 1];
|
|
1243
|
+
let packageParts = parts.slice(0, -1);
|
|
1244
|
+
if (className[0] && className[0] === className[0].toLowerCase() && packageParts.length > 0) {
|
|
1245
|
+
className = packageParts[packageParts.length - 1];
|
|
1246
|
+
packageParts = packageParts.slice(0, -1);
|
|
1247
|
+
}
|
|
1248
|
+
if (packageParts.length > 0) {
|
|
1249
|
+
const maybeOuter = packageParts[packageParts.length - 1];
|
|
1250
|
+
if (maybeOuter[0] && maybeOuter[0] === maybeOuter[0].toUpperCase() && className[0] && className[0] === className[0].toUpperCase()) {
|
|
1251
|
+
className = maybeOuter;
|
|
1252
|
+
packageParts = packageParts.slice(0, -1);
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
const packagePath = packageParts.join("/");
|
|
1256
|
+
const javaFile = `${className}.java`;
|
|
1257
|
+
const fullPath = packagePath ? `${packagePath}/${javaFile}` : javaFile;
|
|
1258
|
+
const prefixes = ["src/main/java/", "src/main/kotlin/", "src/", "app/src/main/java/", ""];
|
|
1259
|
+
for (const prefix of prefixes) {
|
|
1260
|
+
const candidate = prefix + fullPath;
|
|
1261
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1262
|
+
}
|
|
1263
|
+
for (const path of allPaths) {
|
|
1264
|
+
if (path.endsWith(fullPath)) return path;
|
|
1265
|
+
}
|
|
1266
|
+
if (packageParts.length >= 2) {
|
|
1267
|
+
const lastTwoPkg = packageParts.slice(-2).join("/");
|
|
1268
|
+
for (const path of allPaths) {
|
|
1269
|
+
if (path.endsWith(javaFile) && path.includes(lastTwoPkg)) return path;
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
return null;
|
|
1273
|
+
}
|
|
1274
|
+
function resolveRustImport(spec, fromRelativePath, allPaths) {
|
|
1275
|
+
if (spec.raw.startsWith("mod::")) {
|
|
1276
|
+
const modName = spec.raw.slice(5);
|
|
1277
|
+
const dir = dirname(fromRelativePath);
|
|
1278
|
+
const candidates = [
|
|
1279
|
+
join(dir, `${modName}.rs`),
|
|
1280
|
+
join(dir, modName, "mod.rs")
|
|
1281
|
+
];
|
|
1282
|
+
for (const c of candidates) {
|
|
1283
|
+
if (allPaths.has(c)) return c;
|
|
1284
|
+
}
|
|
1285
|
+
return null;
|
|
1286
|
+
}
|
|
1287
|
+
if (spec.raw.startsWith("crate::")) {
|
|
1288
|
+
const parts = spec.raw.replace("crate::", "").split("::");
|
|
1289
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1290
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1291
|
+
const candidates = [
|
|
1292
|
+
`src/${modulePath}.rs`,
|
|
1293
|
+
`src/${modulePath}/mod.rs`,
|
|
1294
|
+
`${modulePath}.rs`,
|
|
1295
|
+
`${modulePath}/mod.rs`
|
|
1296
|
+
];
|
|
1297
|
+
for (const c of candidates) {
|
|
1298
|
+
if (allPaths.has(c)) return c;
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
return null;
|
|
1302
|
+
}
|
|
1303
|
+
if (spec.raw.startsWith("super::")) {
|
|
1304
|
+
const parts = spec.raw.replace("super::", "").split("::");
|
|
1305
|
+
const parentDir = dirname(dirname(fromRelativePath));
|
|
1306
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1307
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1308
|
+
const candidates = [
|
|
1309
|
+
join(parentDir, `${modulePath}.rs`),
|
|
1310
|
+
join(parentDir, modulePath, "mod.rs")
|
|
1311
|
+
];
|
|
1312
|
+
for (const c of candidates) {
|
|
1313
|
+
if (allPaths.has(c)) return c;
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
return null;
|
|
1317
|
+
}
|
|
1318
|
+
if (spec.raw.startsWith("self::")) {
|
|
1319
|
+
const parts = spec.raw.replace("self::", "").split("::");
|
|
1320
|
+
const dir = dirname(fromRelativePath);
|
|
1321
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1322
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1323
|
+
const candidates = [
|
|
1324
|
+
join(dir, `${modulePath}.rs`),
|
|
1325
|
+
join(dir, modulePath, "mod.rs")
|
|
1326
|
+
];
|
|
1327
|
+
for (const c of candidates) {
|
|
1328
|
+
if (allPaths.has(c)) return c;
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
return null;
|
|
1332
|
+
}
|
|
1333
|
+
return null;
|
|
1334
|
+
}
|
|
1335
|
+
function safeReadFile(path) {
|
|
1336
|
+
try {
|
|
1337
|
+
return readFileSync(path, "utf-8");
|
|
1338
|
+
} catch {
|
|
1339
|
+
return null;
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
// src/engine/graph.ts
|
|
105
1344
|
var TS_EXTENSIONS = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs", "cts", "cjs"]);
|
|
106
1345
|
function createProject(projectPath, filePaths) {
|
|
107
|
-
const tsConfigPath =
|
|
108
|
-
const hasTsConfig =
|
|
1346
|
+
const tsConfigPath = join2(projectPath, "tsconfig.json");
|
|
1347
|
+
const hasTsConfig = existsSync2(tsConfigPath);
|
|
109
1348
|
const project = new Project({
|
|
110
1349
|
tsConfigFilePath: hasTsConfig ? tsConfigPath : void 0,
|
|
111
1350
|
skipAddingFilesFromTsConfig: true,
|
|
@@ -131,9 +1370,11 @@ function createProject(projectPath, filePaths) {
|
|
|
131
1370
|
return project;
|
|
132
1371
|
}
|
|
133
1372
|
function buildProjectGraph(projectPath, files) {
|
|
134
|
-
const absPath =
|
|
1373
|
+
const absPath = resolve2(projectPath);
|
|
135
1374
|
const tsFiles = files.filter((f) => TS_EXTENSIONS.has(f.extension)).map((f) => f.path);
|
|
136
|
-
|
|
1375
|
+
const polyglotFiles = files.filter((f) => !TS_EXTENSIONS.has(f.extension)).map((f) => ({ relativePath: f.relativePath, absolutePath: f.path }));
|
|
1376
|
+
const polyglotEdges = parseAllPolyglotImports(polyglotFiles, absPath);
|
|
1377
|
+
if (tsFiles.length === 0 && polyglotEdges.length === 0) {
|
|
137
1378
|
return emptyGraph(files);
|
|
138
1379
|
}
|
|
139
1380
|
let project;
|
|
@@ -145,7 +1386,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
145
1386
|
const edges = [];
|
|
146
1387
|
const nodeSet = /* @__PURE__ */ new Set();
|
|
147
1388
|
for (const sourceFile of project.getSourceFiles()) {
|
|
148
|
-
const fromRel =
|
|
1389
|
+
const fromRel = relative2(absPath, sourceFile.getFilePath());
|
|
149
1390
|
if (fromRel.startsWith("..") || fromRel.includes("node_modules")) continue;
|
|
150
1391
|
nodeSet.add(fromRel);
|
|
151
1392
|
for (const imp of sourceFile.getImportDeclarations()) {
|
|
@@ -167,6 +1408,11 @@ function buildProjectGraph(projectPath, files) {
|
|
|
167
1408
|
}
|
|
168
1409
|
}
|
|
169
1410
|
}
|
|
1411
|
+
for (const edge of polyglotEdges) {
|
|
1412
|
+
nodeSet.add(edge.from);
|
|
1413
|
+
nodeSet.add(edge.to);
|
|
1414
|
+
edges.push(edge);
|
|
1415
|
+
}
|
|
170
1416
|
const nodes = Array.from(nodeSet);
|
|
171
1417
|
const importedByCount = /* @__PURE__ */ new Map();
|
|
172
1418
|
const importCount = /* @__PURE__ */ new Map();
|
|
@@ -199,6 +1445,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
199
1445
|
const orphans = Array.from(allFileNodes).filter((n) => !connectedNodes.has(n));
|
|
200
1446
|
const clusters = detectClusters(nodes, edges, files);
|
|
201
1447
|
enrichComplexity(project, absPath, files);
|
|
1448
|
+
enrichPolyglotComplexity(files);
|
|
202
1449
|
return { nodes, edges, hubs, leaves, orphans, clusters };
|
|
203
1450
|
}
|
|
204
1451
|
var UnionFind = class {
|
|
@@ -291,7 +1538,7 @@ function commonPrefix(paths) {
|
|
|
291
1538
|
function enrichComplexity(project, absPath, files) {
|
|
292
1539
|
const fileMap = new Map(files.map((f) => [f.relativePath, f]));
|
|
293
1540
|
for (const sourceFile of project.getSourceFiles()) {
|
|
294
|
-
const relPath =
|
|
1541
|
+
const relPath = relative2(absPath, sourceFile.getFilePath());
|
|
295
1542
|
if (relPath.startsWith("..") || relPath.includes("node_modules")) continue;
|
|
296
1543
|
const file = fileMap.get(relPath);
|
|
297
1544
|
if (!file) continue;
|
|
@@ -342,22 +1589,34 @@ function calculateCyclomaticComplexity(node) {
|
|
|
342
1589
|
});
|
|
343
1590
|
return complexity;
|
|
344
1591
|
}
|
|
1592
|
+
function enrichPolyglotComplexity(files) {
|
|
1593
|
+
for (const file of files) {
|
|
1594
|
+
if (TS_EXTENSIONS.has(file.extension)) continue;
|
|
1595
|
+
const lang = detectLanguage(file.relativePath);
|
|
1596
|
+
if (!lang) continue;
|
|
1597
|
+
try {
|
|
1598
|
+
const content = readFileSync2(file.path, "utf-8");
|
|
1599
|
+
file.complexity = Math.max(1, estimateComplexity(content, lang));
|
|
1600
|
+
} catch {
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
345
1604
|
function resolveImport(sourceFile, moduleSpecifier, projectRoot) {
|
|
346
1605
|
if (!moduleSpecifier.startsWith(".")) return null;
|
|
347
|
-
const sourceDir =
|
|
348
|
-
const basePath =
|
|
1606
|
+
const sourceDir = dirname2(sourceFile.getFilePath());
|
|
1607
|
+
const basePath = resolve2(sourceDir, moduleSpecifier);
|
|
349
1608
|
const extensions = [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"];
|
|
350
1609
|
for (const ext of extensions) {
|
|
351
1610
|
const candidate = basePath.endsWith(ext) ? basePath : basePath + ext;
|
|
352
|
-
if (
|
|
353
|
-
const rel =
|
|
1611
|
+
if (existsSync2(candidate)) {
|
|
1612
|
+
const rel = relative2(projectRoot, candidate);
|
|
354
1613
|
if (!rel.startsWith("..")) return rel;
|
|
355
1614
|
}
|
|
356
1615
|
}
|
|
357
1616
|
if (moduleSpecifier.endsWith(".js")) {
|
|
358
1617
|
const tsPath = basePath.replace(/\.js$/, ".ts");
|
|
359
|
-
if (
|
|
360
|
-
const rel =
|
|
1618
|
+
if (existsSync2(tsPath)) {
|
|
1619
|
+
const rel = relative2(projectRoot, tsPath);
|
|
361
1620
|
if (!rel.startsWith("..")) return rel;
|
|
362
1621
|
}
|
|
363
1622
|
}
|
|
@@ -525,7 +1784,7 @@ async function walkProject(rootPath, options) {
|
|
|
525
1784
|
}
|
|
526
1785
|
const promises = [];
|
|
527
1786
|
for (const entry of entries) {
|
|
528
|
-
const fullPath =
|
|
1787
|
+
const fullPath = join3(dir, entry.name);
|
|
529
1788
|
if (entry.isDirectory()) {
|
|
530
1789
|
if (!ignoreDirSet.has(entry.name) && !entry.name.startsWith(".")) {
|
|
531
1790
|
promises.push(walk(fullPath, depth + 1));
|
|
@@ -546,7 +1805,7 @@ async function walkProject(rootPath, options) {
|
|
|
546
1805
|
}
|
|
547
1806
|
results.push({
|
|
548
1807
|
path: fullPath,
|
|
549
|
-
relativePath:
|
|
1808
|
+
relativePath: relative3(rootPath, fullPath),
|
|
550
1809
|
extension: ext,
|
|
551
1810
|
size: fileStat.size,
|
|
552
1811
|
lastModified: fileStat.mtime,
|
|
@@ -595,7 +1854,7 @@ function detectStack(files) {
|
|
|
595
1854
|
return stack;
|
|
596
1855
|
}
|
|
597
1856
|
async function analyzeProject(projectPath, config) {
|
|
598
|
-
const absPath =
|
|
1857
|
+
const absPath = resolve3(projectPath);
|
|
599
1858
|
const projectName = basename2(absPath);
|
|
600
1859
|
const mergedConfig = mergeConfig(DEFAULT_CONFIG, config);
|
|
601
1860
|
const allExtensions = [
|
|
@@ -787,8 +2046,8 @@ import { createHash as createHash3 } from "crypto";
|
|
|
787
2046
|
|
|
788
2047
|
// src/govern/secrets.ts
|
|
789
2048
|
import { readFile as readFile3 } from "fs/promises";
|
|
790
|
-
import { readFileSync, existsSync as
|
|
791
|
-
import { resolve as
|
|
2049
|
+
import { readFileSync as readFileSync3, existsSync as existsSync3, mkdirSync, writeFileSync } from "fs";
|
|
2050
|
+
import { resolve as resolve4, relative as relative4, join as join4, dirname as dirname3 } from "path";
|
|
792
2051
|
import { createHash as createHash2 } from "crypto";
|
|
793
2052
|
var BUILTIN_PATTERNS = [
|
|
794
2053
|
// API Keys
|
|
@@ -916,7 +2175,7 @@ function scanContentForSecrets(content, filePath, customPatterns = [], extraPiiS
|
|
|
916
2175
|
async function scanFileForSecrets(filePath, projectPath, customPatterns = []) {
|
|
917
2176
|
try {
|
|
918
2177
|
const content = await readFile3(filePath, "utf-8");
|
|
919
|
-
const relPath =
|
|
2178
|
+
const relPath = relative4(resolve4(projectPath), resolve4(filePath));
|
|
920
2179
|
return scanContentForSecrets(content, relPath, customPatterns);
|
|
921
2180
|
} catch {
|
|
922
2181
|
return [];
|
|
@@ -1012,13 +2271,13 @@ function fingerprintFinding(f) {
|
|
|
1012
2271
|
return createHash2("sha256").update(`${f.file}:${f.type}:${f.match}`).digest("hex").slice(0, 32);
|
|
1013
2272
|
}
|
|
1014
2273
|
function getAllowlistPath(projectPath) {
|
|
1015
|
-
return
|
|
2274
|
+
return join4(projectPath, ".cto", "audit", "allowlist.json");
|
|
1016
2275
|
}
|
|
1017
2276
|
function loadAllowlist(projectPath) {
|
|
1018
2277
|
const filePath = getAllowlistPath(projectPath);
|
|
1019
|
-
if (!
|
|
2278
|
+
if (!existsSync3(filePath)) return [];
|
|
1020
2279
|
try {
|
|
1021
|
-
return JSON.parse(
|
|
2280
|
+
return JSON.parse(readFileSync3(filePath, "utf-8"));
|
|
1022
2281
|
} catch {
|
|
1023
2282
|
return [];
|
|
1024
2283
|
}
|
|
@@ -1039,20 +2298,20 @@ function filterByAllowlist(findings, projectPath) {
|
|
|
1039
2298
|
return { filtered, allowed };
|
|
1040
2299
|
}
|
|
1041
2300
|
function getHashCachePath(projectPath) {
|
|
1042
|
-
return
|
|
2301
|
+
return join4(projectPath, ".cto", "audit", ".hashcache.json");
|
|
1043
2302
|
}
|
|
1044
2303
|
function loadHashCache(projectPath) {
|
|
1045
2304
|
const filePath = getHashCachePath(projectPath);
|
|
1046
|
-
if (!
|
|
2305
|
+
if (!existsSync3(filePath)) return {};
|
|
1047
2306
|
try {
|
|
1048
|
-
return JSON.parse(
|
|
2307
|
+
return JSON.parse(readFileSync3(filePath, "utf-8"));
|
|
1049
2308
|
} catch {
|
|
1050
2309
|
return {};
|
|
1051
2310
|
}
|
|
1052
2311
|
}
|
|
1053
2312
|
function saveHashCache(projectPath, cache) {
|
|
1054
2313
|
const filePath = getHashCachePath(projectPath);
|
|
1055
|
-
mkdirSync(
|
|
2314
|
+
mkdirSync(dirname3(filePath), { recursive: true });
|
|
1056
2315
|
writeFileSync(filePath, JSON.stringify(cache));
|
|
1057
2316
|
}
|
|
1058
2317
|
function hashContent(content) {
|
|
@@ -1065,8 +2324,8 @@ function getChangedFiles(projectPath, filePaths) {
|
|
|
1065
2324
|
const unchanged = [];
|
|
1066
2325
|
for (const fp of filePaths) {
|
|
1067
2326
|
try {
|
|
1068
|
-
const content =
|
|
1069
|
-
const relPath =
|
|
2327
|
+
const content = readFileSync3(fp, "utf-8");
|
|
2328
|
+
const relPath = relative4(resolve4(projectPath), resolve4(fp));
|
|
1070
2329
|
const hash = hashContent(content);
|
|
1071
2330
|
newCache[relPath] = hash;
|
|
1072
2331
|
if (oldCache[relPath] === hash) {
|
|
@@ -1089,13 +2348,13 @@ var DEFAULT_AUDIT_CONFIG = {
|
|
|
1089
2348
|
incrementalScan: true
|
|
1090
2349
|
};
|
|
1091
2350
|
function getAuditConfigPath(projectPath) {
|
|
1092
|
-
return
|
|
2351
|
+
return join4(projectPath, ".cto", "audit", "config.json");
|
|
1093
2352
|
}
|
|
1094
2353
|
function loadAuditConfig(projectPath) {
|
|
1095
2354
|
const filePath = getAuditConfigPath(projectPath);
|
|
1096
|
-
if (!
|
|
2355
|
+
if (!existsSync3(filePath)) return { ...DEFAULT_AUDIT_CONFIG };
|
|
1097
2356
|
try {
|
|
1098
|
-
const loaded = JSON.parse(
|
|
2357
|
+
const loaded = JSON.parse(readFileSync3(filePath, "utf-8"));
|
|
1099
2358
|
return { ...DEFAULT_AUDIT_CONFIG, ...loaded };
|
|
1100
2359
|
} catch {
|
|
1101
2360
|
return { ...DEFAULT_AUDIT_CONFIG };
|
|
@@ -1193,7 +2452,7 @@ async function auditProject(projectPath, filePaths, options = {}) {
|
|
|
1193
2452
|
for (const fp of filesToScan) {
|
|
1194
2453
|
try {
|
|
1195
2454
|
const content = await readFile3(fp, "utf-8");
|
|
1196
|
-
const relPath =
|
|
2455
|
+
const relPath = relative4(resolve4(projectPath), resolve4(fp));
|
|
1197
2456
|
const isTestFile = /\.(test|spec|mock)\.[jt]sx?$/.test(relPath) || relPath.includes("__tests__");
|
|
1198
2457
|
const isDtsFile = relPath.endsWith(".d.ts");
|
|
1199
2458
|
let findings = scanContentForSecrets(content, relPath, customPatterns, extraPiiDomains);
|
|
@@ -1800,19 +3059,8 @@ async function selectContext(input) {
|
|
|
1800
3059
|
for (const s of input.semanticScores ?? []) semanticMap.set(s.filePath, s.score);
|
|
1801
3060
|
const learnerMap = /* @__PURE__ */ new Map();
|
|
1802
3061
|
for (const b of input.learnerBoosts ?? []) learnerMap.set(b.filePath, b.boost);
|
|
1803
|
-
|
|
1804
|
-
if (targetPaths.length
|
|
1805
|
-
const sorted = [...semanticMap.entries()].sort((a, b) => b[1] - a[1]);
|
|
1806
|
-
const threshold = 0.5;
|
|
1807
|
-
targetPaths = sorted.filter(([, score]) => score >= threshold).slice(0, 10).map(([path]) => path);
|
|
1808
|
-
if (targetPaths.length > 0) {
|
|
1809
|
-
decisions.push({
|
|
1810
|
-
file: targetPaths.join(", "),
|
|
1811
|
-
action: "include-full",
|
|
1812
|
-
reason: `Top ${targetPaths.length} file(s) identified via semantic matching (score \u2265 ${threshold})`
|
|
1813
|
-
});
|
|
1814
|
-
}
|
|
1815
|
-
} else if (targetPaths.length > 0) {
|
|
3062
|
+
const targetPaths = identifyTargetFiles(task, analysis.files);
|
|
3063
|
+
if (targetPaths.length > 0) {
|
|
1816
3064
|
decisions.push({
|
|
1817
3065
|
file: targetPaths.join(", "),
|
|
1818
3066
|
action: "include-full",
|
|
@@ -1844,7 +3092,7 @@ async function selectContext(input) {
|
|
|
1844
3092
|
}
|
|
1845
3093
|
const { mustInclude, mustExclude } = applyPolicies(analysis.files, policies);
|
|
1846
3094
|
const candidateSet = /* @__PURE__ */ new Set([...expandedPaths, ...mustInclude]);
|
|
1847
|
-
if (targetPaths.length === 0) {
|
|
3095
|
+
if (semanticMap.size > 0 || targetPaths.length === 0) {
|
|
1848
3096
|
for (const f of analysis.files) {
|
|
1849
3097
|
candidateSet.add(f.relativePath);
|
|
1850
3098
|
}
|
|
@@ -1883,22 +3131,32 @@ async function selectContext(input) {
|
|
|
1883
3131
|
const riskNorm = file.riskScore / maxRisk;
|
|
1884
3132
|
const semantic = semanticMap.get(file.relativePath) ?? 0;
|
|
1885
3133
|
const learner = ((learnerMap.get(file.relativePath) ?? 0) + 1) / 2;
|
|
1886
|
-
return
|
|
3134
|
+
return semantic * 0.55 + riskNorm * 0.25 + learner * 0.2;
|
|
1887
3135
|
}
|
|
3136
|
+
const targetSet = new Set(targetPaths);
|
|
1888
3137
|
const candidates = Array.from(candidateSet).map((p) => allFileMap.get(p)).filter((f) => f !== void 0).sort((a, b) => {
|
|
1889
|
-
const
|
|
1890
|
-
const
|
|
1891
|
-
|
|
1892
|
-
const aIsMust = mustInclude.has(a.relativePath) ? 0 : 1;
|
|
1893
|
-
const bIsMust = mustInclude.has(b.relativePath) ? 0 : 1;
|
|
1894
|
-
if (aIsMust !== bIsMust) return aIsMust - bIsMust;
|
|
1895
|
-
return compositeScore(b) - compositeScore(a);
|
|
3138
|
+
const aBonus = (targetSet.has(a.relativePath) ? 0.3 : 0) + (mustInclude.has(a.relativePath) ? 0.15 : 0);
|
|
3139
|
+
const bBonus = (targetSet.has(b.relativePath) ? 0.3 : 0) + (mustInclude.has(b.relativePath) ? 0.15 : 0);
|
|
3140
|
+
return compositeScore(b) + bBonus - (compositeScore(a) + aBonus);
|
|
1896
3141
|
});
|
|
1897
3142
|
const selectedFiles = [];
|
|
1898
3143
|
let usedTokens = 0;
|
|
3144
|
+
const hasSemanticSignal = semanticMap.size > 0;
|
|
1899
3145
|
for (const file of candidates) {
|
|
1900
|
-
const isTarget =
|
|
3146
|
+
const isTarget = targetSet.has(file.relativePath);
|
|
1901
3147
|
const isMustInclude = mustInclude.has(file.relativePath);
|
|
3148
|
+
if (hasSemanticSignal && !isTarget && !isMustInclude) {
|
|
3149
|
+
const semScore = semanticMap.get(file.relativePath) ?? 0;
|
|
3150
|
+
const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
|
|
3151
|
+
if (semScore === 0 && lrnBoost === 0) {
|
|
3152
|
+
decisions.push({
|
|
3153
|
+
file: file.relativePath,
|
|
3154
|
+
action: "exclude",
|
|
3155
|
+
reason: "Skipped: no semantic relevance to task"
|
|
3156
|
+
});
|
|
3157
|
+
continue;
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
1902
3160
|
const defaultLevel = isTarget ? "full" : getPruneLevelForRisk(file.riskScore);
|
|
1903
3161
|
const levels = getCascadeLevels2(defaultLevel);
|
|
1904
3162
|
let included = false;
|
|
@@ -2022,281 +3280,394 @@ function buildReason(file, level, isTarget, isMustInclude) {
|
|
|
2022
3280
|
return `Low relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
2023
3281
|
}
|
|
2024
3282
|
|
|
2025
|
-
// src/engine/
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
function buildIndex(files) {
|
|
3283
|
+
// src/engine/context-pipeline.ts
|
|
3284
|
+
import { readFileSync as readFileSync6 } from "fs";
|
|
3285
|
+
init_tfidf();
|
|
3286
|
+
|
|
3287
|
+
// src/engine/index-cache.ts
|
|
3288
|
+
init_tfidf();
|
|
3289
|
+
import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
|
|
3290
|
+
import { join as join5 } from "path";
|
|
3291
|
+
var CACHE_VERSION = 2;
|
|
3292
|
+
var CACHE_DIR = ".cto";
|
|
3293
|
+
var CACHE_FILE = "index-cache.json";
|
|
3294
|
+
function buildIndexCached(projectPath, files) {
|
|
3295
|
+
const startTime = Date.now();
|
|
3296
|
+
const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
|
|
3297
|
+
const existing = loadCache(cachePath);
|
|
3298
|
+
const cacheHit = existing !== null;
|
|
3299
|
+
const cachedFiles = existing?.files ?? {};
|
|
3300
|
+
const newCachedFiles = {};
|
|
3301
|
+
let updatedFiles = 0;
|
|
3302
|
+
let removedFiles = 0;
|
|
3303
|
+
let cachedCount = 0;
|
|
3304
|
+
const currentPaths = new Set(files.map((f) => f.relativePath));
|
|
3305
|
+
if (existing) {
|
|
3306
|
+
for (const path of Object.keys(cachedFiles)) {
|
|
3307
|
+
if (!currentPaths.has(path)) {
|
|
3308
|
+
removedFiles++;
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
}
|
|
3312
|
+
for (const file of files) {
|
|
3313
|
+
const cached = cachedFiles[file.relativePath];
|
|
3314
|
+
let currentMtime = 0;
|
|
3315
|
+
try {
|
|
3316
|
+
const st = statSync(file.absolutePath);
|
|
3317
|
+
currentMtime = st.mtimeMs;
|
|
3318
|
+
} catch {
|
|
3319
|
+
continue;
|
|
3320
|
+
}
|
|
3321
|
+
if (cached && cached.mtime === currentMtime) {
|
|
3322
|
+
newCachedFiles[file.relativePath] = cached;
|
|
3323
|
+
cachedCount++;
|
|
3324
|
+
} else {
|
|
3325
|
+
let content = file.content;
|
|
3326
|
+
if (content === void 0) {
|
|
3327
|
+
try {
|
|
3328
|
+
content = readFileSync4(file.absolutePath, "utf-8");
|
|
3329
|
+
} catch {
|
|
3330
|
+
continue;
|
|
3331
|
+
}
|
|
3332
|
+
}
|
|
3333
|
+
const terms = tokenize(content);
|
|
3334
|
+
const termCounts = {};
|
|
3335
|
+
for (const term of terms) {
|
|
3336
|
+
termCounts[term] = (termCounts[term] ?? 0) + 1;
|
|
3337
|
+
}
|
|
3338
|
+
newCachedFiles[file.relativePath] = {
|
|
3339
|
+
mtime: currentMtime,
|
|
3340
|
+
terms: termCounts,
|
|
3341
|
+
length: terms.length
|
|
3342
|
+
};
|
|
3343
|
+
updatedFiles++;
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
const index = rebuildIndex(newCachedFiles);
|
|
3347
|
+
saveCache(cachePath, newCachedFiles);
|
|
3348
|
+
const stats = {
|
|
3349
|
+
totalFiles: Object.keys(newCachedFiles).length,
|
|
3350
|
+
updatedFiles,
|
|
3351
|
+
removedFiles,
|
|
3352
|
+
cachedFiles: cachedCount,
|
|
3353
|
+
cacheHit,
|
|
3354
|
+
buildTimeMs: Date.now() - startTime
|
|
3355
|
+
};
|
|
3356
|
+
return { index, stats };
|
|
3357
|
+
}
|
|
3358
|
+
function invalidateCache(projectPath) {
|
|
3359
|
+
const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
|
|
3360
|
+
try {
|
|
3361
|
+
if (existsSync4(cachePath)) {
|
|
3362
|
+
writeFileSync2(cachePath, "{}");
|
|
3363
|
+
}
|
|
3364
|
+
} catch {
|
|
3365
|
+
}
|
|
3366
|
+
}
|
|
3367
|
+
function getCacheInfo(projectPath) {
|
|
3368
|
+
const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
|
|
3369
|
+
const data = loadCache(cachePath);
|
|
3370
|
+
if (!data) return { exists: false, fileCount: 0, builtAt: null };
|
|
3371
|
+
return {
|
|
3372
|
+
exists: true,
|
|
3373
|
+
fileCount: Object.keys(data.files).length,
|
|
3374
|
+
builtAt: data.builtAt
|
|
3375
|
+
};
|
|
3376
|
+
}
|
|
3377
|
+
function loadCache(cachePath) {
|
|
3378
|
+
try {
|
|
3379
|
+
if (!existsSync4(cachePath)) return null;
|
|
3380
|
+
const raw = readFileSync4(cachePath, "utf-8");
|
|
3381
|
+
const data = JSON.parse(raw);
|
|
3382
|
+
if (data.version !== CACHE_VERSION) return null;
|
|
3383
|
+
if (!data.files || typeof data.files !== "object") return null;
|
|
3384
|
+
return data;
|
|
3385
|
+
} catch {
|
|
3386
|
+
return null;
|
|
3387
|
+
}
|
|
3388
|
+
}
|
|
3389
|
+
function saveCache(cachePath, files) {
|
|
3390
|
+
try {
|
|
3391
|
+
const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
|
|
3392
|
+
if (!existsSync4(dir)) {
|
|
3393
|
+
mkdirSync2(dir, { recursive: true });
|
|
3394
|
+
}
|
|
3395
|
+
const data = {
|
|
3396
|
+
version: CACHE_VERSION,
|
|
3397
|
+
builtAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3398
|
+
files
|
|
3399
|
+
};
|
|
3400
|
+
writeFileSync2(cachePath, JSON.stringify(data));
|
|
3401
|
+
} catch {
|
|
3402
|
+
}
|
|
3403
|
+
}
|
|
3404
|
+
function rebuildIndex(cachedFiles) {
|
|
2148
3405
|
const documents = /* @__PURE__ */ new Map();
|
|
2149
3406
|
const docFreq = /* @__PURE__ */ new Map();
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
const
|
|
2153
|
-
for (const term of terms) {
|
|
2154
|
-
|
|
3407
|
+
let totalLength = 0;
|
|
3408
|
+
for (const [path, cached] of Object.entries(cachedFiles)) {
|
|
3409
|
+
const termMap = /* @__PURE__ */ new Map();
|
|
3410
|
+
for (const [term, count] of Object.entries(cached.terms)) {
|
|
3411
|
+
termMap.set(term, count);
|
|
2155
3412
|
}
|
|
2156
|
-
documents.set(
|
|
2157
|
-
|
|
3413
|
+
documents.set(path, { terms: termMap, length: cached.length });
|
|
3414
|
+
totalLength += cached.length;
|
|
3415
|
+
for (const term of termMap.keys()) {
|
|
2158
3416
|
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
2159
3417
|
}
|
|
2160
3418
|
}
|
|
2161
|
-
const totalDocs =
|
|
3419
|
+
const totalDocs = documents.size;
|
|
3420
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
2162
3421
|
const idf = /* @__PURE__ */ new Map();
|
|
2163
3422
|
for (const [term, df] of docFreq) {
|
|
2164
3423
|
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
2165
3424
|
}
|
|
2166
|
-
let totalLength = 0;
|
|
2167
|
-
for (const doc of documents.values()) totalLength += doc.length;
|
|
2168
|
-
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
2169
3425
|
return { documents, idf, avgDocLength, totalDocs };
|
|
2170
3426
|
}
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
3427
|
+
|
|
3428
|
+
// src/engine/reranker.ts
|
|
3429
|
+
init_tfidf();
|
|
3430
|
+
var WEIGHTS = {
|
|
3431
|
+
termCoverage: 0.35,
|
|
3432
|
+
// What fraction of query terms does the file match?
|
|
3433
|
+
termSpecificity: 0.25,
|
|
3434
|
+
// Are matched terms rare or common?
|
|
3435
|
+
bigramProximity: 0.15,
|
|
3436
|
+
// Do query terms appear near each other?
|
|
3437
|
+
dependencySignal: 0.1,
|
|
3438
|
+
// Is this file connected to a top match?
|
|
3439
|
+
pathRelevance: 0.15
|
|
3440
|
+
// Does the file path match query terms?
|
|
3441
|
+
};
|
|
3442
|
+
var ABSOLUTE_FLOOR = 0.18;
|
|
3443
|
+
var ELBOW_DROP_RATIO = 0.35;
|
|
3444
|
+
var MIN_TERM_COVERAGE = 0.3;
|
|
3445
|
+
function rerank(input) {
|
|
3446
|
+
const startTime = Date.now();
|
|
3447
|
+
const { task, candidates, index, fileContents, dependencies, allFilePaths } = input;
|
|
3448
|
+
const emptyTelemetry = {
|
|
3449
|
+
candidatesIn: candidates.length,
|
|
3450
|
+
candidatesOut: 0,
|
|
3451
|
+
candidatesFiltered: 0,
|
|
3452
|
+
durationMs: 0,
|
|
3453
|
+
weights: { ...WEIGHTS },
|
|
3454
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3455
|
+
signalStats: {
|
|
3456
|
+
termCoverage: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3457
|
+
termSpecificity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3458
|
+
bigramProximity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3459
|
+
dependencySignal: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3460
|
+
pathRelevance: { min: 0, max: 0, mean: 0, median: 0 }
|
|
3461
|
+
},
|
|
3462
|
+
filterReasons: {},
|
|
3463
|
+
scoreDistribution: [0, 0, 0, 0, 0],
|
|
3464
|
+
queryTermCount: 0,
|
|
3465
|
+
relevanceConeSize: 0
|
|
3466
|
+
};
|
|
3467
|
+
if (candidates.length === 0) {
|
|
3468
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
2177
3469
|
}
|
|
2178
|
-
const
|
|
2179
|
-
const
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
3470
|
+
const queryTerms = tokenize(task);
|
|
3471
|
+
const uniqueQueryTerms = [...new Set(queryTerms)];
|
|
3472
|
+
if (uniqueQueryTerms.length === 0) {
|
|
3473
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
3474
|
+
}
|
|
3475
|
+
const queryTermIdfs = /* @__PURE__ */ new Map();
|
|
3476
|
+
for (const term of uniqueQueryTerms) {
|
|
3477
|
+
queryTermIdfs.set(term, index.idf.get(term) ?? 0);
|
|
3478
|
+
}
|
|
3479
|
+
const maxIdf = Math.max(1, ...queryTermIdfs.values());
|
|
3480
|
+
const pathTermsCache = /* @__PURE__ */ new Map();
|
|
3481
|
+
for (const fp of allFilePaths) {
|
|
3482
|
+
pathTermsCache.set(fp, new Set(tokenize(fp.replace(/[/\\.]/g, " "))));
|
|
3483
|
+
}
|
|
3484
|
+
const scored = [];
|
|
3485
|
+
for (const candidate of candidates) {
|
|
3486
|
+
const doc = index.documents.get(candidate.filePath);
|
|
3487
|
+
if (!doc) continue;
|
|
3488
|
+
const matchedQueryTerms = /* @__PURE__ */ new Set();
|
|
3489
|
+
for (const term of uniqueQueryTerms) {
|
|
3490
|
+
if ((doc.terms.get(term) ?? 0) > 0) {
|
|
3491
|
+
matchedQueryTerms.add(term);
|
|
3492
|
+
}
|
|
2192
3493
|
}
|
|
2193
|
-
|
|
2194
|
-
|
|
3494
|
+
const termCoverage = matchedQueryTerms.size / uniqueQueryTerms.length;
|
|
3495
|
+
let specificitySum = 0;
|
|
3496
|
+
let specificityMax = 0;
|
|
3497
|
+
for (const term of matchedQueryTerms) {
|
|
3498
|
+
const idf = queryTermIdfs.get(term) ?? 0;
|
|
3499
|
+
specificitySum += idf;
|
|
3500
|
+
specificityMax += maxIdf;
|
|
2195
3501
|
}
|
|
3502
|
+
const termSpecificity = specificityMax > 0 ? specificitySum / specificityMax : 0;
|
|
3503
|
+
const content = fileContents.get(candidate.filePath) ?? "";
|
|
3504
|
+
const bigramProximity = computeBigramProximity(content, uniqueQueryTerms);
|
|
3505
|
+
const dependencySignal = 0;
|
|
3506
|
+
const pathTerms = pathTermsCache.get(candidate.filePath) ?? /* @__PURE__ */ new Set();
|
|
3507
|
+
const queryTermSet = new Set(uniqueQueryTerms);
|
|
3508
|
+
let pathHits = 0;
|
|
3509
|
+
for (const pt of pathTerms) {
|
|
3510
|
+
if (queryTermSet.has(pt)) pathHits++;
|
|
3511
|
+
}
|
|
3512
|
+
const pathRelevance = Math.min(1, pathHits / Math.max(1, uniqueQueryTerms.length) * 2);
|
|
3513
|
+
const score = termCoverage * WEIGHTS.termCoverage + termSpecificity * WEIGHTS.termSpecificity + bigramProximity * WEIGHTS.bigramProximity + dependencySignal * WEIGHTS.dependencySignal + pathRelevance * WEIGHTS.pathRelevance;
|
|
3514
|
+
scored.push({
|
|
3515
|
+
filePath: candidate.filePath,
|
|
3516
|
+
score,
|
|
3517
|
+
bm25Score: candidate.score,
|
|
3518
|
+
signals: {
|
|
3519
|
+
termCoverage,
|
|
3520
|
+
termSpecificity,
|
|
3521
|
+
bigramProximity,
|
|
3522
|
+
dependencySignal,
|
|
3523
|
+
pathRelevance
|
|
3524
|
+
}
|
|
3525
|
+
});
|
|
2196
3526
|
}
|
|
2197
|
-
const
|
|
2198
|
-
|
|
2199
|
-
|
|
3527
|
+
const topByScore = [...scored].sort((a, b) => b.score - a.score).slice(0, 5);
|
|
3528
|
+
const relevanceCone = /* @__PURE__ */ new Set();
|
|
3529
|
+
for (const top of topByScore) {
|
|
3530
|
+
relevanceCone.add(top.filePath);
|
|
3531
|
+
const deps = dependencies.get(top.filePath) ?? [];
|
|
3532
|
+
for (const dep of deps) relevanceCone.add(dep);
|
|
3533
|
+
for (const [from, tos] of dependencies) {
|
|
3534
|
+
if (tos.includes(top.filePath)) relevanceCone.add(from);
|
|
3535
|
+
}
|
|
2200
3536
|
}
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
const docB = index.documents.get(pathB);
|
|
2206
|
-
if (!docA || !docB) return 0;
|
|
2207
|
-
let dotProduct = 0;
|
|
2208
|
-
let normA = 0;
|
|
2209
|
-
let normB = 0;
|
|
2210
|
-
const allTerms = /* @__PURE__ */ new Set([...docA.terms.keys(), ...docB.terms.keys()]);
|
|
2211
|
-
for (const term of allTerms) {
|
|
2212
|
-
const idf = index.idf.get(term) ?? 0;
|
|
2213
|
-
const wA = (docA.terms.get(term) ?? 0) * idf;
|
|
2214
|
-
const wB = (docB.terms.get(term) ?? 0) * idf;
|
|
2215
|
-
dotProduct += wA * wB;
|
|
2216
|
-
normA += wA * wA;
|
|
2217
|
-
normB += wB * wB;
|
|
3537
|
+
for (const item of scored) {
|
|
3538
|
+
const inCone = relevanceCone.has(item.filePath) ? 1 : 0;
|
|
3539
|
+
item.signals.dependencySignal = inCone;
|
|
3540
|
+
item.score = item.signals.termCoverage * WEIGHTS.termCoverage + item.signals.termSpecificity * WEIGHTS.termSpecificity + item.signals.bigramProximity * WEIGHTS.bigramProximity + item.signals.dependencySignal * WEIGHTS.dependencySignal + item.signals.pathRelevance * WEIGHTS.pathRelevance;
|
|
2218
3541
|
}
|
|
2219
|
-
|
|
2220
|
-
|
|
3542
|
+
scored.sort((a, b) => b.score - a.score);
|
|
3543
|
+
const { passed, filtered, threshold } = applyQualityGate(scored);
|
|
3544
|
+
const filterReasons = {};
|
|
3545
|
+
for (const f of filtered) {
|
|
3546
|
+
const reason = f.reason.replace(/\([^)]+\)/g, "").trim();
|
|
3547
|
+
filterReasons[reason] = (filterReasons[reason] ?? 0) + 1;
|
|
3548
|
+
}
|
|
3549
|
+
const allScores = scored.map((s) => s.score).sort((a, b) => a - b);
|
|
3550
|
+
const signalNames = ["termCoverage", "termSpecificity", "bigramProximity", "dependencySignal", "pathRelevance"];
|
|
3551
|
+
const signalStats = {};
|
|
3552
|
+
for (const name of signalNames) {
|
|
3553
|
+
const vals = scored.map((s) => s.signals[name]).sort((a, b) => a - b);
|
|
3554
|
+
signalStats[name] = {
|
|
3555
|
+
min: vals[0] ?? 0,
|
|
3556
|
+
max: vals[vals.length - 1] ?? 0,
|
|
3557
|
+
mean: vals.length > 0 ? vals.reduce((a, b) => a + b, 0) / vals.length : 0,
|
|
3558
|
+
median: vals.length > 0 ? vals[Math.floor(vals.length / 2)] : 0
|
|
3559
|
+
};
|
|
3560
|
+
}
|
|
3561
|
+
const telemetry = {
|
|
3562
|
+
candidatesIn: candidates.length,
|
|
3563
|
+
candidatesOut: passed.length,
|
|
3564
|
+
candidatesFiltered: filtered.length,
|
|
3565
|
+
durationMs: Date.now() - startTime,
|
|
3566
|
+
weights: { ...WEIGHTS },
|
|
3567
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3568
|
+
signalStats,
|
|
3569
|
+
filterReasons,
|
|
3570
|
+
scoreDistribution: [
|
|
3571
|
+
allScores[0] ?? 0,
|
|
3572
|
+
allScores[Math.floor(allScores.length * 0.25)] ?? 0,
|
|
3573
|
+
allScores[Math.floor(allScores.length * 0.5)] ?? 0,
|
|
3574
|
+
allScores[Math.floor(allScores.length * 0.75)] ?? 0,
|
|
3575
|
+
allScores[allScores.length - 1] ?? 0
|
|
3576
|
+
],
|
|
3577
|
+
queryTermCount: uniqueQueryTerms.length,
|
|
3578
|
+
relevanceConeSize: relevanceCone.size
|
|
3579
|
+
};
|
|
3580
|
+
return {
|
|
3581
|
+
files: passed,
|
|
3582
|
+
filtered,
|
|
3583
|
+
qualityThreshold: threshold,
|
|
3584
|
+
telemetry
|
|
3585
|
+
};
|
|
2221
3586
|
}
|
|
2222
|
-
function
|
|
2223
|
-
|
|
2224
|
-
const
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
const
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
tokens.push(stemmed);
|
|
3587
|
+
function computeBigramProximity(content, queryTerms) {
|
|
3588
|
+
if (queryTerms.length < 2 || !content) return 0;
|
|
3589
|
+
const contentTokens = tokenize(content);
|
|
3590
|
+
const termPositions = /* @__PURE__ */ new Map();
|
|
3591
|
+
for (let i = 0; i < contentTokens.length; i++) {
|
|
3592
|
+
const token = contentTokens[i];
|
|
3593
|
+
if (queryTerms.includes(token)) {
|
|
3594
|
+
const positions = termPositions.get(token) ?? [];
|
|
3595
|
+
positions.push(i);
|
|
3596
|
+
termPositions.set(token, positions);
|
|
2233
3597
|
}
|
|
2234
3598
|
}
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
2256
|
-
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
2257
|
-
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
2258
|
-
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
2259
|
-
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
2260
|
-
return w;
|
|
3599
|
+
let totalScore = 0;
|
|
3600
|
+
let pairCount = 0;
|
|
3601
|
+
for (let i = 0; i < queryTerms.length; i++) {
|
|
3602
|
+
for (let j = i + 1; j < queryTerms.length; j++) {
|
|
3603
|
+
const posA = termPositions.get(queryTerms[i]);
|
|
3604
|
+
const posB = termPositions.get(queryTerms[j]);
|
|
3605
|
+
if (!posA || !posB) continue;
|
|
3606
|
+
let minDist = Infinity;
|
|
3607
|
+
for (const a of posA) {
|
|
3608
|
+
for (const b of posB) {
|
|
3609
|
+
minDist = Math.min(minDist, Math.abs(a - b));
|
|
3610
|
+
}
|
|
3611
|
+
}
|
|
3612
|
+
if (minDist < Infinity) {
|
|
3613
|
+
totalScore += Math.max(0, 1 - minDist / 20);
|
|
3614
|
+
pairCount++;
|
|
3615
|
+
}
|
|
3616
|
+
}
|
|
3617
|
+
}
|
|
3618
|
+
return pairCount > 0 ? totalScore / pairCount : 0;
|
|
2261
3619
|
}
|
|
2262
|
-
function
|
|
2263
|
-
const
|
|
2264
|
-
const
|
|
2265
|
-
|
|
2266
|
-
|
|
3620
|
+
function applyQualityGate(scored) {
|
|
3621
|
+
const passed = [];
|
|
3622
|
+
const filtered = [];
|
|
3623
|
+
if (scored.length === 0) {
|
|
3624
|
+
return { passed, filtered, threshold: 0 };
|
|
2267
3625
|
}
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
3626
|
+
let elbowIndex = scored.length;
|
|
3627
|
+
if (scored.length >= 3) {
|
|
3628
|
+
let maxDrop = 0;
|
|
3629
|
+
for (let i = 1; i < scored.length; i++) {
|
|
3630
|
+
if (scored[i - 1].score > 0) {
|
|
3631
|
+
const drop = (scored[i - 1].score - scored[i].score) / scored[i - 1].score;
|
|
3632
|
+
if (drop > maxDrop && drop >= ELBOW_DROP_RATIO) {
|
|
3633
|
+
maxDrop = drop;
|
|
3634
|
+
elbowIndex = i;
|
|
2278
3635
|
}
|
|
2279
|
-
} else {
|
|
2280
|
-
boosted.set(filePath, {
|
|
2281
|
-
filePath,
|
|
2282
|
-
score: Math.min(1, pathBoost),
|
|
2283
|
-
matchedTerms: pathMatches
|
|
2284
|
-
});
|
|
2285
3636
|
}
|
|
2286
3637
|
}
|
|
2287
3638
|
}
|
|
2288
|
-
|
|
3639
|
+
const threshold = Math.max(
|
|
3640
|
+
ABSOLUTE_FLOOR,
|
|
3641
|
+
elbowIndex < scored.length ? scored[elbowIndex].score : 0
|
|
3642
|
+
);
|
|
3643
|
+
for (let i = 0; i < scored.length; i++) {
|
|
3644
|
+
const item = scored[i];
|
|
3645
|
+
if (item.score < ABSOLUTE_FLOOR) {
|
|
3646
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below absolute floor (${item.score.toFixed(3)} < ${ABSOLUTE_FLOOR})` });
|
|
3647
|
+
continue;
|
|
3648
|
+
}
|
|
3649
|
+
if (item.signals.termCoverage < MIN_TERM_COVERAGE) {
|
|
3650
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Low term coverage (${(item.signals.termCoverage * 100).toFixed(0)}% < ${MIN_TERM_COVERAGE * 100}%)` });
|
|
3651
|
+
continue;
|
|
3652
|
+
}
|
|
3653
|
+
if (i >= elbowIndex && item.score < scored[0].score * 0.5) {
|
|
3654
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below elbow cutoff (rank ${i + 1}, score ${item.score.toFixed(3)})` });
|
|
3655
|
+
continue;
|
|
3656
|
+
}
|
|
3657
|
+
passed.push(item);
|
|
3658
|
+
}
|
|
3659
|
+
return { passed, filtered, threshold };
|
|
2289
3660
|
}
|
|
2290
3661
|
|
|
2291
3662
|
// src/engine/learner.ts
|
|
2292
3663
|
import { readFile as readFile5, writeFile as writeFile2, mkdir } from "fs/promises";
|
|
2293
|
-
import { join as
|
|
3664
|
+
import { join as join6 } from "path";
|
|
2294
3665
|
var DECAY_FACTOR = 0.95;
|
|
2295
3666
|
var MODEL_DIR = ".cto";
|
|
2296
3667
|
var MODEL_FILE = "learner.json";
|
|
2297
3668
|
var MIN_OBSERVATIONS = 3;
|
|
2298
3669
|
async function loadLearner(projectPath) {
|
|
2299
|
-
const modelPath =
|
|
3670
|
+
const modelPath = join6(projectPath, MODEL_DIR, MODEL_FILE);
|
|
2300
3671
|
try {
|
|
2301
3672
|
const raw = await readFile5(modelPath, "utf-8");
|
|
2302
3673
|
const parsed = JSON.parse(raw);
|
|
@@ -2306,10 +3677,10 @@ async function loadLearner(projectPath) {
|
|
|
2306
3677
|
return createEmptyModel();
|
|
2307
3678
|
}
|
|
2308
3679
|
async function saveLearner(projectPath, model) {
|
|
2309
|
-
const dir =
|
|
3680
|
+
const dir = join6(projectPath, MODEL_DIR);
|
|
2310
3681
|
await mkdir(dir, { recursive: true });
|
|
2311
3682
|
model.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
2312
|
-
await writeFile2(
|
|
3683
|
+
await writeFile2(join6(dir, MODEL_FILE), JSON.stringify(model, null, 2));
|
|
2313
3684
|
}
|
|
2314
3685
|
function recordSelection(model, taskType, selectedFiles, excludedFiles) {
|
|
2315
3686
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -2426,6 +3797,289 @@ function extractPattern(filePath) {
|
|
|
2426
3797
|
return `*${ext}`;
|
|
2427
3798
|
}
|
|
2428
3799
|
|
|
3800
|
+
// src/interact/router.ts
|
|
3801
|
+
var TASK_KEYWORDS = {
|
|
3802
|
+
debug: ["debug", "fix", "bug", "error", "issue", "broken", "crash", "failing", "wrong"],
|
|
3803
|
+
review: ["review", "check", "assess", "evaluate", "audit", "inspect", "critique"],
|
|
3804
|
+
refactor: ["refactor", "restructure", "reorganize", "clean up", "simplify", "extract", "move"],
|
|
3805
|
+
test: ["test", "spec", "coverage", "unit test", "integration test", "e2e"],
|
|
3806
|
+
docs: ["document", "docs", "readme", "jsdoc", "comment", "explain"],
|
|
3807
|
+
feature: ["add", "implement", "create", "build", "new", "feature", "endpoint"],
|
|
3808
|
+
architecture: ["architecture", "design", "system", "structure", "migrate", "pattern"],
|
|
3809
|
+
"simple-edit": ["rename", "typo", "update", "change", "modify", "tweak", "adjust"]
|
|
3810
|
+
};
|
|
3811
|
+
function classifyTask(taskDescription) {
|
|
3812
|
+
const lower = taskDescription.toLowerCase();
|
|
3813
|
+
let bestType = "simple-edit";
|
|
3814
|
+
let bestScore = 0;
|
|
3815
|
+
for (const [type, keywords] of Object.entries(TASK_KEYWORDS)) {
|
|
3816
|
+
let score = 0;
|
|
3817
|
+
for (const kw of keywords) {
|
|
3818
|
+
if (lower.includes(kw)) score++;
|
|
3819
|
+
}
|
|
3820
|
+
if (score > bestScore) {
|
|
3821
|
+
bestScore = score;
|
|
3822
|
+
bestType = type;
|
|
3823
|
+
}
|
|
3824
|
+
}
|
|
3825
|
+
return bestType;
|
|
3826
|
+
}
|
|
3827
|
+
|
|
3828
|
+
// src/engine/context-pipeline.ts
|
|
3829
|
+
async function runContextPipeline(input) {
|
|
3830
|
+
const { projectPath, task, analysis, budget = 5e4 } = input;
|
|
3831
|
+
const taskType = classifyTask(task);
|
|
3832
|
+
const fileContentMap = /* @__PURE__ */ new Map();
|
|
3833
|
+
const fileContents = [];
|
|
3834
|
+
for (const file of analysis.files) {
|
|
3835
|
+
try {
|
|
3836
|
+
const content = readFileSync6(file.path, "utf-8");
|
|
3837
|
+
fileContentMap.set(file.relativePath, content);
|
|
3838
|
+
fileContents.push({ relativePath: file.relativePath, content });
|
|
3839
|
+
} catch {
|
|
3840
|
+
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
3841
|
+
}
|
|
3842
|
+
}
|
|
3843
|
+
const indexFiles = analysis.files.map((f) => ({
|
|
3844
|
+
relativePath: f.relativePath,
|
|
3845
|
+
absolutePath: f.path,
|
|
3846
|
+
content: fileContentMap.get(f.relativePath)
|
|
3847
|
+
}));
|
|
3848
|
+
const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
|
|
3849
|
+
const semanticMatches = query(index, task, 50);
|
|
3850
|
+
const boostedMatches = boostByPath(
|
|
3851
|
+
semanticMatches,
|
|
3852
|
+
analysis.files.map((f) => f.relativePath),
|
|
3853
|
+
task
|
|
3854
|
+
);
|
|
3855
|
+
const depMap = /* @__PURE__ */ new Map();
|
|
3856
|
+
for (const file of analysis.files) {
|
|
3857
|
+
depMap.set(file.relativePath, file.imports);
|
|
3858
|
+
}
|
|
3859
|
+
const rerankResult = rerank({
|
|
3860
|
+
task,
|
|
3861
|
+
candidates: boostedMatches,
|
|
3862
|
+
index,
|
|
3863
|
+
fileContents: fileContentMap,
|
|
3864
|
+
dependencies: depMap,
|
|
3865
|
+
allFilePaths: analysis.files.map((f) => f.relativePath)
|
|
3866
|
+
});
|
|
3867
|
+
const rerankedMatches = rerankResult.files.map((rf) => ({
|
|
3868
|
+
filePath: rf.filePath,
|
|
3869
|
+
score: rf.bm25Score,
|
|
3870
|
+
// Keep original BM25 score for composite
|
|
3871
|
+
matchedTerms: boostedMatches.find((m) => m.filePath === rf.filePath)?.matchedTerms ?? []
|
|
3872
|
+
}));
|
|
3873
|
+
const learner = await loadLearner(projectPath);
|
|
3874
|
+
const learnerBoosts = getLearnerBoosts(
|
|
3875
|
+
learner,
|
|
3876
|
+
taskType,
|
|
3877
|
+
analysis.files.map((f) => f.relativePath)
|
|
3878
|
+
);
|
|
3879
|
+
const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
3880
|
+
const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
3881
|
+
const selection = await selectContext({
|
|
3882
|
+
task,
|
|
3883
|
+
analysis,
|
|
3884
|
+
budget,
|
|
3885
|
+
semanticScores,
|
|
3886
|
+
learnerBoosts: learnerBoostInputs
|
|
3887
|
+
});
|
|
3888
|
+
const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
|
|
3889
|
+
const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
|
|
3890
|
+
let multiRepo;
|
|
3891
|
+
if (input.siblingRepos && input.siblingRepos.length > 0) {
|
|
3892
|
+
const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
|
|
3893
|
+
multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
|
|
3894
|
+
}
|
|
3895
|
+
return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
|
|
3896
|
+
}
|
|
3897
|
+
|
|
3898
|
+
// src/engine/index.ts
|
|
3899
|
+
init_tfidf();
|
|
3900
|
+
|
|
3901
|
+
// src/engine/ab-testing.ts
|
|
3902
|
+
import { createHash as createHash4 } from "crypto";
|
|
3903
|
+
import { readFileSync as readFileSync7, writeFileSync as writeFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3 } from "fs";
|
|
3904
|
+
import { join as join8 } from "path";
|
|
3905
|
+
var EXPERIMENTS_FILE = "experiments.json";
|
|
3906
|
+
function loadExperiments(projectPath) {
|
|
3907
|
+
const path = join8(projectPath, ".cto", EXPERIMENTS_FILE);
|
|
3908
|
+
try {
|
|
3909
|
+
if (!existsSync6(path)) return [];
|
|
3910
|
+
return JSON.parse(readFileSync7(path, "utf-8"));
|
|
3911
|
+
} catch {
|
|
3912
|
+
return [];
|
|
3913
|
+
}
|
|
3914
|
+
}
|
|
3915
|
+
function saveExperiments(projectPath, experiments) {
|
|
3916
|
+
const dir = join8(projectPath, ".cto");
|
|
3917
|
+
if (!existsSync6(dir)) mkdirSync3(dir, { recursive: true });
|
|
3918
|
+
writeFileSync3(join8(dir, EXPERIMENTS_FILE), JSON.stringify(experiments, null, 2));
|
|
3919
|
+
}
|
|
3920
|
+
function createExperiment(id, name, description, controlParams, variantParams, options = {}) {
|
|
3921
|
+
return {
|
|
3922
|
+
id,
|
|
3923
|
+
name,
|
|
3924
|
+
description,
|
|
3925
|
+
status: "running",
|
|
3926
|
+
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3927
|
+
trafficSplit: options.trafficSplit ?? 0.5,
|
|
3928
|
+
minObservations: options.minObservations ?? 30,
|
|
3929
|
+
significanceThreshold: options.significanceThreshold ?? 0.05,
|
|
3930
|
+
control: {
|
|
3931
|
+
name: "control",
|
|
3932
|
+
params: controlParams,
|
|
3933
|
+
metrics: emptyMetrics()
|
|
3934
|
+
},
|
|
3935
|
+
variant: {
|
|
3936
|
+
name: "variant",
|
|
3937
|
+
params: variantParams,
|
|
3938
|
+
metrics: emptyMetrics()
|
|
3939
|
+
}
|
|
3940
|
+
};
|
|
3941
|
+
}
|
|
3942
|
+
function emptyMetrics() {
|
|
3943
|
+
return {
|
|
3944
|
+
total: 0,
|
|
3945
|
+
successes: 0,
|
|
3946
|
+
acceptRate: 0,
|
|
3947
|
+
avgTimeToAccept: 0,
|
|
3948
|
+
compilableRate: 0,
|
|
3949
|
+
timeSum: 0,
|
|
3950
|
+
compilableCount: 0
|
|
3951
|
+
};
|
|
3952
|
+
}
|
|
3953
|
+
function assignGroup(experiment, task) {
|
|
3954
|
+
if (experiment.status !== "running") return null;
|
|
3955
|
+
const hash = createHash4("sha256").update(`${experiment.id}:${task}`).digest();
|
|
3956
|
+
const value = hash.readUInt32BE(0) / 4294967295;
|
|
3957
|
+
const group = value < experiment.trafficSplit ? "control" : "variant";
|
|
3958
|
+
return {
|
|
3959
|
+
group,
|
|
3960
|
+
params: group === "control" ? experiment.control.params : experiment.variant.params,
|
|
3961
|
+
experimentId: experiment.id
|
|
3962
|
+
};
|
|
3963
|
+
}
|
|
3964
|
+
function recordOutcome(experiment, group, outcome) {
|
|
3965
|
+
if (experiment.status !== "running") return experiment;
|
|
3966
|
+
const metrics = group === "control" ? experiment.control.metrics : experiment.variant.metrics;
|
|
3967
|
+
metrics.total++;
|
|
3968
|
+
if (outcome.accepted) metrics.successes++;
|
|
3969
|
+
metrics.acceptRate = metrics.total > 0 ? metrics.successes / metrics.total : 0;
|
|
3970
|
+
if (outcome.timeToAcceptMs !== void 0) {
|
|
3971
|
+
metrics.timeSum += outcome.timeToAcceptMs;
|
|
3972
|
+
metrics.avgTimeToAccept = metrics.timeSum / metrics.total;
|
|
3973
|
+
}
|
|
3974
|
+
if (outcome.compilable !== void 0) {
|
|
3975
|
+
if (outcome.compilable) metrics.compilableCount++;
|
|
3976
|
+
metrics.compilableRate = metrics.total > 0 ? metrics.compilableCount / metrics.total : 0;
|
|
3977
|
+
}
|
|
3978
|
+
if (experiment.control.metrics.total >= experiment.minObservations && experiment.variant.metrics.total >= experiment.minObservations) {
|
|
3979
|
+
const sig = testSignificance(experiment);
|
|
3980
|
+
if (sig.pValue < experiment.significanceThreshold) {
|
|
3981
|
+
return concludeExperiment(experiment, sig);
|
|
3982
|
+
}
|
|
3983
|
+
}
|
|
3984
|
+
return experiment;
|
|
3985
|
+
}
|
|
3986
|
+
function testSignificance(experiment) {
|
|
3987
|
+
const c = experiment.control.metrics;
|
|
3988
|
+
const v = experiment.variant.metrics;
|
|
3989
|
+
const n1 = c.total;
|
|
3990
|
+
const n2 = v.total;
|
|
3991
|
+
const p1 = c.acceptRate;
|
|
3992
|
+
const p2 = v.acceptRate;
|
|
3993
|
+
if (n1 === 0 || n2 === 0) {
|
|
3994
|
+
return {
|
|
3995
|
+
pValue: 1,
|
|
3996
|
+
zScore: 0,
|
|
3997
|
+
effectSize: 0,
|
|
3998
|
+
confidenceInterval: [0, 0],
|
|
3999
|
+
significant: false
|
|
4000
|
+
};
|
|
4001
|
+
}
|
|
4002
|
+
const pooled = (c.successes + v.successes) / (n1 + n2);
|
|
4003
|
+
const se = Math.sqrt(pooled * (1 - pooled) * (1 / n1 + 1 / n2));
|
|
4004
|
+
const z = se > 0 ? (p2 - p1) / se : 0;
|
|
4005
|
+
const pValue = 2 * (1 - normalCDF(Math.abs(z)));
|
|
4006
|
+
const effectSize = p2 - p1;
|
|
4007
|
+
const seDiff = Math.sqrt(p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2);
|
|
4008
|
+
const ci95 = [effectSize - 1.96 * seDiff, effectSize + 1.96 * seDiff];
|
|
4009
|
+
return {
|
|
4010
|
+
pValue,
|
|
4011
|
+
zScore: z,
|
|
4012
|
+
effectSize,
|
|
4013
|
+
confidenceInterval: ci95,
|
|
4014
|
+
significant: pValue < experiment.significanceThreshold
|
|
4015
|
+
};
|
|
4016
|
+
}
|
|
4017
|
+
function normalCDF(x) {
|
|
4018
|
+
if (x < -8) return 0;
|
|
4019
|
+
if (x > 8) return 1;
|
|
4020
|
+
const a1 = 0.254829592;
|
|
4021
|
+
const a2 = -0.284496736;
|
|
4022
|
+
const a3 = 1.421413741;
|
|
4023
|
+
const a4 = -1.453152027;
|
|
4024
|
+
const a5 = 1.061405429;
|
|
4025
|
+
const p = 0.3275911;
|
|
4026
|
+
const sign = x < 0 ? -1 : 1;
|
|
4027
|
+
const absX = Math.abs(x);
|
|
4028
|
+
const t = 1 / (1 + p * absX);
|
|
4029
|
+
const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
|
|
4030
|
+
return 0.5 * (1 + sign * y);
|
|
4031
|
+
}
|
|
4032
|
+
function concludeExperiment(experiment, sig) {
|
|
4033
|
+
const winner = sig.effectSize > 0 ? "variant" : sig.effectSize < 0 ? "control" : "no_difference";
|
|
4034
|
+
const winnerName = winner === "control" ? experiment.control.name : winner === "variant" ? experiment.variant.name : "neither";
|
|
4035
|
+
const conclusion = {
|
|
4036
|
+
winner,
|
|
4037
|
+
pValue: sig.pValue,
|
|
4038
|
+
effectSize: sig.effectSize,
|
|
4039
|
+
confidenceInterval: sig.confidenceInterval,
|
|
4040
|
+
summary: `Experiment "${experiment.name}" concluded: ${winnerName} wins with ${(Math.abs(sig.effectSize) * 100).toFixed(1)}% improvement (p=${sig.pValue.toFixed(4)}, CI=[${sig.confidenceInterval[0].toFixed(3)}, ${sig.confidenceInterval[1].toFixed(3)}])`
|
|
4041
|
+
};
|
|
4042
|
+
return {
|
|
4043
|
+
...experiment,
|
|
4044
|
+
status: "concluded",
|
|
4045
|
+
concludedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4046
|
+
conclusion
|
|
4047
|
+
};
|
|
4048
|
+
}
|
|
4049
|
+
function getActiveExperiment(experiments) {
|
|
4050
|
+
return experiments.find((e) => e.status === "running") ?? null;
|
|
4051
|
+
}
|
|
4052
|
+
function getConcludedExperiments(experiments) {
|
|
4053
|
+
return experiments.filter((e) => e.status === "concluded");
|
|
4054
|
+
}
|
|
4055
|
+
function renderExperimentSummary(experiment) {
|
|
4056
|
+
const lines = [];
|
|
4057
|
+
lines.push(`Experiment: ${experiment.name} (${experiment.status})`);
|
|
4058
|
+
lines.push(` ${experiment.description}`);
|
|
4059
|
+
lines.push("");
|
|
4060
|
+
lines.push(` Control (${experiment.control.name}):`);
|
|
4061
|
+
lines.push(` Observations: ${experiment.control.metrics.total}`);
|
|
4062
|
+
lines.push(` Accept rate: ${(experiment.control.metrics.acceptRate * 100).toFixed(1)}%`);
|
|
4063
|
+
lines.push(` Variant (${experiment.variant.name}):`);
|
|
4064
|
+
lines.push(` Observations: ${experiment.variant.metrics.total}`);
|
|
4065
|
+
lines.push(` Accept rate: ${(experiment.variant.metrics.acceptRate * 100).toFixed(1)}%`);
|
|
4066
|
+
if (experiment.status === "running") {
|
|
4067
|
+
const sig = testSignificance(experiment);
|
|
4068
|
+
lines.push("");
|
|
4069
|
+
lines.push(` Current p-value: ${sig.pValue.toFixed(4)}`);
|
|
4070
|
+
lines.push(` Effect size: ${(sig.effectSize * 100).toFixed(1)}%`);
|
|
4071
|
+
lines.push(` Significant: ${sig.significant ? "YES" : "not yet"}`);
|
|
4072
|
+
}
|
|
4073
|
+
if (experiment.conclusion) {
|
|
4074
|
+
lines.push("");
|
|
4075
|
+
lines.push(` CONCLUSION: ${experiment.conclusion.summary}`);
|
|
4076
|
+
}
|
|
4077
|
+
return lines.join("\n");
|
|
4078
|
+
}
|
|
4079
|
+
|
|
4080
|
+
// src/engine/index.ts
|
|
4081
|
+
init_multi_repo();
|
|
4082
|
+
|
|
2429
4083
|
// src/engine/logger.ts
|
|
2430
4084
|
var LEVEL_ORDER = { debug: 0, info: 1, warn: 2, error: 3 };
|
|
2431
4085
|
var currentLevel = process.env.CTO_LOG_LEVEL ?? "warn";
|
|
@@ -2505,34 +4159,55 @@ function wrapError(err, code, module, context) {
|
|
|
2505
4159
|
export {
|
|
2506
4160
|
CtoError,
|
|
2507
4161
|
analyzeProject,
|
|
4162
|
+
assignGroup,
|
|
2508
4163
|
auditProject,
|
|
2509
4164
|
bfsBidirectional,
|
|
2510
4165
|
boostByPath,
|
|
2511
4166
|
buildAdjacencyList,
|
|
2512
4167
|
buildIndex,
|
|
4168
|
+
buildIndexCached,
|
|
2513
4169
|
buildProjectGraph,
|
|
2514
4170
|
calculateCoverage,
|
|
2515
4171
|
classifyFileKind,
|
|
2516
4172
|
countTokensChars4,
|
|
2517
4173
|
countTokensTiktoken,
|
|
4174
|
+
createExperiment,
|
|
2518
4175
|
createLogger,
|
|
2519
4176
|
createProject,
|
|
4177
|
+
detectLanguage,
|
|
2520
4178
|
detectStack,
|
|
4179
|
+
discoverSiblingRepos,
|
|
4180
|
+
estimateComplexity,
|
|
2521
4181
|
estimateFileTokens,
|
|
2522
4182
|
estimateTokens,
|
|
2523
4183
|
extractPattern,
|
|
2524
4184
|
freeEncoder,
|
|
4185
|
+
getActiveExperiment,
|
|
4186
|
+
getCacheInfo,
|
|
4187
|
+
getConcludedExperiments,
|
|
2525
4188
|
getLearnerBoosts,
|
|
2526
4189
|
getLearnerStats,
|
|
2527
4190
|
getPruneLevelForRisk,
|
|
4191
|
+
invalidateCache,
|
|
2528
4192
|
isCtoError,
|
|
4193
|
+
loadExperiments,
|
|
2529
4194
|
loadLearner,
|
|
2530
4195
|
optimizeBudget,
|
|
4196
|
+
parseAllPolyglotImports,
|
|
4197
|
+
parseImports,
|
|
4198
|
+
parseSiblingPaths,
|
|
2531
4199
|
pruneFile,
|
|
2532
4200
|
pruneFiles,
|
|
2533
4201
|
query,
|
|
4202
|
+
querySiblingRepos,
|
|
4203
|
+
recordOutcome,
|
|
2534
4204
|
recordSelection,
|
|
4205
|
+
renderExperimentSummary,
|
|
4206
|
+
renderMultiRepoSummary,
|
|
4207
|
+
rerank,
|
|
4208
|
+
runContextPipeline,
|
|
2535
4209
|
sanitizeContent,
|
|
4210
|
+
saveExperiments,
|
|
2536
4211
|
saveLearner,
|
|
2537
4212
|
scanContentForSecrets,
|
|
2538
4213
|
scanFileForSecrets,
|
|
@@ -2543,6 +4218,7 @@ export {
|
|
|
2543
4218
|
setJsonLogging,
|
|
2544
4219
|
setLogLevel,
|
|
2545
4220
|
similarity,
|
|
4221
|
+
testSignificance,
|
|
2546
4222
|
tokenize,
|
|
2547
4223
|
walkProject,
|
|
2548
4224
|
wrapError
|