cto-ai-cli 6.1.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,530 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // src/engine/tfidf.ts
12
+ function buildIndex(files) {
13
+ const documents = /* @__PURE__ */ new Map();
14
+ const docFreq = /* @__PURE__ */ new Map();
15
+ for (const file of files) {
16
+ const terms = tokenize(file.content);
17
+ const termCounts = /* @__PURE__ */ new Map();
18
+ for (const term of terms) {
19
+ termCounts.set(term, (termCounts.get(term) ?? 0) + 1);
20
+ }
21
+ documents.set(file.relativePath, { terms: termCounts, length: terms.length });
22
+ for (const term of termCounts.keys()) {
23
+ docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
24
+ }
25
+ }
26
+ const totalDocs = files.length;
27
+ const idf = /* @__PURE__ */ new Map();
28
+ for (const [term, df] of docFreq) {
29
+ idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
30
+ }
31
+ let totalLength = 0;
32
+ for (const doc of documents.values()) totalLength += doc.length;
33
+ const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
34
+ return { documents, idf, avgDocLength, totalDocs };
35
+ }
36
+ function query(index, taskDescription, maxResults = 50) {
37
+ const queryTerms = tokenize(taskDescription);
38
+ if (queryTerms.length === 0) return [];
39
+ const querySet = /* @__PURE__ */ new Map();
40
+ for (const term of queryTerms) {
41
+ querySet.set(term, (querySet.get(term) ?? 0) + 1);
42
+ }
43
+ const results = [];
44
+ const k1 = 1.5;
45
+ const b = 0.75;
46
+ for (const [filePath, doc] of index.documents) {
47
+ let score = 0;
48
+ const matchedTerms = [];
49
+ for (const [qTerm, qCount] of querySet) {
50
+ const tf = doc.terms.get(qTerm) ?? 0;
51
+ if (tf === 0) continue;
52
+ const termIdf = index.idf.get(qTerm) ?? 0;
53
+ if (termIdf <= 0) continue;
54
+ const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
55
+ score += termIdf * tfNorm * qCount;
56
+ matchedTerms.push(qTerm);
57
+ }
58
+ if (score > 0) {
59
+ results.push({ filePath, score, matchedTerms });
60
+ }
61
+ }
62
+ const maxScore = results.reduce((max, r) => Math.max(max, r.score), 0);
63
+ if (maxScore > 0) {
64
+ for (const r of results) r.score = r.score / maxScore;
65
+ }
66
+ return results.sort((a, b2) => b2.score - a.score).slice(0, maxResults);
67
+ }
68
+ function similarity(index, pathA, pathB) {
69
+ const docA = index.documents.get(pathA);
70
+ const docB = index.documents.get(pathB);
71
+ if (!docA || !docB) return 0;
72
+ let dotProduct = 0;
73
+ let normA = 0;
74
+ let normB = 0;
75
+ const allTerms = /* @__PURE__ */ new Set([...docA.terms.keys(), ...docB.terms.keys()]);
76
+ for (const term of allTerms) {
77
+ const idf = index.idf.get(term) ?? 0;
78
+ const wA = (docA.terms.get(term) ?? 0) * idf;
79
+ const wB = (docB.terms.get(term) ?? 0) * idf;
80
+ dotProduct += wA * wB;
81
+ normA += wA * wA;
82
+ normB += wB * wB;
83
+ }
84
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
85
+ return denom > 0 ? dotProduct / denom : 0;
86
+ }
87
+ function tokenize(text) {
88
+ const tokens = [];
89
+ const rawTokens = text.match(/[a-zA-Z][a-zA-Z0-9]*|[0-9]+/g) ?? [];
90
+ for (const raw of rawTokens) {
91
+ const parts = raw.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
92
+ for (const part of parts) {
93
+ if (part.length < 2) continue;
94
+ const stemmed = stem(part);
95
+ if (stemmed.length < 2) continue;
96
+ if (STOP_WORDS.has(stemmed)) continue;
97
+ tokens.push(stemmed);
98
+ }
99
+ }
100
+ return tokens;
101
+ }
102
+ function stem(word) {
103
+ let w = word.toLowerCase();
104
+ for (const [prefix, root] of TERM_FAMILIES) {
105
+ if (w.startsWith(prefix) || w === root) return root;
106
+ }
107
+ if (w.endsWith("ication") && w.length > 9) return w.slice(0, -7);
108
+ if (w.endsWith("ation") && w.length > 7) return w.slice(0, -5);
109
+ if (w.endsWith("tion") && w.length > 6) return w.slice(0, -4);
110
+ if (w.endsWith("sion") && w.length > 6) return w.slice(0, -4);
111
+ if (w.endsWith("ment") && w.length > 6) return w.slice(0, -4);
112
+ if (w.endsWith("ness") && w.length > 6) return w.slice(0, -4);
113
+ if (w.endsWith("able") && w.length > 6) return w.slice(0, -4);
114
+ if (w.endsWith("ible") && w.length > 6) return w.slice(0, -4);
115
+ if (w.endsWith("ator") && w.length > 6) return w.slice(0, -4);
116
+ if (w.endsWith("izer") && w.length > 6) return w.slice(0, -4);
117
+ if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
118
+ if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
119
+ if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
120
+ if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
121
+ if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
122
+ if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
123
+ if (w.endsWith("ion") && w.length > 5) return w.slice(0, -3);
124
+ if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
125
+ if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
126
+ if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
127
+ if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
128
+ if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
129
+ return w;
130
+ }
131
+ function boostByPath(matches, allFiles, taskDescription) {
132
+ const queryTerms = new Set(tokenize(taskDescription));
133
+ const boosted = /* @__PURE__ */ new Map();
134
+ for (const m of matches) {
135
+ boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
136
+ }
137
+ for (const filePath of allFiles) {
138
+ const pathTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
139
+ const pathMatches = pathTerms.filter((t) => queryTerms.has(t));
140
+ if (pathMatches.length > 0) {
141
+ const existing = boosted.get(filePath);
142
+ const pathBoost = pathMatches.length * 0.3;
143
+ if (existing) {
144
+ existing.score = Math.min(1, existing.score + pathBoost);
145
+ for (const t of pathMatches) {
146
+ if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
147
+ }
148
+ } else {
149
+ boosted.set(filePath, {
150
+ filePath,
151
+ score: Math.min(1, pathBoost),
152
+ matchedTerms: pathMatches
153
+ });
154
+ }
155
+ }
156
+ }
157
+ return [...boosted.values()].sort((a, b) => b.score - a.score);
158
+ }
159
+ var STOP_WORDS, TERM_FAMILIES;
160
+ var init_tfidf = __esm({
161
+ "src/engine/tfidf.ts"() {
162
+ "use strict";
163
+ STOP_WORDS = /* @__PURE__ */ new Set([
164
+ // Language keywords
165
+ "import",
166
+ "export",
167
+ "from",
168
+ "const",
169
+ "let",
170
+ "var",
171
+ "function",
172
+ "class",
173
+ "interface",
174
+ "type",
175
+ "return",
176
+ "async",
177
+ "await",
178
+ "new",
179
+ "this",
180
+ "that",
181
+ "true",
182
+ "false",
183
+ "null",
184
+ "undefined",
185
+ "void",
186
+ "string",
187
+ "number",
188
+ "boolean",
189
+ "any",
190
+ "unknown",
191
+ "never",
192
+ "object",
193
+ "array",
194
+ "promise",
195
+ "if",
196
+ "else",
197
+ "for",
198
+ "while",
199
+ "do",
200
+ "switch",
201
+ "case",
202
+ "break",
203
+ "continue",
204
+ "try",
205
+ "catch",
206
+ "throw",
207
+ "finally",
208
+ "default",
209
+ "extends",
210
+ "implements",
211
+ "static",
212
+ "private",
213
+ "public",
214
+ "protected",
215
+ "readonly",
216
+ "abstract",
217
+ "override",
218
+ "super",
219
+ "typeof",
220
+ "instanceof",
221
+ "in",
222
+ "of",
223
+ "as",
224
+ "is",
225
+ "keyof",
226
+ "enum",
227
+ "namespace",
228
+ "module",
229
+ "declare",
230
+ // Python
231
+ "def",
232
+ "self",
233
+ "cls",
234
+ "none",
235
+ "pass",
236
+ "yield",
237
+ "lambda",
238
+ "with",
239
+ "elif",
240
+ "except",
241
+ "raise",
242
+ "assert",
243
+ "global",
244
+ "nonlocal",
245
+ // Natural language stop words only — NOT domain terms that carry signal
246
+ "the",
247
+ "and",
248
+ "for",
249
+ "with",
250
+ "not",
251
+ "but",
252
+ "are",
253
+ "was",
254
+ "were",
255
+ "has",
256
+ "have",
257
+ "had",
258
+ "will",
259
+ "would",
260
+ "could",
261
+ "should",
262
+ "may",
263
+ "can",
264
+ "its",
265
+ "also",
266
+ "than",
267
+ "then",
268
+ "into",
269
+ "only",
270
+ "very",
271
+ "just",
272
+ "about",
273
+ "being",
274
+ "been",
275
+ "does",
276
+ "did",
277
+ "doing",
278
+ "todo",
279
+ "fixme",
280
+ "hack",
281
+ "note",
282
+ "xxx"
283
+ ]);
284
+ TERM_FAMILIES = [
285
+ ["authenticat", "auth"],
286
+ ["authori", "auth"],
287
+ ["configur", "config"],
288
+ ["connect", "connect"],
289
+ ["request", "request"],
290
+ ["response", "respons"],
291
+ ["middlewar", "middlewar"],
292
+ ["validat", "valid"],
293
+ ["initiali", "init"],
294
+ ["subscri", "subscrib"],
295
+ ["transform", "transform"],
296
+ ["seriali", "serial"],
297
+ ["deseriali", "serial"],
298
+ ["dependen", "depend"],
299
+ ["environ", "environ"],
300
+ ["permiss", "permiss"],
301
+ ["migrat", "migrat"],
302
+ ["transact", "transact"],
303
+ ["encryp", "encrypt"],
304
+ ["decryp", "encrypt"]
305
+ ];
306
+ }
307
+ });
308
+
309
+ // src/engine/multi-repo.ts
310
+ var multi_repo_exports = {};
311
+ __export(multi_repo_exports, {
312
+ discoverSiblingRepos: () => discoverSiblingRepos,
313
+ parseSiblingPaths: () => parseSiblingPaths,
314
+ querySiblingRepos: () => querySiblingRepos,
315
+ renderMultiRepoSummary: () => renderMultiRepoSummary
316
+ });
317
+ import { readdirSync, readFileSync as readFileSync5, statSync as statSync2, existsSync as existsSync5 } from "fs";
318
+ import { join as join7, basename as basename3, resolve as resolve5, relative as relative5 } from "path";
319
+ function discoverSiblingRepos(projectPath) {
320
+ const absProject = resolve5(projectPath);
321
+ const parentDir = join7(absProject, "..");
322
+ const projectName = basename3(absProject);
323
+ const siblings = [];
324
+ let entries;
325
+ try {
326
+ entries = readdirSync(parentDir);
327
+ } catch {
328
+ return [];
329
+ }
330
+ for (const entry of entries) {
331
+ if (entry === projectName) continue;
332
+ if (entry.startsWith(".")) continue;
333
+ if (SKIP_DIRS.has(entry)) continue;
334
+ const candidatePath = join7(parentDir, entry);
335
+ try {
336
+ if (!statSync2(candidatePath).isDirectory()) continue;
337
+ } catch {
338
+ continue;
339
+ }
340
+ const hasMarker = REPO_MARKERS.some((marker) => {
341
+ try {
342
+ return existsSync5(join7(candidatePath, marker));
343
+ } catch {
344
+ return false;
345
+ }
346
+ });
347
+ if (!hasMarker) continue;
348
+ const stack = detectStack2(candidatePath);
349
+ siblings.push({
350
+ path: candidatePath,
351
+ name: entry,
352
+ stack,
353
+ fileCount: 0
354
+ // filled during indexing
355
+ });
356
+ }
357
+ return siblings;
358
+ }
359
+ function detectStack2(repoPath) {
360
+ const stack = [];
361
+ try {
362
+ if (existsSync5(join7(repoPath, "tsconfig.json"))) stack.push("TypeScript");
363
+ if (existsSync5(join7(repoPath, "package.json"))) stack.push("Node.js");
364
+ if (existsSync5(join7(repoPath, "Cargo.toml"))) stack.push("Rust");
365
+ if (existsSync5(join7(repoPath, "go.mod"))) stack.push("Go");
366
+ if (existsSync5(join7(repoPath, "pyproject.toml"))) stack.push("Python");
367
+ if (existsSync5(join7(repoPath, "pom.xml"))) stack.push("Java");
368
+ } catch {
369
+ }
370
+ return stack;
371
+ }
372
+ function listSourceFiles(repoPath, maxFiles = MAX_FILES_PER_REPO) {
373
+ const files = [];
374
+ function walk(dir, depth) {
375
+ if (depth > 8 || files.length >= maxFiles) return;
376
+ let entries;
377
+ try {
378
+ entries = readdirSync(dir);
379
+ } catch {
380
+ return;
381
+ }
382
+ for (const entry of entries) {
383
+ if (files.length >= maxFiles) return;
384
+ if (entry.startsWith(".")) continue;
385
+ if (SKIP_DIRS.has(entry)) continue;
386
+ const fullPath = join7(dir, entry);
387
+ try {
388
+ const stat3 = statSync2(fullPath);
389
+ if (stat3.isDirectory()) {
390
+ walk(fullPath, depth + 1);
391
+ } else if (stat3.isFile() && stat3.size <= MAX_FILE_SIZE) {
392
+ const ext = entry.split(".").pop()?.toLowerCase() ?? "";
393
+ if (SOURCE_EXTENSIONS.has(ext)) {
394
+ files.push(relative5(repoPath, fullPath));
395
+ }
396
+ }
397
+ } catch {
398
+ }
399
+ }
400
+ }
401
+ walk(repoPath, 0);
402
+ return files;
403
+ }
404
+ function indexSiblingRepo(repo) {
405
+ const filePaths = listSourceFiles(repo.path);
406
+ repo.fileCount = filePaths.length;
407
+ const contents = [];
408
+ const contentMap = /* @__PURE__ */ new Map();
409
+ for (const relPath of filePaths) {
410
+ try {
411
+ const content = readFileSync5(join7(repo.path, relPath), "utf-8");
412
+ contents.push({ relativePath: relPath, content });
413
+ contentMap.set(relPath, content);
414
+ } catch {
415
+ contents.push({ relativePath: relPath, content: "" });
416
+ }
417
+ }
418
+ return { contents, contentMap };
419
+ }
420
+ function querySiblingRepos(siblings, task, maxPerRepo = 5, minScore = 0.3) {
421
+ const startTime = performance.now();
422
+ const allMatches = [];
423
+ for (const repo of siblings) {
424
+ const { contents, contentMap } = indexSiblingRepo(repo);
425
+ if (contents.length === 0) continue;
426
+ const index = buildIndex(contents);
427
+ const matches = query(index, task, maxPerRepo * 2);
428
+ const boosted = boostByPath(
429
+ matches,
430
+ contents.map((c) => c.relativePath),
431
+ task
432
+ );
433
+ for (const match of boosted.slice(0, maxPerRepo)) {
434
+ if (match.score < minScore) continue;
435
+ const content = contentMap.get(match.filePath) ?? "";
436
+ const tokens = Math.ceil(content.length / 4);
437
+ allMatches.push({
438
+ repoName: repo.name,
439
+ repoPath: repo.path,
440
+ relativePath: match.filePath,
441
+ absolutePath: join7(repo.path, match.filePath),
442
+ score: match.score,
443
+ content,
444
+ tokens
445
+ });
446
+ }
447
+ }
448
+ allMatches.sort((a, b) => b.score - a.score);
449
+ return {
450
+ siblings,
451
+ matches: allMatches,
452
+ timeMs: Math.round(performance.now() - startTime)
453
+ };
454
+ }
455
+ function parseSiblingPaths(pathsStr, projectPath) {
456
+ const absProject = resolve5(projectPath);
457
+ return pathsStr.split(",").map((p) => p.trim()).filter((p) => p.length > 0).map((p) => {
458
+ const absPath = resolve5(join7(absProject, ".."), p);
459
+ return {
460
+ path: absPath,
461
+ name: basename3(absPath),
462
+ stack: detectStack2(absPath),
463
+ fileCount: 0
464
+ };
465
+ }).filter((repo) => existsSync5(repo.path));
466
+ }
467
+ function renderMultiRepoSummary(result) {
468
+ const lines = [];
469
+ if (result.siblings.length === 0) {
470
+ lines.push(" No sibling repos found.");
471
+ return lines.join("\n");
472
+ }
473
+ lines.push(` Sibling repos scanned: ${result.siblings.length} (${result.timeMs}ms)`);
474
+ for (const repo of result.siblings) {
475
+ lines.push(` ${repo.name}/ \u2014 ${repo.fileCount} files [${repo.stack.join(", ") || "unknown"}]`);
476
+ }
477
+ if (result.matches.length === 0) {
478
+ lines.push(" No relevant files found in sibling repos.");
479
+ } else {
480
+ lines.push(` Cross-repo matches: ${result.matches.length}`);
481
+ for (const m of result.matches.slice(0, 10)) {
482
+ const pct = Math.round(m.score * 100);
483
+ lines.push(` ${m.repoName}/${m.relativePath} sem: ${pct}% (~${Math.round(m.tokens / 1e3)}K tok)`);
484
+ }
485
+ }
486
+ return lines.join("\n");
487
+ }
488
+ var REPO_MARKERS, SKIP_DIRS, SOURCE_EXTENSIONS, MAX_FILES_PER_REPO, MAX_FILE_SIZE;
489
+ var init_multi_repo = __esm({
490
+ "src/engine/multi-repo.ts"() {
491
+ "use strict";
492
+ init_tfidf();
493
+ REPO_MARKERS = ["package.json", "tsconfig.json", "Cargo.toml", "go.mod", "pyproject.toml", "pom.xml"];
494
+ SKIP_DIRS = /* @__PURE__ */ new Set(["node_modules", ".git", "dist", "build", ".next", "__pycache__", "target", "vendor"]);
495
+ SOURCE_EXTENSIONS = /* @__PURE__ */ new Set([
496
+ "ts",
497
+ "tsx",
498
+ "js",
499
+ "jsx",
500
+ "mjs",
501
+ "cjs",
502
+ "py",
503
+ "rs",
504
+ "go",
505
+ "java",
506
+ "kt",
507
+ "rb",
508
+ "c",
509
+ "cpp",
510
+ "h",
511
+ "hpp",
512
+ "cs",
513
+ "json",
514
+ "yaml",
515
+ "yml",
516
+ "toml",
517
+ "md",
518
+ "txt"
519
+ ]);
520
+ MAX_FILES_PER_REPO = 500;
521
+ MAX_FILE_SIZE = 1e5;
522
+ }
523
+ });
524
+
1
525
  // src/engine/analyzer.ts
2
526
  import { readFile as readFile2, readdir, stat as stat2 } from "fs/promises";
3
- import { join as join2, extname, relative as relative2, resolve as resolve2, basename as basename2 } from "path";
527
+ import { join as join3, extname, relative as relative3, resolve as resolve3, basename as basename2 } from "path";
4
528
  import { createHash } from "crypto";
5
529
 
6
530
  // src/types/engine.ts
@@ -100,12 +624,727 @@ function freeEncoder() {
100
624
 
101
625
  // src/engine/graph.ts
102
626
  import { Project, SyntaxKind } from "ts-morph";
103
- import { resolve, relative, dirname, join } from "path";
104
- import { existsSync } from "fs";
627
+ import { resolve as resolve2, relative as relative2, dirname as dirname2, join as join2 } from "path";
628
+ import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
629
+
630
+ // src/engine/polyglot-graph.ts
631
+ import { readFileSync } from "fs";
632
+ import { join, dirname } from "path";
633
+ var LANG_EXTENSIONS = {
634
+ "py": "python",
635
+ "pyw": "python",
636
+ "go": "go",
637
+ "java": "java",
638
+ "rs": "rust",
639
+ "ts": "typescript",
640
+ "tsx": "typescript",
641
+ "js": "typescript",
642
+ "jsx": "typescript",
643
+ "mts": "typescript",
644
+ "mjs": "typescript",
645
+ "cts": "typescript",
646
+ "cjs": "typescript"
647
+ };
648
+ function detectLanguage(filePath) {
649
+ const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
650
+ return LANG_EXTENSIONS[ext] ?? null;
651
+ }
652
+ function parseImports(filePath, relativePath, projectPath, allRelativePaths, content) {
653
+ const lang = detectLanguage(relativePath);
654
+ if (!lang || lang === "typescript") return [];
655
+ const src = content ?? safeReadFile(filePath);
656
+ if (!src) return [];
657
+ const edges = [];
658
+ let specs;
659
+ switch (lang) {
660
+ case "python":
661
+ specs = parsePythonImports(src);
662
+ break;
663
+ case "go":
664
+ specs = parseGoImports(src);
665
+ break;
666
+ case "java":
667
+ specs = parseJavaImports(src);
668
+ break;
669
+ case "rust":
670
+ specs = parseRustImports(src);
671
+ break;
672
+ default:
673
+ return [];
674
+ }
675
+ for (const spec of specs) {
676
+ const resolved = resolveImportSpec(spec, relativePath, projectPath, allRelativePaths, lang);
677
+ if (resolved) {
678
+ edges.push({ from: relativePath, to: resolved, type: "import" });
679
+ }
680
+ }
681
+ return edges;
682
+ }
683
+ function parseAllPolyglotImports(files, projectPath) {
684
+ const allPaths = new Set(files.map((f) => f.relativePath));
685
+ const edges = [];
686
+ for (const file of files) {
687
+ const lang = detectLanguage(file.relativePath);
688
+ if (!lang || lang === "typescript") continue;
689
+ const fileEdges = parseImports(
690
+ file.absolutePath,
691
+ file.relativePath,
692
+ projectPath,
693
+ allPaths,
694
+ file.content
695
+ );
696
+ edges.push(...fileEdges);
697
+ }
698
+ return edges;
699
+ }
700
+ function estimateComplexity(content, lang) {
701
+ let complexity = 1;
702
+ const lines = content.split("\n");
703
+ const patterns = {
704
+ python: [
705
+ /^\s*if\s/,
706
+ /^\s*elif\s/,
707
+ /^\s*for\s/,
708
+ /^\s*while\s/,
709
+ /^\s*except\s/,
710
+ /\sif\s.*\selse\s/,
711
+ // ternary
712
+ /\sand\s/,
713
+ /\sor\s/
714
+ ],
715
+ go: [
716
+ /^\s*if\s/,
717
+ /^\s*for\s/,
718
+ /^\s*case\s/,
719
+ /^\s*select\s*{/,
720
+ /&&/,
721
+ /\|\|/
722
+ ],
723
+ java: [
724
+ /^\s*if\s*\(/,
725
+ /^\s*for\s*\(/,
726
+ /^\s*while\s*\(/,
727
+ /^\s*case\s/,
728
+ /^\s*catch\s*\(/,
729
+ /\?\s/,
730
+ // ternary
731
+ /&&/,
732
+ /\|\|/
733
+ ],
734
+ rust: [
735
+ /^\s*if\s/,
736
+ /^\s*for\s/,
737
+ /^\s*while\s/,
738
+ /^\s*match\s/,
739
+ /=>\s/,
740
+ // match arms
741
+ /&&/,
742
+ /\|\|/
743
+ ],
744
+ typescript: []
745
+ // handled by ts-morph
746
+ };
747
+ const langPatterns = patterns[lang];
748
+ for (const line of lines) {
749
+ for (const pattern of langPatterns) {
750
+ if (pattern.test(line)) {
751
+ complexity++;
752
+ break;
753
+ }
754
+ }
755
+ }
756
+ return complexity;
757
+ }
758
+ var PYTHON_STDLIB = /* @__PURE__ */ new Set([
759
+ "__future__",
760
+ "abc",
761
+ "aifc",
762
+ "argparse",
763
+ "array",
764
+ "ast",
765
+ "asynchat",
766
+ "asyncio",
767
+ "asyncore",
768
+ "atexit",
769
+ "audioop",
770
+ "base64",
771
+ "bdb",
772
+ "binascii",
773
+ "binhex",
774
+ "bisect",
775
+ "builtins",
776
+ "bz2",
777
+ "calendar",
778
+ "cgi",
779
+ "cgitb",
780
+ "chunk",
781
+ "cmath",
782
+ "cmd",
783
+ "code",
784
+ "codecs",
785
+ "codeop",
786
+ "collections",
787
+ "colorsys",
788
+ "compileall",
789
+ "concurrent",
790
+ "configparser",
791
+ "contextlib",
792
+ "contextvars",
793
+ "copy",
794
+ "copyreg",
795
+ "cProfile",
796
+ "crypt",
797
+ "csv",
798
+ "ctypes",
799
+ "curses",
800
+ "dataclasses",
801
+ "datetime",
802
+ "dbm",
803
+ "decimal",
804
+ "difflib",
805
+ "dis",
806
+ "distutils",
807
+ "doctest",
808
+ "email",
809
+ "encodings",
810
+ "enum",
811
+ "errno",
812
+ "faulthandler",
813
+ "fcntl",
814
+ "filecmp",
815
+ "fileinput",
816
+ "fnmatch",
817
+ "fractions",
818
+ "ftplib",
819
+ "functools",
820
+ "gc",
821
+ "getopt",
822
+ "getpass",
823
+ "gettext",
824
+ "glob",
825
+ "grp",
826
+ "gzip",
827
+ "hashlib",
828
+ "heapq",
829
+ "hmac",
830
+ "html",
831
+ "http",
832
+ "idlelib",
833
+ "imaplib",
834
+ "imghdr",
835
+ "imp",
836
+ "importlib",
837
+ "inspect",
838
+ "io",
839
+ "ipaddress",
840
+ "itertools",
841
+ "json",
842
+ "keyword",
843
+ "lib2to3",
844
+ "linecache",
845
+ "locale",
846
+ "logging",
847
+ "lzma",
848
+ "mailbox",
849
+ "mailcap",
850
+ "marshal",
851
+ "math",
852
+ "mimetypes",
853
+ "mmap",
854
+ "modulefinder",
855
+ "multiprocessing",
856
+ "netrc",
857
+ "nis",
858
+ "nntplib",
859
+ "numbers",
860
+ "operator",
861
+ "optparse",
862
+ "os",
863
+ "ossaudiodev",
864
+ "pathlib",
865
+ "pdb",
866
+ "pickle",
867
+ "pickletools",
868
+ "pipes",
869
+ "pkgutil",
870
+ "platform",
871
+ "plistlib",
872
+ "poplib",
873
+ "posix",
874
+ "posixpath",
875
+ "pprint",
876
+ "profile",
877
+ "pstats",
878
+ "pty",
879
+ "pwd",
880
+ "py_compile",
881
+ "pyclbr",
882
+ "pydoc",
883
+ "queue",
884
+ "quopri",
885
+ "random",
886
+ "re",
887
+ "readline",
888
+ "reprlib",
889
+ "resource",
890
+ "rlcompleter",
891
+ "runpy",
892
+ "sched",
893
+ "secrets",
894
+ "select",
895
+ "selectors",
896
+ "shelve",
897
+ "shlex",
898
+ "shutil",
899
+ "signal",
900
+ "site",
901
+ "smtpd",
902
+ "smtplib",
903
+ "sndhdr",
904
+ "socket",
905
+ "socketserver",
906
+ "spwd",
907
+ "sqlite3",
908
+ "ssl",
909
+ "stat",
910
+ "statistics",
911
+ "string",
912
+ "stringprep",
913
+ "struct",
914
+ "subprocess",
915
+ "sunau",
916
+ "symtable",
917
+ "sys",
918
+ "sysconfig",
919
+ "syslog",
920
+ "tabnanny",
921
+ "tarfile",
922
+ "telnetlib",
923
+ "tempfile",
924
+ "termios",
925
+ "test",
926
+ "textwrap",
927
+ "threading",
928
+ "time",
929
+ "timeit",
930
+ "tkinter",
931
+ "token",
932
+ "tokenize",
933
+ "tomllib",
934
+ "trace",
935
+ "traceback",
936
+ "tracemalloc",
937
+ "tty",
938
+ "turtle",
939
+ "turtledemo",
940
+ "types",
941
+ "typing",
942
+ "unicodedata",
943
+ "unittest",
944
+ "urllib",
945
+ "uu",
946
+ "uuid",
947
+ "venv",
948
+ "warnings",
949
+ "wave",
950
+ "weakref",
951
+ "webbrowser",
952
+ "winreg",
953
+ "winsound",
954
+ "wsgiref",
955
+ "xdrlib",
956
+ "xml",
957
+ "xmlrpc",
958
+ "zipapp",
959
+ "zipfile",
960
+ "zipimport",
961
+ "zlib",
962
+ "_thread"
963
+ ]);
964
+ function isPythonStdlib(module) {
965
+ const topLevel = module.split(".")[0];
966
+ return PYTHON_STDLIB.has(topLevel);
967
+ }
968
+ function parsePythonImports(content) {
969
+ const specs = [];
970
+ const joined = content.replace(/\(\s*\n([^)]*?)\)/gs, (_, inner) => {
971
+ return "(" + inner.replace(/\n/g, " ").replace(/\s+/g, " ") + ")";
972
+ });
973
+ const lines = joined.split("\n");
974
+ for (const line of lines) {
975
+ const trimmed = line.trimStart();
976
+ if (trimmed.startsWith("#")) continue;
977
+ const fromMatch = trimmed.match(/^from\s+(\.{0,10}[\w.]*)\s+import\s+(.+)/);
978
+ if (fromMatch) {
979
+ const raw = fromMatch[1];
980
+ const isRelative = raw.startsWith(".");
981
+ if (!isRelative && isPythonStdlib(raw)) continue;
982
+ specs.push({ raw, isRelative });
983
+ continue;
984
+ }
985
+ const importMatch = trimmed.match(/^import\s+(.+)/);
986
+ if (importMatch) {
987
+ const modules = importMatch[1].split(",").map((m) => m.trim().split(/\s+as\s+/)[0].trim());
988
+ for (const mod of modules) {
989
+ if (!mod || !mod.match(/^[\w.]+$/)) continue;
990
+ if (isPythonStdlib(mod)) continue;
991
+ specs.push({ raw: mod, isRelative: false });
992
+ }
993
+ }
994
+ }
995
+ return specs;
996
+ }
997
+ var GO_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
998
+ "archive",
999
+ "bufio",
1000
+ "bytes",
1001
+ "cmp",
1002
+ "compress",
1003
+ "container",
1004
+ "context",
1005
+ "crypto",
1006
+ "database",
1007
+ "debug",
1008
+ "embed",
1009
+ "encoding",
1010
+ "errors",
1011
+ "expvar",
1012
+ "flag",
1013
+ "fmt",
1014
+ "go",
1015
+ "hash",
1016
+ "html",
1017
+ "image",
1018
+ "index",
1019
+ "internal",
1020
+ "io",
1021
+ "iter",
1022
+ "log",
1023
+ "maps",
1024
+ "math",
1025
+ "mime",
1026
+ "net",
1027
+ "os",
1028
+ "path",
1029
+ "plugin",
1030
+ "reflect",
1031
+ "regexp",
1032
+ "runtime",
1033
+ "slices",
1034
+ "sort",
1035
+ "strconv",
1036
+ "strings",
1037
+ "structs",
1038
+ "sync",
1039
+ "syscall",
1040
+ "testing",
1041
+ "text",
1042
+ "time",
1043
+ "unicode",
1044
+ "unsafe"
1045
+ ]);
1046
+ function isGoStdlib(importPath) {
1047
+ const firstSegment = importPath.split("/")[0];
1048
+ if (firstSegment.includes(".")) return false;
1049
+ return GO_STDLIB_PREFIXES.has(firstSegment);
1050
+ }
1051
+ function parseGoImports(content) {
1052
+ const specs = [];
1053
+ const singlePattern = /^\s*import\s+(?:[\w_.]+\s+)?"([^"]+)"/gm;
1054
+ let match;
1055
+ while ((match = singlePattern.exec(content)) !== null) {
1056
+ const pkg = match[1];
1057
+ if (isGoStdlib(pkg)) continue;
1058
+ specs.push({ raw: pkg, isRelative: false });
1059
+ }
1060
+ const blockPattern = /import\s*\(([\s\S]*?)\)/g;
1061
+ while ((match = blockPattern.exec(content)) !== null) {
1062
+ const block = match[1];
1063
+ const linePattern = /(?:[\w_.]+\s+)?"([^"]+)"/g;
1064
+ let lineMatch;
1065
+ while ((lineMatch = linePattern.exec(block)) !== null) {
1066
+ const pkg = lineMatch[1];
1067
+ if (isGoStdlib(pkg)) continue;
1068
+ specs.push({ raw: pkg, isRelative: false });
1069
+ }
1070
+ }
1071
+ return specs;
1072
+ }
1073
+ var JAVA_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
1074
+ "java",
1075
+ "javax",
1076
+ "jdk",
1077
+ "sun",
1078
+ "com.sun",
1079
+ "org.w3c",
1080
+ "org.xml",
1081
+ "org.ietf"
1082
+ ]);
1083
+ function isJavaStdlib(importPath) {
1084
+ for (const prefix of JAVA_STDLIB_PREFIXES) {
1085
+ if (importPath === prefix || importPath.startsWith(prefix + ".")) return true;
1086
+ }
1087
+ return false;
1088
+ }
1089
+ function parseJavaImports(content) {
1090
+ const specs = [];
1091
+ const pattern = /^\s*import\s+(?:static\s+)?([\w.*]+)\s*;/gm;
1092
+ let match;
1093
+ while ((match = pattern.exec(content)) !== null) {
1094
+ const raw = match[1];
1095
+ if (isJavaStdlib(raw)) continue;
1096
+ specs.push({ raw, isRelative: false });
1097
+ }
1098
+ return specs;
1099
+ }
1100
+ function parseRustImports(content) {
1101
+ const specs = [];
1102
+ const usePattern = /^\s*(?:pub\s+)?use\s+((?:crate|super|self)(?:::\w+)*)/gm;
1103
+ let match;
1104
+ while ((match = usePattern.exec(content)) !== null) {
1105
+ const raw = match[1];
1106
+ const isRelative = raw.startsWith("super") || raw.startsWith("self");
1107
+ specs.push({ raw, isRelative });
1108
+ }
1109
+ const modPattern = /^\s*(?:pub\s+)?mod\s+(\w+)\s*;/gm;
1110
+ while ((match = modPattern.exec(content)) !== null) {
1111
+ specs.push({ raw: `mod::${match[1]}`, isRelative: true });
1112
+ }
1113
+ return specs;
1114
+ }
1115
+ function resolveImportSpec(spec, fromRelativePath, projectPath, allPaths, lang) {
1116
+ switch (lang) {
1117
+ case "python":
1118
+ return resolvePythonImport(spec, fromRelativePath, allPaths);
1119
+ case "go":
1120
+ return resolveGoImport(spec, fromRelativePath, projectPath, allPaths);
1121
+ case "java":
1122
+ return resolveJavaImport(spec, allPaths);
1123
+ case "rust":
1124
+ return resolveRustImport(spec, fromRelativePath, allPaths);
1125
+ default:
1126
+ return null;
1127
+ }
1128
+ }
1129
+ function resolvePythonImport(spec, fromRelativePath, allPaths) {
1130
+ if (spec.isRelative) {
1131
+ const dots = spec.raw.match(/^\.+/)?.[0].length ?? 0;
1132
+ const modulePart = spec.raw.slice(dots);
1133
+ let baseDir = dirname(fromRelativePath);
1134
+ for (let i = 1; i < dots; i++) {
1135
+ baseDir = dirname(baseDir);
1136
+ }
1137
+ if (!modulePart) {
1138
+ return tryResolvePython(baseDir, "", allPaths);
1139
+ }
1140
+ const modulePath2 = modulePart.replace(/\./g, "/");
1141
+ return tryResolvePython(baseDir, modulePath2, allPaths);
1142
+ }
1143
+ const modulePath = spec.raw.replace(/\./g, "/");
1144
+ return tryResolvePython("", modulePath, allPaths);
1145
+ }
1146
+ function tryResolvePython(baseDir, modulePath, allPaths) {
1147
+ const candidates = [];
1148
+ if (!modulePath) {
1149
+ candidates.push(join(baseDir, "__init__.py"));
1150
+ } else {
1151
+ candidates.push(
1152
+ join(baseDir, `${modulePath}.py`),
1153
+ join(baseDir, modulePath, "__init__.py")
1154
+ );
1155
+ if (baseDir) {
1156
+ candidates.push(
1157
+ `${modulePath}.py`,
1158
+ join(modulePath, "__init__.py")
1159
+ );
1160
+ }
1161
+ for (const prefix of ["src", "lib", "app"]) {
1162
+ candidates.push(
1163
+ join(prefix, `${modulePath}.py`),
1164
+ join(prefix, modulePath, "__init__.py")
1165
+ );
1166
+ }
1167
+ }
1168
+ const normalized = candidates.map((p) => p.replace(/^\.[\\/]/, ""));
1169
+ for (const candidate of normalized) {
1170
+ if (allPaths.has(candidate)) return candidate;
1171
+ }
1172
+ return null;
1173
+ }
1174
+ function resolveGoImport(spec, fromRelativePath, projectPath, allPaths) {
1175
+ const dirFiles = /* @__PURE__ */ new Map();
1176
+ for (const p of allPaths) {
1177
+ if (!p.endsWith(".go")) continue;
1178
+ if (p.endsWith("_test.go")) continue;
1179
+ const dir = dirname(p);
1180
+ const existing = dirFiles.get(dir);
1181
+ if (existing) existing.push(p);
1182
+ else dirFiles.set(dir, [p]);
1183
+ }
1184
+ const importParts = spec.raw.split("/");
1185
+ const pkgName = importParts[importParts.length - 1];
1186
+ let goModModule = "";
1187
+ for (const p of allPaths) {
1188
+ if (p === "go.mod" || p.endsWith("/go.mod")) {
1189
+ try {
1190
+ const goModContent = safeReadFile(join(projectPath, p));
1191
+ if (goModContent) {
1192
+ const modMatch = goModContent.match(/^module\s+(\S+)/m);
1193
+ if (modMatch) goModModule = modMatch[1];
1194
+ }
1195
+ } catch {
1196
+ }
1197
+ break;
1198
+ }
1199
+ }
1200
+ if (goModModule && spec.raw.startsWith(goModModule + "/")) {
1201
+ const localPath = spec.raw.slice(goModModule.length + 1);
1202
+ const files = dirFiles.get(localPath);
1203
+ if (files && files.length > 0) return files.sort()[0];
1204
+ for (const prefix of ["", "cmd/", "pkg/", "internal/"]) {
1205
+ const tryPath = prefix + localPath;
1206
+ const tryFiles = dirFiles.get(tryPath);
1207
+ if (tryFiles && tryFiles.length > 0) return tryFiles.sort()[0];
1208
+ }
1209
+ }
1210
+ for (const [dir, files] of dirFiles) {
1211
+ const dirName = dir.split("/").pop();
1212
+ if (dirName === pkgName) return files.sort()[0];
1213
+ }
1214
+ for (let depth = 2; depth <= Math.min(importParts.length, 4); depth++) {
1215
+ const suffix = importParts.slice(-depth).join("/");
1216
+ for (const [dir, files] of dirFiles) {
1217
+ if (dir === suffix || dir.endsWith("/" + suffix)) {
1218
+ return files.sort()[0];
1219
+ }
1220
+ }
1221
+ }
1222
+ return null;
1223
+ }
1224
+ function resolveJavaImport(spec, allPaths) {
1225
+ const parts = spec.raw.split(".");
1226
+ if (parts[parts.length - 1] === "*") {
1227
+ const packagePath2 = parts.slice(0, -1).join("/");
1228
+ for (const prefix of ["src/main/java/", "src/", ""]) {
1229
+ for (const path of allPaths) {
1230
+ if (path.startsWith(prefix + packagePath2 + "/") && path.endsWith(".java")) {
1231
+ return path;
1232
+ }
1233
+ }
1234
+ }
1235
+ for (const path of allPaths) {
1236
+ if (path.includes(packagePath2 + "/") && path.endsWith(".java")) {
1237
+ return path;
1238
+ }
1239
+ }
1240
+ return null;
1241
+ }
1242
+ let className = parts[parts.length - 1];
1243
+ let packageParts = parts.slice(0, -1);
1244
+ if (className[0] && className[0] === className[0].toLowerCase() && packageParts.length > 0) {
1245
+ className = packageParts[packageParts.length - 1];
1246
+ packageParts = packageParts.slice(0, -1);
1247
+ }
1248
+ if (packageParts.length > 0) {
1249
+ const maybeOuter = packageParts[packageParts.length - 1];
1250
+ if (maybeOuter[0] && maybeOuter[0] === maybeOuter[0].toUpperCase() && className[0] && className[0] === className[0].toUpperCase()) {
1251
+ className = maybeOuter;
1252
+ packageParts = packageParts.slice(0, -1);
1253
+ }
1254
+ }
1255
+ const packagePath = packageParts.join("/");
1256
+ const javaFile = `${className}.java`;
1257
+ const fullPath = packagePath ? `${packagePath}/${javaFile}` : javaFile;
1258
+ const prefixes = ["src/main/java/", "src/main/kotlin/", "src/", "app/src/main/java/", ""];
1259
+ for (const prefix of prefixes) {
1260
+ const candidate = prefix + fullPath;
1261
+ if (allPaths.has(candidate)) return candidate;
1262
+ }
1263
+ for (const path of allPaths) {
1264
+ if (path.endsWith(fullPath)) return path;
1265
+ }
1266
+ if (packageParts.length >= 2) {
1267
+ const lastTwoPkg = packageParts.slice(-2).join("/");
1268
+ for (const path of allPaths) {
1269
+ if (path.endsWith(javaFile) && path.includes(lastTwoPkg)) return path;
1270
+ }
1271
+ }
1272
+ return null;
1273
+ }
1274
+ function resolveRustImport(spec, fromRelativePath, allPaths) {
1275
+ if (spec.raw.startsWith("mod::")) {
1276
+ const modName = spec.raw.slice(5);
1277
+ const dir = dirname(fromRelativePath);
1278
+ const candidates = [
1279
+ join(dir, `${modName}.rs`),
1280
+ join(dir, modName, "mod.rs")
1281
+ ];
1282
+ for (const c of candidates) {
1283
+ if (allPaths.has(c)) return c;
1284
+ }
1285
+ return null;
1286
+ }
1287
+ if (spec.raw.startsWith("crate::")) {
1288
+ const parts = spec.raw.replace("crate::", "").split("::");
1289
+ for (let i = parts.length; i >= 1; i--) {
1290
+ const modulePath = parts.slice(0, i).join("/");
1291
+ const candidates = [
1292
+ `src/${modulePath}.rs`,
1293
+ `src/${modulePath}/mod.rs`,
1294
+ `${modulePath}.rs`,
1295
+ `${modulePath}/mod.rs`
1296
+ ];
1297
+ for (const c of candidates) {
1298
+ if (allPaths.has(c)) return c;
1299
+ }
1300
+ }
1301
+ return null;
1302
+ }
1303
+ if (spec.raw.startsWith("super::")) {
1304
+ const parts = spec.raw.replace("super::", "").split("::");
1305
+ const parentDir = dirname(dirname(fromRelativePath));
1306
+ for (let i = parts.length; i >= 1; i--) {
1307
+ const modulePath = parts.slice(0, i).join("/");
1308
+ const candidates = [
1309
+ join(parentDir, `${modulePath}.rs`),
1310
+ join(parentDir, modulePath, "mod.rs")
1311
+ ];
1312
+ for (const c of candidates) {
1313
+ if (allPaths.has(c)) return c;
1314
+ }
1315
+ }
1316
+ return null;
1317
+ }
1318
+ if (spec.raw.startsWith("self::")) {
1319
+ const parts = spec.raw.replace("self::", "").split("::");
1320
+ const dir = dirname(fromRelativePath);
1321
+ for (let i = parts.length; i >= 1; i--) {
1322
+ const modulePath = parts.slice(0, i).join("/");
1323
+ const candidates = [
1324
+ join(dir, `${modulePath}.rs`),
1325
+ join(dir, modulePath, "mod.rs")
1326
+ ];
1327
+ for (const c of candidates) {
1328
+ if (allPaths.has(c)) return c;
1329
+ }
1330
+ }
1331
+ return null;
1332
+ }
1333
+ return null;
1334
+ }
1335
+ function safeReadFile(path) {
1336
+ try {
1337
+ return readFileSync(path, "utf-8");
1338
+ } catch {
1339
+ return null;
1340
+ }
1341
+ }
1342
+
1343
+ // src/engine/graph.ts
105
1344
  var TS_EXTENSIONS = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs", "cts", "cjs"]);
106
1345
  function createProject(projectPath, filePaths) {
107
- const tsConfigPath = join(projectPath, "tsconfig.json");
108
- const hasTsConfig = existsSync(tsConfigPath);
1346
+ const tsConfigPath = join2(projectPath, "tsconfig.json");
1347
+ const hasTsConfig = existsSync2(tsConfigPath);
109
1348
  const project = new Project({
110
1349
  tsConfigFilePath: hasTsConfig ? tsConfigPath : void 0,
111
1350
  skipAddingFilesFromTsConfig: true,
@@ -131,9 +1370,11 @@ function createProject(projectPath, filePaths) {
131
1370
  return project;
132
1371
  }
133
1372
  function buildProjectGraph(projectPath, files) {
134
- const absPath = resolve(projectPath);
1373
+ const absPath = resolve2(projectPath);
135
1374
  const tsFiles = files.filter((f) => TS_EXTENSIONS.has(f.extension)).map((f) => f.path);
136
- if (tsFiles.length === 0) {
1375
+ const polyglotFiles = files.filter((f) => !TS_EXTENSIONS.has(f.extension)).map((f) => ({ relativePath: f.relativePath, absolutePath: f.path }));
1376
+ const polyglotEdges = parseAllPolyglotImports(polyglotFiles, absPath);
1377
+ if (tsFiles.length === 0 && polyglotEdges.length === 0) {
137
1378
  return emptyGraph(files);
138
1379
  }
139
1380
  let project;
@@ -145,7 +1386,7 @@ function buildProjectGraph(projectPath, files) {
145
1386
  const edges = [];
146
1387
  const nodeSet = /* @__PURE__ */ new Set();
147
1388
  for (const sourceFile of project.getSourceFiles()) {
148
- const fromRel = relative(absPath, sourceFile.getFilePath());
1389
+ const fromRel = relative2(absPath, sourceFile.getFilePath());
149
1390
  if (fromRel.startsWith("..") || fromRel.includes("node_modules")) continue;
150
1391
  nodeSet.add(fromRel);
151
1392
  for (const imp of sourceFile.getImportDeclarations()) {
@@ -167,6 +1408,11 @@ function buildProjectGraph(projectPath, files) {
167
1408
  }
168
1409
  }
169
1410
  }
1411
+ for (const edge of polyglotEdges) {
1412
+ nodeSet.add(edge.from);
1413
+ nodeSet.add(edge.to);
1414
+ edges.push(edge);
1415
+ }
170
1416
  const nodes = Array.from(nodeSet);
171
1417
  const importedByCount = /* @__PURE__ */ new Map();
172
1418
  const importCount = /* @__PURE__ */ new Map();
@@ -199,6 +1445,7 @@ function buildProjectGraph(projectPath, files) {
199
1445
  const orphans = Array.from(allFileNodes).filter((n) => !connectedNodes.has(n));
200
1446
  const clusters = detectClusters(nodes, edges, files);
201
1447
  enrichComplexity(project, absPath, files);
1448
+ enrichPolyglotComplexity(files);
202
1449
  return { nodes, edges, hubs, leaves, orphans, clusters };
203
1450
  }
204
1451
  var UnionFind = class {
@@ -291,7 +1538,7 @@ function commonPrefix(paths) {
291
1538
  function enrichComplexity(project, absPath, files) {
292
1539
  const fileMap = new Map(files.map((f) => [f.relativePath, f]));
293
1540
  for (const sourceFile of project.getSourceFiles()) {
294
- const relPath = relative(absPath, sourceFile.getFilePath());
1541
+ const relPath = relative2(absPath, sourceFile.getFilePath());
295
1542
  if (relPath.startsWith("..") || relPath.includes("node_modules")) continue;
296
1543
  const file = fileMap.get(relPath);
297
1544
  if (!file) continue;
@@ -342,22 +1589,34 @@ function calculateCyclomaticComplexity(node) {
342
1589
  });
343
1590
  return complexity;
344
1591
  }
1592
+ function enrichPolyglotComplexity(files) {
1593
+ for (const file of files) {
1594
+ if (TS_EXTENSIONS.has(file.extension)) continue;
1595
+ const lang = detectLanguage(file.relativePath);
1596
+ if (!lang) continue;
1597
+ try {
1598
+ const content = readFileSync2(file.path, "utf-8");
1599
+ file.complexity = Math.max(1, estimateComplexity(content, lang));
1600
+ } catch {
1601
+ }
1602
+ }
1603
+ }
345
1604
  function resolveImport(sourceFile, moduleSpecifier, projectRoot) {
346
1605
  if (!moduleSpecifier.startsWith(".")) return null;
347
- const sourceDir = dirname(sourceFile.getFilePath());
348
- const basePath = resolve(sourceDir, moduleSpecifier);
1606
+ const sourceDir = dirname2(sourceFile.getFilePath());
1607
+ const basePath = resolve2(sourceDir, moduleSpecifier);
349
1608
  const extensions = [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"];
350
1609
  for (const ext of extensions) {
351
1610
  const candidate = basePath.endsWith(ext) ? basePath : basePath + ext;
352
- if (existsSync(candidate)) {
353
- const rel = relative(projectRoot, candidate);
1611
+ if (existsSync2(candidate)) {
1612
+ const rel = relative2(projectRoot, candidate);
354
1613
  if (!rel.startsWith("..")) return rel;
355
1614
  }
356
1615
  }
357
1616
  if (moduleSpecifier.endsWith(".js")) {
358
1617
  const tsPath = basePath.replace(/\.js$/, ".ts");
359
- if (existsSync(tsPath)) {
360
- const rel = relative(projectRoot, tsPath);
1618
+ if (existsSync2(tsPath)) {
1619
+ const rel = relative2(projectRoot, tsPath);
361
1620
  if (!rel.startsWith("..")) return rel;
362
1621
  }
363
1622
  }
@@ -525,7 +1784,7 @@ async function walkProject(rootPath, options) {
525
1784
  }
526
1785
  const promises = [];
527
1786
  for (const entry of entries) {
528
- const fullPath = join2(dir, entry.name);
1787
+ const fullPath = join3(dir, entry.name);
529
1788
  if (entry.isDirectory()) {
530
1789
  if (!ignoreDirSet.has(entry.name) && !entry.name.startsWith(".")) {
531
1790
  promises.push(walk(fullPath, depth + 1));
@@ -546,7 +1805,7 @@ async function walkProject(rootPath, options) {
546
1805
  }
547
1806
  results.push({
548
1807
  path: fullPath,
549
- relativePath: relative2(rootPath, fullPath),
1808
+ relativePath: relative3(rootPath, fullPath),
550
1809
  extension: ext,
551
1810
  size: fileStat.size,
552
1811
  lastModified: fileStat.mtime,
@@ -595,7 +1854,7 @@ function detectStack(files) {
595
1854
  return stack;
596
1855
  }
597
1856
  async function analyzeProject(projectPath, config) {
598
- const absPath = resolve2(projectPath);
1857
+ const absPath = resolve3(projectPath);
599
1858
  const projectName = basename2(absPath);
600
1859
  const mergedConfig = mergeConfig(DEFAULT_CONFIG, config);
601
1860
  const allExtensions = [
@@ -787,8 +2046,8 @@ import { createHash as createHash3 } from "crypto";
787
2046
 
788
2047
  // src/govern/secrets.ts
789
2048
  import { readFile as readFile3 } from "fs/promises";
790
- import { readFileSync, existsSync as existsSync2, mkdirSync, writeFileSync } from "fs";
791
- import { resolve as resolve3, relative as relative3, join as join3, dirname as dirname2 } from "path";
2049
+ import { readFileSync as readFileSync3, existsSync as existsSync3, mkdirSync, writeFileSync } from "fs";
2050
+ import { resolve as resolve4, relative as relative4, join as join4, dirname as dirname3 } from "path";
792
2051
  import { createHash as createHash2 } from "crypto";
793
2052
  var BUILTIN_PATTERNS = [
794
2053
  // API Keys
@@ -916,7 +2175,7 @@ function scanContentForSecrets(content, filePath, customPatterns = [], extraPiiS
916
2175
  async function scanFileForSecrets(filePath, projectPath, customPatterns = []) {
917
2176
  try {
918
2177
  const content = await readFile3(filePath, "utf-8");
919
- const relPath = relative3(resolve3(projectPath), resolve3(filePath));
2178
+ const relPath = relative4(resolve4(projectPath), resolve4(filePath));
920
2179
  return scanContentForSecrets(content, relPath, customPatterns);
921
2180
  } catch {
922
2181
  return [];
@@ -1012,13 +2271,13 @@ function fingerprintFinding(f) {
1012
2271
  return createHash2("sha256").update(`${f.file}:${f.type}:${f.match}`).digest("hex").slice(0, 32);
1013
2272
  }
1014
2273
  function getAllowlistPath(projectPath) {
1015
- return join3(projectPath, ".cto", "audit", "allowlist.json");
2274
+ return join4(projectPath, ".cto", "audit", "allowlist.json");
1016
2275
  }
1017
2276
  function loadAllowlist(projectPath) {
1018
2277
  const filePath = getAllowlistPath(projectPath);
1019
- if (!existsSync2(filePath)) return [];
2278
+ if (!existsSync3(filePath)) return [];
1020
2279
  try {
1021
- return JSON.parse(readFileSync(filePath, "utf-8"));
2280
+ return JSON.parse(readFileSync3(filePath, "utf-8"));
1022
2281
  } catch {
1023
2282
  return [];
1024
2283
  }
@@ -1039,20 +2298,20 @@ function filterByAllowlist(findings, projectPath) {
1039
2298
  return { filtered, allowed };
1040
2299
  }
1041
2300
  function getHashCachePath(projectPath) {
1042
- return join3(projectPath, ".cto", "audit", ".hashcache.json");
2301
+ return join4(projectPath, ".cto", "audit", ".hashcache.json");
1043
2302
  }
1044
2303
  function loadHashCache(projectPath) {
1045
2304
  const filePath = getHashCachePath(projectPath);
1046
- if (!existsSync2(filePath)) return {};
2305
+ if (!existsSync3(filePath)) return {};
1047
2306
  try {
1048
- return JSON.parse(readFileSync(filePath, "utf-8"));
2307
+ return JSON.parse(readFileSync3(filePath, "utf-8"));
1049
2308
  } catch {
1050
2309
  return {};
1051
2310
  }
1052
2311
  }
1053
2312
  function saveHashCache(projectPath, cache) {
1054
2313
  const filePath = getHashCachePath(projectPath);
1055
- mkdirSync(dirname2(filePath), { recursive: true });
2314
+ mkdirSync(dirname3(filePath), { recursive: true });
1056
2315
  writeFileSync(filePath, JSON.stringify(cache));
1057
2316
  }
1058
2317
  function hashContent(content) {
@@ -1065,8 +2324,8 @@ function getChangedFiles(projectPath, filePaths) {
1065
2324
  const unchanged = [];
1066
2325
  for (const fp of filePaths) {
1067
2326
  try {
1068
- const content = readFileSync(fp, "utf-8");
1069
- const relPath = relative3(resolve3(projectPath), resolve3(fp));
2327
+ const content = readFileSync3(fp, "utf-8");
2328
+ const relPath = relative4(resolve4(projectPath), resolve4(fp));
1070
2329
  const hash = hashContent(content);
1071
2330
  newCache[relPath] = hash;
1072
2331
  if (oldCache[relPath] === hash) {
@@ -1089,13 +2348,13 @@ var DEFAULT_AUDIT_CONFIG = {
1089
2348
  incrementalScan: true
1090
2349
  };
1091
2350
  function getAuditConfigPath(projectPath) {
1092
- return join3(projectPath, ".cto", "audit", "config.json");
2351
+ return join4(projectPath, ".cto", "audit", "config.json");
1093
2352
  }
1094
2353
  function loadAuditConfig(projectPath) {
1095
2354
  const filePath = getAuditConfigPath(projectPath);
1096
- if (!existsSync2(filePath)) return { ...DEFAULT_AUDIT_CONFIG };
2355
+ if (!existsSync3(filePath)) return { ...DEFAULT_AUDIT_CONFIG };
1097
2356
  try {
1098
- const loaded = JSON.parse(readFileSync(filePath, "utf-8"));
2357
+ const loaded = JSON.parse(readFileSync3(filePath, "utf-8"));
1099
2358
  return { ...DEFAULT_AUDIT_CONFIG, ...loaded };
1100
2359
  } catch {
1101
2360
  return { ...DEFAULT_AUDIT_CONFIG };
@@ -1193,7 +2452,7 @@ async function auditProject(projectPath, filePaths, options = {}) {
1193
2452
  for (const fp of filesToScan) {
1194
2453
  try {
1195
2454
  const content = await readFile3(fp, "utf-8");
1196
- const relPath = relative3(resolve3(projectPath), resolve3(fp));
2455
+ const relPath = relative4(resolve4(projectPath), resolve4(fp));
1197
2456
  const isTestFile = /\.(test|spec|mock)\.[jt]sx?$/.test(relPath) || relPath.includes("__tests__");
1198
2457
  const isDtsFile = relPath.endsWith(".d.ts");
1199
2458
  let findings = scanContentForSecrets(content, relPath, customPatterns, extraPiiDomains);
@@ -1800,19 +3059,8 @@ async function selectContext(input) {
1800
3059
  for (const s of input.semanticScores ?? []) semanticMap.set(s.filePath, s.score);
1801
3060
  const learnerMap = /* @__PURE__ */ new Map();
1802
3061
  for (const b of input.learnerBoosts ?? []) learnerMap.set(b.filePath, b.boost);
1803
- let targetPaths = identifyTargetFiles(task, analysis.files);
1804
- if (targetPaths.length === 0 && semanticMap.size > 0) {
1805
- const sorted = [...semanticMap.entries()].sort((a, b) => b[1] - a[1]);
1806
- const threshold = 0.5;
1807
- targetPaths = sorted.filter(([, score]) => score >= threshold).slice(0, 10).map(([path]) => path);
1808
- if (targetPaths.length > 0) {
1809
- decisions.push({
1810
- file: targetPaths.join(", "),
1811
- action: "include-full",
1812
- reason: `Top ${targetPaths.length} file(s) identified via semantic matching (score \u2265 ${threshold})`
1813
- });
1814
- }
1815
- } else if (targetPaths.length > 0) {
3062
+ const targetPaths = identifyTargetFiles(task, analysis.files);
3063
+ if (targetPaths.length > 0) {
1816
3064
  decisions.push({
1817
3065
  file: targetPaths.join(", "),
1818
3066
  action: "include-full",
@@ -1844,7 +3092,7 @@ async function selectContext(input) {
1844
3092
  }
1845
3093
  const { mustInclude, mustExclude } = applyPolicies(analysis.files, policies);
1846
3094
  const candidateSet = /* @__PURE__ */ new Set([...expandedPaths, ...mustInclude]);
1847
- if (targetPaths.length === 0) {
3095
+ if (semanticMap.size > 0 || targetPaths.length === 0) {
1848
3096
  for (const f of analysis.files) {
1849
3097
  candidateSet.add(f.relativePath);
1850
3098
  }
@@ -1883,22 +3131,32 @@ async function selectContext(input) {
1883
3131
  const riskNorm = file.riskScore / maxRisk;
1884
3132
  const semantic = semanticMap.get(file.relativePath) ?? 0;
1885
3133
  const learner = ((learnerMap.get(file.relativePath) ?? 0) + 1) / 2;
1886
- return riskNorm * 0.4 + semantic * 0.4 + learner * 0.2;
3134
+ return semantic * 0.55 + riskNorm * 0.25 + learner * 0.2;
1887
3135
  }
3136
+ const targetSet = new Set(targetPaths);
1888
3137
  const candidates = Array.from(candidateSet).map((p) => allFileMap.get(p)).filter((f) => f !== void 0).sort((a, b) => {
1889
- const aIsTarget = targetPaths.includes(a.relativePath) ? 0 : 1;
1890
- const bIsTarget = targetPaths.includes(b.relativePath) ? 0 : 1;
1891
- if (aIsTarget !== bIsTarget) return aIsTarget - bIsTarget;
1892
- const aIsMust = mustInclude.has(a.relativePath) ? 0 : 1;
1893
- const bIsMust = mustInclude.has(b.relativePath) ? 0 : 1;
1894
- if (aIsMust !== bIsMust) return aIsMust - bIsMust;
1895
- return compositeScore(b) - compositeScore(a);
3138
+ const aBonus = (targetSet.has(a.relativePath) ? 0.3 : 0) + (mustInclude.has(a.relativePath) ? 0.15 : 0);
3139
+ const bBonus = (targetSet.has(b.relativePath) ? 0.3 : 0) + (mustInclude.has(b.relativePath) ? 0.15 : 0);
3140
+ return compositeScore(b) + bBonus - (compositeScore(a) + aBonus);
1896
3141
  });
1897
3142
  const selectedFiles = [];
1898
3143
  let usedTokens = 0;
3144
+ const hasSemanticSignal = semanticMap.size > 0;
1899
3145
  for (const file of candidates) {
1900
- const isTarget = targetPaths.includes(file.relativePath);
3146
+ const isTarget = targetSet.has(file.relativePath);
1901
3147
  const isMustInclude = mustInclude.has(file.relativePath);
3148
+ if (hasSemanticSignal && !isTarget && !isMustInclude) {
3149
+ const semScore = semanticMap.get(file.relativePath) ?? 0;
3150
+ const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
3151
+ if (semScore === 0 && lrnBoost === 0) {
3152
+ decisions.push({
3153
+ file: file.relativePath,
3154
+ action: "exclude",
3155
+ reason: "Skipped: no semantic relevance to task"
3156
+ });
3157
+ continue;
3158
+ }
3159
+ }
1902
3160
  const defaultLevel = isTarget ? "full" : getPruneLevelForRisk(file.riskScore);
1903
3161
  const levels = getCascadeLevels2(defaultLevel);
1904
3162
  let included = false;
@@ -2022,281 +3280,394 @@ function buildReason(file, level, isTarget, isMustInclude) {
2022
3280
  return `Low relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
2023
3281
  }
2024
3282
 
2025
- // src/engine/tfidf.ts
2026
- var STOP_WORDS = /* @__PURE__ */ new Set([
2027
- // Language keywords
2028
- "import",
2029
- "export",
2030
- "from",
2031
- "const",
2032
- "let",
2033
- "var",
2034
- "function",
2035
- "class",
2036
- "interface",
2037
- "type",
2038
- "return",
2039
- "async",
2040
- "await",
2041
- "new",
2042
- "this",
2043
- "that",
2044
- "true",
2045
- "false",
2046
- "null",
2047
- "undefined",
2048
- "void",
2049
- "string",
2050
- "number",
2051
- "boolean",
2052
- "any",
2053
- "unknown",
2054
- "never",
2055
- "object",
2056
- "array",
2057
- "promise",
2058
- "if",
2059
- "else",
2060
- "for",
2061
- "while",
2062
- "do",
2063
- "switch",
2064
- "case",
2065
- "break",
2066
- "continue",
2067
- "try",
2068
- "catch",
2069
- "throw",
2070
- "finally",
2071
- "default",
2072
- "extends",
2073
- "implements",
2074
- "static",
2075
- "private",
2076
- "public",
2077
- "protected",
2078
- "readonly",
2079
- "abstract",
2080
- "override",
2081
- "super",
2082
- "typeof",
2083
- "instanceof",
2084
- "in",
2085
- "of",
2086
- "as",
2087
- "is",
2088
- "keyof",
2089
- "enum",
2090
- "namespace",
2091
- "module",
2092
- "declare",
2093
- // Python
2094
- "def",
2095
- "self",
2096
- "cls",
2097
- "none",
2098
- "pass",
2099
- "yield",
2100
- "lambda",
2101
- "with",
2102
- "elif",
2103
- "except",
2104
- "raise",
2105
- "assert",
2106
- "global",
2107
- "nonlocal",
2108
- // Natural language stop words only — NOT domain terms that carry signal
2109
- "the",
2110
- "and",
2111
- "for",
2112
- "with",
2113
- "not",
2114
- "but",
2115
- "are",
2116
- "was",
2117
- "were",
2118
- "has",
2119
- "have",
2120
- "had",
2121
- "will",
2122
- "would",
2123
- "could",
2124
- "should",
2125
- "may",
2126
- "can",
2127
- "its",
2128
- "also",
2129
- "than",
2130
- "then",
2131
- "into",
2132
- "only",
2133
- "very",
2134
- "just",
2135
- "about",
2136
- "being",
2137
- "been",
2138
- "does",
2139
- "did",
2140
- "doing",
2141
- "todo",
2142
- "fixme",
2143
- "hack",
2144
- "note",
2145
- "xxx"
2146
- ]);
2147
- function buildIndex(files) {
3283
+ // src/engine/context-pipeline.ts
3284
+ import { readFileSync as readFileSync6 } from "fs";
3285
+ init_tfidf();
3286
+
3287
+ // src/engine/index-cache.ts
3288
+ init_tfidf();
3289
+ import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
3290
+ import { join as join5 } from "path";
3291
+ var CACHE_VERSION = 2;
3292
+ var CACHE_DIR = ".cto";
3293
+ var CACHE_FILE = "index-cache.json";
3294
+ function buildIndexCached(projectPath, files) {
3295
+ const startTime = Date.now();
3296
+ const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
3297
+ const existing = loadCache(cachePath);
3298
+ const cacheHit = existing !== null;
3299
+ const cachedFiles = existing?.files ?? {};
3300
+ const newCachedFiles = {};
3301
+ let updatedFiles = 0;
3302
+ let removedFiles = 0;
3303
+ let cachedCount = 0;
3304
+ const currentPaths = new Set(files.map((f) => f.relativePath));
3305
+ if (existing) {
3306
+ for (const path of Object.keys(cachedFiles)) {
3307
+ if (!currentPaths.has(path)) {
3308
+ removedFiles++;
3309
+ }
3310
+ }
3311
+ }
3312
+ for (const file of files) {
3313
+ const cached = cachedFiles[file.relativePath];
3314
+ let currentMtime = 0;
3315
+ try {
3316
+ const st = statSync(file.absolutePath);
3317
+ currentMtime = st.mtimeMs;
3318
+ } catch {
3319
+ continue;
3320
+ }
3321
+ if (cached && cached.mtime === currentMtime) {
3322
+ newCachedFiles[file.relativePath] = cached;
3323
+ cachedCount++;
3324
+ } else {
3325
+ let content = file.content;
3326
+ if (content === void 0) {
3327
+ try {
3328
+ content = readFileSync4(file.absolutePath, "utf-8");
3329
+ } catch {
3330
+ continue;
3331
+ }
3332
+ }
3333
+ const terms = tokenize(content);
3334
+ const termCounts = {};
3335
+ for (const term of terms) {
3336
+ termCounts[term] = (termCounts[term] ?? 0) + 1;
3337
+ }
3338
+ newCachedFiles[file.relativePath] = {
3339
+ mtime: currentMtime,
3340
+ terms: termCounts,
3341
+ length: terms.length
3342
+ };
3343
+ updatedFiles++;
3344
+ }
3345
+ }
3346
+ const index = rebuildIndex(newCachedFiles);
3347
+ saveCache(cachePath, newCachedFiles);
3348
+ const stats = {
3349
+ totalFiles: Object.keys(newCachedFiles).length,
3350
+ updatedFiles,
3351
+ removedFiles,
3352
+ cachedFiles: cachedCount,
3353
+ cacheHit,
3354
+ buildTimeMs: Date.now() - startTime
3355
+ };
3356
+ return { index, stats };
3357
+ }
3358
+ function invalidateCache(projectPath) {
3359
+ const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
3360
+ try {
3361
+ if (existsSync4(cachePath)) {
3362
+ writeFileSync2(cachePath, "{}");
3363
+ }
3364
+ } catch {
3365
+ }
3366
+ }
3367
+ function getCacheInfo(projectPath) {
3368
+ const cachePath = join5(projectPath, CACHE_DIR, CACHE_FILE);
3369
+ const data = loadCache(cachePath);
3370
+ if (!data) return { exists: false, fileCount: 0, builtAt: null };
3371
+ return {
3372
+ exists: true,
3373
+ fileCount: Object.keys(data.files).length,
3374
+ builtAt: data.builtAt
3375
+ };
3376
+ }
3377
+ function loadCache(cachePath) {
3378
+ try {
3379
+ if (!existsSync4(cachePath)) return null;
3380
+ const raw = readFileSync4(cachePath, "utf-8");
3381
+ const data = JSON.parse(raw);
3382
+ if (data.version !== CACHE_VERSION) return null;
3383
+ if (!data.files || typeof data.files !== "object") return null;
3384
+ return data;
3385
+ } catch {
3386
+ return null;
3387
+ }
3388
+ }
3389
+ function saveCache(cachePath, files) {
3390
+ try {
3391
+ const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
3392
+ if (!existsSync4(dir)) {
3393
+ mkdirSync2(dir, { recursive: true });
3394
+ }
3395
+ const data = {
3396
+ version: CACHE_VERSION,
3397
+ builtAt: (/* @__PURE__ */ new Date()).toISOString(),
3398
+ files
3399
+ };
3400
+ writeFileSync2(cachePath, JSON.stringify(data));
3401
+ } catch {
3402
+ }
3403
+ }
3404
+ function rebuildIndex(cachedFiles) {
2148
3405
  const documents = /* @__PURE__ */ new Map();
2149
3406
  const docFreq = /* @__PURE__ */ new Map();
2150
- for (const file of files) {
2151
- const terms = tokenize(file.content);
2152
- const termCounts = /* @__PURE__ */ new Map();
2153
- for (const term of terms) {
2154
- termCounts.set(term, (termCounts.get(term) ?? 0) + 1);
3407
+ let totalLength = 0;
3408
+ for (const [path, cached] of Object.entries(cachedFiles)) {
3409
+ const termMap = /* @__PURE__ */ new Map();
3410
+ for (const [term, count] of Object.entries(cached.terms)) {
3411
+ termMap.set(term, count);
2155
3412
  }
2156
- documents.set(file.relativePath, { terms: termCounts, length: terms.length });
2157
- for (const term of termCounts.keys()) {
3413
+ documents.set(path, { terms: termMap, length: cached.length });
3414
+ totalLength += cached.length;
3415
+ for (const term of termMap.keys()) {
2158
3416
  docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
2159
3417
  }
2160
3418
  }
2161
- const totalDocs = files.length;
3419
+ const totalDocs = documents.size;
3420
+ const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
2162
3421
  const idf = /* @__PURE__ */ new Map();
2163
3422
  for (const [term, df] of docFreq) {
2164
3423
  idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
2165
3424
  }
2166
- let totalLength = 0;
2167
- for (const doc of documents.values()) totalLength += doc.length;
2168
- const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
2169
3425
  return { documents, idf, avgDocLength, totalDocs };
2170
3426
  }
2171
- function query(index, taskDescription, maxResults = 50) {
2172
- const queryTerms = tokenize(taskDescription);
2173
- if (queryTerms.length === 0) return [];
2174
- const querySet = /* @__PURE__ */ new Map();
2175
- for (const term of queryTerms) {
2176
- querySet.set(term, (querySet.get(term) ?? 0) + 1);
3427
+
3428
+ // src/engine/reranker.ts
3429
+ init_tfidf();
3430
+ var WEIGHTS = {
3431
+ termCoverage: 0.35,
3432
+ // What fraction of query terms does the file match?
3433
+ termSpecificity: 0.25,
3434
+ // Are matched terms rare or common?
3435
+ bigramProximity: 0.15,
3436
+ // Do query terms appear near each other?
3437
+ dependencySignal: 0.1,
3438
+ // Is this file connected to a top match?
3439
+ pathRelevance: 0.15
3440
+ // Does the file path match query terms?
3441
+ };
3442
+ var ABSOLUTE_FLOOR = 0.18;
3443
+ var ELBOW_DROP_RATIO = 0.35;
3444
+ var MIN_TERM_COVERAGE = 0.3;
3445
+ function rerank(input) {
3446
+ const startTime = Date.now();
3447
+ const { task, candidates, index, fileContents, dependencies, allFilePaths } = input;
3448
+ const emptyTelemetry = {
3449
+ candidatesIn: candidates.length,
3450
+ candidatesOut: 0,
3451
+ candidatesFiltered: 0,
3452
+ durationMs: 0,
3453
+ weights: { ...WEIGHTS },
3454
+ gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
3455
+ signalStats: {
3456
+ termCoverage: { min: 0, max: 0, mean: 0, median: 0 },
3457
+ termSpecificity: { min: 0, max: 0, mean: 0, median: 0 },
3458
+ bigramProximity: { min: 0, max: 0, mean: 0, median: 0 },
3459
+ dependencySignal: { min: 0, max: 0, mean: 0, median: 0 },
3460
+ pathRelevance: { min: 0, max: 0, mean: 0, median: 0 }
3461
+ },
3462
+ filterReasons: {},
3463
+ scoreDistribution: [0, 0, 0, 0, 0],
3464
+ queryTermCount: 0,
3465
+ relevanceConeSize: 0
3466
+ };
3467
+ if (candidates.length === 0) {
3468
+ return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
2177
3469
  }
2178
- const results = [];
2179
- const k1 = 1.5;
2180
- const b = 0.75;
2181
- for (const [filePath, doc] of index.documents) {
2182
- let score = 0;
2183
- const matchedTerms = [];
2184
- for (const [qTerm, qCount] of querySet) {
2185
- const tf = doc.terms.get(qTerm) ?? 0;
2186
- if (tf === 0) continue;
2187
- const termIdf = index.idf.get(qTerm) ?? 0;
2188
- if (termIdf <= 0) continue;
2189
- const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
2190
- score += termIdf * tfNorm * qCount;
2191
- matchedTerms.push(qTerm);
3470
+ const queryTerms = tokenize(task);
3471
+ const uniqueQueryTerms = [...new Set(queryTerms)];
3472
+ if (uniqueQueryTerms.length === 0) {
3473
+ return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
3474
+ }
3475
+ const queryTermIdfs = /* @__PURE__ */ new Map();
3476
+ for (const term of uniqueQueryTerms) {
3477
+ queryTermIdfs.set(term, index.idf.get(term) ?? 0);
3478
+ }
3479
+ const maxIdf = Math.max(1, ...queryTermIdfs.values());
3480
+ const pathTermsCache = /* @__PURE__ */ new Map();
3481
+ for (const fp of allFilePaths) {
3482
+ pathTermsCache.set(fp, new Set(tokenize(fp.replace(/[/\\.]/g, " "))));
3483
+ }
3484
+ const scored = [];
3485
+ for (const candidate of candidates) {
3486
+ const doc = index.documents.get(candidate.filePath);
3487
+ if (!doc) continue;
3488
+ const matchedQueryTerms = /* @__PURE__ */ new Set();
3489
+ for (const term of uniqueQueryTerms) {
3490
+ if ((doc.terms.get(term) ?? 0) > 0) {
3491
+ matchedQueryTerms.add(term);
3492
+ }
2192
3493
  }
2193
- if (score > 0) {
2194
- results.push({ filePath, score, matchedTerms });
3494
+ const termCoverage = matchedQueryTerms.size / uniqueQueryTerms.length;
3495
+ let specificitySum = 0;
3496
+ let specificityMax = 0;
3497
+ for (const term of matchedQueryTerms) {
3498
+ const idf = queryTermIdfs.get(term) ?? 0;
3499
+ specificitySum += idf;
3500
+ specificityMax += maxIdf;
2195
3501
  }
3502
+ const termSpecificity = specificityMax > 0 ? specificitySum / specificityMax : 0;
3503
+ const content = fileContents.get(candidate.filePath) ?? "";
3504
+ const bigramProximity = computeBigramProximity(content, uniqueQueryTerms);
3505
+ const dependencySignal = 0;
3506
+ const pathTerms = pathTermsCache.get(candidate.filePath) ?? /* @__PURE__ */ new Set();
3507
+ const queryTermSet = new Set(uniqueQueryTerms);
3508
+ let pathHits = 0;
3509
+ for (const pt of pathTerms) {
3510
+ if (queryTermSet.has(pt)) pathHits++;
3511
+ }
3512
+ const pathRelevance = Math.min(1, pathHits / Math.max(1, uniqueQueryTerms.length) * 2);
3513
+ const score = termCoverage * WEIGHTS.termCoverage + termSpecificity * WEIGHTS.termSpecificity + bigramProximity * WEIGHTS.bigramProximity + dependencySignal * WEIGHTS.dependencySignal + pathRelevance * WEIGHTS.pathRelevance;
3514
+ scored.push({
3515
+ filePath: candidate.filePath,
3516
+ score,
3517
+ bm25Score: candidate.score,
3518
+ signals: {
3519
+ termCoverage,
3520
+ termSpecificity,
3521
+ bigramProximity,
3522
+ dependencySignal,
3523
+ pathRelevance
3524
+ }
3525
+ });
2196
3526
  }
2197
- const maxScore = results.reduce((max, r) => Math.max(max, r.score), 0);
2198
- if (maxScore > 0) {
2199
- for (const r of results) r.score = r.score / maxScore;
3527
+ const topByScore = [...scored].sort((a, b) => b.score - a.score).slice(0, 5);
3528
+ const relevanceCone = /* @__PURE__ */ new Set();
3529
+ for (const top of topByScore) {
3530
+ relevanceCone.add(top.filePath);
3531
+ const deps = dependencies.get(top.filePath) ?? [];
3532
+ for (const dep of deps) relevanceCone.add(dep);
3533
+ for (const [from, tos] of dependencies) {
3534
+ if (tos.includes(top.filePath)) relevanceCone.add(from);
3535
+ }
2200
3536
  }
2201
- return results.sort((a, b2) => b2.score - a.score).slice(0, maxResults);
2202
- }
2203
- function similarity(index, pathA, pathB) {
2204
- const docA = index.documents.get(pathA);
2205
- const docB = index.documents.get(pathB);
2206
- if (!docA || !docB) return 0;
2207
- let dotProduct = 0;
2208
- let normA = 0;
2209
- let normB = 0;
2210
- const allTerms = /* @__PURE__ */ new Set([...docA.terms.keys(), ...docB.terms.keys()]);
2211
- for (const term of allTerms) {
2212
- const idf = index.idf.get(term) ?? 0;
2213
- const wA = (docA.terms.get(term) ?? 0) * idf;
2214
- const wB = (docB.terms.get(term) ?? 0) * idf;
2215
- dotProduct += wA * wB;
2216
- normA += wA * wA;
2217
- normB += wB * wB;
3537
+ for (const item of scored) {
3538
+ const inCone = relevanceCone.has(item.filePath) ? 1 : 0;
3539
+ item.signals.dependencySignal = inCone;
3540
+ item.score = item.signals.termCoverage * WEIGHTS.termCoverage + item.signals.termSpecificity * WEIGHTS.termSpecificity + item.signals.bigramProximity * WEIGHTS.bigramProximity + item.signals.dependencySignal * WEIGHTS.dependencySignal + item.signals.pathRelevance * WEIGHTS.pathRelevance;
2218
3541
  }
2219
- const denom = Math.sqrt(normA) * Math.sqrt(normB);
2220
- return denom > 0 ? dotProduct / denom : 0;
3542
+ scored.sort((a, b) => b.score - a.score);
3543
+ const { passed, filtered, threshold } = applyQualityGate(scored);
3544
+ const filterReasons = {};
3545
+ for (const f of filtered) {
3546
+ const reason = f.reason.replace(/\([^)]+\)/g, "").trim();
3547
+ filterReasons[reason] = (filterReasons[reason] ?? 0) + 1;
3548
+ }
3549
+ const allScores = scored.map((s) => s.score).sort((a, b) => a - b);
3550
+ const signalNames = ["termCoverage", "termSpecificity", "bigramProximity", "dependencySignal", "pathRelevance"];
3551
+ const signalStats = {};
3552
+ for (const name of signalNames) {
3553
+ const vals = scored.map((s) => s.signals[name]).sort((a, b) => a - b);
3554
+ signalStats[name] = {
3555
+ min: vals[0] ?? 0,
3556
+ max: vals[vals.length - 1] ?? 0,
3557
+ mean: vals.length > 0 ? vals.reduce((a, b) => a + b, 0) / vals.length : 0,
3558
+ median: vals.length > 0 ? vals[Math.floor(vals.length / 2)] : 0
3559
+ };
3560
+ }
3561
+ const telemetry = {
3562
+ candidatesIn: candidates.length,
3563
+ candidatesOut: passed.length,
3564
+ candidatesFiltered: filtered.length,
3565
+ durationMs: Date.now() - startTime,
3566
+ weights: { ...WEIGHTS },
3567
+ gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
3568
+ signalStats,
3569
+ filterReasons,
3570
+ scoreDistribution: [
3571
+ allScores[0] ?? 0,
3572
+ allScores[Math.floor(allScores.length * 0.25)] ?? 0,
3573
+ allScores[Math.floor(allScores.length * 0.5)] ?? 0,
3574
+ allScores[Math.floor(allScores.length * 0.75)] ?? 0,
3575
+ allScores[allScores.length - 1] ?? 0
3576
+ ],
3577
+ queryTermCount: uniqueQueryTerms.length,
3578
+ relevanceConeSize: relevanceCone.size
3579
+ };
3580
+ return {
3581
+ files: passed,
3582
+ filtered,
3583
+ qualityThreshold: threshold,
3584
+ telemetry
3585
+ };
2221
3586
  }
2222
- function tokenize(text) {
2223
- const tokens = [];
2224
- const rawTokens = text.match(/[a-zA-Z][a-zA-Z0-9]*|[0-9]+/g) ?? [];
2225
- for (const raw of rawTokens) {
2226
- const parts = raw.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
2227
- for (const part of parts) {
2228
- if (part.length < 2) continue;
2229
- const stemmed = stem(part);
2230
- if (stemmed.length < 2) continue;
2231
- if (STOP_WORDS.has(stemmed)) continue;
2232
- tokens.push(stemmed);
3587
+ function computeBigramProximity(content, queryTerms) {
3588
+ if (queryTerms.length < 2 || !content) return 0;
3589
+ const contentTokens = tokenize(content);
3590
+ const termPositions = /* @__PURE__ */ new Map();
3591
+ for (let i = 0; i < contentTokens.length; i++) {
3592
+ const token = contentTokens[i];
3593
+ if (queryTerms.includes(token)) {
3594
+ const positions = termPositions.get(token) ?? [];
3595
+ positions.push(i);
3596
+ termPositions.set(token, positions);
2233
3597
  }
2234
3598
  }
2235
- return tokens;
2236
- }
2237
- function stem(word) {
2238
- let w = word.toLowerCase();
2239
- if (w.endsWith("tion")) return w.slice(0, -4);
2240
- if (w.endsWith("sion")) return w.slice(0, -4);
2241
- if (w.endsWith("ment")) return w.slice(0, -4);
2242
- if (w.endsWith("ness")) return w.slice(0, -4);
2243
- if (w.endsWith("able")) return w.slice(0, -4);
2244
- if (w.endsWith("ible")) return w.slice(0, -4);
2245
- if (w.endsWith("ator")) return w.slice(0, -4);
2246
- if (w.endsWith("izer")) return w.slice(0, -4);
2247
- if (w.endsWith("ling")) return w.slice(0, -4);
2248
- if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
2249
- if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
2250
- if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
2251
- if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
2252
- if (w.endsWith("ess") && w.length > 5) return w.slice(0, -3);
2253
- if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
2254
- if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
2255
- if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
2256
- if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
2257
- if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
2258
- if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
2259
- if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
2260
- return w;
3599
+ let totalScore = 0;
3600
+ let pairCount = 0;
3601
+ for (let i = 0; i < queryTerms.length; i++) {
3602
+ for (let j = i + 1; j < queryTerms.length; j++) {
3603
+ const posA = termPositions.get(queryTerms[i]);
3604
+ const posB = termPositions.get(queryTerms[j]);
3605
+ if (!posA || !posB) continue;
3606
+ let minDist = Infinity;
3607
+ for (const a of posA) {
3608
+ for (const b of posB) {
3609
+ minDist = Math.min(minDist, Math.abs(a - b));
3610
+ }
3611
+ }
3612
+ if (minDist < Infinity) {
3613
+ totalScore += Math.max(0, 1 - minDist / 20);
3614
+ pairCount++;
3615
+ }
3616
+ }
3617
+ }
3618
+ return pairCount > 0 ? totalScore / pairCount : 0;
2261
3619
  }
2262
- function boostByPath(matches, allFiles, taskDescription) {
2263
- const queryTerms = new Set(tokenize(taskDescription));
2264
- const boosted = /* @__PURE__ */ new Map();
2265
- for (const m of matches) {
2266
- boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
3620
+ function applyQualityGate(scored) {
3621
+ const passed = [];
3622
+ const filtered = [];
3623
+ if (scored.length === 0) {
3624
+ return { passed, filtered, threshold: 0 };
2267
3625
  }
2268
- for (const filePath of allFiles) {
2269
- const pathTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
2270
- const pathMatches = pathTerms.filter((t) => queryTerms.has(t));
2271
- if (pathMatches.length > 0) {
2272
- const existing = boosted.get(filePath);
2273
- const pathBoost = pathMatches.length * 0.3;
2274
- if (existing) {
2275
- existing.score = Math.min(1, existing.score + pathBoost);
2276
- for (const t of pathMatches) {
2277
- if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
3626
+ let elbowIndex = scored.length;
3627
+ if (scored.length >= 3) {
3628
+ let maxDrop = 0;
3629
+ for (let i = 1; i < scored.length; i++) {
3630
+ if (scored[i - 1].score > 0) {
3631
+ const drop = (scored[i - 1].score - scored[i].score) / scored[i - 1].score;
3632
+ if (drop > maxDrop && drop >= ELBOW_DROP_RATIO) {
3633
+ maxDrop = drop;
3634
+ elbowIndex = i;
2278
3635
  }
2279
- } else {
2280
- boosted.set(filePath, {
2281
- filePath,
2282
- score: Math.min(1, pathBoost),
2283
- matchedTerms: pathMatches
2284
- });
2285
3636
  }
2286
3637
  }
2287
3638
  }
2288
- return [...boosted.values()].sort((a, b) => b.score - a.score);
3639
+ const threshold = Math.max(
3640
+ ABSOLUTE_FLOOR,
3641
+ elbowIndex < scored.length ? scored[elbowIndex].score : 0
3642
+ );
3643
+ for (let i = 0; i < scored.length; i++) {
3644
+ const item = scored[i];
3645
+ if (item.score < ABSOLUTE_FLOOR) {
3646
+ filtered.push({ filePath: item.filePath, score: item.score, reason: `Below absolute floor (${item.score.toFixed(3)} < ${ABSOLUTE_FLOOR})` });
3647
+ continue;
3648
+ }
3649
+ if (item.signals.termCoverage < MIN_TERM_COVERAGE) {
3650
+ filtered.push({ filePath: item.filePath, score: item.score, reason: `Low term coverage (${(item.signals.termCoverage * 100).toFixed(0)}% < ${MIN_TERM_COVERAGE * 100}%)` });
3651
+ continue;
3652
+ }
3653
+ if (i >= elbowIndex && item.score < scored[0].score * 0.5) {
3654
+ filtered.push({ filePath: item.filePath, score: item.score, reason: `Below elbow cutoff (rank ${i + 1}, score ${item.score.toFixed(3)})` });
3655
+ continue;
3656
+ }
3657
+ passed.push(item);
3658
+ }
3659
+ return { passed, filtered, threshold };
2289
3660
  }
2290
3661
 
2291
3662
  // src/engine/learner.ts
2292
3663
  import { readFile as readFile5, writeFile as writeFile2, mkdir } from "fs/promises";
2293
- import { join as join4 } from "path";
3664
+ import { join as join6 } from "path";
2294
3665
  var DECAY_FACTOR = 0.95;
2295
3666
  var MODEL_DIR = ".cto";
2296
3667
  var MODEL_FILE = "learner.json";
2297
3668
  var MIN_OBSERVATIONS = 3;
2298
3669
  async function loadLearner(projectPath) {
2299
- const modelPath = join4(projectPath, MODEL_DIR, MODEL_FILE);
3670
+ const modelPath = join6(projectPath, MODEL_DIR, MODEL_FILE);
2300
3671
  try {
2301
3672
  const raw = await readFile5(modelPath, "utf-8");
2302
3673
  const parsed = JSON.parse(raw);
@@ -2306,10 +3677,10 @@ async function loadLearner(projectPath) {
2306
3677
  return createEmptyModel();
2307
3678
  }
2308
3679
  async function saveLearner(projectPath, model) {
2309
- const dir = join4(projectPath, MODEL_DIR);
3680
+ const dir = join6(projectPath, MODEL_DIR);
2310
3681
  await mkdir(dir, { recursive: true });
2311
3682
  model.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
2312
- await writeFile2(join4(dir, MODEL_FILE), JSON.stringify(model, null, 2));
3683
+ await writeFile2(join6(dir, MODEL_FILE), JSON.stringify(model, null, 2));
2313
3684
  }
2314
3685
  function recordSelection(model, taskType, selectedFiles, excludedFiles) {
2315
3686
  const now = (/* @__PURE__ */ new Date()).toISOString();
@@ -2426,6 +3797,289 @@ function extractPattern(filePath) {
2426
3797
  return `*${ext}`;
2427
3798
  }
2428
3799
 
3800
+ // src/interact/router.ts
3801
+ var TASK_KEYWORDS = {
3802
+ debug: ["debug", "fix", "bug", "error", "issue", "broken", "crash", "failing", "wrong"],
3803
+ review: ["review", "check", "assess", "evaluate", "audit", "inspect", "critique"],
3804
+ refactor: ["refactor", "restructure", "reorganize", "clean up", "simplify", "extract", "move"],
3805
+ test: ["test", "spec", "coverage", "unit test", "integration test", "e2e"],
3806
+ docs: ["document", "docs", "readme", "jsdoc", "comment", "explain"],
3807
+ feature: ["add", "implement", "create", "build", "new", "feature", "endpoint"],
3808
+ architecture: ["architecture", "design", "system", "structure", "migrate", "pattern"],
3809
+ "simple-edit": ["rename", "typo", "update", "change", "modify", "tweak", "adjust"]
3810
+ };
3811
+ function classifyTask(taskDescription) {
3812
+ const lower = taskDescription.toLowerCase();
3813
+ let bestType = "simple-edit";
3814
+ let bestScore = 0;
3815
+ for (const [type, keywords] of Object.entries(TASK_KEYWORDS)) {
3816
+ let score = 0;
3817
+ for (const kw of keywords) {
3818
+ if (lower.includes(kw)) score++;
3819
+ }
3820
+ if (score > bestScore) {
3821
+ bestScore = score;
3822
+ bestType = type;
3823
+ }
3824
+ }
3825
+ return bestType;
3826
+ }
3827
+
3828
+ // src/engine/context-pipeline.ts
3829
+ async function runContextPipeline(input) {
3830
+ const { projectPath, task, analysis, budget = 5e4 } = input;
3831
+ const taskType = classifyTask(task);
3832
+ const fileContentMap = /* @__PURE__ */ new Map();
3833
+ const fileContents = [];
3834
+ for (const file of analysis.files) {
3835
+ try {
3836
+ const content = readFileSync6(file.path, "utf-8");
3837
+ fileContentMap.set(file.relativePath, content);
3838
+ fileContents.push({ relativePath: file.relativePath, content });
3839
+ } catch {
3840
+ fileContents.push({ relativePath: file.relativePath, content: "" });
3841
+ }
3842
+ }
3843
+ const indexFiles = analysis.files.map((f) => ({
3844
+ relativePath: f.relativePath,
3845
+ absolutePath: f.path,
3846
+ content: fileContentMap.get(f.relativePath)
3847
+ }));
3848
+ const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
3849
+ const semanticMatches = query(index, task, 50);
3850
+ const boostedMatches = boostByPath(
3851
+ semanticMatches,
3852
+ analysis.files.map((f) => f.relativePath),
3853
+ task
3854
+ );
3855
+ const depMap = /* @__PURE__ */ new Map();
3856
+ for (const file of analysis.files) {
3857
+ depMap.set(file.relativePath, file.imports);
3858
+ }
3859
+ const rerankResult = rerank({
3860
+ task,
3861
+ candidates: boostedMatches,
3862
+ index,
3863
+ fileContents: fileContentMap,
3864
+ dependencies: depMap,
3865
+ allFilePaths: analysis.files.map((f) => f.relativePath)
3866
+ });
3867
+ const rerankedMatches = rerankResult.files.map((rf) => ({
3868
+ filePath: rf.filePath,
3869
+ score: rf.bm25Score,
3870
+ // Keep original BM25 score for composite
3871
+ matchedTerms: boostedMatches.find((m) => m.filePath === rf.filePath)?.matchedTerms ?? []
3872
+ }));
3873
+ const learner = await loadLearner(projectPath);
3874
+ const learnerBoosts = getLearnerBoosts(
3875
+ learner,
3876
+ taskType,
3877
+ analysis.files.map((f) => f.relativePath)
3878
+ );
3879
+ const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
3880
+ const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
3881
+ const selection = await selectContext({
3882
+ task,
3883
+ analysis,
3884
+ budget,
3885
+ semanticScores,
3886
+ learnerBoosts: learnerBoostInputs
3887
+ });
3888
+ const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
3889
+ const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
3890
+ let multiRepo;
3891
+ if (input.siblingRepos && input.siblingRepos.length > 0) {
3892
+ const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
3893
+ multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
3894
+ }
3895
+ return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
3896
+ }
3897
+
3898
+ // src/engine/index.ts
3899
+ init_tfidf();
3900
+
3901
+ // src/engine/ab-testing.ts
3902
+ import { createHash as createHash4 } from "crypto";
3903
+ import { readFileSync as readFileSync7, writeFileSync as writeFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3 } from "fs";
3904
+ import { join as join8 } from "path";
3905
+ var EXPERIMENTS_FILE = "experiments.json";
3906
+ function loadExperiments(projectPath) {
3907
+ const path = join8(projectPath, ".cto", EXPERIMENTS_FILE);
3908
+ try {
3909
+ if (!existsSync6(path)) return [];
3910
+ return JSON.parse(readFileSync7(path, "utf-8"));
3911
+ } catch {
3912
+ return [];
3913
+ }
3914
+ }
3915
+ function saveExperiments(projectPath, experiments) {
3916
+ const dir = join8(projectPath, ".cto");
3917
+ if (!existsSync6(dir)) mkdirSync3(dir, { recursive: true });
3918
+ writeFileSync3(join8(dir, EXPERIMENTS_FILE), JSON.stringify(experiments, null, 2));
3919
+ }
3920
+ function createExperiment(id, name, description, controlParams, variantParams, options = {}) {
3921
+ return {
3922
+ id,
3923
+ name,
3924
+ description,
3925
+ status: "running",
3926
+ startedAt: (/* @__PURE__ */ new Date()).toISOString(),
3927
+ trafficSplit: options.trafficSplit ?? 0.5,
3928
+ minObservations: options.minObservations ?? 30,
3929
+ significanceThreshold: options.significanceThreshold ?? 0.05,
3930
+ control: {
3931
+ name: "control",
3932
+ params: controlParams,
3933
+ metrics: emptyMetrics()
3934
+ },
3935
+ variant: {
3936
+ name: "variant",
3937
+ params: variantParams,
3938
+ metrics: emptyMetrics()
3939
+ }
3940
+ };
3941
+ }
3942
+ function emptyMetrics() {
3943
+ return {
3944
+ total: 0,
3945
+ successes: 0,
3946
+ acceptRate: 0,
3947
+ avgTimeToAccept: 0,
3948
+ compilableRate: 0,
3949
+ timeSum: 0,
3950
+ compilableCount: 0
3951
+ };
3952
+ }
3953
+ function assignGroup(experiment, task) {
3954
+ if (experiment.status !== "running") return null;
3955
+ const hash = createHash4("sha256").update(`${experiment.id}:${task}`).digest();
3956
+ const value = hash.readUInt32BE(0) / 4294967295;
3957
+ const group = value < experiment.trafficSplit ? "control" : "variant";
3958
+ return {
3959
+ group,
3960
+ params: group === "control" ? experiment.control.params : experiment.variant.params,
3961
+ experimentId: experiment.id
3962
+ };
3963
+ }
3964
+ function recordOutcome(experiment, group, outcome) {
3965
+ if (experiment.status !== "running") return experiment;
3966
+ const metrics = group === "control" ? experiment.control.metrics : experiment.variant.metrics;
3967
+ metrics.total++;
3968
+ if (outcome.accepted) metrics.successes++;
3969
+ metrics.acceptRate = metrics.total > 0 ? metrics.successes / metrics.total : 0;
3970
+ if (outcome.timeToAcceptMs !== void 0) {
3971
+ metrics.timeSum += outcome.timeToAcceptMs;
3972
+ metrics.avgTimeToAccept = metrics.timeSum / metrics.total;
3973
+ }
3974
+ if (outcome.compilable !== void 0) {
3975
+ if (outcome.compilable) metrics.compilableCount++;
3976
+ metrics.compilableRate = metrics.total > 0 ? metrics.compilableCount / metrics.total : 0;
3977
+ }
3978
+ if (experiment.control.metrics.total >= experiment.minObservations && experiment.variant.metrics.total >= experiment.minObservations) {
3979
+ const sig = testSignificance(experiment);
3980
+ if (sig.pValue < experiment.significanceThreshold) {
3981
+ return concludeExperiment(experiment, sig);
3982
+ }
3983
+ }
3984
+ return experiment;
3985
+ }
3986
+ function testSignificance(experiment) {
3987
+ const c = experiment.control.metrics;
3988
+ const v = experiment.variant.metrics;
3989
+ const n1 = c.total;
3990
+ const n2 = v.total;
3991
+ const p1 = c.acceptRate;
3992
+ const p2 = v.acceptRate;
3993
+ if (n1 === 0 || n2 === 0) {
3994
+ return {
3995
+ pValue: 1,
3996
+ zScore: 0,
3997
+ effectSize: 0,
3998
+ confidenceInterval: [0, 0],
3999
+ significant: false
4000
+ };
4001
+ }
4002
+ const pooled = (c.successes + v.successes) / (n1 + n2);
4003
+ const se = Math.sqrt(pooled * (1 - pooled) * (1 / n1 + 1 / n2));
4004
+ const z = se > 0 ? (p2 - p1) / se : 0;
4005
+ const pValue = 2 * (1 - normalCDF(Math.abs(z)));
4006
+ const effectSize = p2 - p1;
4007
+ const seDiff = Math.sqrt(p1 * (1 - p1) / n1 + p2 * (1 - p2) / n2);
4008
+ const ci95 = [effectSize - 1.96 * seDiff, effectSize + 1.96 * seDiff];
4009
+ return {
4010
+ pValue,
4011
+ zScore: z,
4012
+ effectSize,
4013
+ confidenceInterval: ci95,
4014
+ significant: pValue < experiment.significanceThreshold
4015
+ };
4016
+ }
4017
+ function normalCDF(x) {
4018
+ if (x < -8) return 0;
4019
+ if (x > 8) return 1;
4020
+ const a1 = 0.254829592;
4021
+ const a2 = -0.284496736;
4022
+ const a3 = 1.421413741;
4023
+ const a4 = -1.453152027;
4024
+ const a5 = 1.061405429;
4025
+ const p = 0.3275911;
4026
+ const sign = x < 0 ? -1 : 1;
4027
+ const absX = Math.abs(x);
4028
+ const t = 1 / (1 + p * absX);
4029
+ const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
4030
+ return 0.5 * (1 + sign * y);
4031
+ }
4032
+ function concludeExperiment(experiment, sig) {
4033
+ const winner = sig.effectSize > 0 ? "variant" : sig.effectSize < 0 ? "control" : "no_difference";
4034
+ const winnerName = winner === "control" ? experiment.control.name : winner === "variant" ? experiment.variant.name : "neither";
4035
+ const conclusion = {
4036
+ winner,
4037
+ pValue: sig.pValue,
4038
+ effectSize: sig.effectSize,
4039
+ confidenceInterval: sig.confidenceInterval,
4040
+ summary: `Experiment "${experiment.name}" concluded: ${winnerName} wins with ${(Math.abs(sig.effectSize) * 100).toFixed(1)}% improvement (p=${sig.pValue.toFixed(4)}, CI=[${sig.confidenceInterval[0].toFixed(3)}, ${sig.confidenceInterval[1].toFixed(3)}])`
4041
+ };
4042
+ return {
4043
+ ...experiment,
4044
+ status: "concluded",
4045
+ concludedAt: (/* @__PURE__ */ new Date()).toISOString(),
4046
+ conclusion
4047
+ };
4048
+ }
4049
+ function getActiveExperiment(experiments) {
4050
+ return experiments.find((e) => e.status === "running") ?? null;
4051
+ }
4052
+ function getConcludedExperiments(experiments) {
4053
+ return experiments.filter((e) => e.status === "concluded");
4054
+ }
4055
+ function renderExperimentSummary(experiment) {
4056
+ const lines = [];
4057
+ lines.push(`Experiment: ${experiment.name} (${experiment.status})`);
4058
+ lines.push(` ${experiment.description}`);
4059
+ lines.push("");
4060
+ lines.push(` Control (${experiment.control.name}):`);
4061
+ lines.push(` Observations: ${experiment.control.metrics.total}`);
4062
+ lines.push(` Accept rate: ${(experiment.control.metrics.acceptRate * 100).toFixed(1)}%`);
4063
+ lines.push(` Variant (${experiment.variant.name}):`);
4064
+ lines.push(` Observations: ${experiment.variant.metrics.total}`);
4065
+ lines.push(` Accept rate: ${(experiment.variant.metrics.acceptRate * 100).toFixed(1)}%`);
4066
+ if (experiment.status === "running") {
4067
+ const sig = testSignificance(experiment);
4068
+ lines.push("");
4069
+ lines.push(` Current p-value: ${sig.pValue.toFixed(4)}`);
4070
+ lines.push(` Effect size: ${(sig.effectSize * 100).toFixed(1)}%`);
4071
+ lines.push(` Significant: ${sig.significant ? "YES" : "not yet"}`);
4072
+ }
4073
+ if (experiment.conclusion) {
4074
+ lines.push("");
4075
+ lines.push(` CONCLUSION: ${experiment.conclusion.summary}`);
4076
+ }
4077
+ return lines.join("\n");
4078
+ }
4079
+
4080
+ // src/engine/index.ts
4081
+ init_multi_repo();
4082
+
2429
4083
  // src/engine/logger.ts
2430
4084
  var LEVEL_ORDER = { debug: 0, info: 1, warn: 2, error: 3 };
2431
4085
  var currentLevel = process.env.CTO_LOG_LEVEL ?? "warn";
@@ -2505,34 +4159,55 @@ function wrapError(err, code, module, context) {
2505
4159
  export {
2506
4160
  CtoError,
2507
4161
  analyzeProject,
4162
+ assignGroup,
2508
4163
  auditProject,
2509
4164
  bfsBidirectional,
2510
4165
  boostByPath,
2511
4166
  buildAdjacencyList,
2512
4167
  buildIndex,
4168
+ buildIndexCached,
2513
4169
  buildProjectGraph,
2514
4170
  calculateCoverage,
2515
4171
  classifyFileKind,
2516
4172
  countTokensChars4,
2517
4173
  countTokensTiktoken,
4174
+ createExperiment,
2518
4175
  createLogger,
2519
4176
  createProject,
4177
+ detectLanguage,
2520
4178
  detectStack,
4179
+ discoverSiblingRepos,
4180
+ estimateComplexity,
2521
4181
  estimateFileTokens,
2522
4182
  estimateTokens,
2523
4183
  extractPattern,
2524
4184
  freeEncoder,
4185
+ getActiveExperiment,
4186
+ getCacheInfo,
4187
+ getConcludedExperiments,
2525
4188
  getLearnerBoosts,
2526
4189
  getLearnerStats,
2527
4190
  getPruneLevelForRisk,
4191
+ invalidateCache,
2528
4192
  isCtoError,
4193
+ loadExperiments,
2529
4194
  loadLearner,
2530
4195
  optimizeBudget,
4196
+ parseAllPolyglotImports,
4197
+ parseImports,
4198
+ parseSiblingPaths,
2531
4199
  pruneFile,
2532
4200
  pruneFiles,
2533
4201
  query,
4202
+ querySiblingRepos,
4203
+ recordOutcome,
2534
4204
  recordSelection,
4205
+ renderExperimentSummary,
4206
+ renderMultiRepoSummary,
4207
+ rerank,
4208
+ runContextPipeline,
2535
4209
  sanitizeContent,
4210
+ saveExperiments,
2536
4211
  saveLearner,
2537
4212
  scanContentForSecrets,
2538
4213
  scanFileForSecrets,
@@ -2543,6 +4218,7 @@ export {
2543
4218
  setJsonLogging,
2544
4219
  setLogLevel,
2545
4220
  similarity,
4221
+ testSignificance,
2546
4222
  tokenize,
2547
4223
  walkProject,
2548
4224
  wrapError