cto-ai-cli 7.1.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,152 @@ var __export = (target, all) => {
8
8
  __defProp(target, name, { get: all[name], enumerable: true });
9
9
  };
10
10
 
11
+ // src/engine/synonyms.ts
12
+ function buildBidirectionalIndex() {
13
+ for (const [canonical, synonyms] of Object.entries(SYNONYM_MAP)) {
14
+ if (!BIDIRECTIONAL_INDEX.has(canonical)) {
15
+ BIDIRECTIONAL_INDEX.set(canonical, /* @__PURE__ */ new Set());
16
+ }
17
+ const canonicalSet = BIDIRECTIONAL_INDEX.get(canonical);
18
+ for (const syn of synonyms) {
19
+ canonicalSet.add(syn);
20
+ }
21
+ canonicalSet.add(canonical);
22
+ for (const syn of synonyms) {
23
+ if (!BIDIRECTIONAL_INDEX.has(syn)) {
24
+ BIDIRECTIONAL_INDEX.set(syn, /* @__PURE__ */ new Set());
25
+ }
26
+ const synSet = BIDIRECTIONAL_INDEX.get(syn);
27
+ synSet.add(canonical);
28
+ for (const otherSyn of synonyms) {
29
+ if (otherSyn !== syn) synSet.add(otherSyn);
30
+ }
31
+ }
32
+ }
33
+ }
34
+ function expandTerm(term) {
35
+ const normalized = term.toLowerCase().trim();
36
+ const related = BIDIRECTIONAL_INDEX.get(normalized);
37
+ if (!related) return [normalized];
38
+ return [normalized, ...Array.from(related)];
39
+ }
40
+ function expandQuery(query2) {
41
+ const terms = query2.toLowerCase().match(/[a-z][a-z0-9]*/g) ?? [];
42
+ const expanded = /* @__PURE__ */ new Set();
43
+ for (const term of terms) {
44
+ const related = expandTerm(term);
45
+ for (const r of related) expanded.add(r);
46
+ }
47
+ return Array.from(expanded);
48
+ }
49
+ function getExpansionDetails(query2) {
50
+ const terms = query2.toLowerCase().match(/[a-z][a-z0-9]*/g) ?? [];
51
+ const seen = /* @__PURE__ */ new Set();
52
+ const results = [];
53
+ for (const term of terms) {
54
+ if (seen.has(term)) continue;
55
+ seen.add(term);
56
+ const expanded = expandTerm(term);
57
+ if (expanded.length > 1) {
58
+ results.push({ original: term, expanded: expanded.filter((e) => e !== term) });
59
+ }
60
+ }
61
+ return results;
62
+ }
63
+ function getSynonymStats() {
64
+ const canonicalTerms = Object.keys(SYNONYM_MAP).length;
65
+ let totalSynonyms = 0;
66
+ for (const syns of Object.values(SYNONYM_MAP)) {
67
+ totalSynonyms += syns.length;
68
+ }
69
+ return {
70
+ canonicalTerms,
71
+ totalSynonyms,
72
+ avgSynonymsPerTerm: totalSynonyms / canonicalTerms,
73
+ bidirectionalEntries: BIDIRECTIONAL_INDEX.size
74
+ };
75
+ }
76
+ var SYNONYM_MAP, BIDIRECTIONAL_INDEX;
77
+ var init_synonyms = __esm({
78
+ "src/engine/synonyms.ts"() {
79
+ "use strict";
80
+ SYNONYM_MAP = {
81
+ // Authentication & Authorization
82
+ "auth": ["authentication", "authorize", "login", "signin", "session", "jwt", "token", "oauth", "sso", "identity", "credential"],
83
+ "permission": ["authorization", "access", "role", "acl", "rbac", "policy", "grant"],
84
+ // Database & Storage
85
+ "database": ["db", "repository", "store", "storage", "persistence", "orm", "sql", "query", "prisma", "sequelize", "typeorm", "mongo", "postgres", "mysql"],
86
+ "cache": ["redis", "memcached", "ttl", "invalidation", "memoize", "store"],
87
+ "migration": ["schema", "upgrade", "version", "evolution"],
88
+ // API & Networking
89
+ "api": ["endpoint", "route", "handler", "controller", "rest", "graphql", "rpc", "service"],
90
+ "request": ["req", "http", "call", "fetch", "axios"],
91
+ "response": ["res", "reply", "result", "output"],
92
+ "middleware": ["interceptor", "filter", "plugin", "hook"],
93
+ "gateway": ["proxy", "router", "load-balancer", "reverse-proxy"],
94
+ // Frontend & UI
95
+ "component": ["widget", "element", "view", "template"],
96
+ "state": ["store", "redux", "zustand", "context", "model"],
97
+ "render": ["paint", "draw", "display", "show"],
98
+ "style": ["css", "theme", "design", "layout", "tailwind"],
99
+ // Testing & Quality
100
+ "test": ["spec", "suite", "case", "assertion", "mock", "stub", "fixture", "vitest", "jest", "mocha"],
101
+ "validate": ["verify", "check", "assert", "ensure", "sanitize"],
102
+ "error": ["exception", "failure", "bug", "issue", "crash"],
103
+ // Performance & Optimization
104
+ "optimize": ["performance", "speed", "fast", "efficient", "improve", "enhance"],
105
+ "latency": ["delay", "lag", "slowness", "response-time"],
106
+ "throughput": ["capacity", "volume", "rate", "bandwidth"],
107
+ // Data & Collections
108
+ "dataset": ["data", "record", "row", "entry", "item", "collection"],
109
+ "empty": ["null", "blank", "missing", "absent", "none", "zero"],
110
+ // Data Processing
111
+ "parse": ["decode", "deserialize", "extract", "read"],
112
+ "serialize": ["encode", "stringify", "format", "marshal"],
113
+ "transform": ["map", "convert", "translate", "process"],
114
+ "filter": ["select", "where", "match", "find"],
115
+ // Configuration & Setup
116
+ "config": ["configuration", "setting", "option", "preference", "env", "environment"],
117
+ "init": ["initialize", "setup", "bootstrap", "start", "create"],
118
+ "deploy": ["deployment", "release", "publish", "ship", "launch"],
119
+ // Logging & Monitoring
120
+ "log": ["logger", "logging", "trace", "debug", "info", "warn", "error"],
121
+ "metric": ["measurement", "stat", "telemetry", "analytics", "tracking"],
122
+ "monitor": ["observe", "watch", "track", "alert"],
123
+ // Security
124
+ "secret": ["credential", "key", "password", "token", "apikey", "sensitive"],
125
+ "encrypt": ["cipher", "encode", "hash", "crypto"],
126
+ "sanitize": ["escape", "clean", "validate", "filter"],
127
+ // File System & I/O
128
+ "file": ["document", "asset", "resource", "path"],
129
+ "read": ["load", "fetch", "get", "retrieve"],
130
+ "write": ["save", "persist", "store", "put"],
131
+ "delete": ["remove", "unlink", "destroy", "drop"],
132
+ // Async & Concurrency
133
+ "async": ["asynchronous", "promise", "await", "concurrent", "parallel"],
134
+ "queue": ["buffer", "backlog", "pending", "deferred"],
135
+ "lock": ["mutex", "semaphore", "synchronize", "atomic"],
136
+ // Architecture & Patterns
137
+ "service": ["microservice", "api", "backend", "server", "daemon"],
138
+ "client": ["consumer", "frontend", "user", "caller"],
139
+ "event": ["message", "signal", "notification", "trigger"],
140
+ "stream": ["flow", "pipe", "channel", "observable"],
141
+ // Business Logic
142
+ "user": ["account", "profile", "member", "customer"],
143
+ "order": ["purchase", "transaction", "checkout", "cart"],
144
+ "payment": ["billing", "invoice", "charge", "stripe", "paypal"],
145
+ "notification": ["alert", "message", "email", "push", "sms"],
146
+ // DevOps & Infrastructure
147
+ "docker": ["container", "image", "dockerfile", "compose"],
148
+ "kubernetes": ["k8s", "cluster", "pod", "deployment", "helm"],
149
+ "ci": ["continuous-integration", "pipeline", "build", "github-actions", "jenkins"],
150
+ "cd": ["continuous-deployment", "release", "deploy", "rollout"]
151
+ };
152
+ BIDIRECTIONAL_INDEX = /* @__PURE__ */ new Map();
153
+ buildBidirectionalIndex();
154
+ }
155
+ });
156
+
11
157
  // src/engine/tfidf.ts
12
158
  function buildIndex(files) {
13
159
  const documents = /* @__PURE__ */ new Map();
@@ -31,15 +177,29 @@ function buildIndex(files) {
31
177
  let totalLength = 0;
32
178
  for (const doc of documents.values()) totalLength += doc.length;
33
179
  const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
34
- return { documents, idf, avgDocLength, totalDocs };
180
+ return { documents, idf, docFreq, avgDocLength, totalDocs };
35
181
  }
36
- function query(index, taskDescription, maxResults = 50) {
182
+ function query(index, taskDescription, maxResults = 50, expandSynonyms = true) {
37
183
  const queryTerms = tokenize(taskDescription);
38
184
  if (queryTerms.length === 0) return [];
39
185
  const querySet = /* @__PURE__ */ new Map();
40
186
  for (const term of queryTerms) {
41
187
  querySet.set(term, (querySet.get(term) ?? 0) + 1);
42
188
  }
189
+ if (expandSynonyms) {
190
+ const expandedSet = /* @__PURE__ */ new Map();
191
+ for (const [term, count] of querySet) {
192
+ const synonyms = expandTerm(term);
193
+ for (const syn of synonyms) {
194
+ const weight = syn === term ? count : count * 0.7;
195
+ expandedSet.set(syn, (expandedSet.get(syn) ?? 0) + weight);
196
+ }
197
+ }
198
+ querySet.clear();
199
+ for (const [term, weight] of expandedSet) {
200
+ querySet.set(term, weight);
201
+ }
202
+ }
43
203
  const results = [];
44
204
  const k1 = 1.5;
45
205
  const b = 0.75;
@@ -51,8 +211,11 @@ function query(index, taskDescription, maxResults = 50) {
51
211
  if (tf === 0) continue;
52
212
  const termIdf = index.idf.get(qTerm) ?? 0;
53
213
  if (termIdf <= 0) continue;
214
+ const df = index.docFreq.get(qTerm) ?? 0;
215
+ const dfRatio = index.totalDocs > 0 ? df / index.totalDocs : 0;
216
+ const domainDamp = dfRatio > 0.5 ? (1 - dfRatio) * (1 - dfRatio) : 1;
54
217
  const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
55
- score += termIdf * tfNorm * qCount;
218
+ score += termIdf * tfNorm * qCount * domainDamp;
56
219
  matchedTerms.push(qTerm);
57
220
  }
58
221
  if (score > 0) {
@@ -134,32 +297,224 @@ function boostByPath(matches, allFiles, taskDescription) {
134
297
  for (const m of matches) {
135
298
  boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
136
299
  }
300
+ const pathTermDocFreq = /* @__PURE__ */ new Map();
301
+ const allPathTokenSets = /* @__PURE__ */ new Map();
302
+ for (const filePath of allFiles) {
303
+ const tokens = new Set(tokenize(filePath.replace(/[/\\.]/g, " ")));
304
+ allPathTokenSets.set(filePath, tokens);
305
+ for (const t of tokens) {
306
+ pathTermDocFreq.set(t, (pathTermDocFreq.get(t) ?? 0) + 1);
307
+ }
308
+ }
309
+ const N = allFiles.length;
310
+ function pathIdf(term) {
311
+ const df = pathTermDocFreq.get(term) ?? 0;
312
+ if (df === 0) return 0;
313
+ return Math.log((N + 1) / (df + 1));
314
+ }
137
315
  for (const filePath of allFiles) {
138
- const pathTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
139
- const pathMatches = pathTerms.filter((t) => queryTerms.has(t));
140
- if (pathMatches.length > 0) {
316
+ const parts = filePath.replace(/\\/g, "/").split("/");
317
+ const fileName = parts.pop() ?? "";
318
+ const dirSegments = parts;
319
+ const dirTerms = tokenize(dirSegments.join(" ").replace(/[/\\.]/g, " "));
320
+ const fileTerms = tokenize(fileName.replace(/[.\-_]/g, " "));
321
+ const dirMatches = dirTerms.filter((t) => queryTerms.has(t));
322
+ const fileMatches = fileTerms.filter((t) => queryTerms.has(t));
323
+ const allPathMatches = [.../* @__PURE__ */ new Set([...dirMatches, ...fileMatches])];
324
+ if (allPathMatches.length > 0) {
325
+ const uniqueDirMatches = [...new Set(dirMatches)];
326
+ const uniqueFileMatches = [...new Set(fileMatches)].filter((t) => !uniqueDirMatches.includes(t));
327
+ const maxIdf = Math.log(N + 1);
328
+ let pathBoost = 0;
329
+ for (const t of uniqueDirMatches) {
330
+ pathBoost += 0.4 * (pathIdf(t) / maxIdf);
331
+ }
332
+ for (const t of uniqueFileMatches) {
333
+ pathBoost += 0.25 * (pathIdf(t) / maxIdf);
334
+ }
141
335
  const existing = boosted.get(filePath);
142
- const pathBoost = pathMatches.length * 0.3;
143
336
  if (existing) {
144
- existing.score = Math.min(1, existing.score + pathBoost);
145
- for (const t of pathMatches) {
337
+ existing.score = existing.score + pathBoost;
338
+ for (const t of allPathMatches) {
146
339
  if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
147
340
  }
148
341
  } else {
149
342
  boosted.set(filePath, {
150
343
  filePath,
151
- score: Math.min(1, pathBoost),
152
- matchedTerms: pathMatches
344
+ score: pathBoost,
345
+ matchedTerms: allPathMatches
346
+ });
347
+ }
348
+ }
349
+ }
350
+ return [...boosted.values()].sort((a, b) => b.score - a.score);
351
+ }
352
+ function boostByLayer(matches, allFiles, taskDescription) {
353
+ const queryTerms = tokenize(taskDescription);
354
+ const targetDirTerms = /* @__PURE__ */ new Set();
355
+ for (const term of queryTerms) {
356
+ const layers = LAYER_MAP[term];
357
+ if (layers) {
358
+ for (const l of layers) targetDirTerms.add(l);
359
+ }
360
+ }
361
+ if (targetDirTerms.size === 0) return matches;
362
+ const boosted = /* @__PURE__ */ new Map();
363
+ for (const m of matches) {
364
+ boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
365
+ }
366
+ for (const filePath of allFiles) {
367
+ const dirTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
368
+ const layerHits = dirTerms.filter((t) => targetDirTerms.has(t));
369
+ if (layerHits.length > 0) {
370
+ const layerBoost = Math.min(0.5, layerHits.length * 0.2);
371
+ const existing = boosted.get(filePath);
372
+ if (existing) {
373
+ existing.score = existing.score + layerBoost;
374
+ } else {
375
+ boosted.set(filePath, {
376
+ filePath,
377
+ score: layerBoost,
378
+ matchedTerms: [`[layer:${layerHits[0]}]`]
153
379
  });
154
380
  }
155
381
  }
156
382
  }
157
383
  return [...boosted.values()].sort((a, b) => b.score - a.score);
158
384
  }
159
- var STOP_WORDS, TERM_FAMILIES;
385
+ function boostByImports(matches, dependencies, topK = 10, boostFactor = 0.4) {
386
+ if (matches.length === 0 || dependencies.size === 0) return matches;
387
+ const boosted = /* @__PURE__ */ new Map();
388
+ for (const m of matches) {
389
+ boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
390
+ }
391
+ const reverseDeps = /* @__PURE__ */ new Map();
392
+ for (const [from, tos] of dependencies) {
393
+ for (const to of tos) {
394
+ const existing = reverseDeps.get(to) ?? [];
395
+ existing.push(from);
396
+ reverseDeps.set(to, existing);
397
+ }
398
+ }
399
+ const topMatches = matches.slice(0, topK);
400
+ const maxDepsPerParent = 5;
401
+ for (const parent of topMatches) {
402
+ const boost = parent.score * boostFactor;
403
+ const imports = dependencies.get(parent.filePath) ?? [];
404
+ const sortedImports = [...imports].sort((a, b) => {
405
+ const sa = boosted.get(a)?.score ?? 0;
406
+ const sb = boosted.get(b)?.score ?? 0;
407
+ return sb - sa;
408
+ });
409
+ for (const dep of sortedImports.slice(0, maxDepsPerParent)) {
410
+ applyImportBoost(boosted, dep, boost, parent.filePath, "imported-by");
411
+ }
412
+ const importers = reverseDeps.get(parent.filePath) ?? [];
413
+ const sortedImporters = [...importers].sort((a, b) => {
414
+ const sa = boosted.get(a)?.score ?? 0;
415
+ const sb = boosted.get(b)?.score ?? 0;
416
+ return sb - sa;
417
+ });
418
+ for (const imp of sortedImporters.slice(0, maxDepsPerParent)) {
419
+ applyImportBoost(boosted, imp, boost * 0.7, parent.filePath, "imports");
420
+ }
421
+ }
422
+ return [...boosted.values()].sort((a, b) => b.score - a.score);
423
+ }
424
+ function applyImportBoost(boosted, filePath, boost, parentPath, relation) {
425
+ const existing = boosted.get(filePath);
426
+ if (existing) {
427
+ existing.score = existing.score + boost;
428
+ } else {
429
+ boosted.set(filePath, {
430
+ filePath,
431
+ score: boost,
432
+ matchedTerms: [`[${relation}:${parentPath.split("/").pop()}]`]
433
+ });
434
+ }
435
+ }
436
+ function reciprocalRankFusion(bm25Matches, allFiles, taskDescription, dependencies, k = 60) {
437
+ const bm25Ranked = [...bm25Matches].sort((a, b) => b.score - a.score);
438
+ const bm25RankMap = /* @__PURE__ */ new Map();
439
+ bm25Ranked.forEach((m, i) => bm25RankMap.set(m.filePath, i + 1));
440
+ const queryTerms = new Set(tokenize(taskDescription));
441
+ const pathScores = [];
442
+ for (const filePath of allFiles) {
443
+ const parts = filePath.replace(/\\/g, "/").split("/");
444
+ const fileName = parts.pop() ?? "";
445
+ const dirTerms = tokenize(parts.join(" ").replace(/[/\\.]/g, " "));
446
+ const fileTerms = tokenize(fileName.replace(/[.\-_]/g, " "));
447
+ const dirHits = [...new Set(dirTerms.filter((t) => queryTerms.has(t)))].length;
448
+ const fileHits = [...new Set(fileTerms.filter((t) => queryTerms.has(t)))].length;
449
+ const score = dirHits * 2 + fileHits;
450
+ if (score > 0) pathScores.push({ filePath, score });
451
+ }
452
+ pathScores.sort((a, b) => b.score - a.score);
453
+ const pathRankMap = /* @__PURE__ */ new Map();
454
+ pathScores.forEach((p, i) => pathRankMap.set(p.filePath, i + 1));
455
+ const reverseDeps = /* @__PURE__ */ new Map();
456
+ for (const [from, tos] of dependencies) {
457
+ for (const to of tos) {
458
+ const existing = reverseDeps.get(to) ?? [];
459
+ existing.push(from);
460
+ reverseDeps.set(to, existing);
461
+ }
462
+ }
463
+ const top10 = new Set(bm25Ranked.slice(0, 10).map((m) => m.filePath));
464
+ const importScores = [];
465
+ for (const filePath of allFiles) {
466
+ let connections = 0;
467
+ for (const dep of dependencies.get(filePath) ?? []) {
468
+ if (top10.has(dep)) connections++;
469
+ }
470
+ for (const imp of reverseDeps.get(filePath) ?? []) {
471
+ if (top10.has(imp)) connections++;
472
+ }
473
+ if (top10.has(filePath)) connections += 2;
474
+ if (connections > 0) importScores.push({ filePath, score: connections });
475
+ }
476
+ importScores.sort((a, b) => b.score - a.score);
477
+ const importRankMap = /* @__PURE__ */ new Map();
478
+ importScores.forEach((p, i) => importRankMap.set(p.filePath, i + 1));
479
+ const classNameScores = [];
480
+ for (const filePath of allFiles) {
481
+ const fileName = filePath.split("/").pop() ?? "";
482
+ const baseName = fileName.replace(/\.[^.]+$/, "");
483
+ const classTokens = tokenize(baseName.replace(/([a-z])([A-Z])/g, "$1 $2"));
484
+ const hits = classTokens.filter((t) => queryTerms.has(t));
485
+ const coverage = queryTerms.size > 0 ? hits.length / queryTerms.size : 0;
486
+ if (hits.length > 0) {
487
+ classNameScores.push({ filePath, score: hits.length + coverage });
488
+ }
489
+ }
490
+ classNameScores.sort((a, b) => b.score - a.score);
491
+ const classRankMap = /* @__PURE__ */ new Map();
492
+ classNameScores.forEach((p, i) => classRankMap.set(p.filePath, i + 1));
493
+ const WEIGHTS2 = { bm25: 0.4, path: 0.25, imports: 0.2, className: 0.15 };
494
+ const allCandidates = /* @__PURE__ */ new Set();
495
+ for (const m of bm25Ranked) allCandidates.add(m.filePath);
496
+ for (const p of pathScores) allCandidates.add(p.filePath);
497
+ for (const p of importScores) allCandidates.add(p.filePath);
498
+ for (const p of classNameScores) allCandidates.add(p.filePath);
499
+ const defaultRank = allFiles.length + 1;
500
+ const fused = [];
501
+ for (const filePath of allCandidates) {
502
+ const bm25Rank = bm25RankMap.get(filePath) ?? defaultRank;
503
+ const pathRank = pathRankMap.get(filePath) ?? defaultRank;
504
+ const importRank = importRankMap.get(filePath) ?? defaultRank;
505
+ const classRank = classRankMap.get(filePath) ?? defaultRank;
506
+ const rrfScore = WEIGHTS2.bm25 / (k + bm25Rank) + WEIGHTS2.path / (k + pathRank) + WEIGHTS2.imports / (k + importRank) + WEIGHTS2.className / (k + classRank);
507
+ const bm25Match = bm25Ranked.find((m) => m.filePath === filePath);
508
+ const matchedTerms = bm25Match ? [...bm25Match.matchedTerms] : [];
509
+ fused.push({ filePath, score: rrfScore, matchedTerms });
510
+ }
511
+ return fused.sort((a, b) => b.score - a.score);
512
+ }
513
+ var STOP_WORDS, TERM_FAMILIES, LAYER_MAP;
160
514
  var init_tfidf = __esm({
161
515
  "src/engine/tfidf.ts"() {
162
516
  "use strict";
517
+ init_synonyms();
163
518
  STOP_WORDS = /* @__PURE__ */ new Set([
164
519
  // Language keywords
165
520
  "import",
@@ -303,6 +658,29 @@ var init_tfidf = __esm({
303
658
  ["encryp", "encrypt"],
304
659
  ["decryp", "encrypt"]
305
660
  ];
661
+ LAYER_MAP = {
662
+ // Query terms → directory segments that should be boosted
663
+ "endpoint": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
664
+ "api": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
665
+ "controller": ["endpoint", "controller", "handler", "route", "router"],
666
+ "repositori": ["repositori", "dao", "store", "persist"],
667
+ "databas": ["repositori", "dao", "store", "persist", "migrat"],
668
+ "storag": ["repositori", "dao", "store", "persist"],
669
+ "cach": ["cach", "redis", "memcach", "store"],
670
+ "servic": ["servic", "usecas", "core"],
671
+ "usecas": ["usecas", "servic", "core"],
672
+ "config": ["config", "inject", "setup", "bootstrap"],
673
+ "inject": ["config", "inject", "setup"],
674
+ "depend": ["config", "inject", "setup"],
675
+ "event": ["event", "listen", "handler", "subscrib"],
676
+ "error": ["error", "except", "handler", "fault"],
677
+ "except": ["except", "error", "handler", "fault"],
678
+ "model": ["model", "entiti", "dto", "domain", "schema"],
679
+ "entiti": ["entiti", "model", "dto", "domain"],
680
+ "metric": ["metric", "monitor", "observ", "telemetri"],
681
+ "test": ["test", "spec", "mock", "fixtur"],
682
+ "migrat": ["migrat", "schema", "databas"]
683
+ };
306
684
  }
307
685
  });
308
686
 
@@ -548,7 +926,19 @@ var DEFAULT_CONFIG = {
548
926
  },
549
927
  ignore: {
550
928
  dirs: ["node_modules", "dist", "build", ".git", "coverage", "__pycache__", ".next", "vendor", ".cto"],
551
- patterns: ["*.min.js", "*.map", "*.lock", "*.generated.*"]
929
+ patterns: [
930
+ "*.min.js",
931
+ "*.map",
932
+ "*.lock",
933
+ "*.generated.*",
934
+ "CHANGELOG*",
935
+ "LICENSE*",
936
+ "CONTRIBUTING*",
937
+ "CODE_OF_CONDUCT*",
938
+ "AUTHORS*",
939
+ "CODEOWNERS",
940
+ "SECURITY*"
941
+ ]
552
942
  },
553
943
  maxDepth: 20
554
944
  },
@@ -1760,11 +2150,15 @@ function computeTypeProviderUsage(files, graph) {
1760
2150
 
1761
2151
  // src/engine/analyzer.ts
1762
2152
  function matchesPattern(filename, patterns) {
2153
+ const lower = filename.toLowerCase();
1763
2154
  for (const pattern of patterns) {
1764
2155
  if (pattern.startsWith("*.")) {
1765
2156
  const ext = pattern.slice(1);
1766
2157
  if (filename.endsWith(ext)) return true;
1767
- } else if (filename === pattern) {
2158
+ } else if (pattern.endsWith("*")) {
2159
+ const prefix = pattern.slice(0, -1).toLowerCase();
2160
+ if (lower.startsWith(prefix)) return true;
2161
+ } else if (lower === pattern.toLowerCase()) {
1768
2162
  return true;
1769
2163
  }
1770
2164
  }
@@ -3142,17 +3536,19 @@ async function selectContext(input) {
3142
3536
  const selectedFiles = [];
3143
3537
  let usedTokens = 0;
3144
3538
  const hasSemanticSignal = semanticMap.size > 0;
3539
+ const maxSemanticScore = hasSemanticSignal ? Math.max(...Array.from(semanticMap.values())) : 0;
3540
+ const semanticFloor = maxSemanticScore * 0.1;
3145
3541
  for (const file of candidates) {
3146
3542
  const isTarget = targetSet.has(file.relativePath);
3147
3543
  const isMustInclude = mustInclude.has(file.relativePath);
3148
3544
  if (hasSemanticSignal && !isTarget && !isMustInclude) {
3149
3545
  const semScore = semanticMap.get(file.relativePath) ?? 0;
3150
3546
  const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
3151
- if (semScore === 0 && lrnBoost === 0) {
3547
+ if (semScore < semanticFloor && lrnBoost <= 0) {
3152
3548
  decisions.push({
3153
3549
  file: file.relativePath,
3154
3550
  action: "exclude",
3155
- reason: "Skipped: no semantic relevance to task"
3551
+ reason: `Skipped: semantic score ${semScore.toFixed(3)} below floor ${semanticFloor.toFixed(3)}`
3156
3552
  });
3157
3553
  continue;
3158
3554
  }
@@ -3284,6 +3680,222 @@ function buildReason(file, level, isTarget, isMustInclude) {
3284
3680
  import { readFileSync as readFileSync6 } from "fs";
3285
3681
  init_tfidf();
3286
3682
 
3683
+ // src/engine/ast-tokenizer.ts
3684
+ init_tfidf();
3685
+ var ANNOTATION_LAYER_MAP = {
3686
+ "repository": ["repositori", "dao", "store", "persist", "databas"],
3687
+ "service": ["servic", "usecas", "busi", "logic"],
3688
+ "controller": ["control", "endpoint", "api", "rest", "handler"],
3689
+ "restcontroller": ["control", "endpoint", "api", "rest", "handler"],
3690
+ "component": ["compon", "bean", "inject"],
3691
+ "entity": ["entiti", "model", "domain", "persist"],
3692
+ "configuration": ["config", "setup", "inject", "wire"],
3693
+ "bean": ["config", "inject", "wire", "bean"],
3694
+ "autowired": ["inject", "wire", "depend"],
3695
+ "inject": ["inject", "wire", "depend"],
3696
+ "provides": ["inject", "wire", "depend", "config"],
3697
+ "singleton": ["singleton", "scope", "lifecycl"],
3698
+ "test": ["test", "spec", "assert", "mock"],
3699
+ "override": ["overrid", "inherit", "polymorph"],
3700
+ "transactional": ["transact", "databas", "commit", "rollback"],
3701
+ "cacheable": ["cach", "ttl", "evict", "invalidat"],
3702
+ "async": ["async", "concurr", "thread", "parallel"],
3703
+ "eventlistener": ["event", "listen", "handler", "subscrib"],
3704
+ "scheduled": ["schedul", "cron", "timer", "job"],
3705
+ "slf4j": ["log", "metric", "observ"],
3706
+ "data": ["model", "entiti", "dto", "data"],
3707
+ "getter": ["model", "entiti", "dto", "accessor"],
3708
+ "setter": ["model", "entiti", "dto", "mutator"],
3709
+ "builder": ["build", "pattern", "fluent"],
3710
+ "value": ["model", "entiti", "dto", "immut"]
3711
+ };
3712
+ function extractStructuralTokens(content, filePath) {
3713
+ const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
3714
+ const language = detectLanguage2(ext);
3715
+ switch (language) {
3716
+ case "java":
3717
+ return extractJava(content);
3718
+ case "python":
3719
+ return extractPython(content);
3720
+ case "go":
3721
+ return extractGo(content);
3722
+ case "typescript":
3723
+ return extractTypeScript(content);
3724
+ default:
3725
+ return { classNames: [], methodNames: [], annotations: [], parents: [], packageName: null, language: "unknown" };
3726
+ }
3727
+ }
3728
+ function detectLanguage2(ext) {
3729
+ switch (ext) {
3730
+ case "java":
3731
+ return "java";
3732
+ case "py":
3733
+ return "python";
3734
+ case "go":
3735
+ return "go";
3736
+ case "ts":
3737
+ case "tsx":
3738
+ case "js":
3739
+ case "jsx":
3740
+ return "typescript";
3741
+ default:
3742
+ return "unknown";
3743
+ }
3744
+ }
3745
+ function extractJava(content) {
3746
+ const classNames = [];
3747
+ const methodNames = [];
3748
+ const annotations = [];
3749
+ const parents = [];
3750
+ let packageName = null;
3751
+ const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
3752
+ if (pkgMatch) packageName = pkgMatch[1];
3753
+ const annRegex = /@(\w+)/g;
3754
+ let annMatch;
3755
+ while ((annMatch = annRegex.exec(content)) !== null) {
3756
+ const ann = annMatch[1].toLowerCase();
3757
+ if (ann !== "override" && ann.length > 2) {
3758
+ annotations.push(ann);
3759
+ }
3760
+ }
3761
+ const classRegex = /(?:public|private|protected|abstract|final|static)?\s*(?:class|interface|enum)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
3762
+ let classMatch;
3763
+ while ((classMatch = classRegex.exec(content)) !== null) {
3764
+ classNames.push(classMatch[1]);
3765
+ if (classMatch[2]) parents.push(classMatch[2]);
3766
+ if (classMatch[3]) {
3767
+ for (const impl of classMatch[3].split(",")) {
3768
+ const trimmed = impl.trim();
3769
+ if (trimmed) parents.push(trimmed);
3770
+ }
3771
+ }
3772
+ }
3773
+ const methodRegex = /(?:public|private|protected|static|abstract|final|synchronized|default)\s+(?:<[\w\s,?]+>\s+)?(?:[\w<>\[\]?,\s]+)\s+(\w+)\s*\(/g;
3774
+ let methodMatch;
3775
+ while ((methodMatch = methodRegex.exec(content)) !== null) {
3776
+ const name = methodMatch[1];
3777
+ if (!["equals", "hashCode", "toString", "main", "get", "set"].includes(name)) {
3778
+ methodNames.push(name);
3779
+ }
3780
+ }
3781
+ return { classNames, methodNames, annotations, parents, packageName, language: "java" };
3782
+ }
3783
+ function extractPython(content) {
3784
+ const classNames = [];
3785
+ const methodNames = [];
3786
+ const annotations = [];
3787
+ const parents = [];
3788
+ const classRegex = /^\s*class\s+(\w+)(?:\(([^)]+)\))?/gm;
3789
+ let classMatch;
3790
+ while ((classMatch = classRegex.exec(content)) !== null) {
3791
+ classNames.push(classMatch[1]);
3792
+ if (classMatch[2]) {
3793
+ for (const parent of classMatch[2].split(",")) {
3794
+ const trimmed = parent.trim().split("[")[0];
3795
+ if (trimmed && trimmed !== "object") parents.push(trimmed);
3796
+ }
3797
+ }
3798
+ }
3799
+ const decRegex = /^\s*@(\w+)/gm;
3800
+ let decMatch;
3801
+ while ((decMatch = decRegex.exec(content)) !== null) {
3802
+ annotations.push(decMatch[1].toLowerCase());
3803
+ }
3804
+ const funcRegex = /^\s*(?:async\s+)?def\s+(\w+)/gm;
3805
+ let funcMatch;
3806
+ while ((funcMatch = funcRegex.exec(content)) !== null) {
3807
+ const name = funcMatch[1];
3808
+ if (!name.startsWith("__") || name === "__init__") {
3809
+ methodNames.push(name.replace(/^_+|_+$/g, ""));
3810
+ }
3811
+ }
3812
+ return { classNames, methodNames, annotations, parents, packageName: null, language: "python" };
3813
+ }
3814
+ function extractGo(content) {
3815
+ const classNames = [];
3816
+ const methodNames = [];
3817
+ const parents = [];
3818
+ const pkgMatch = content.match(/^package\s+(\w+)/m);
3819
+ const packageName = pkgMatch ? pkgMatch[1] : null;
3820
+ const typeRegex = /type\s+(\w+)\s+(?:struct|interface)/g;
3821
+ let typeMatch;
3822
+ while ((typeMatch = typeRegex.exec(content)) !== null) {
3823
+ classNames.push(typeMatch[1]);
3824
+ }
3825
+ const funcRegex = /func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)\s*\(/g;
3826
+ let funcMatch;
3827
+ while ((funcMatch = funcRegex.exec(content)) !== null) {
3828
+ methodNames.push(funcMatch[2]);
3829
+ if (funcMatch[1]) {
3830
+ parents.push(funcMatch[1]);
3831
+ }
3832
+ }
3833
+ return { classNames, methodNames, annotations: [], parents, packageName, language: "go" };
3834
+ }
3835
+ function extractTypeScript(content) {
3836
+ const classNames = [];
3837
+ const methodNames = [];
3838
+ const annotations = [];
3839
+ const parents = [];
3840
+ const classRegex = /(?:export\s+)?(?:abstract\s+)?(?:class|interface)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
3841
+ let classMatch;
3842
+ while ((classMatch = classRegex.exec(content)) !== null) {
3843
+ classNames.push(classMatch[1]);
3844
+ if (classMatch[2]) parents.push(classMatch[2]);
3845
+ if (classMatch[3]) {
3846
+ for (const impl of classMatch[3].split(",")) {
3847
+ const trimmed = impl.trim();
3848
+ if (trimmed) parents.push(trimmed);
3849
+ }
3850
+ }
3851
+ }
3852
+ const decRegex = /@(\w+)/g;
3853
+ let decMatch;
3854
+ while ((decMatch = decRegex.exec(content)) !== null) {
3855
+ annotations.push(decMatch[1].toLowerCase());
3856
+ }
3857
+ const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)/g;
3858
+ let funcMatch;
3859
+ while ((funcMatch = funcRegex.exec(content)) !== null) {
3860
+ methodNames.push(funcMatch[1]);
3861
+ }
3862
+ return { classNames, methodNames, annotations, parents, packageName: null, language: "typescript" };
3863
+ }
3864
+ function augmentContentWithStructure(content, filePath) {
3865
+ const struct = extractStructuralTokens(content, filePath);
3866
+ const augmentParts = [];
3867
+ for (const name of struct.classNames) {
3868
+ const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
3869
+ augmentParts.push(words, words, words);
3870
+ }
3871
+ for (const name of struct.methodNames) {
3872
+ const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
3873
+ augmentParts.push(words, words);
3874
+ }
3875
+ for (const parent of struct.parents) {
3876
+ const words = parent.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
3877
+ augmentParts.push(words);
3878
+ }
3879
+ for (const ann of struct.annotations) {
3880
+ const layerTerms = ANNOTATION_LAYER_MAP[ann];
3881
+ if (layerTerms) {
3882
+ augmentParts.push(...layerTerms);
3883
+ }
3884
+ }
3885
+ if (augmentParts.length === 0) return content;
3886
+ return augmentParts.join(" ") + "\n" + content;
3887
+ }
3888
+ function getStructuralSummary(content, filePath) {
3889
+ const s = extractStructuralTokens(content, filePath);
3890
+ const parts = [];
3891
+ if (s.classNames.length > 0) parts.push(`classes: [${s.classNames.join(", ")}]`);
3892
+ if (s.methodNames.length > 0) parts.push(`methods: [${s.methodNames.join(", ")}]`);
3893
+ if (s.annotations.length > 0) parts.push(`annotations: [@${s.annotations.join(", @")}]`);
3894
+ if (s.parents.length > 0) parts.push(`extends/implements: [${s.parents.join(", ")}]`);
3895
+ if (s.packageName) parts.push(`package: ${s.packageName}`);
3896
+ return parts.length > 0 ? `[${s.language}] ${parts.join(" | ")}` : `[${s.language}] (no structural tokens)`;
3897
+ }
3898
+
3287
3899
  // src/engine/index-cache.ts
3288
3900
  init_tfidf();
3289
3901
  import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
@@ -3422,7 +4034,7 @@ function rebuildIndex(cachedFiles) {
3422
4034
  for (const [term, df] of docFreq) {
3423
4035
  idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
3424
4036
  }
3425
- return { documents, idf, avgDocLength, totalDocs };
4037
+ return { documents, idf, docFreq, avgDocLength, totalDocs };
3426
4038
  }
3427
4039
 
3428
4040
  // src/engine/reranker.ts
@@ -3825,130 +4437,1668 @@ function classifyTask(taskDescription) {
3825
4437
  return bestType;
3826
4438
  }
3827
4439
 
3828
- // src/engine/context-pipeline.ts
3829
- async function runContextPipeline(input) {
3830
- const { projectPath, task, analysis, budget = 5e4 } = input;
3831
- const taskType = classifyTask(task);
3832
- const fileContentMap = /* @__PURE__ */ new Map();
3833
- const fileContents = [];
3834
- for (const file of analysis.files) {
3835
- try {
3836
- const content = readFileSync6(file.path, "utf-8");
3837
- fileContentMap.set(file.relativePath, content);
3838
- fileContents.push({ relativePath: file.relativePath, content });
3839
- } catch {
3840
- fileContents.push({ relativePath: file.relativePath, content: "" });
3841
- }
3842
- }
3843
- const indexFiles = analysis.files.map((f) => ({
3844
- relativePath: f.relativePath,
3845
- absolutePath: f.path,
3846
- content: fileContentMap.get(f.relativePath)
3847
- }));
3848
- const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
3849
- const semanticMatches = query(index, task, 50);
3850
- const boostedMatches = boostByPath(
3851
- semanticMatches,
3852
- analysis.files.map((f) => f.relativePath),
3853
- task
3854
- );
3855
- const depMap = /* @__PURE__ */ new Map();
3856
- for (const file of analysis.files) {
3857
- depMap.set(file.relativePath, file.imports);
4440
+ // src/engine/call-graph.ts
4441
+ function getLanguage(filePath) {
4442
+ const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
4443
+ if (ext === "java") return "java";
4444
+ if (["ts", "tsx", "js", "jsx", "mts", "mjs"].includes(ext)) return "ts";
4445
+ if (ext === "py") return "python";
4446
+ if (ext === "go") return "go";
4447
+ return null;
4448
+ }
4449
+ function extractJavaDefinitions(content, filePath) {
4450
+ const defs = [];
4451
+ const classMatch = content.match(/(?:public|abstract)\s+(?:class|interface)\s+(\w+)/);
4452
+ const className = classMatch?.[1];
4453
+ const methodRegex = /(?:public|protected|private|static|\s)+\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
4454
+ let match;
4455
+ while ((match = methodRegex.exec(content)) !== null) {
4456
+ const name = match[1];
4457
+ if (name === className || name === "if" || name === "for" || name === "while" || name === "switch" || name === "catch" || name === "return") continue;
4458
+ const linePrefix = content.substring(Math.max(0, match.index - 200), match.index);
4459
+ const isPublic = /public\s/.test(match[0]);
4460
+ defs.push({
4461
+ name,
4462
+ className,
4463
+ filePath,
4464
+ isExported: isPublic
4465
+ });
3858
4466
  }
3859
- const rerankResult = rerank({
3860
- task,
3861
- candidates: boostedMatches,
3862
- index,
3863
- fileContents: fileContentMap,
3864
- dependencies: depMap,
3865
- allFilePaths: analysis.files.map((f) => f.relativePath)
3866
- });
3867
- const rerankedMatches = rerankResult.files.map((rf) => ({
3868
- filePath: rf.filePath,
3869
- score: rf.bm25Score,
3870
- // Keep original BM25 score for composite
3871
- matchedTerms: boostedMatches.find((m) => m.filePath === rf.filePath)?.matchedTerms ?? []
3872
- }));
3873
- const learner = await loadLearner(projectPath);
3874
- const learnerBoosts = getLearnerBoosts(
3875
- learner,
3876
- taskType,
3877
- analysis.files.map((f) => f.relativePath)
3878
- );
3879
- const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
3880
- const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
3881
- const selection = await selectContext({
3882
- task,
3883
- analysis,
3884
- budget,
3885
- semanticScores,
3886
- learnerBoosts: learnerBoostInputs
3887
- });
3888
- const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
3889
- const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
3890
- let multiRepo;
3891
- if (input.siblingRepos && input.siblingRepos.length > 0) {
3892
- const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
3893
- multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
4467
+ return defs;
4468
+ }
4469
+ function extractTsDefinitions(content, filePath) {
4470
+ const defs = [];
4471
+ const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/gm;
4472
+ let match;
4473
+ while ((match = funcRegex.exec(content)) !== null) {
4474
+ const isExported = match[0].startsWith("export");
4475
+ defs.push({ name: match[1], filePath, isExported });
4476
+ }
4477
+ const classRegex = /(?:export\s+)?class\s+(\w+)/gm;
4478
+ while ((match = classRegex.exec(content)) !== null) {
4479
+ const className = match[1];
4480
+ const isExportedClass = match[0].startsWith("export");
4481
+ const classStart = match.index + match[0].length;
4482
+ const methodInClassRegex = /(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\w[\w<>\[\]|,\s]*\s*)?{/gm;
4483
+ methodInClassRegex.lastIndex = classStart;
4484
+ let methodMatch;
4485
+ while ((methodMatch = methodInClassRegex.exec(content)) !== null) {
4486
+ const name = methodMatch[1];
4487
+ if (name === "constructor" || name === "if" || name === "for" || name === "while" || name === "catch" || name === "return" || name === "function" || name === "class") continue;
4488
+ defs.push({ name, className, filePath, isExported: isExportedClass });
4489
+ if (methodMatch.index - classStart > 1e4) break;
4490
+ }
4491
+ }
4492
+ const arrowRegex = /export\s+const\s+(\w+)\s*=\s*(?:async\s+)?\(/gm;
4493
+ while ((match = arrowRegex.exec(content)) !== null) {
4494
+ defs.push({ name: match[1], filePath, isExported: true });
4495
+ }
4496
+ return defs;
4497
+ }
4498
+ function extractPythonDefinitions(content, filePath) {
4499
+ const defs = [];
4500
+ const classRegex = /^class\s+(\w+)/gm;
4501
+ let currentClass;
4502
+ let match;
4503
+ const funcRegex = /^(\s*)def\s+(\w+)\s*\(/gm;
4504
+ while ((match = funcRegex.exec(content)) !== null) {
4505
+ const indent = match[1];
4506
+ const name = match[2];
4507
+ if (name.startsWith("_") && name !== "__init__") continue;
4508
+ const before = content.substring(0, match.index);
4509
+ const lastClass = before.match(/^class\s+(\w+)/gm);
4510
+ const isMethod = indent.length > 0 && lastClass;
4511
+ const className = isMethod ? lastClass[lastClass.length - 1].replace(/^class\s+/, "") : void 0;
4512
+ defs.push({
4513
+ name: name === "__init__" ? className ?? name : name,
4514
+ className,
4515
+ filePath,
4516
+ isExported: !name.startsWith("_")
4517
+ });
3894
4518
  }
3895
- return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
4519
+ return defs;
3896
4520
  }
3897
-
3898
- // src/engine/index.ts
3899
- init_tfidf();
3900
-
3901
- // src/engine/ab-testing.ts
3902
- import { createHash as createHash4 } from "crypto";
3903
- import { readFileSync as readFileSync7, writeFileSync as writeFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3 } from "fs";
3904
- import { join as join8 } from "path";
3905
- var EXPERIMENTS_FILE = "experiments.json";
3906
- function loadExperiments(projectPath) {
3907
- const path = join8(projectPath, ".cto", EXPERIMENTS_FILE);
3908
- try {
3909
- if (!existsSync6(path)) return [];
3910
- return JSON.parse(readFileSync7(path, "utf-8"));
3911
- } catch {
3912
- return [];
4521
+ function extractGoDefinitions(content, filePath) {
4522
+ const defs = [];
4523
+ const funcRegex = /^func\s+(\w+)\s*\(/gm;
4524
+ let match;
4525
+ while ((match = funcRegex.exec(content)) !== null) {
4526
+ const name = match[1];
4527
+ defs.push({
4528
+ name,
4529
+ filePath,
4530
+ isExported: name[0] === name[0].toUpperCase()
4531
+ });
4532
+ }
4533
+ const methodRegex = /^func\s+\(\s*\w+\s+\*?(\w+)\s*\)\s+(\w+)\s*\(/gm;
4534
+ while ((match = methodRegex.exec(content)) !== null) {
4535
+ defs.push({
4536
+ name: match[2],
4537
+ className: match[1],
4538
+ filePath,
4539
+ isExported: match[2][0] === match[2][0].toUpperCase()
4540
+ });
3913
4541
  }
4542
+ return defs;
3914
4543
  }
3915
- function saveExperiments(projectPath, experiments) {
3916
- const dir = join8(projectPath, ".cto");
3917
- if (!existsSync6(dir)) mkdirSync3(dir, { recursive: true });
3918
- writeFileSync3(join8(dir, EXPERIMENTS_FILE), JSON.stringify(experiments, null, 2));
4544
+ function extractJavaCalls(content, filePath) {
4545
+ const calls = [];
4546
+ const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
4547
+ let match;
4548
+ while ((match = callRegex.exec(content)) !== null) {
4549
+ const receiver = match[1];
4550
+ const method = match[2];
4551
+ if ([
4552
+ "System",
4553
+ "LOG",
4554
+ "LOGGER",
4555
+ "logger",
4556
+ "log",
4557
+ "this",
4558
+ "super",
4559
+ "String",
4560
+ "Integer",
4561
+ "Long",
4562
+ "Boolean",
4563
+ "Double",
4564
+ "Float",
4565
+ "Math",
4566
+ "Arrays",
4567
+ "Collections",
4568
+ "Objects",
4569
+ "Optional",
4570
+ "List",
4571
+ "Map",
4572
+ "Set",
4573
+ "Stream"
4574
+ ].includes(receiver)) continue;
4575
+ if ([
4576
+ "toString",
4577
+ "hashCode",
4578
+ "equals",
4579
+ "getClass",
4580
+ "wait",
4581
+ "notify",
4582
+ "length",
4583
+ "size",
4584
+ "isEmpty",
4585
+ "get",
4586
+ "set",
4587
+ "add",
4588
+ "remove",
4589
+ "contains",
4590
+ "put",
4591
+ "stream",
4592
+ "map",
4593
+ "filter",
4594
+ "collect",
4595
+ "orElse",
4596
+ "orElseGet",
4597
+ "orElseThrow",
4598
+ "isPresent",
4599
+ "ifPresent",
4600
+ "of",
4601
+ "valueOf",
4602
+ "format",
4603
+ "println",
4604
+ "append",
4605
+ "build",
4606
+ "builder",
4607
+ "thenReturn",
4608
+ "when",
4609
+ "verify",
4610
+ "mock",
4611
+ "given"
4612
+ ].includes(method)) continue;
4613
+ calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
4614
+ }
4615
+ const staticRegex = /(?<!\w)([A-Z]\w+)\.([a-z]\w+)\s*\(/gm;
4616
+ while ((match = staticRegex.exec(content)) !== null) {
4617
+ const receiver = match[1];
4618
+ const method = match[2];
4619
+ if ([
4620
+ "System",
4621
+ "Math",
4622
+ "Arrays",
4623
+ "Collections",
4624
+ "Objects",
4625
+ "Optional",
4626
+ "String",
4627
+ "Integer",
4628
+ "Long",
4629
+ "Boolean",
4630
+ "Double",
4631
+ "Float",
4632
+ "LoggerFactory",
4633
+ "Logger",
4634
+ "Assert",
4635
+ "Mockito",
4636
+ "Assertions",
4637
+ "ResponseEntity",
4638
+ "HttpStatus"
4639
+ ].includes(receiver)) continue;
4640
+ if ([
4641
+ "of",
4642
+ "valueOf",
4643
+ "format",
4644
+ "parse",
4645
+ "toString",
4646
+ "getLogger",
4647
+ "builder",
4648
+ "newBuilder",
4649
+ "create",
4650
+ "getInstance"
4651
+ ].includes(method)) continue;
4652
+ calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
4653
+ }
4654
+ return calls;
4655
+ }
4656
+ function extractTsCalls(content, filePath) {
4657
+ const calls = [];
4658
+ const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
4659
+ let match;
4660
+ while ((match = callRegex.exec(content)) !== null) {
4661
+ const receiver = match[1];
4662
+ const method = match[2];
4663
+ if ([
4664
+ "console",
4665
+ "process",
4666
+ "Math",
4667
+ "JSON",
4668
+ "Promise",
4669
+ "Object",
4670
+ "Array",
4671
+ "String",
4672
+ "Number",
4673
+ "Date",
4674
+ "Error",
4675
+ "RegExp",
4676
+ "Buffer",
4677
+ "this",
4678
+ "super",
4679
+ "window",
4680
+ "document",
4681
+ "expect",
4682
+ "describe",
4683
+ "it",
4684
+ "test",
4685
+ "vi",
4686
+ "jest"
4687
+ ].includes(receiver)) continue;
4688
+ if ([
4689
+ "toString",
4690
+ "valueOf",
4691
+ "hasOwnProperty",
4692
+ "length",
4693
+ "push",
4694
+ "pop",
4695
+ "shift",
4696
+ "unshift",
4697
+ "slice",
4698
+ "splice",
4699
+ "map",
4700
+ "filter",
4701
+ "reduce",
4702
+ "forEach",
4703
+ "find",
4704
+ "findIndex",
4705
+ "some",
4706
+ "every",
4707
+ "includes",
4708
+ "indexOf",
4709
+ "join",
4710
+ "split",
4711
+ "replace",
4712
+ "match",
4713
+ "trim",
4714
+ "toLowerCase",
4715
+ "toUpperCase",
4716
+ "startsWith",
4717
+ "endsWith",
4718
+ "keys",
4719
+ "values",
4720
+ "entries",
4721
+ "has",
4722
+ "get",
4723
+ "set",
4724
+ "delete",
4725
+ "add",
4726
+ "size",
4727
+ "then",
4728
+ "catch",
4729
+ "finally",
4730
+ "resolve",
4731
+ "reject",
4732
+ "stringify",
4733
+ "parse",
4734
+ "log",
4735
+ "warn",
4736
+ "error",
4737
+ "info",
4738
+ "debug"
4739
+ ].includes(method)) continue;
4740
+ calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
4741
+ }
4742
+ return calls;
4743
+ }
4744
+ function extractPythonCalls(content, filePath) {
4745
+ const calls = [];
4746
+ const callRegex = /(?<!\w)(?:self\.)?([a-z_]\w+)\.([a-z_]\w+)\s*\(/gm;
4747
+ let match;
4748
+ while ((match = callRegex.exec(content)) !== null) {
4749
+ const receiver = match[1];
4750
+ const method = match[2];
4751
+ if ([
4752
+ "self",
4753
+ "cls",
4754
+ "os",
4755
+ "sys",
4756
+ "json",
4757
+ "logging",
4758
+ "print",
4759
+ "str",
4760
+ "int",
4761
+ "float",
4762
+ "list",
4763
+ "dict",
4764
+ "set",
4765
+ "tuple",
4766
+ "super",
4767
+ "type",
4768
+ "isinstance",
4769
+ "len",
4770
+ "range",
4771
+ "enumerate"
4772
+ ].includes(receiver)) continue;
4773
+ if ([
4774
+ "append",
4775
+ "extend",
4776
+ "insert",
4777
+ "remove",
4778
+ "pop",
4779
+ "clear",
4780
+ "get",
4781
+ "keys",
4782
+ "values",
4783
+ "items",
4784
+ "update",
4785
+ "format",
4786
+ "join",
4787
+ "split",
4788
+ "strip",
4789
+ "replace",
4790
+ "lower",
4791
+ "upper",
4792
+ "startswith",
4793
+ "endswith",
4794
+ "encode",
4795
+ "decode"
4796
+ ].includes(method)) continue;
4797
+ calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
4798
+ }
4799
+ return calls;
4800
+ }
4801
+ function extractGoCalls(content, filePath) {
4802
+ const calls = [];
4803
+ const callRegex = /(?<!\w)([a-z]\w+)\.([A-Z]\w+)\s*\(/gm;
4804
+ let match;
4805
+ while ((match = callRegex.exec(content)) !== null) {
4806
+ const receiver = match[1];
4807
+ const method = match[2];
4808
+ if ([
4809
+ "fmt",
4810
+ "log",
4811
+ "os",
4812
+ "io",
4813
+ "strings",
4814
+ "strconv",
4815
+ "bytes",
4816
+ "context",
4817
+ "errors",
4818
+ "sync",
4819
+ "time",
4820
+ "math",
4821
+ "sort",
4822
+ "http",
4823
+ "json",
4824
+ "testing",
4825
+ "reflect"
4826
+ ].includes(receiver)) continue;
4827
+ calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
4828
+ }
4829
+ return calls;
3919
4830
  }
3920
- function createExperiment(id, name, description, controlParams, variantParams, options = {}) {
3921
- return {
3922
- id,
3923
- name,
3924
- description,
3925
- status: "running",
3926
- startedAt: (/* @__PURE__ */ new Date()).toISOString(),
3927
- trafficSplit: options.trafficSplit ?? 0.5,
3928
- minObservations: options.minObservations ?? 30,
3929
- significanceThreshold: options.significanceThreshold ?? 0.05,
3930
- control: {
3931
- name: "control",
3932
- params: controlParams,
3933
- metrics: emptyMetrics()
3934
- },
3935
- variant: {
3936
- name: "variant",
3937
- params: variantParams,
3938
- metrics: emptyMetrics()
4831
+ function buildJavaImportMap(content, allFiles) {
4832
+ const importMap = /* @__PURE__ */ new Map();
4833
+ const importRegex = /^import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm;
4834
+ let match;
4835
+ while ((match = importRegex.exec(content)) !== null) {
4836
+ const className = match[1];
4837
+ const targetFile = allFiles.find((f) => {
4838
+ const basename4 = f.split("/").pop()?.replace(".java", "") ?? "";
4839
+ return basename4 === className;
4840
+ });
4841
+ if (targetFile) {
4842
+ importMap.set(className, targetFile);
4843
+ const varName = className.charAt(0).toLowerCase() + className.slice(1);
4844
+ importMap.set(varName, targetFile);
3939
4845
  }
3940
- };
4846
+ }
4847
+ const fieldRegex = /(?:private|protected)\s+(?:final\s+)?(\w+)\s+(\w+)\s*[;=]/gm;
4848
+ while ((match = fieldRegex.exec(content)) !== null) {
4849
+ const typeName = match[1];
4850
+ const fieldName = match[2];
4851
+ const existing = importMap.get(typeName);
4852
+ if (existing) {
4853
+ importMap.set(fieldName, existing);
4854
+ }
4855
+ }
4856
+ return importMap;
3941
4857
  }
3942
- function emptyMetrics() {
3943
- return {
3944
- total: 0,
3945
- successes: 0,
3946
- acceptRate: 0,
3947
- avgTimeToAccept: 0,
3948
- compilableRate: 0,
3949
- timeSum: 0,
3950
- compilableCount: 0
3951
- };
4858
+ function buildTsImportMap(content, allFiles) {
4859
+ const importMap = /* @__PURE__ */ new Map();
4860
+ const importRegex = /import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]/gm;
4861
+ let match;
4862
+ while ((match = importRegex.exec(content)) !== null) {
4863
+ const namedImports = match[1];
4864
+ const defaultImport = match[2];
4865
+ const modulePath = match[3];
4866
+ const targetFile = allFiles.find((f) => {
4867
+ const stripped = f.replace(/\.(ts|tsx|js|jsx|mts|mjs)$/, "");
4868
+ return stripped.endsWith(modulePath.replace(/^\.\//, "").replace(/^\.\.\//, "")) || f.endsWith(modulePath.replace(/^\.\//, "") + "/index.ts");
4869
+ });
4870
+ if (targetFile) {
4871
+ if (namedImports) {
4872
+ for (const name of namedImports.split(",").map((s) => s.trim())) {
4873
+ const cleanName = name.split(" as ").pop()?.trim() ?? name.trim();
4874
+ if (cleanName) importMap.set(cleanName, targetFile);
4875
+ }
4876
+ }
4877
+ if (defaultImport) {
4878
+ importMap.set(defaultImport, targetFile);
4879
+ }
4880
+ }
4881
+ }
4882
+ return importMap;
4883
+ }
4884
+ function buildPythonImportMap(content, allFiles) {
4885
+ const importMap = /* @__PURE__ */ new Map();
4886
+ const fromRegex = /^from\s+([\w.]+)\s+import\s+(.+)$/gm;
4887
+ let match;
4888
+ while ((match = fromRegex.exec(content)) !== null) {
4889
+ const modulePath = match[1].replace(/\./g, "/");
4890
+ const names = match[2].split(",").map((s) => s.trim().split(" as ").pop()?.trim() ?? "");
4891
+ const targetFile = allFiles.find((f) => f.includes(modulePath + ".py") || f.includes(modulePath + "/__init__.py"));
4892
+ if (targetFile) {
4893
+ for (const name of names) {
4894
+ if (name) importMap.set(name, targetFile);
4895
+ const snakeName = name.replace(/([A-Z])/g, "_$1").toLowerCase().replace(/^_/, "");
4896
+ if (snakeName !== name) importMap.set(snakeName, targetFile);
4897
+ }
4898
+ }
4899
+ }
4900
+ return importMap;
4901
+ }
4902
+ function buildCallGraph(files) {
4903
+ const allPaths = files.map((f) => f.relativePath);
4904
+ const allDefinitions = [];
4905
+ const allCalls = [];
4906
+ for (const file of files) {
4907
+ const lang = getLanguage(file.relativePath);
4908
+ if (!lang) continue;
4909
+ let defs;
4910
+ let calls;
4911
+ switch (lang) {
4912
+ case "java":
4913
+ defs = extractJavaDefinitions(file.content, file.relativePath);
4914
+ calls = extractJavaCalls(file.content, file.relativePath);
4915
+ break;
4916
+ case "ts":
4917
+ defs = extractTsDefinitions(file.content, file.relativePath);
4918
+ calls = extractTsCalls(file.content, file.relativePath);
4919
+ break;
4920
+ case "python":
4921
+ defs = extractPythonDefinitions(file.content, file.relativePath);
4922
+ calls = extractPythonCalls(file.content, file.relativePath);
4923
+ break;
4924
+ case "go":
4925
+ defs = extractGoDefinitions(file.content, file.relativePath);
4926
+ calls = extractGoCalls(file.content, file.relativePath);
4927
+ break;
4928
+ }
4929
+ allDefinitions.push(...defs);
4930
+ allCalls.push(...calls);
4931
+ }
4932
+ const defByMethod = /* @__PURE__ */ new Map();
4933
+ for (const def of allDefinitions) {
4934
+ const existing = defByMethod.get(def.name) ?? [];
4935
+ existing.push(def);
4936
+ defByMethod.set(def.name, existing);
4937
+ }
4938
+ const defByQualified = /* @__PURE__ */ new Map();
4939
+ for (const def of allDefinitions) {
4940
+ if (def.className) {
4941
+ defByQualified.set(`${def.className}.${def.name}`, def);
4942
+ }
4943
+ }
4944
+ const edges = [];
4945
+ const edgeSet = /* @__PURE__ */ new Set();
4946
+ for (const file of files) {
4947
+ const lang = getLanguage(file.relativePath);
4948
+ if (!lang) continue;
4949
+ let importMap;
4950
+ switch (lang) {
4951
+ case "java":
4952
+ importMap = buildJavaImportMap(file.content, allPaths);
4953
+ break;
4954
+ case "ts":
4955
+ importMap = buildTsImportMap(file.content, allPaths);
4956
+ break;
4957
+ case "python":
4958
+ importMap = buildPythonImportMap(file.content, allPaths);
4959
+ break;
4960
+ default:
4961
+ importMap = /* @__PURE__ */ new Map();
4962
+ }
4963
+ const fileCalls = allCalls.filter((c) => c.callerFile === file.relativePath);
4964
+ for (const call of fileCalls) {
4965
+ let targetFile;
4966
+ targetFile = importMap.get(call.receiverName);
4967
+ if (!targetFile) {
4968
+ const qualDef = defByQualified.get(`${call.receiverName}.${call.methodName}`);
4969
+ if (qualDef) targetFile = qualDef.filePath;
4970
+ }
4971
+ if (!targetFile) {
4972
+ const capitalized = call.receiverName.charAt(0).toUpperCase() + call.receiverName.slice(1);
4973
+ targetFile = importMap.get(capitalized);
4974
+ }
4975
+ if (!targetFile) {
4976
+ const candidates = defByMethod.get(call.methodName);
4977
+ if (candidates && candidates.length === 1 && candidates[0].filePath !== file.relativePath) {
4978
+ targetFile = candidates[0].filePath;
4979
+ }
4980
+ }
4981
+ if (targetFile && targetFile !== file.relativePath) {
4982
+ const key = `${file.relativePath}\u2192${targetFile}`;
4983
+ if (!edgeSet.has(key)) {
4984
+ edgeSet.add(key);
4985
+ edges.push({ from: file.relativePath, to: targetFile, type: "call" });
4986
+ }
4987
+ }
4988
+ }
4989
+ }
4990
+ return { definitions: allDefinitions, calls: allCalls, edges };
4991
+ }
4992
+ function boostByCallGraph(matches, callEdges, topK = 10, boostFactor = 0.3) {
4993
+ if (matches.length === 0 || callEdges.length === 0) return matches;
4994
+ const boosted = /* @__PURE__ */ new Map();
4995
+ for (const m of matches) {
4996
+ boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
4997
+ }
4998
+ const callsTo = /* @__PURE__ */ new Map();
4999
+ const calledBy = /* @__PURE__ */ new Map();
5000
+ for (const edge of callEdges) {
5001
+ if (edge.type !== "call") continue;
5002
+ const fwd = callsTo.get(edge.from) ?? [];
5003
+ fwd.push(edge.to);
5004
+ callsTo.set(edge.from, fwd);
5005
+ const rev = calledBy.get(edge.to) ?? [];
5006
+ rev.push(edge.from);
5007
+ calledBy.set(edge.to, rev);
5008
+ }
5009
+ const topMatches = matches.slice(0, topK);
5010
+ const maxBoostPerParent = 5;
5011
+ for (const parent of topMatches) {
5012
+ const boost = parent.score * boostFactor;
5013
+ const called = callsTo.get(parent.filePath) ?? [];
5014
+ for (const target of called.slice(0, maxBoostPerParent)) {
5015
+ const existing = boosted.get(target);
5016
+ if (existing) {
5017
+ existing.score += boost;
5018
+ if (!existing.matchedTerms.includes("[call-graph:called-by-match]")) {
5019
+ existing.matchedTerms.push("[call-graph:called-by-match]");
5020
+ }
5021
+ } else {
5022
+ boosted.set(target, {
5023
+ filePath: target,
5024
+ score: boost,
5025
+ matchedTerms: ["[call-graph:called-by-match]"]
5026
+ });
5027
+ }
5028
+ }
5029
+ const callers = calledBy.get(parent.filePath) ?? [];
5030
+ for (const caller of callers.slice(0, maxBoostPerParent)) {
5031
+ const callerBoost = boost * 0.7;
5032
+ const existing = boosted.get(caller);
5033
+ if (existing) {
5034
+ existing.score += callerBoost;
5035
+ if (!existing.matchedTerms.includes("[call-graph:calls-match]")) {
5036
+ existing.matchedTerms.push("[call-graph:calls-match]");
5037
+ }
5038
+ } else {
5039
+ boosted.set(caller, {
5040
+ filePath: caller,
5041
+ score: callerBoost,
5042
+ matchedTerms: ["[call-graph:calls-match]"]
5043
+ });
5044
+ }
5045
+ }
5046
+ }
5047
+ return [...boosted.values()].sort((a, b) => b.score - a.score);
5048
+ }
5049
+
5050
+ // src/engine/git-relevance.ts
5051
+ import { execSync } from "child_process";
5052
+ function buildCoChangeMatrix(projectPath, maxCommits = 500, minCoChanges = 2) {
5053
+ const emptyMatrix = {
5054
+ entries: /* @__PURE__ */ new Map(),
5055
+ fileCommitCounts: /* @__PURE__ */ new Map(),
5056
+ totalCommits: 0
5057
+ };
5058
+ let gitOutput;
5059
+ try {
5060
+ gitOutput = execSync(
5061
+ `git log --no-merges --diff-filter=ACMR --name-only --format="---COMMIT---" -n ${maxCommits}`,
5062
+ { cwd: projectPath, encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, timeout: 15e3 }
5063
+ );
5064
+ } catch {
5065
+ return emptyMatrix;
5066
+ }
5067
+ const commits = [];
5068
+ let currentFiles = [];
5069
+ for (const line of gitOutput.split("\n")) {
5070
+ const trimmed = line.trim();
5071
+ if (trimmed === "---COMMIT---") {
5072
+ if (currentFiles.length > 0) {
5073
+ commits.push(currentFiles);
5074
+ }
5075
+ currentFiles = [];
5076
+ } else if (trimmed.length > 0) {
5077
+ currentFiles.push(trimmed);
5078
+ }
5079
+ }
5080
+ if (currentFiles.length > 0) {
5081
+ commits.push(currentFiles);
5082
+ }
5083
+ if (commits.length === 0) return emptyMatrix;
5084
+ const fileCommitCounts = /* @__PURE__ */ new Map();
5085
+ const coChangeCounts = /* @__PURE__ */ new Map();
5086
+ for (const files of commits) {
5087
+ const unique = [...new Set(files)];
5088
+ for (const file of unique) {
5089
+ fileCommitCounts.set(file, (fileCommitCounts.get(file) ?? 0) + 1);
5090
+ }
5091
+ const capped = unique.slice(0, 20);
5092
+ for (let i = 0; i < capped.length; i++) {
5093
+ for (let j = i + 1; j < capped.length; j++) {
5094
+ const [a, b] = capped[i] < capped[j] ? [capped[i], capped[j]] : [capped[j], capped[i]];
5095
+ const key = `${a}\0${b}`;
5096
+ coChangeCounts.set(key, (coChangeCounts.get(key) ?? 0) + 1);
5097
+ }
5098
+ }
5099
+ }
5100
+ const entries = /* @__PURE__ */ new Map();
5101
+ for (const [key, coCommits] of coChangeCounts) {
5102
+ if (coCommits < minCoChanges) continue;
5103
+ const [fileA, fileB] = key.split("\0");
5104
+ const commitsA = fileCommitCounts.get(fileA) ?? 0;
5105
+ const commitsB = fileCommitCounts.get(fileB) ?? 0;
5106
+ const union = commitsA + commitsB - coCommits;
5107
+ const similarity2 = union > 0 ? coCommits / union : 0;
5108
+ const entry = { fileA, fileB, coCommits, similarity: similarity2 };
5109
+ const listA = entries.get(fileA) ?? [];
5110
+ listA.push(entry);
5111
+ entries.set(fileA, listA);
5112
+ const listB = entries.get(fileB) ?? [];
5113
+ listB.push({ ...entry, fileA: fileB, fileB: fileA });
5114
+ entries.set(fileB, listB);
5115
+ }
5116
+ for (const [, list] of entries) {
5117
+ list.sort((a, b) => b.similarity - a.similarity);
5118
+ }
5119
+ return { entries, fileCommitCounts, totalCommits: commits.length };
5120
+ }
5121
+ function boostByGitCoChange(matches, coChangeMatrix, topK = 10, boostFactor = 0.25, minSimilarity = 0.15) {
5122
+ if (matches.length === 0 || coChangeMatrix.entries.size === 0) return matches;
5123
+ const boosted = /* @__PURE__ */ new Map();
5124
+ for (const m of matches) {
5125
+ boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
5126
+ }
5127
+ const topMatches = matches.slice(0, topK);
5128
+ const maxBoostTargets = 5;
5129
+ for (const parent of topMatches) {
5130
+ const partners = coChangeMatrix.entries.get(parent.filePath) ?? [];
5131
+ let boostedCount = 0;
5132
+ for (const partner of partners) {
5133
+ if (boostedCount >= maxBoostTargets) break;
5134
+ if (partner.similarity < minSimilarity) break;
5135
+ const boost = parent.score * boostFactor * partner.similarity;
5136
+ const existing = boosted.get(partner.fileB);
5137
+ if (existing) {
5138
+ existing.score += boost;
5139
+ if (!existing.matchedTerms.includes("[git-cochange]")) {
5140
+ existing.matchedTerms.push("[git-cochange]");
5141
+ }
5142
+ } else {
5143
+ boosted.set(partner.fileB, {
5144
+ filePath: partner.fileB,
5145
+ score: boost,
5146
+ matchedTerms: ["[git-cochange]"]
5147
+ });
5148
+ }
5149
+ boostedCount++;
5150
+ }
5151
+ }
5152
+ return [...boosted.values()].sort((a, b) => b.score - a.score);
5153
+ }
5154
+ function getGitRecency(projectPath, days = 30) {
5155
+ const recency = /* @__PURE__ */ new Map();
5156
+ try {
5157
+ const since = `${days} days ago`;
5158
+ const output = execSync(
5159
+ `git log --no-merges --diff-filter=ACMR --name-only --format="%aI" --since="${since}"`,
5160
+ { cwd: projectPath, encoding: "utf-8", maxBuffer: 5 * 1024 * 1024, timeout: 1e4 }
5161
+ );
5162
+ const now = Date.now();
5163
+ const msPerDay = 864e5;
5164
+ let currentDate = null;
5165
+ for (const line of output.split("\n")) {
5166
+ const trimmed = line.trim();
5167
+ if (trimmed.length === 0) continue;
5168
+ if (trimmed.match(/^\d{4}-\d{2}-\d{2}/)) {
5169
+ currentDate = new Date(trimmed).getTime();
5170
+ continue;
5171
+ }
5172
+ if (currentDate !== null) {
5173
+ const ageMs = now - currentDate;
5174
+ const ageDays = ageMs / msPerDay;
5175
+ const halfLife = days / 3;
5176
+ const score = Math.exp(-ageDays / halfLife);
5177
+ const existing = recency.get(trimmed) ?? 0;
5178
+ if (score > existing) {
5179
+ recency.set(trimmed, score);
5180
+ }
5181
+ }
5182
+ }
5183
+ } catch {
5184
+ }
5185
+ return recency;
5186
+ }
5187
+
5188
+ // src/engine/multi-hop.ts
5189
+ init_tfidf();
5190
+ var DEFAULT_CONFIG2 = {
5191
+ maxHops: 2,
5192
+ topKPerHop: 5,
5193
+ decayFactor: 0.5,
5194
+ minScoreThreshold: 0.2
5195
+ };
5196
+ function multiHopQuery(index, task, deps, callEdges, fileContents, config = {}) {
5197
+ const cfg = { ...DEFAULT_CONFIG2, ...config };
5198
+ const hops = [];
5199
+ const callsTo = /* @__PURE__ */ new Map();
5200
+ const calledBy = /* @__PURE__ */ new Map();
5201
+ for (const edge of callEdges) {
5202
+ const fwd = callsTo.get(edge.from) ?? /* @__PURE__ */ new Set();
5203
+ fwd.add(edge.to);
5204
+ callsTo.set(edge.from, fwd);
5205
+ const rev = calledBy.get(edge.to) ?? /* @__PURE__ */ new Set();
5206
+ rev.add(edge.from);
5207
+ calledBy.set(edge.to, rev);
5208
+ }
5209
+ const aggregateScores = /* @__PURE__ */ new Map();
5210
+ const aggregateTerms = /* @__PURE__ */ new Map();
5211
+ const explored = /* @__PURE__ */ new Set();
5212
+ const initialResults = query(index, task, 50);
5213
+ for (const m of initialResults) {
5214
+ aggregateScores.set(m.filePath, m.score);
5215
+ aggregateTerms.set(m.filePath, new Set(m.matchedTerms));
5216
+ explored.add(m.filePath);
5217
+ }
5218
+ hops.push({
5219
+ hop: 0,
5220
+ seedFiles: [],
5221
+ newFiles: initialResults.slice(0, cfg.topKPerHop).map((m) => m.filePath),
5222
+ expandedTerms: tokenize(task)
5223
+ });
5224
+ let currentSeeds = initialResults.slice(0, cfg.topKPerHop);
5225
+ for (let hop = 1; hop <= cfg.maxHops; hop++) {
5226
+ if (currentSeeds.length === 0) break;
5227
+ const seedFiles = currentSeeds.map((m) => m.filePath);
5228
+ const newFiles = [];
5229
+ const expandedTerms = [];
5230
+ const connectedFiles = /* @__PURE__ */ new Set();
5231
+ for (const seed of seedFiles) {
5232
+ const importDeps = deps.get(seed) ?? [];
5233
+ for (const dep of importDeps) {
5234
+ if (!explored.has(dep)) connectedFiles.add(dep);
5235
+ }
5236
+ const calls = callsTo.get(seed) ?? /* @__PURE__ */ new Set();
5237
+ for (const called of calls) {
5238
+ if (!explored.has(called)) connectedFiles.add(called);
5239
+ }
5240
+ const callers = calledBy.get(seed) ?? /* @__PURE__ */ new Set();
5241
+ for (const caller of callers) {
5242
+ if (!explored.has(caller)) connectedFiles.add(caller);
5243
+ }
5244
+ }
5245
+ for (const seed of seedFiles) {
5246
+ const content = fileContents.get(seed);
5247
+ if (!content) continue;
5248
+ const identifiers = extractKeyIdentifiers(content, seed);
5249
+ expandedTerms.push(...identifiers);
5250
+ }
5251
+ const decayMultiplier = Math.pow(cfg.decayFactor, hop);
5252
+ const uniqueExpandedTerms = [...new Set(expandedTerms)];
5253
+ const expandedQuery = task + " " + uniqueExpandedTerms.slice(0, 10).join(" ");
5254
+ const expandedResults = query(index, expandedQuery, 30);
5255
+ for (const connected of connectedFiles) {
5256
+ const expandedMatch = expandedResults.find((r) => r.filePath === connected);
5257
+ const graphScore = 0.3;
5258
+ const bm25Score = expandedMatch?.score ?? 0;
5259
+ const hopScore = (graphScore + bm25Score) * decayMultiplier;
5260
+ if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
5261
+ const existing = aggregateScores.get(connected) ?? 0;
5262
+ aggregateScores.set(connected, existing + hopScore);
5263
+ const terms = aggregateTerms.get(connected) ?? /* @__PURE__ */ new Set();
5264
+ terms.add(`[hop-${hop}]`);
5265
+ if (expandedMatch) {
5266
+ for (const t of expandedMatch.matchedTerms) terms.add(t);
5267
+ }
5268
+ aggregateTerms.set(connected, terms);
5269
+ if (!explored.has(connected)) {
5270
+ newFiles.push(connected);
5271
+ explored.add(connected);
5272
+ }
5273
+ }
5274
+ }
5275
+ for (const r of expandedResults) {
5276
+ if (!explored.has(r.filePath)) {
5277
+ const hopScore = r.score * decayMultiplier * 0.5;
5278
+ if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
5279
+ const existing = aggregateScores.get(r.filePath) ?? 0;
5280
+ aggregateScores.set(r.filePath, existing + hopScore);
5281
+ const terms = aggregateTerms.get(r.filePath) ?? /* @__PURE__ */ new Set();
5282
+ terms.add(`[hop-${hop}-bm25]`);
5283
+ for (const t of r.matchedTerms) terms.add(t);
5284
+ aggregateTerms.set(r.filePath, terms);
5285
+ newFiles.push(r.filePath);
5286
+ explored.add(r.filePath);
5287
+ }
5288
+ }
5289
+ }
5290
+ hops.push({ hop, seedFiles, newFiles, expandedTerms: uniqueExpandedTerms.slice(0, 20) });
5291
+ const newScored = newFiles.map((f) => ({ filePath: f, score: aggregateScores.get(f) ?? 0 })).sort((a, b) => b.score - a.score).slice(0, cfg.topKPerHop);
5292
+ currentSeeds = newScored.map((s) => ({
5293
+ filePath: s.filePath,
5294
+ score: s.score,
5295
+ matchedTerms: [...aggregateTerms.get(s.filePath) ?? []]
5296
+ }));
5297
+ }
5298
+ const matches = [];
5299
+ for (const [filePath, score] of aggregateScores) {
5300
+ const terms = aggregateTerms.get(filePath) ?? /* @__PURE__ */ new Set();
5301
+ matches.push({ filePath, score, matchedTerms: [...terms] });
5302
+ }
5303
+ matches.sort((a, b) => b.score - a.score);
5304
+ return {
5305
+ matches,
5306
+ hops,
5307
+ totalFilesExplored: explored.size
5308
+ };
5309
+ }
5310
+ function extractKeyIdentifiers(content, filePath) {
5311
+ const identifiers = [];
5312
+ const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
5313
+ if (ext === "java") {
5314
+ const classMatches = content.match(/(?:class|interface)\s+(\w+)/g) ?? [];
5315
+ for (const m of classMatches) {
5316
+ const name = m.replace(/(?:class|interface)\s+/, "");
5317
+ identifiers.push(...splitCamelCase(name));
5318
+ }
5319
+ const methodRegex = /(?:public|protected)\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
5320
+ let match;
5321
+ while ((match = methodRegex.exec(content)) !== null) {
5322
+ identifiers.push(...splitCamelCase(match[1]));
5323
+ }
5324
+ } else if (["ts", "tsx", "js", "jsx"].includes(ext)) {
5325
+ const exportMatches = content.match(/export\s+(?:class|function|const|interface|type)\s+(\w+)/g) ?? [];
5326
+ for (const m of exportMatches) {
5327
+ const name = m.replace(/export\s+(?:class|function|const|interface|type)\s+/, "");
5328
+ identifiers.push(...splitCamelCase(name));
5329
+ }
5330
+ } else if (ext === "py") {
5331
+ const defMatches = content.match(/^(?:class|def)\s+(\w+)/gm) ?? [];
5332
+ for (const m of defMatches) {
5333
+ const name = m.replace(/^(?:class|def)\s+/, "");
5334
+ identifiers.push(...splitSnakeCase(name));
5335
+ }
5336
+ } else if (ext === "go") {
5337
+ const funcMatches = content.match(/^func\s+(?:\([^)]+\)\s+)?([A-Z]\w+)/gm) ?? [];
5338
+ for (const m of funcMatches) {
5339
+ const name = m.replace(/^func\s+(?:\([^)]+\)\s+)?/, "");
5340
+ identifiers.push(...splitCamelCase(name));
5341
+ }
5342
+ const typeMatches = content.match(/^type\s+([A-Z]\w+)/gm) ?? [];
5343
+ for (const m of typeMatches) {
5344
+ const name = m.replace(/^type\s+/, "");
5345
+ identifiers.push(...splitCamelCase(name));
5346
+ }
5347
+ }
5348
+ return [...new Set(identifiers)].filter((id) => id.length >= 3 && !NOISE_IDENTIFIERS.has(id.toLowerCase())).slice(0, 30);
5349
+ }
5350
+ function splitCamelCase(name) {
5351
+ return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((s) => s.length >= 3);
5352
+ }
5353
+ function splitSnakeCase(name) {
5354
+ return name.split("_").filter((s) => s.length >= 3);
5355
+ }
5356
+ var NOISE_IDENTIFIERS = /* @__PURE__ */ new Set([
5357
+ "get",
5358
+ "set",
5359
+ "has",
5360
+ "add",
5361
+ "put",
5362
+ "new",
5363
+ "run",
5364
+ "map",
5365
+ "for",
5366
+ "the",
5367
+ "and",
5368
+ "not",
5369
+ "with",
5370
+ "from",
5371
+ "this",
5372
+ "that",
5373
+ "test",
5374
+ "spec",
5375
+ "mock",
5376
+ "void",
5377
+ "null",
5378
+ "true",
5379
+ "false",
5380
+ "string",
5381
+ "number",
5382
+ "boolean",
5383
+ "int",
5384
+ "impl",
5385
+ "default",
5386
+ "abstract",
5387
+ "base",
5388
+ "main",
5389
+ "init",
5390
+ "setup",
5391
+ "util",
5392
+ "utils",
5393
+ "helper",
5394
+ "helpers",
5395
+ "common",
5396
+ "config",
5397
+ "model",
5398
+ "entity",
5399
+ "service",
5400
+ "repository",
5401
+ "controller",
5402
+ "handler",
5403
+ "interface",
5404
+ "type",
5405
+ "class",
5406
+ "function",
5407
+ "const",
5408
+ "return",
5409
+ "import",
5410
+ "export",
5411
+ "private",
5412
+ "public",
5413
+ "protected",
5414
+ "static",
5415
+ "final",
5416
+ "override",
5417
+ "async",
5418
+ "await"
5419
+ ]);
5420
+
5421
+ // src/engine/query-intent.ts
5422
+ var ACTION_PATTERNS = [
5423
+ [/\b(fix|bug|debug|repair|resolve|broken|crash|error|issue|wrong)\b/i, "fix"],
5424
+ [/\b(add|implement|create|build|new|feature|introduce|wire)\b/i, "add"],
5425
+ [/\b(refactor|restructure|clean|extract|split|move|rename|simplify)\b/i, "refactor"],
5426
+ [/\b(trace|follow|understand|find|where|how|flow|path|chain)\b/i, "trace"],
5427
+ [/\b(test|spec|coverage|assert|mock|verify)\b/i, "test"],
5428
+ [/\b(doc|document|describe|explain|readme|comment)\b/i, "docs"],
5429
+ [/\b(remove|delete|deprecate|drop|kill|eliminate)\b/i, "remove"],
5430
+ [/\b(optimize|performance|speed|fast|slow|latency|efficient)\b/i, "optimize"]
5431
+ ];
5432
+ function detectAction(task) {
5433
+ for (const [pattern, action] of ACTION_PATTERNS) {
5434
+ if (pattern.test(task)) return action;
5435
+ }
5436
+ return "unknown";
5437
+ }
5438
+ var LAYER_KEYWORDS = [
5439
+ [["controller", "endpoint", "handler", "router", "route", "api", "rest", "entrypoint"], "endpoint"],
5440
+ [["usecase", "use case", "use-case", "interactor", "application service"], "usecase"],
5441
+ [["service", "domain service", "business logic"], "service"],
5442
+ [["repository", "repo", "dao", "data access", "persistence", "database", "db", "store"], "repository"],
5443
+ [["cache", "redis", "memcached", "caching", "ttl", "invalidat"], "cache"],
5444
+ [["client", "http client", "api client", "rest client", "feign", "retrofit"], "client"],
5445
+ [["model", "entity", "dto", "domain object", "value object", "pojo"], "model"],
5446
+ [["config", "configuration", "injector", "module", "bean", "provider", "dependency injection"], "config"],
5447
+ [["queue", "kafka", "rabbit", "sqs", "event", "listener", "consumer", "producer", "message"], "queue"],
5448
+ [["middleware", "interceptor", "filter", "guard", "pipe"], "middleware"]
5449
+ ];
5450
+ function detectLayers(task) {
5451
+ const lower = task.toLowerCase();
5452
+ const layers = [];
5453
+ for (const [keywords, layer] of LAYER_KEYWORDS) {
5454
+ if (keywords.some((kw) => lower.includes(kw))) {
5455
+ layers.push(layer);
5456
+ }
5457
+ }
5458
+ return [...new Set(layers)];
5459
+ }
5460
+ var STOP_WORDS2 = /* @__PURE__ */ new Set([
5461
+ "the",
5462
+ "a",
5463
+ "an",
5464
+ "is",
5465
+ "are",
5466
+ "was",
5467
+ "were",
5468
+ "be",
5469
+ "been",
5470
+ "being",
5471
+ "have",
5472
+ "has",
5473
+ "had",
5474
+ "do",
5475
+ "does",
5476
+ "did",
5477
+ "will",
5478
+ "would",
5479
+ "shall",
5480
+ "should",
5481
+ "may",
5482
+ "might",
5483
+ "must",
5484
+ "can",
5485
+ "could",
5486
+ "need",
5487
+ "not",
5488
+ "and",
5489
+ "but",
5490
+ "or",
5491
+ "nor",
5492
+ "for",
5493
+ "yet",
5494
+ "so",
5495
+ "in",
5496
+ "on",
5497
+ "at",
5498
+ "to",
5499
+ "from",
5500
+ "by",
5501
+ "with",
5502
+ "about",
5503
+ "between",
5504
+ "through",
5505
+ "during",
5506
+ "before",
5507
+ "after",
5508
+ "above",
5509
+ "below",
5510
+ "up",
5511
+ "down",
5512
+ "out",
5513
+ "off",
5514
+ "over",
5515
+ "under",
5516
+ "again",
5517
+ "further",
5518
+ "then",
5519
+ "once",
5520
+ "here",
5521
+ "there",
5522
+ "when",
5523
+ "where",
5524
+ "why",
5525
+ "how",
5526
+ "all",
5527
+ "each",
5528
+ "every",
5529
+ "both",
5530
+ "few",
5531
+ "more",
5532
+ "most",
5533
+ "other",
5534
+ "some",
5535
+ "such",
5536
+ "no",
5537
+ "nor",
5538
+ "only",
5539
+ "own",
5540
+ "same",
5541
+ "so",
5542
+ "than",
5543
+ "too",
5544
+ "very",
5545
+ "just",
5546
+ "because",
5547
+ "this",
5548
+ "that",
5549
+ "these",
5550
+ "those",
5551
+ "it",
5552
+ "its",
5553
+ "of",
5554
+ "if"
5555
+ ]);
5556
+ var ACTION_WORDS = /* @__PURE__ */ new Set([
5557
+ "fix",
5558
+ "add",
5559
+ "create",
5560
+ "build",
5561
+ "implement",
5562
+ "refactor",
5563
+ "trace",
5564
+ "follow",
5565
+ "find",
5566
+ "update",
5567
+ "modify",
5568
+ "change",
5569
+ "remove",
5570
+ "delete",
5571
+ "debug",
5572
+ "test",
5573
+ "check",
5574
+ "verify",
5575
+ "validate",
5576
+ "handle",
5577
+ "process",
5578
+ "resolve",
5579
+ "repair",
5580
+ "optimize",
5581
+ "improve",
5582
+ "speed",
5583
+ "clean",
5584
+ "bug",
5585
+ "error",
5586
+ "issue",
5587
+ "problem",
5588
+ "flow",
5589
+ "path",
5590
+ "chain"
5591
+ ]);
5592
+ var OPERATION_WORDS = /* @__PURE__ */ new Set([
5593
+ "create",
5594
+ "read",
5595
+ "update",
5596
+ "delete",
5597
+ "save",
5598
+ "load",
5599
+ "fetch",
5600
+ "retrieve",
5601
+ "store",
5602
+ "persist",
5603
+ "insert",
5604
+ "remove",
5605
+ "invalidate",
5606
+ "validate",
5607
+ "parse",
5608
+ "transform",
5609
+ "convert",
5610
+ "render",
5611
+ "display",
5612
+ "send",
5613
+ "receive",
5614
+ "publish",
5615
+ "subscribe",
5616
+ "emit",
5617
+ "listen",
5618
+ "authenticate",
5619
+ "authorize",
5620
+ "encrypt",
5621
+ "decrypt",
5622
+ "hash",
5623
+ "serialize",
5624
+ "deserialize",
5625
+ "encode",
5626
+ "decode",
5627
+ "compress",
5628
+ "replicate",
5629
+ "sync",
5630
+ "migrate",
5631
+ "export",
5632
+ "import",
5633
+ "upload",
5634
+ "download",
5635
+ "search",
5636
+ "index",
5637
+ "query",
5638
+ "filter",
5639
+ "sort"
5640
+ ]);
5641
+ function extractEntities(task) {
5642
+ const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
5643
+ const entities = [];
5644
+ for (const word of words) {
5645
+ if (word.length < 3) continue;
5646
+ if (STOP_WORDS2.has(word)) continue;
5647
+ if (ACTION_WORDS.has(word)) continue;
5648
+ if (OPERATION_WORDS.has(word)) continue;
5649
+ const isLayer = LAYER_KEYWORDS.some(([kws]) => kws.includes(word));
5650
+ if (isLayer) continue;
5651
+ entities.push(word);
5652
+ }
5653
+ return [...new Set(entities)];
5654
+ }
5655
+ function extractOperations(task) {
5656
+ const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
5657
+ const operations = [];
5658
+ for (const word of words) {
5659
+ if (OPERATION_WORDS.has(word)) {
5660
+ operations.push(word);
5661
+ }
5662
+ }
5663
+ const opPatterns = task.toLowerCase().match(/\b(on|after|before|during)\s+(\w+)/g);
5664
+ if (opPatterns) {
5665
+ for (const pattern of opPatterns) {
5666
+ const parts = pattern.split(/\s+/);
5667
+ if (parts.length >= 2 && OPERATION_WORDS.has(parts[1])) {
5668
+ operations.push(parts[1]);
5669
+ }
5670
+ }
5671
+ }
5672
+ return [...new Set(operations)];
5673
+ }
5674
+ function extractQualifiers(task) {
5675
+ const qualifiers = [];
5676
+ const patterns = task.match(/\b(on|for|in|via|from|through)\s+(\w+(?:\s+\w+)?)/gi);
5677
+ if (patterns) {
5678
+ for (const p of patterns) {
5679
+ const parts = p.split(/\s+/);
5680
+ if (parts.length >= 2) {
5681
+ const qualifier = parts.slice(1).join(" ").toLowerCase();
5682
+ if (!STOP_WORDS2.has(qualifier) && qualifier.length >= 2) {
5683
+ qualifiers.push(qualifier);
5684
+ }
5685
+ }
5686
+ }
5687
+ }
5688
+ return [...new Set(qualifiers)];
5689
+ }
5690
+ function parseQueryIntent(task) {
5691
+ const action = detectAction(task);
5692
+ const entities = extractEntities(task);
5693
+ const operations = extractOperations(task);
5694
+ const layers = detectLayers(task);
5695
+ const qualifiers = extractQualifiers(task);
5696
+ const signals = [
5697
+ action !== "unknown" ? 1 : 0,
5698
+ entities.length > 0 ? 1 : 0,
5699
+ operations.length > 0 ? 1 : 0,
5700
+ layers.length > 0 ? 1 : 0
5701
+ ];
5702
+ const confidence = signals.reduce((a, b) => a + b, 0) / signals.length;
5703
+ return { original: task, action, entities, operations, layers, qualifiers, confidence };
5704
+ }
5705
+ function buildWeightedQuery(intent) {
5706
+ const parts = [];
5707
+ for (const entity of intent.entities) {
5708
+ parts.push(entity, entity, entity);
5709
+ }
5710
+ for (const op of intent.operations) {
5711
+ parts.push(op, op);
5712
+ }
5713
+ for (const layer of intent.layers) {
5714
+ parts.push(layer);
5715
+ }
5716
+ for (const q of intent.qualifiers) {
5717
+ parts.push(q);
5718
+ }
5719
+ if (parts.length === 0) return intent.original;
5720
+ return parts.join(" ");
5721
+ }
5722
+ function expandLayers(layers) {
5723
+ const expanded = new Set(layers);
5724
+ for (const layer of layers) {
5725
+ switch (layer) {
5726
+ case "endpoint":
5727
+ expanded.add("usecase");
5728
+ expanded.add("service");
5729
+ break;
5730
+ case "usecase":
5731
+ expanded.add("service");
5732
+ expanded.add("repository");
5733
+ break;
5734
+ case "service":
5735
+ expanded.add("usecase");
5736
+ expanded.add("repository");
5737
+ break;
5738
+ case "repository":
5739
+ expanded.add("service");
5740
+ expanded.add("model");
5741
+ break;
5742
+ case "cache":
5743
+ expanded.add("repository");
5744
+ expanded.add("service");
5745
+ break;
5746
+ case "client":
5747
+ expanded.add("service");
5748
+ expanded.add("config");
5749
+ break;
5750
+ }
5751
+ }
5752
+ return [...expanded];
5753
+ }
5754
+
5755
+ // src/engine/embeddings.ts
5756
+ function buildTfIdfEmbeddingIndex(index) {
5757
+ const allTerms = [...index.idf.keys()];
5758
+ const termToIdx = new Map(allTerms.map((t, i) => [t, i]));
5759
+ const dimensions = allTerms.length;
5760
+ const docVectors = /* @__PURE__ */ new Map();
5761
+ const docNorms = /* @__PURE__ */ new Map();
5762
+ for (const [filePath, doc] of index.documents) {
5763
+ const vec = new Float32Array(dimensions);
5764
+ let norm = 0;
5765
+ for (const [term, tf] of doc.terms) {
5766
+ const idx = termToIdx.get(term);
5767
+ if (idx === void 0) continue;
5768
+ const idf = index.idf.get(term) ?? 0;
5769
+ const weight = tf * idf;
5770
+ vec[idx] = weight;
5771
+ norm += weight * weight;
5772
+ }
5773
+ norm = Math.sqrt(norm);
5774
+ if (norm > 0) {
5775
+ for (let i = 0; i < dimensions; i++) {
5776
+ vec[i] /= norm;
5777
+ }
5778
+ }
5779
+ docVectors.set(filePath, vec);
5780
+ docNorms.set(filePath, norm);
5781
+ }
5782
+ function queryFn(text, topK) {
5783
+ const queryTerms = tokenizeForEmbedding(text);
5784
+ const termCounts = /* @__PURE__ */ new Map();
5785
+ for (const t of queryTerms) {
5786
+ termCounts.set(t, (termCounts.get(t) ?? 0) + 1);
5787
+ }
5788
+ const queryVec = new Float32Array(dimensions);
5789
+ let queryNorm = 0;
5790
+ for (const [term, count] of termCounts) {
5791
+ const idx = termToIdx.get(term);
5792
+ if (idx === void 0) continue;
5793
+ const idf = index.idf.get(term) ?? 0;
5794
+ const weight = count * idf;
5795
+ queryVec[idx] = weight;
5796
+ queryNorm += weight * weight;
5797
+ }
5798
+ queryNorm = Math.sqrt(queryNorm);
5799
+ if (queryNorm > 0) {
5800
+ for (let i = 0; i < dimensions; i++) {
5801
+ queryVec[i] /= queryNorm;
5802
+ }
5803
+ }
5804
+ const results = [];
5805
+ for (const [filePath, docVec] of docVectors) {
5806
+ let dot = 0;
5807
+ for (const [term] of termCounts) {
5808
+ const idx = termToIdx.get(term);
5809
+ if (idx !== void 0) {
5810
+ dot += queryVec[idx] * docVec[idx];
5811
+ }
5812
+ }
5813
+ if (dot > 0) {
5814
+ results.push({ filePath, score: dot });
5815
+ }
5816
+ }
5817
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
5818
+ }
5819
+ return {
5820
+ backend: "tfidf-cosine",
5821
+ dimensions,
5822
+ documentCount: docVectors.size,
5823
+ query: queryFn
5824
+ };
5825
+ }
5826
+ function reciprocalRankFusion2(bm25Results, embeddingResults, k = 60, bm25Weight = 0.6, embeddingWeight = 0.4) {
5827
+ const scores = /* @__PURE__ */ new Map();
5828
+ for (let i = 0; i < bm25Results.length; i++) {
5829
+ const rrf = bm25Weight / (k + i + 1);
5830
+ const existing = scores.get(bm25Results[i].filePath) ?? 0;
5831
+ scores.set(bm25Results[i].filePath, existing + rrf);
5832
+ }
5833
+ for (let i = 0; i < embeddingResults.length; i++) {
5834
+ const rrf = embeddingWeight / (k + i + 1);
5835
+ const existing = scores.get(embeddingResults[i].filePath) ?? 0;
5836
+ scores.set(embeddingResults[i].filePath, existing + rrf);
5837
+ }
5838
+ return [...scores.entries()].map(([filePath, score]) => ({ filePath, score })).sort((a, b) => b.score - a.score);
5839
+ }
5840
+ function tokenizeForEmbedding(text) {
5841
+ return text.toLowerCase().replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]/g, " ").split(/\s+/).filter((t) => t.length >= 2);
5842
+ }
5843
+ async function isOnnxAvailable() {
5844
+ try {
5845
+ await Function('return import("onnxruntime-node")')();
5846
+ return true;
5847
+ } catch {
5848
+ return false;
5849
+ }
5850
+ }
5851
+ async function buildNeuralEmbeddingIndex(_files, modelPath) {
5852
+ try {
5853
+ const ort = await Function('return import("onnxruntime-node")')();
5854
+ if (!modelPath || !ort) return null;
5855
+ return null;
5856
+ } catch {
5857
+ return null;
5858
+ }
5859
+ }
5860
+
5861
+ // src/engine/context-pipeline.ts
5862
+ var RANKING_NOISE_PATTERNS = [
5863
+ /^changelog/i,
5864
+ /^license/i,
5865
+ /^contributing/i,
5866
+ /^code_of_conduct/i,
5867
+ /^authors/i,
5868
+ /^codeowners$/i,
5869
+ /^security/i,
5870
+ /\.lock$/,
5871
+ /^package-lock\.json$/,
5872
+ /^yarn\.lock$/,
5873
+ /^pnpm-lock\.yaml$/,
5874
+ /^Gemfile\.lock$/
5875
+ ];
5876
+ function isRankingNoise(filePath) {
5877
+ const basename4 = filePath.split("/").pop() ?? filePath;
5878
+ return RANKING_NOISE_PATTERNS.some((re) => re.test(basename4));
5879
+ }
5880
+ function fileTypePenalty(filePath, taskType) {
5881
+ const lower = filePath.toLowerCase();
5882
+ const isTest = /[/\\]test[s]?[/\\]|\.test\.|\.spec\.|_test\./i.test(lower);
5883
+ const isDoc = /\.md$|\.txt$|\.rst$|^docs[/\\]/i.test(lower);
5884
+ const isConfig = /\.xml$|\.yml$|\.yaml$|\.properties$|\.gradle$/i.test(lower);
5885
+ if (taskType === "debug") {
5886
+ if (isTest) return 0.4;
5887
+ if (isDoc) return 0.2;
5888
+ if (isConfig) return 0.6;
5889
+ } else if (taskType === "test") {
5890
+ if (isTest) return 1.2;
5891
+ if (isDoc) return 0.3;
5892
+ } else if (taskType === "docs") {
5893
+ if (isDoc) return 1.2;
5894
+ if (isTest) return 0.3;
5895
+ } else if (taskType === "feature" || taskType === "refactor") {
5896
+ if (isTest) return 0.5;
5897
+ if (isDoc) return 0.4;
5898
+ }
5899
+ return 1;
5900
+ }
5901
+ function detectComplexQuery(task) {
5902
+ const lower = task.toLowerCase();
5903
+ const words = lower.split(/\s+/).filter((w) => w.length > 2);
5904
+ const chainIndicators = /\b(when|after|then|through|from .+ to|via|chain|flow|trace|path|propagat|cascade|invalidat\w+ on|calls?|invokes?)\b/;
5905
+ if (chainIndicators.test(lower)) return true;
5906
+ const layers = [
5907
+ "controller",
5908
+ "endpoint",
5909
+ "router",
5910
+ "handler",
5911
+ "service",
5912
+ "usecase",
5913
+ "use case",
5914
+ "repository",
5915
+ "repo",
5916
+ "cache",
5917
+ "database",
5918
+ "queue",
5919
+ "client",
5920
+ "adapter",
5921
+ "gateway",
5922
+ "interceptor",
5923
+ "middleware",
5924
+ "listener",
5925
+ "consumer",
5926
+ "producer",
5927
+ "publisher",
5928
+ "subscriber"
5929
+ ];
5930
+ const layerCount = layers.filter((l) => lower.includes(l)).length;
5931
+ if (layerCount >= 2) return true;
5932
+ if (words.length >= 10) return true;
5933
+ const entityConnectors = lower.match(/\b(on|for|in|from|to|with|after|before|during)\b/g);
5934
+ if (entityConnectors && entityConnectors.length >= 3) return true;
5935
+ return false;
5936
+ }
5937
+ async function runContextPipeline(input) {
5938
+ const { projectPath, task, analysis, budget = 5e4 } = input;
5939
+ const taskType = classifyTask(task);
5940
+ const queryIntent = parseQueryIntent(task);
5941
+ const weightedQuery = buildWeightedQuery(queryIntent);
5942
+ const fileContentMap = /* @__PURE__ */ new Map();
5943
+ const fileContents = [];
5944
+ for (const file of analysis.files) {
5945
+ try {
5946
+ const content = readFileSync6(file.path, "utf-8");
5947
+ fileContentMap.set(file.relativePath, content);
5948
+ fileContents.push({ relativePath: file.relativePath, content });
5949
+ } catch {
5950
+ fileContents.push({ relativePath: file.relativePath, content: "" });
5951
+ }
5952
+ }
5953
+ const indexFiles = analysis.files.map((f) => {
5954
+ const raw = fileContentMap.get(f.relativePath);
5955
+ const augmented = raw ? augmentContentWithStructure(raw, f.relativePath) : void 0;
5956
+ return {
5957
+ relativePath: f.relativePath,
5958
+ absolutePath: f.path,
5959
+ content: augmented
5960
+ };
5961
+ });
5962
+ const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
5963
+ const fileCount = analysis.files.length;
5964
+ const adaptiveTopK = Math.min(Math.max(20, Math.round(fileCount * 0.15)), 100);
5965
+ const allFilePaths = analysis.files.map((f) => f.relativePath);
5966
+ const depMap = /* @__PURE__ */ new Map();
5967
+ for (const file of analysis.files) {
5968
+ depMap.set(file.relativePath, file.imports);
5969
+ }
5970
+ const callGraph = buildCallGraph(
5971
+ fileContents.filter((f) => f.content.length > 0)
5972
+ );
5973
+ const callEdges = [...analysis.graph.edges.filter((e) => e.type === "call"), ...callGraph.edges];
5974
+ const isComplexQuery = detectComplexQuery(task);
5975
+ const embeddingIndex = buildTfIdfEmbeddingIndex(index);
5976
+ const embeddingResults = embeddingIndex.query(weightedQuery, adaptiveTopK);
5977
+ let bm25Matches;
5978
+ if (isComplexQuery) {
5979
+ const hopResult = multiHopQuery(index, weightedQuery, depMap, callEdges, fileContentMap, {
5980
+ maxHops: 2,
5981
+ topKPerHop: 5,
5982
+ decayFactor: 0.5,
5983
+ minScoreThreshold: 0.15
5984
+ });
5985
+ bm25Matches = hopResult.matches.slice(0, adaptiveTopK);
5986
+ } else {
5987
+ bm25Matches = query(index, weightedQuery, adaptiveTopK);
5988
+ }
5989
+ const fusedResults = reciprocalRankFusion2(bm25Matches, embeddingResults, 60, 0.6, 0.4);
5990
+ const rawMatches = fusedResults.slice(0, adaptiveTopK).map((r) => {
5991
+ const bm25Match = bm25Matches.find((m) => m.filePath === r.filePath);
5992
+ return {
5993
+ filePath: r.filePath,
5994
+ score: r.score,
5995
+ matchedTerms: bm25Match?.matchedTerms ?? ["[embedding-only]"]
5996
+ };
5997
+ });
5998
+ const semanticMatches = rawMatches.filter((m) => !isRankingNoise(m.filePath));
5999
+ const pathBoosted = boostByPath(semanticMatches, allFilePaths, task);
6000
+ const layerBoosted = boostByLayer(pathBoosted, allFilePaths, task);
6001
+ const importBoosted = boostByImports(layerBoosted, depMap, 10, 0.4);
6002
+ const callBoosted = boostByCallGraph(importBoosted, callEdges, 10, 0.3);
6003
+ const coChangeMatrix = buildCoChangeMatrix(projectPath, 500, 2);
6004
+ const boostedMatches = boostByGitCoChange(callBoosted, coChangeMatrix, 10, 0.25, 0.15);
6005
+ const rerankResult = rerank({
6006
+ task,
6007
+ candidates: boostedMatches,
6008
+ index,
6009
+ fileContents: fileContentMap,
6010
+ dependencies: depMap,
6011
+ allFilePaths: analysis.files.map((f) => f.relativePath)
6012
+ });
6013
+ const rerankerApproved = new Set(rerankResult.files.map((rf) => rf.filePath));
6014
+ const rerankedMatches = boostedMatches.map((m) => ({
6015
+ filePath: m.filePath,
6016
+ score: rerankerApproved.has(m.filePath) ? m.score * 1.5 : m.score,
6017
+ matchedTerms: [...m.matchedTerms]
6018
+ }));
6019
+ for (const m of rerankedMatches) {
6020
+ m.score *= fileTypePenalty(m.filePath, taskType);
6021
+ }
6022
+ const learner = await loadLearner(projectPath);
6023
+ const learnerBoosts = getLearnerBoosts(
6024
+ learner,
6025
+ taskType,
6026
+ analysis.files.map((f) => f.relativePath)
6027
+ );
6028
+ const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
6029
+ const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
6030
+ const selection = await selectContext({
6031
+ task,
6032
+ analysis,
6033
+ budget,
6034
+ semanticScores,
6035
+ learnerBoosts: learnerBoostInputs
6036
+ });
6037
+ const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
6038
+ const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
6039
+ let multiRepo;
6040
+ if (input.siblingRepos && input.siblingRepos.length > 0) {
6041
+ const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
6042
+ multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
6043
+ }
6044
+ return { selection, taskType, fileContentMap, semanticMap, learnerMap, queryIntent, multiRepo, indexCacheStats };
6045
+ }
6046
+
6047
+ // src/engine/index.ts
6048
+ init_tfidf();
6049
+ init_synonyms();
6050
+
6051
+ // src/engine/ab-testing.ts
6052
+ import { createHash as createHash4 } from "crypto";
6053
+ import { readFileSync as readFileSync7, writeFileSync as writeFileSync3, existsSync as existsSync6, mkdirSync as mkdirSync3 } from "fs";
6054
+ import { join as join8 } from "path";
6055
+ var EXPERIMENTS_FILE = "experiments.json";
6056
+ function loadExperiments(projectPath) {
6057
+ const path = join8(projectPath, ".cto", EXPERIMENTS_FILE);
6058
+ try {
6059
+ if (!existsSync6(path)) return [];
6060
+ return JSON.parse(readFileSync7(path, "utf-8"));
6061
+ } catch {
6062
+ return [];
6063
+ }
6064
+ }
6065
+ function saveExperiments(projectPath, experiments) {
6066
+ const dir = join8(projectPath, ".cto");
6067
+ if (!existsSync6(dir)) mkdirSync3(dir, { recursive: true });
6068
+ writeFileSync3(join8(dir, EXPERIMENTS_FILE), JSON.stringify(experiments, null, 2));
6069
+ }
6070
+ function createExperiment(id, name, description, controlParams, variantParams, options = {}) {
6071
+ return {
6072
+ id,
6073
+ name,
6074
+ description,
6075
+ status: "running",
6076
+ startedAt: (/* @__PURE__ */ new Date()).toISOString(),
6077
+ trafficSplit: options.trafficSplit ?? 0.5,
6078
+ minObservations: options.minObservations ?? 30,
6079
+ significanceThreshold: options.significanceThreshold ?? 0.05,
6080
+ control: {
6081
+ name: "control",
6082
+ params: controlParams,
6083
+ metrics: emptyMetrics()
6084
+ },
6085
+ variant: {
6086
+ name: "variant",
6087
+ params: variantParams,
6088
+ metrics: emptyMetrics()
6089
+ }
6090
+ };
6091
+ }
6092
+ function emptyMetrics() {
6093
+ return {
6094
+ total: 0,
6095
+ successes: 0,
6096
+ acceptRate: 0,
6097
+ avgTimeToAccept: 0,
6098
+ compilableRate: 0,
6099
+ timeSum: 0,
6100
+ compilableCount: 0
6101
+ };
3952
6102
  }
3953
6103
  function assignGroup(experiment, task) {
3954
6104
  if (experiment.status !== "running") return null;
@@ -4080,6 +6230,757 @@ function renderExperimentSummary(experiment) {
4080
6230
  // src/engine/index.ts
4081
6231
  init_multi_repo();
4082
6232
 
6233
+ // src/engine/semantic-expand.ts
6234
+ init_tfidf();
6235
+ function expandQueryWithPMI(index, queryTerms, topK = 3, minPmi = 1, expansionWeight = 0.5) {
6236
+ const uniqueQuery = [...new Set(queryTerms)];
6237
+ const expanded = /* @__PURE__ */ new Map();
6238
+ const expansions = [];
6239
+ for (const term of uniqueQuery) {
6240
+ expanded.set(term, 1);
6241
+ }
6242
+ const N = index.totalDocs;
6243
+ if (N < 5) return { original: uniqueQuery, expanded, expansions };
6244
+ const queryTermDocs = /* @__PURE__ */ new Map();
6245
+ for (const term of uniqueQuery) {
6246
+ const docs = /* @__PURE__ */ new Set();
6247
+ for (const [filePath, doc] of index.documents) {
6248
+ if ((doc.terms.get(term) ?? 0) > 0) docs.add(filePath);
6249
+ }
6250
+ queryTermDocs.set(term, docs);
6251
+ }
6252
+ const queryTermSet = new Set(uniqueQuery);
6253
+ for (const queryTerm of uniqueQuery) {
6254
+ const queryDocs = queryTermDocs.get(queryTerm);
6255
+ if (!queryDocs || queryDocs.size === 0) continue;
6256
+ const pA = queryDocs.size / N;
6257
+ const candidates = [];
6258
+ for (const [candidateTerm] of index.idf) {
6259
+ if (queryTermSet.has(candidateTerm)) continue;
6260
+ if (candidateTerm.length < 3) continue;
6261
+ let cooccurrence = 0;
6262
+ let candidateCount = 0;
6263
+ for (const [filePath, doc] of index.documents) {
6264
+ const hasCand = (doc.terms.get(candidateTerm) ?? 0) > 0;
6265
+ if (hasCand) {
6266
+ candidateCount++;
6267
+ if (queryDocs.has(filePath)) cooccurrence++;
6268
+ }
6269
+ }
6270
+ const minCooc = N >= 20 ? 2 : 1;
6271
+ if (cooccurrence < minCooc) continue;
6272
+ if (candidateCount < minCooc) continue;
6273
+ const pB = candidateCount / N;
6274
+ const pAB = cooccurrence / N;
6275
+ const pmi = Math.log2(pAB / (pA * pB));
6276
+ if (pmi >= minPmi) {
6277
+ candidates.push({ term: candidateTerm, pmi });
6278
+ }
6279
+ }
6280
+ candidates.sort((a, b) => b.pmi - a.pmi);
6281
+ for (const cand of candidates.slice(0, topK)) {
6282
+ const weight = expansionWeight * Math.min(1, cand.pmi / 3);
6283
+ const existing = expanded.get(cand.term) ?? 0;
6284
+ expanded.set(cand.term, Math.max(existing, weight));
6285
+ expansions.push({
6286
+ term: cand.term,
6287
+ source: queryTerm,
6288
+ pmi: cand.pmi,
6289
+ weight
6290
+ });
6291
+ }
6292
+ }
6293
+ return { original: uniqueQuery, expanded, expansions };
6294
+ }
6295
+ function buildCorpusEmbeddings(index, dimension = 128, nnz = 6, seed = 42) {
6296
+ const termVectors = /* @__PURE__ */ new Map();
6297
+ let rngState = seed;
6298
+ function nextRandom() {
6299
+ rngState = rngState * 1664525 + 1013904223 & 2147483647;
6300
+ return rngState / 2147483647;
6301
+ }
6302
+ for (const [term] of index.idf) {
6303
+ const indices = [];
6304
+ const signs = [];
6305
+ const usedIndices = /* @__PURE__ */ new Set();
6306
+ for (let i = 0; i < nnz; i++) {
6307
+ let idx;
6308
+ do {
6309
+ idx = Math.floor(nextRandom() * dimension);
6310
+ } while (usedIndices.has(idx));
6311
+ usedIndices.add(idx);
6312
+ indices.push(idx);
6313
+ signs.push(nextRandom() > 0.5 ? 1 : -1);
6314
+ }
6315
+ termVectors.set(term, { indices, signs });
6316
+ }
6317
+ const documents = /* @__PURE__ */ new Map();
6318
+ for (const [filePath, doc] of index.documents) {
6319
+ const embedding = new Float64Array(dimension);
6320
+ for (const [term, count] of doc.terms) {
6321
+ const idf = index.idf.get(term) ?? 0;
6322
+ const tfidfWeight = count * idf;
6323
+ const vec = termVectors.get(term);
6324
+ if (!vec) continue;
6325
+ for (let i = 0; i < vec.indices.length; i++) {
6326
+ embedding[vec.indices[i]] += tfidfWeight * vec.signs[i];
6327
+ }
6328
+ }
6329
+ documents.set(filePath, embedding);
6330
+ }
6331
+ return { documents, dimension, termVectors };
6332
+ }
6333
+ function embedQuery(query2, embeddings) {
6334
+ const terms = tokenize(query2);
6335
+ const vec = new Float64Array(embeddings.dimension);
6336
+ for (const term of terms) {
6337
+ const tv = embeddings.termVectors.get(term);
6338
+ if (!tv) continue;
6339
+ for (let i = 0; i < tv.indices.length; i++) {
6340
+ vec[tv.indices[i]] += tv.signs[i];
6341
+ }
6342
+ }
6343
+ return vec;
6344
+ }
6345
+ function queryByEmbedding(queryVec, embeddings, maxResults = 50) {
6346
+ const results = [];
6347
+ const queryNorm = Math.sqrt(queryVec.reduce((s, v) => s + v * v, 0));
6348
+ if (queryNorm === 0) return results;
6349
+ for (const [filePath, docVec] of embeddings.documents) {
6350
+ let dot = 0;
6351
+ let docNorm = 0;
6352
+ for (let i = 0; i < docVec.length; i++) {
6353
+ dot += queryVec[i] * docVec[i];
6354
+ docNorm += docVec[i] * docVec[i];
6355
+ }
6356
+ docNorm = Math.sqrt(docNorm);
6357
+ if (docNorm === 0) continue;
6358
+ const sim = dot / (queryNorm * docNorm);
6359
+ if (sim > 0) results.push({ filePath, similarity: sim });
6360
+ }
6361
+ results.sort((a, b) => b.similarity - a.similarity);
6362
+ return results.slice(0, maxResults);
6363
+ }
6364
+
6365
+ // src/engine/weight-tuner.ts
6366
+ import { readFileSync as readFileSync8, writeFileSync as writeFileSync4, existsSync as existsSync7, mkdirSync as mkdirSync4 } from "fs";
6367
+ import { join as join9 } from "path";
6368
+ var MODEL_DIR2 = ".cto";
6369
+ var MODEL_FILE2 = "weight-tuner.json";
6370
+ var MIN_FEEDBACK = 5;
6371
+ var MAX_HISTORY = 20;
6372
+ var DEFAULT_SIGNALS = [
6373
+ { name: "bm25", defaultWeight: 0.4 },
6374
+ { name: "path", defaultWeight: 0.25 },
6375
+ { name: "imports", defaultWeight: 0.2 },
6376
+ { name: "className", defaultWeight: 0.15 }
6377
+ ];
6378
+ function loadWeightTuner(projectPath) {
6379
+ const modelPath = join9(projectPath, MODEL_DIR2, MODEL_FILE2);
6380
+ try {
6381
+ if (existsSync7(modelPath)) {
6382
+ const raw = readFileSync8(modelPath, "utf-8");
6383
+ const model = JSON.parse(raw);
6384
+ if (model.version === 1) return model;
6385
+ }
6386
+ } catch {
6387
+ }
6388
+ return createFreshModel();
6389
+ }
6390
+ function saveWeightTuner(projectPath, model) {
6391
+ const dir = join9(projectPath, MODEL_DIR2);
6392
+ mkdirSync4(dir, { recursive: true });
6393
+ writeFileSync4(join9(dir, MODEL_FILE2), JSON.stringify(model, null, 2));
6394
+ }
6395
+ function createFreshModel() {
6396
+ return {
6397
+ version: 1,
6398
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
6399
+ signals: DEFAULT_SIGNALS.map((s) => ({
6400
+ ...s,
6401
+ alpha: 1,
6402
+ // uniform prior
6403
+ beta: 1
6404
+ })),
6405
+ totalFeedback: 0,
6406
+ history: []
6407
+ };
6408
+ }
6409
+ function recordFeedback(model, feedback) {
6410
+ const updated = { ...model, signals: model.signals.map((s) => ({ ...s })) };
6411
+ for (const fb of feedback) {
6412
+ const signal = updated.signals.find((s) => s.name === fb.signal);
6413
+ if (!signal) continue;
6414
+ if (fb.accepted) {
6415
+ signal.alpha += 1;
6416
+ } else {
6417
+ signal.beta += 1;
6418
+ }
6419
+ }
6420
+ updated.totalFeedback += feedback.length;
6421
+ updated.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
6422
+ const currentWeights = getOptimizedWeights(updated);
6423
+ updated.history.push({
6424
+ timestamp: updated.updatedAt,
6425
+ weights: currentWeights.weights
6426
+ });
6427
+ if (updated.history.length > MAX_HISTORY) {
6428
+ updated.history = updated.history.slice(-MAX_HISTORY);
6429
+ }
6430
+ return updated;
6431
+ }
6432
+ function getOptimizedWeights(model) {
6433
+ if (model.totalFeedback < MIN_FEEDBACK) {
6434
+ const weights2 = {};
6435
+ for (const s of model.signals) {
6436
+ weights2[s.name] = s.defaultWeight;
6437
+ }
6438
+ return { weights: weights2, confidence: 0, source: "default" };
6439
+ }
6440
+ const rawWeights = {};
6441
+ let total = 0;
6442
+ for (const s of model.signals) {
6443
+ const mean = s.alpha / (s.alpha + s.beta);
6444
+ rawWeights[s.name] = mean;
6445
+ total += mean;
6446
+ }
6447
+ const weights = {};
6448
+ for (const [name, raw] of Object.entries(rawWeights)) {
6449
+ weights[name] = total > 0 ? raw / total : 1 / Object.keys(rawWeights).length;
6450
+ }
6451
+ const confidence = Math.min(1, model.totalFeedback / 50);
6452
+ return { weights, confidence, source: "learned" };
6453
+ }
6454
+ function attributeToSignal(signalRanks) {
6455
+ let bestSignal = "";
6456
+ let bestRank = Infinity;
6457
+ for (const [signal, rank] of Object.entries(signalRanks)) {
6458
+ if (rank < bestRank) {
6459
+ bestRank = rank;
6460
+ bestSignal = signal;
6461
+ }
6462
+ }
6463
+ return bestSignal;
6464
+ }
6465
+ function renderWeightStatus(model) {
6466
+ const tuned = getOptimizedWeights(model);
6467
+ const lines = [];
6468
+ lines.push(` Signal Weights (${tuned.source}, confidence: ${(tuned.confidence * 100).toFixed(0)}%):`);
6469
+ for (const s of model.signals) {
6470
+ const w = tuned.weights[s.name] ?? 0;
6471
+ const bar = "\u2588".repeat(Math.round(w * 20));
6472
+ const pad = "\u2591".repeat(20 - Math.round(w * 20));
6473
+ const delta = w - s.defaultWeight;
6474
+ const deltaStr = delta >= 0 ? `+${(delta * 100).toFixed(1)}` : `${(delta * 100).toFixed(1)}`;
6475
+ lines.push(` ${s.name.padEnd(12)} ${bar}${pad} ${(w * 100).toFixed(1)}% (${deltaStr}% vs default)`);
6476
+ }
6477
+ lines.push(` Total feedback: ${model.totalFeedback} observations`);
6478
+ return lines.join("\n");
6479
+ }
6480
+
6481
+ // src/engine/ide-telemetry.ts
6482
+ import { existsSync as existsSync8, readFileSync as readFileSync9, writeFileSync as writeFileSync5, mkdirSync as mkdirSync5 } from "fs";
6483
+ import { join as join10 } from "path";
6484
+ var EMPTY_MODEL = {
6485
+ version: 1,
6486
+ sessions: [],
6487
+ fileOpenCounts: {},
6488
+ fileTaskCounts: {},
6489
+ lastUpdated: 0
6490
+ };
6491
+ var MAX_SESSIONS = 200;
6492
+ function loadTelemetry(projectPath) {
6493
+ const filePath = join10(projectPath, ".cto", "telemetry.json");
6494
+ try {
6495
+ if (!existsSync8(filePath)) return { ...EMPTY_MODEL };
6496
+ const raw = readFileSync9(filePath, "utf-8");
6497
+ const data = JSON.parse(raw);
6498
+ if (data.version !== 1) return { ...EMPTY_MODEL };
6499
+ return data;
6500
+ } catch {
6501
+ return { ...EMPTY_MODEL };
6502
+ }
6503
+ }
6504
+ function saveTelemetry(projectPath, model) {
6505
+ const dir = join10(projectPath, ".cto");
6506
+ try {
6507
+ if (!existsSync8(dir)) mkdirSync5(dir, { recursive: true });
6508
+ model.lastUpdated = Date.now();
6509
+ writeFileSync5(join10(dir, "telemetry.json"), JSON.stringify(model, null, 2));
6510
+ } catch {
6511
+ }
6512
+ }
6513
+ function recordFileOpen(model, filePath, taskContext) {
6514
+ model.fileOpenCounts[filePath] = (model.fileOpenCounts[filePath] ?? 0) + 1;
6515
+ if (taskContext) {
6516
+ const taskType = extractTaskType(taskContext);
6517
+ if (!model.fileTaskCounts[filePath]) {
6518
+ model.fileTaskCounts[filePath] = {};
6519
+ }
6520
+ model.fileTaskCounts[filePath][taskType] = (model.fileTaskCounts[filePath][taskType] ?? 0) + 1;
6521
+ }
6522
+ return model;
6523
+ }
6524
+ function recordSession(model, taskDescription, suggestedFiles, openedFiles) {
6525
+ model.sessions.push({
6526
+ taskDescription,
6527
+ suggestedFiles,
6528
+ openedFiles,
6529
+ timestamp: Date.now()
6530
+ });
6531
+ if (model.sessions.length > MAX_SESSIONS) {
6532
+ model.sessions = model.sessions.slice(-MAX_SESSIONS);
6533
+ }
6534
+ for (const file of openedFiles) {
6535
+ recordFileOpen(model, file, taskDescription);
6536
+ }
6537
+ return model;
6538
+ }
6539
+ function getTelemetryBoosts(model, taskType, candidateFiles) {
6540
+ const boosts = /* @__PURE__ */ new Map();
6541
+ if (model.sessions.length === 0) return boosts;
6542
+ const suggestCount = /* @__PURE__ */ new Map();
6543
+ const openCount = /* @__PURE__ */ new Map();
6544
+ for (const session of model.sessions) {
6545
+ const sessionTaskType = extractTaskType(session.taskDescription);
6546
+ const typeMatch = sessionTaskType === taskType ? 1.5 : 1;
6547
+ const openedSet = new Set(session.openedFiles);
6548
+ for (const file of session.suggestedFiles) {
6549
+ suggestCount.set(file, (suggestCount.get(file) ?? 0) + typeMatch);
6550
+ if (openedSet.has(file)) {
6551
+ openCount.set(file, (openCount.get(file) ?? 0) + typeMatch);
6552
+ }
6553
+ }
6554
+ }
6555
+ for (const file of candidateFiles) {
6556
+ const suggested = suggestCount.get(file) ?? 0;
6557
+ if (suggested < 2) continue;
6558
+ const opened = openCount.get(file) ?? 0;
6559
+ const acceptRate = opened / suggested;
6560
+ const boost = acceptRate - 0.5;
6561
+ boosts.set(file, boost);
6562
+ }
6563
+ return boosts;
6564
+ }
6565
+ function extractTaskType(taskDescription) {
6566
+ const lower = taskDescription.toLowerCase();
6567
+ if (lower.match(/fix|bug|error|debug|crash|broken/)) return "debug";
6568
+ if (lower.match(/test|spec|coverage/)) return "test";
6569
+ if (lower.match(/refactor|restructure|clean/)) return "refactor";
6570
+ if (lower.match(/doc|readme|comment/)) return "docs";
6571
+ if (lower.match(/add|implement|create|build|feature/)) return "feature";
6572
+ return "general";
6573
+ }
6574
+ function renderTelemetrySummary(model) {
6575
+ const lines = [];
6576
+ lines.push(`IDE Telemetry: ${model.sessions.length} sessions tracked`);
6577
+ const sorted = Object.entries(model.fileOpenCounts).sort(([, a], [, b]) => b - a).slice(0, 10);
6578
+ if (sorted.length > 0) {
6579
+ lines.push("Most opened files:");
6580
+ for (const [file, count] of sorted) {
6581
+ lines.push(` ${count}\xD7 ${file}`);
6582
+ }
6583
+ }
6584
+ if (model.sessions.length > 0) {
6585
+ const last10 = model.sessions.slice(-10);
6586
+ const totalSuggested = last10.reduce((s, sess) => s + sess.suggestedFiles.length, 0);
6587
+ const totalOpened = last10.reduce((s, sess) => s + sess.openedFiles.length, 0);
6588
+ const rate = totalSuggested > 0 ? (totalOpened / totalSuggested * 100).toFixed(1) : "0";
6589
+ lines.push(`Last 10 sessions: ${rate}% acceptance rate (${totalOpened}/${totalSuggested} files used)`);
6590
+ }
6591
+ return lines.join("\n");
6592
+ }
6593
+
6594
+ // src/engine/chunk-retrieval.ts
6595
+ init_tfidf();
6596
+ function chunkJava(content, filePath) {
6597
+ const lines = content.split("\n");
6598
+ const chunks = [];
6599
+ let i = 0;
6600
+ const importStart = lines.findIndex((l) => l.trim().startsWith("import "));
6601
+ if (importStart >= 0) {
6602
+ let importEnd = importStart;
6603
+ while (importEnd < lines.length && (lines[importEnd].trim().startsWith("import ") || lines[importEnd].trim() === "")) {
6604
+ importEnd++;
6605
+ }
6606
+ if (importEnd > importStart) {
6607
+ chunks.push({
6608
+ filePath,
6609
+ startLine: importStart + 1,
6610
+ endLine: importEnd,
6611
+ content: lines.slice(importStart, importEnd).join("\n"),
6612
+ kind: "import",
6613
+ name: "imports",
6614
+ score: 0,
6615
+ tokens: estimateTokens2(lines.slice(importStart, importEnd).join("\n"))
6616
+ });
6617
+ }
6618
+ }
6619
+ i = 0;
6620
+ while (i < lines.length) {
6621
+ const line = lines[i].trim();
6622
+ const classMatch = line.match(/^(?:public|abstract|private|protected)?\s*(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+(\w+)/);
6623
+ if (classMatch) {
6624
+ const classEnd = findBraceEnd(lines, i);
6625
+ chunks.push({
6626
+ filePath,
6627
+ startLine: i + 1,
6628
+ endLine: classEnd + 1,
6629
+ content: lines.slice(i, classEnd + 1).join("\n"),
6630
+ kind: line.includes("interface") ? "interface" : "class",
6631
+ name: classMatch[1],
6632
+ score: 0,
6633
+ tokens: estimateTokens2(lines.slice(i, classEnd + 1).join("\n"))
6634
+ });
6635
+ }
6636
+ const methodMatch = line.match(/^(?:@\w+\s+)*(?:public|private|protected|static|\s)+\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/);
6637
+ if (methodMatch && !classMatch) {
6638
+ const name = methodMatch[1];
6639
+ if (!["if", "for", "while", "switch", "catch", "return"].includes(name)) {
6640
+ let methodStart = i;
6641
+ while (methodStart > 0 && lines[methodStart - 1].trim().startsWith("@")) {
6642
+ methodStart--;
6643
+ }
6644
+ const methodEnd = findBraceEnd(lines, i);
6645
+ const className = findEnclosingClass(lines, i);
6646
+ chunks.push({
6647
+ filePath,
6648
+ startLine: methodStart + 1,
6649
+ endLine: methodEnd + 1,
6650
+ content: lines.slice(methodStart, methodEnd + 1).join("\n"),
6651
+ kind: "method",
6652
+ name,
6653
+ className,
6654
+ score: 0,
6655
+ tokens: estimateTokens2(lines.slice(methodStart, methodEnd + 1).join("\n"))
6656
+ });
6657
+ i = methodEnd + 1;
6658
+ continue;
6659
+ }
6660
+ }
6661
+ i++;
6662
+ }
6663
+ return chunks;
6664
+ }
6665
+ function chunkTypeScript(content, filePath) {
6666
+ const lines = content.split("\n");
6667
+ const chunks = [];
6668
+ let i = 0;
6669
+ const importLines = [];
6670
+ let importStart = -1;
6671
+ for (let j = 0; j < lines.length; j++) {
6672
+ if (lines[j].trim().startsWith("import ")) {
6673
+ if (importStart === -1) importStart = j;
6674
+ importLines.push(lines[j]);
6675
+ } else if (importStart >= 0 && lines[j].trim() === "") {
6676
+ continue;
6677
+ } else if (importStart >= 0) {
6678
+ break;
6679
+ }
6680
+ }
6681
+ if (importLines.length > 0) {
6682
+ chunks.push({
6683
+ filePath,
6684
+ startLine: importStart + 1,
6685
+ endLine: importStart + importLines.length,
6686
+ content: importLines.join("\n"),
6687
+ kind: "import",
6688
+ name: "imports",
6689
+ score: 0,
6690
+ tokens: estimateTokens2(importLines.join("\n"))
6691
+ });
6692
+ }
6693
+ i = 0;
6694
+ while (i < lines.length) {
6695
+ const line = lines[i].trim();
6696
+ const funcMatch = line.match(/^(?:export\s+)?(?:async\s+)?function\s+(\w+)/);
6697
+ const classMatch = line.match(/^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/);
6698
+ const interfaceMatch = line.match(/^(?:export\s+)?(?:interface|type)\s+(\w+)/);
6699
+ const arrowMatch = line.match(/^(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\(/);
6700
+ if (funcMatch || classMatch || arrowMatch) {
6701
+ const name = (funcMatch || classMatch || arrowMatch)[1];
6702
+ const blockEnd = findBraceEnd(lines, i);
6703
+ const kind = classMatch ? "class" : "function";
6704
+ chunks.push({
6705
+ filePath,
6706
+ startLine: i + 1,
6707
+ endLine: blockEnd + 1,
6708
+ content: lines.slice(i, blockEnd + 1).join("\n"),
6709
+ kind,
6710
+ name,
6711
+ score: 0,
6712
+ tokens: estimateTokens2(lines.slice(i, blockEnd + 1).join("\n"))
6713
+ });
6714
+ if (kind === "function") {
6715
+ i = blockEnd + 1;
6716
+ continue;
6717
+ }
6718
+ }
6719
+ if (interfaceMatch) {
6720
+ const blockEnd = findBraceEnd(lines, i);
6721
+ chunks.push({
6722
+ filePath,
6723
+ startLine: i + 1,
6724
+ endLine: blockEnd + 1,
6725
+ content: lines.slice(i, blockEnd + 1).join("\n"),
6726
+ kind: "interface",
6727
+ name: interfaceMatch[1],
6728
+ score: 0,
6729
+ tokens: estimateTokens2(lines.slice(i, blockEnd + 1).join("\n"))
6730
+ });
6731
+ i = blockEnd + 1;
6732
+ continue;
6733
+ }
6734
+ i++;
6735
+ }
6736
+ return chunks;
6737
+ }
6738
+ function chunkPython(content, filePath) {
6739
+ const lines = content.split("\n");
6740
+ const chunks = [];
6741
+ const importLines = [];
6742
+ for (let j = 0; j < lines.length; j++) {
6743
+ if (lines[j].trim().startsWith("import ") || lines[j].trim().startsWith("from ")) {
6744
+ importLines.push(j);
6745
+ }
6746
+ }
6747
+ if (importLines.length > 0) {
6748
+ const start = importLines[0];
6749
+ const end = importLines[importLines.length - 1];
6750
+ chunks.push({
6751
+ filePath,
6752
+ startLine: start + 1,
6753
+ endLine: end + 1,
6754
+ content: lines.slice(start, end + 1).join("\n"),
6755
+ kind: "import",
6756
+ name: "imports",
6757
+ score: 0,
6758
+ tokens: estimateTokens2(lines.slice(start, end + 1).join("\n"))
6759
+ });
6760
+ }
6761
+ let i = 0;
6762
+ while (i < lines.length) {
6763
+ const line = lines[i];
6764
+ const trimmed = line.trim();
6765
+ const classMatch = trimmed.match(/^class\s+(\w+)/);
6766
+ const funcMatch = trimmed.match(/^(?:async\s+)?def\s+(\w+)/);
6767
+ const decoratorStart = trimmed.startsWith("@") ? i : -1;
6768
+ if (classMatch || funcMatch) {
6769
+ const name = (classMatch || funcMatch)[1];
6770
+ const indent = line.length - line.trimStart().length;
6771
+ let blockStart = decoratorStart >= 0 ? decoratorStart : i;
6772
+ if (decoratorStart < 0 && i > 0 && lines[i - 1].trim().startsWith("@")) {
6773
+ blockStart = i - 1;
6774
+ while (blockStart > 0 && lines[blockStart - 1].trim().startsWith("@")) blockStart--;
6775
+ }
6776
+ let end = i + 1;
6777
+ while (end < lines.length) {
6778
+ const nextLine = lines[end];
6779
+ if (nextLine.trim() === "") {
6780
+ end++;
6781
+ continue;
6782
+ }
6783
+ const nextIndent = nextLine.length - nextLine.trimStart().length;
6784
+ if (nextIndent <= indent && nextLine.trim() !== "") break;
6785
+ end++;
6786
+ }
6787
+ const kind = classMatch ? "class" : "function";
6788
+ let className;
6789
+ if (funcMatch && indent > 0) {
6790
+ for (let j = i - 1; j >= 0; j--) {
6791
+ const prevMatch = lines[j].trim().match(/^class\s+(\w+)/);
6792
+ if (prevMatch) {
6793
+ className = prevMatch[1];
6794
+ break;
6795
+ }
6796
+ }
6797
+ }
6798
+ chunks.push({
6799
+ filePath,
6800
+ startLine: blockStart + 1,
6801
+ endLine: end,
6802
+ content: lines.slice(blockStart, end).join("\n"),
6803
+ kind: className ? "method" : kind,
6804
+ name,
6805
+ className,
6806
+ score: 0,
6807
+ tokens: estimateTokens2(lines.slice(blockStart, end).join("\n"))
6808
+ });
6809
+ i = end;
6810
+ continue;
6811
+ }
6812
+ i++;
6813
+ }
6814
+ return chunks;
6815
+ }
6816
+ function chunkGo(content, filePath) {
6817
+ const lines = content.split("\n");
6818
+ const chunks = [];
6819
+ let i = 0;
6820
+ while (i < lines.length) {
6821
+ const line = lines[i].trim();
6822
+ const funcMatch = line.match(/^func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/);
6823
+ const typeMatch = line.match(/^type\s+(\w+)\s+(struct|interface)/);
6824
+ if (funcMatch) {
6825
+ const end = findBraceEnd(lines, i);
6826
+ const receiverMatch = line.match(/^func\s+\(\s*\w+\s+\*?(\w+)\s*\)/);
6827
+ chunks.push({
6828
+ filePath,
6829
+ startLine: i + 1,
6830
+ endLine: end + 1,
6831
+ content: lines.slice(i, end + 1).join("\n"),
6832
+ kind: receiverMatch ? "method" : "function",
6833
+ name: funcMatch[1],
6834
+ className: receiverMatch?.[1],
6835
+ score: 0,
6836
+ tokens: estimateTokens2(lines.slice(i, end + 1).join("\n"))
6837
+ });
6838
+ i = end + 1;
6839
+ continue;
6840
+ }
6841
+ if (typeMatch) {
6842
+ const end = findBraceEnd(lines, i);
6843
+ chunks.push({
6844
+ filePath,
6845
+ startLine: i + 1,
6846
+ endLine: end + 1,
6847
+ content: lines.slice(i, end + 1).join("\n"),
6848
+ kind: typeMatch[2] === "interface" ? "interface" : "class",
6849
+ name: typeMatch[1],
6850
+ score: 0,
6851
+ tokens: estimateTokens2(lines.slice(i, end + 1).join("\n"))
6852
+ });
6853
+ i = end + 1;
6854
+ continue;
6855
+ }
6856
+ i++;
6857
+ }
6858
+ return chunks;
6859
+ }
6860
+ function findBraceEnd(lines, start) {
6861
+ let depth = 0;
6862
+ let foundOpen = false;
6863
+ for (let i = start; i < lines.length; i++) {
6864
+ for (const ch of lines[i]) {
6865
+ if (ch === "{") {
6866
+ depth++;
6867
+ foundOpen = true;
6868
+ }
6869
+ if (ch === "}") {
6870
+ depth--;
6871
+ }
6872
+ if (foundOpen && depth === 0) return i;
6873
+ }
6874
+ }
6875
+ return Math.min(start + 30, lines.length - 1);
6876
+ }
6877
+ function findEnclosingClass(lines, methodLine) {
6878
+ for (let i = methodLine - 1; i >= 0; i--) {
6879
+ const match = lines[i].trim().match(/(?:class|interface|enum)\s+(\w+)/);
6880
+ if (match) return match[1];
6881
+ }
6882
+ return void 0;
6883
+ }
6884
+ function estimateTokens2(content) {
6885
+ return Math.ceil(content.length / 4);
6886
+ }
6887
+ function getLanguage2(filePath) {
6888
+ const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
6889
+ if (ext === "java") return "java";
6890
+ if (["ts", "tsx", "js", "jsx", "mts", "mjs"].includes(ext)) return "ts";
6891
+ if (ext === "py") return "python";
6892
+ if (ext === "go") return "go";
6893
+ return null;
6894
+ }
6895
+ function chunkFile(content, filePath) {
6896
+ const lang = getLanguage2(filePath);
6897
+ if (!lang) return [];
6898
+ switch (lang) {
6899
+ case "java":
6900
+ return chunkJava(content, filePath);
6901
+ case "ts":
6902
+ return chunkTypeScript(content, filePath);
6903
+ case "python":
6904
+ return chunkPython(content, filePath);
6905
+ case "go":
6906
+ return chunkGo(content, filePath);
6907
+ }
6908
+ }
6909
+ function scoreChunks(chunks, task) {
6910
+ const queryTerms = tokenize(task);
6911
+ const queryTermSet = new Set(queryTerms);
6912
+ for (const chunk of chunks) {
6913
+ const chunkTerms = tokenize(chunk.content);
6914
+ const chunkTermSet = new Set(chunkTerms);
6915
+ let termOverlap = 0;
6916
+ for (const qt of queryTermSet) {
6917
+ if (chunkTermSet.has(qt)) termOverlap++;
6918
+ }
6919
+ const termCoverage = queryTermSet.size > 0 ? termOverlap / queryTermSet.size : 0;
6920
+ let nameBonus = 0;
6921
+ const nameTerms = tokenize(chunk.name + (chunk.className ? " " + chunk.className : ""));
6922
+ for (const nt of nameTerms) {
6923
+ if (queryTermSet.has(nt)) nameBonus += 0.3;
6924
+ }
6925
+ const kindBonus = chunk.kind === "method" || chunk.kind === "function" ? 0.1 : chunk.kind === "class" || chunk.kind === "interface" ? 0.05 : 0;
6926
+ const sizePenalty = chunk.tokens > 500 ? 0.9 : chunk.tokens > 1e3 ? 0.7 : 1;
6927
+ chunk.score = (termCoverage + nameBonus + kindBonus) * sizePenalty;
6928
+ }
6929
+ return chunks.sort((a, b) => b.score - a.score);
6930
+ }
6931
+ function retrieveChunks(files, task, tokenBudget = 3e4, minScore = 0.1) {
6932
+ const allChunks = [];
6933
+ for (const file of files) {
6934
+ const chunks = chunkFile(file.content, file.relativePath);
6935
+ allChunks.push(...chunks);
6936
+ }
6937
+ scoreChunks(allChunks, task);
6938
+ const selectedChunks = [];
6939
+ const fileChunks = /* @__PURE__ */ new Map();
6940
+ let tokensUsed = 0;
6941
+ for (const chunk of allChunks) {
6942
+ if (chunk.score < minScore) continue;
6943
+ if (tokensUsed + chunk.tokens > tokenBudget) continue;
6944
+ selectedChunks.push(chunk);
6945
+ tokensUsed += chunk.tokens;
6946
+ const existing = fileChunks.get(chunk.filePath) ?? [];
6947
+ existing.push(chunk);
6948
+ fileChunks.set(chunk.filePath, existing);
6949
+ }
6950
+ for (const [, chunks] of fileChunks) {
6951
+ chunks.sort((a, b) => a.startLine - b.startLine);
6952
+ }
6953
+ return {
6954
+ chunks: selectedChunks,
6955
+ fileChunks,
6956
+ totalChunks: allChunks.length,
6957
+ totalTokensUsed: tokensUsed
6958
+ };
6959
+ }
6960
+ function renderFileChunks(filePath, chunks, ext) {
6961
+ if (chunks.length === 0) return "";
6962
+ let result = `### ${filePath}
6963
+ \`\`\`${ext}
6964
+ `;
6965
+ for (let i = 0; i < chunks.length; i++) {
6966
+ if (i > 0) {
6967
+ const prevEnd = chunks[i - 1].endLine;
6968
+ const currStart = chunks[i].startLine;
6969
+ if (currStart > prevEnd + 1) {
6970
+ result += `
6971
+ // ... lines ${prevEnd + 1}-${currStart - 1} omitted ...
6972
+
6973
+ `;
6974
+ }
6975
+ }
6976
+ result += `// L${chunks[i].startLine}-${chunks[i].endLine}: ${chunks[i].kind} ${chunks[i].name}
6977
+ `;
6978
+ result += chunks[i].content + "\n";
6979
+ }
6980
+ result += "```\n\n";
6981
+ return result;
6982
+ }
6983
+
4083
6984
  // src/engine/logger.ts
4084
6985
  var LEVEL_ORDER = { debug: 0, info: 1, warn: 2, error: 3 };
4085
6986
  var currentLevel = process.env.CTO_LOG_LEVEL ?? "warn";
@@ -4160,59 +7061,103 @@ export {
4160
7061
  CtoError,
4161
7062
  analyzeProject,
4162
7063
  assignGroup,
7064
+ attributeToSignal,
4163
7065
  auditProject,
7066
+ augmentContentWithStructure,
4164
7067
  bfsBidirectional,
7068
+ boostByCallGraph,
7069
+ boostByGitCoChange,
7070
+ boostByImports,
7071
+ boostByLayer,
4165
7072
  boostByPath,
4166
7073
  buildAdjacencyList,
7074
+ buildCallGraph,
7075
+ buildCoChangeMatrix,
7076
+ buildCorpusEmbeddings,
4167
7077
  buildIndex,
4168
7078
  buildIndexCached,
7079
+ buildNeuralEmbeddingIndex,
4169
7080
  buildProjectGraph,
7081
+ buildTfIdfEmbeddingIndex,
7082
+ buildWeightedQuery,
4170
7083
  calculateCoverage,
7084
+ chunkFile,
4171
7085
  classifyFileKind,
4172
7086
  countTokensChars4,
4173
7087
  countTokensTiktoken,
4174
7088
  createExperiment,
7089
+ createFreshModel,
4175
7090
  createLogger,
4176
7091
  createProject,
4177
7092
  detectLanguage,
4178
7093
  detectStack,
4179
7094
  discoverSiblingRepos,
7095
+ embedQuery,
7096
+ reciprocalRankFusion2 as embeddingRRF,
4180
7097
  estimateComplexity,
4181
7098
  estimateFileTokens,
4182
7099
  estimateTokens,
7100
+ expandLayers,
7101
+ expandQuery,
7102
+ expandQueryWithPMI,
7103
+ expandTerm,
4183
7104
  extractPattern,
7105
+ extractStructuralTokens,
4184
7106
  freeEncoder,
4185
7107
  getActiveExperiment,
4186
7108
  getCacheInfo,
4187
7109
  getConcludedExperiments,
7110
+ getExpansionDetails,
7111
+ getGitRecency,
4188
7112
  getLearnerBoosts,
4189
7113
  getLearnerStats,
7114
+ getOptimizedWeights,
4190
7115
  getPruneLevelForRisk,
7116
+ getStructuralSummary,
7117
+ getSynonymStats,
7118
+ getTelemetryBoosts,
4191
7119
  invalidateCache,
4192
7120
  isCtoError,
7121
+ isOnnxAvailable,
4193
7122
  loadExperiments,
4194
7123
  loadLearner,
7124
+ loadTelemetry,
7125
+ loadWeightTuner,
7126
+ multiHopQuery,
4195
7127
  optimizeBudget,
4196
7128
  parseAllPolyglotImports,
4197
7129
  parseImports,
7130
+ parseQueryIntent,
4198
7131
  parseSiblingPaths,
4199
7132
  pruneFile,
4200
7133
  pruneFiles,
4201
7134
  query,
7135
+ queryByEmbedding,
4202
7136
  querySiblingRepos,
7137
+ reciprocalRankFusion,
7138
+ recordFeedback,
7139
+ recordFileOpen,
4203
7140
  recordOutcome,
4204
7141
  recordSelection,
7142
+ recordSession,
4205
7143
  renderExperimentSummary,
7144
+ renderFileChunks,
4206
7145
  renderMultiRepoSummary,
7146
+ renderTelemetrySummary,
7147
+ renderWeightStatus,
4207
7148
  rerank,
7149
+ retrieveChunks,
4208
7150
  runContextPipeline,
4209
7151
  sanitizeContent,
4210
7152
  saveExperiments,
4211
7153
  saveLearner,
7154
+ saveTelemetry,
7155
+ saveWeightTuner,
4212
7156
  scanContentForSecrets,
4213
7157
  scanFileForSecrets,
4214
7158
  scanProjectForSecrets,
4215
7159
  scoreAllFiles,
7160
+ scoreChunks,
4216
7161
  scoreFile,
4217
7162
  selectContext,
4218
7163
  setJsonLogging,