cto-ai-cli 6.1.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +264 -63
- package/dist/cli/index.js +7732 -1729
- package/dist/engine/index.d.ts +1373 -14
- package/dist/engine/index.js +6731 -2110
- package/dist/mcp/index.js +3750 -430
- package/package.json +1 -1
package/dist/mcp/index.js
CHANGED
|
@@ -1,15 +1,783 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __esm = (fn, res) => function __init() {
|
|
5
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
+
};
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// src/engine/synonyms.ts
|
|
13
|
+
function buildBidirectionalIndex() {
|
|
14
|
+
for (const [canonical, synonyms] of Object.entries(SYNONYM_MAP)) {
|
|
15
|
+
if (!BIDIRECTIONAL_INDEX.has(canonical)) {
|
|
16
|
+
BIDIRECTIONAL_INDEX.set(canonical, /* @__PURE__ */ new Set());
|
|
17
|
+
}
|
|
18
|
+
const canonicalSet = BIDIRECTIONAL_INDEX.get(canonical);
|
|
19
|
+
for (const syn of synonyms) {
|
|
20
|
+
canonicalSet.add(syn);
|
|
21
|
+
}
|
|
22
|
+
canonicalSet.add(canonical);
|
|
23
|
+
for (const syn of synonyms) {
|
|
24
|
+
if (!BIDIRECTIONAL_INDEX.has(syn)) {
|
|
25
|
+
BIDIRECTIONAL_INDEX.set(syn, /* @__PURE__ */ new Set());
|
|
26
|
+
}
|
|
27
|
+
const synSet = BIDIRECTIONAL_INDEX.get(syn);
|
|
28
|
+
synSet.add(canonical);
|
|
29
|
+
for (const otherSyn of synonyms) {
|
|
30
|
+
if (otherSyn !== syn) synSet.add(otherSyn);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function expandTerm(term) {
|
|
36
|
+
const normalized = term.toLowerCase().trim();
|
|
37
|
+
const related = BIDIRECTIONAL_INDEX.get(normalized);
|
|
38
|
+
if (!related) return [normalized];
|
|
39
|
+
return [normalized, ...Array.from(related)];
|
|
40
|
+
}
|
|
41
|
+
var SYNONYM_MAP, BIDIRECTIONAL_INDEX;
|
|
42
|
+
var init_synonyms = __esm({
|
|
43
|
+
"src/engine/synonyms.ts"() {
|
|
44
|
+
"use strict";
|
|
45
|
+
SYNONYM_MAP = {
|
|
46
|
+
// Authentication & Authorization
|
|
47
|
+
"auth": ["authentication", "authorize", "login", "signin", "session", "jwt", "token", "oauth", "sso", "identity", "credential"],
|
|
48
|
+
"permission": ["authorization", "access", "role", "acl", "rbac", "policy", "grant"],
|
|
49
|
+
// Database & Storage
|
|
50
|
+
"database": ["db", "repository", "store", "storage", "persistence", "orm", "sql", "query", "prisma", "sequelize", "typeorm", "mongo", "postgres", "mysql"],
|
|
51
|
+
"cache": ["redis", "memcached", "ttl", "invalidation", "memoize", "store"],
|
|
52
|
+
"migration": ["schema", "upgrade", "version", "evolution"],
|
|
53
|
+
// API & Networking
|
|
54
|
+
"api": ["endpoint", "route", "handler", "controller", "rest", "graphql", "rpc", "service"],
|
|
55
|
+
"request": ["req", "http", "call", "fetch", "axios"],
|
|
56
|
+
"response": ["res", "reply", "result", "output"],
|
|
57
|
+
"middleware": ["interceptor", "filter", "plugin", "hook"],
|
|
58
|
+
"gateway": ["proxy", "router", "load-balancer", "reverse-proxy"],
|
|
59
|
+
// Frontend & UI
|
|
60
|
+
"component": ["widget", "element", "view", "template"],
|
|
61
|
+
"state": ["store", "redux", "zustand", "context", "model"],
|
|
62
|
+
"render": ["paint", "draw", "display", "show"],
|
|
63
|
+
"style": ["css", "theme", "design", "layout", "tailwind"],
|
|
64
|
+
// Testing & Quality
|
|
65
|
+
"test": ["spec", "suite", "case", "assertion", "mock", "stub", "fixture", "vitest", "jest", "mocha"],
|
|
66
|
+
"validate": ["verify", "check", "assert", "ensure", "sanitize"],
|
|
67
|
+
"error": ["exception", "failure", "bug", "issue", "crash"],
|
|
68
|
+
// Performance & Optimization
|
|
69
|
+
"optimize": ["performance", "speed", "fast", "efficient", "improve", "enhance"],
|
|
70
|
+
"latency": ["delay", "lag", "slowness", "response-time"],
|
|
71
|
+
"throughput": ["capacity", "volume", "rate", "bandwidth"],
|
|
72
|
+
// Data & Collections
|
|
73
|
+
"dataset": ["data", "record", "row", "entry", "item", "collection"],
|
|
74
|
+
"empty": ["null", "blank", "missing", "absent", "none", "zero"],
|
|
75
|
+
// Data Processing
|
|
76
|
+
"parse": ["decode", "deserialize", "extract", "read"],
|
|
77
|
+
"serialize": ["encode", "stringify", "format", "marshal"],
|
|
78
|
+
"transform": ["map", "convert", "translate", "process"],
|
|
79
|
+
"filter": ["select", "where", "match", "find"],
|
|
80
|
+
// Configuration & Setup
|
|
81
|
+
"config": ["configuration", "setting", "option", "preference", "env", "environment"],
|
|
82
|
+
"init": ["initialize", "setup", "bootstrap", "start", "create"],
|
|
83
|
+
"deploy": ["deployment", "release", "publish", "ship", "launch"],
|
|
84
|
+
// Logging & Monitoring
|
|
85
|
+
"log": ["logger", "logging", "trace", "debug", "info", "warn", "error"],
|
|
86
|
+
"metric": ["measurement", "stat", "telemetry", "analytics", "tracking"],
|
|
87
|
+
"monitor": ["observe", "watch", "track", "alert"],
|
|
88
|
+
// Security
|
|
89
|
+
"secret": ["credential", "key", "password", "token", "apikey", "sensitive"],
|
|
90
|
+
"encrypt": ["cipher", "encode", "hash", "crypto"],
|
|
91
|
+
"sanitize": ["escape", "clean", "validate", "filter"],
|
|
92
|
+
// File System & I/O
|
|
93
|
+
"file": ["document", "asset", "resource", "path"],
|
|
94
|
+
"read": ["load", "fetch", "get", "retrieve"],
|
|
95
|
+
"write": ["save", "persist", "store", "put"],
|
|
96
|
+
"delete": ["remove", "unlink", "destroy", "drop"],
|
|
97
|
+
// Async & Concurrency
|
|
98
|
+
"async": ["asynchronous", "promise", "await", "concurrent", "parallel"],
|
|
99
|
+
"queue": ["buffer", "backlog", "pending", "deferred"],
|
|
100
|
+
"lock": ["mutex", "semaphore", "synchronize", "atomic"],
|
|
101
|
+
// Architecture & Patterns
|
|
102
|
+
"service": ["microservice", "api", "backend", "server", "daemon"],
|
|
103
|
+
"client": ["consumer", "frontend", "user", "caller"],
|
|
104
|
+
"event": ["message", "signal", "notification", "trigger"],
|
|
105
|
+
"stream": ["flow", "pipe", "channel", "observable"],
|
|
106
|
+
// Business Logic
|
|
107
|
+
"user": ["account", "profile", "member", "customer"],
|
|
108
|
+
"order": ["purchase", "transaction", "checkout", "cart"],
|
|
109
|
+
"payment": ["billing", "invoice", "charge", "stripe", "paypal"],
|
|
110
|
+
"notification": ["alert", "message", "email", "push", "sms"],
|
|
111
|
+
// DevOps & Infrastructure
|
|
112
|
+
"docker": ["container", "image", "dockerfile", "compose"],
|
|
113
|
+
"kubernetes": ["k8s", "cluster", "pod", "deployment", "helm"],
|
|
114
|
+
"ci": ["continuous-integration", "pipeline", "build", "github-actions", "jenkins"],
|
|
115
|
+
"cd": ["continuous-deployment", "release", "deploy", "rollout"]
|
|
116
|
+
};
|
|
117
|
+
BIDIRECTIONAL_INDEX = /* @__PURE__ */ new Map();
|
|
118
|
+
buildBidirectionalIndex();
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
// src/engine/tfidf.ts
|
|
123
|
+
function buildIndex(files) {
|
|
124
|
+
const documents = /* @__PURE__ */ new Map();
|
|
125
|
+
const docFreq = /* @__PURE__ */ new Map();
|
|
126
|
+
for (const file of files) {
|
|
127
|
+
const terms = tokenize(file.content);
|
|
128
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
129
|
+
for (const term of terms) {
|
|
130
|
+
termCounts.set(term, (termCounts.get(term) ?? 0) + 1);
|
|
131
|
+
}
|
|
132
|
+
documents.set(file.relativePath, { terms: termCounts, length: terms.length });
|
|
133
|
+
for (const term of termCounts.keys()) {
|
|
134
|
+
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const totalDocs = files.length;
|
|
138
|
+
const idf = /* @__PURE__ */ new Map();
|
|
139
|
+
for (const [term, df] of docFreq) {
|
|
140
|
+
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
141
|
+
}
|
|
142
|
+
let totalLength = 0;
|
|
143
|
+
for (const doc of documents.values()) totalLength += doc.length;
|
|
144
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
145
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
146
|
+
}
|
|
147
|
+
function query(index, taskDescription, maxResults = 50, expandSynonyms = true) {
|
|
148
|
+
const queryTerms = tokenize(taskDescription);
|
|
149
|
+
if (queryTerms.length === 0) return [];
|
|
150
|
+
const querySet = /* @__PURE__ */ new Map();
|
|
151
|
+
for (const term of queryTerms) {
|
|
152
|
+
querySet.set(term, (querySet.get(term) ?? 0) + 1);
|
|
153
|
+
}
|
|
154
|
+
if (expandSynonyms) {
|
|
155
|
+
const expandedSet = /* @__PURE__ */ new Map();
|
|
156
|
+
for (const [term, count] of querySet) {
|
|
157
|
+
const synonyms = expandTerm(term);
|
|
158
|
+
for (const syn of synonyms) {
|
|
159
|
+
const weight = syn === term ? count : count * 0.7;
|
|
160
|
+
expandedSet.set(syn, (expandedSet.get(syn) ?? 0) + weight);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
querySet.clear();
|
|
164
|
+
for (const [term, weight] of expandedSet) {
|
|
165
|
+
querySet.set(term, weight);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
const results = [];
|
|
169
|
+
const k1 = 1.5;
|
|
170
|
+
const b = 0.75;
|
|
171
|
+
for (const [filePath, doc] of index.documents) {
|
|
172
|
+
let score = 0;
|
|
173
|
+
const matchedTerms = [];
|
|
174
|
+
for (const [qTerm, qCount] of querySet) {
|
|
175
|
+
const tf = doc.terms.get(qTerm) ?? 0;
|
|
176
|
+
if (tf === 0) continue;
|
|
177
|
+
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
178
|
+
if (termIdf <= 0) continue;
|
|
179
|
+
const df = index.docFreq.get(qTerm) ?? 0;
|
|
180
|
+
const dfRatio = index.totalDocs > 0 ? df / index.totalDocs : 0;
|
|
181
|
+
const domainDamp = dfRatio > 0.5 ? (1 - dfRatio) * (1 - dfRatio) : 1;
|
|
182
|
+
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
183
|
+
score += termIdf * tfNorm * qCount * domainDamp;
|
|
184
|
+
matchedTerms.push(qTerm);
|
|
185
|
+
}
|
|
186
|
+
if (score > 0) {
|
|
187
|
+
results.push({ filePath, score, matchedTerms });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const maxScore = results.reduce((max, r) => Math.max(max, r.score), 0);
|
|
191
|
+
if (maxScore > 0) {
|
|
192
|
+
for (const r of results) r.score = r.score / maxScore;
|
|
193
|
+
}
|
|
194
|
+
return results.sort((a, b2) => b2.score - a.score).slice(0, maxResults);
|
|
195
|
+
}
|
|
196
|
+
function tokenize(text) {
|
|
197
|
+
const tokens = [];
|
|
198
|
+
const rawTokens = text.match(/[a-zA-Z][a-zA-Z0-9]*|[0-9]+/g) ?? [];
|
|
199
|
+
for (const raw of rawTokens) {
|
|
200
|
+
const parts = raw.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/);
|
|
201
|
+
for (const part of parts) {
|
|
202
|
+
if (part.length < 2) continue;
|
|
203
|
+
const stemmed = stem(part);
|
|
204
|
+
if (stemmed.length < 2) continue;
|
|
205
|
+
if (STOP_WORDS.has(stemmed)) continue;
|
|
206
|
+
tokens.push(stemmed);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return tokens;
|
|
210
|
+
}
|
|
211
|
+
function stem(word) {
|
|
212
|
+
let w = word.toLowerCase();
|
|
213
|
+
for (const [prefix, root] of TERM_FAMILIES) {
|
|
214
|
+
if (w.startsWith(prefix) || w === root) return root;
|
|
215
|
+
}
|
|
216
|
+
if (w.endsWith("ication") && w.length > 9) return w.slice(0, -7);
|
|
217
|
+
if (w.endsWith("ation") && w.length > 7) return w.slice(0, -5);
|
|
218
|
+
if (w.endsWith("tion") && w.length > 6) return w.slice(0, -4);
|
|
219
|
+
if (w.endsWith("sion") && w.length > 6) return w.slice(0, -4);
|
|
220
|
+
if (w.endsWith("ment") && w.length > 6) return w.slice(0, -4);
|
|
221
|
+
if (w.endsWith("ness") && w.length > 6) return w.slice(0, -4);
|
|
222
|
+
if (w.endsWith("able") && w.length > 6) return w.slice(0, -4);
|
|
223
|
+
if (w.endsWith("ible") && w.length > 6) return w.slice(0, -4);
|
|
224
|
+
if (w.endsWith("ator") && w.length > 6) return w.slice(0, -4);
|
|
225
|
+
if (w.endsWith("izer") && w.length > 6) return w.slice(0, -4);
|
|
226
|
+
if (w.endsWith("ing") && w.length > 5) return w.slice(0, -3);
|
|
227
|
+
if (w.endsWith("ies") && w.length > 4) return w.slice(0, -3) + "y";
|
|
228
|
+
if (w.endsWith("ous") && w.length > 5) return w.slice(0, -3);
|
|
229
|
+
if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
|
|
230
|
+
if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
|
|
231
|
+
if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
|
|
232
|
+
if (w.endsWith("ion") && w.length > 5) return w.slice(0, -3);
|
|
233
|
+
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
234
|
+
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
235
|
+
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
236
|
+
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
237
|
+
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
238
|
+
return w;
|
|
239
|
+
}
|
|
240
|
+
function boostByPath(matches, allFiles, taskDescription) {
|
|
241
|
+
const queryTerms = new Set(tokenize(taskDescription));
|
|
242
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
243
|
+
for (const m of matches) {
|
|
244
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
245
|
+
}
|
|
246
|
+
const pathTermDocFreq = /* @__PURE__ */ new Map();
|
|
247
|
+
const allPathTokenSets = /* @__PURE__ */ new Map();
|
|
248
|
+
for (const filePath of allFiles) {
|
|
249
|
+
const tokens = new Set(tokenize(filePath.replace(/[/\\.]/g, " ")));
|
|
250
|
+
allPathTokenSets.set(filePath, tokens);
|
|
251
|
+
for (const t of tokens) {
|
|
252
|
+
pathTermDocFreq.set(t, (pathTermDocFreq.get(t) ?? 0) + 1);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
const N = allFiles.length;
|
|
256
|
+
function pathIdf(term) {
|
|
257
|
+
const df = pathTermDocFreq.get(term) ?? 0;
|
|
258
|
+
if (df === 0) return 0;
|
|
259
|
+
return Math.log((N + 1) / (df + 1));
|
|
260
|
+
}
|
|
261
|
+
for (const filePath of allFiles) {
|
|
262
|
+
const parts = filePath.replace(/\\/g, "/").split("/");
|
|
263
|
+
const fileName = parts.pop() ?? "";
|
|
264
|
+
const dirSegments = parts;
|
|
265
|
+
const dirTerms = tokenize(dirSegments.join(" ").replace(/[/\\.]/g, " "));
|
|
266
|
+
const fileTerms = tokenize(fileName.replace(/[.\-_]/g, " "));
|
|
267
|
+
const dirMatches = dirTerms.filter((t) => queryTerms.has(t));
|
|
268
|
+
const fileMatches = fileTerms.filter((t) => queryTerms.has(t));
|
|
269
|
+
const allPathMatches = [.../* @__PURE__ */ new Set([...dirMatches, ...fileMatches])];
|
|
270
|
+
if (allPathMatches.length > 0) {
|
|
271
|
+
const uniqueDirMatches = [...new Set(dirMatches)];
|
|
272
|
+
const uniqueFileMatches = [...new Set(fileMatches)].filter((t) => !uniqueDirMatches.includes(t));
|
|
273
|
+
const maxIdf = Math.log(N + 1);
|
|
274
|
+
let pathBoost = 0;
|
|
275
|
+
for (const t of uniqueDirMatches) {
|
|
276
|
+
pathBoost += 0.4 * (pathIdf(t) / maxIdf);
|
|
277
|
+
}
|
|
278
|
+
for (const t of uniqueFileMatches) {
|
|
279
|
+
pathBoost += 0.25 * (pathIdf(t) / maxIdf);
|
|
280
|
+
}
|
|
281
|
+
const existing = boosted.get(filePath);
|
|
282
|
+
if (existing) {
|
|
283
|
+
existing.score = existing.score + pathBoost;
|
|
284
|
+
for (const t of allPathMatches) {
|
|
285
|
+
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
286
|
+
}
|
|
287
|
+
} else {
|
|
288
|
+
boosted.set(filePath, {
|
|
289
|
+
filePath,
|
|
290
|
+
score: pathBoost,
|
|
291
|
+
matchedTerms: allPathMatches
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
297
|
+
}
|
|
298
|
+
function boostByLayer(matches, allFiles, taskDescription) {
|
|
299
|
+
const queryTerms = tokenize(taskDescription);
|
|
300
|
+
const targetDirTerms = /* @__PURE__ */ new Set();
|
|
301
|
+
for (const term of queryTerms) {
|
|
302
|
+
const layers = LAYER_MAP[term];
|
|
303
|
+
if (layers) {
|
|
304
|
+
for (const l of layers) targetDirTerms.add(l);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
if (targetDirTerms.size === 0) return matches;
|
|
308
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
309
|
+
for (const m of matches) {
|
|
310
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
311
|
+
}
|
|
312
|
+
for (const filePath of allFiles) {
|
|
313
|
+
const dirTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
|
|
314
|
+
const layerHits = dirTerms.filter((t) => targetDirTerms.has(t));
|
|
315
|
+
if (layerHits.length > 0) {
|
|
316
|
+
const layerBoost = Math.min(0.5, layerHits.length * 0.2);
|
|
317
|
+
const existing = boosted.get(filePath);
|
|
318
|
+
if (existing) {
|
|
319
|
+
existing.score = existing.score + layerBoost;
|
|
320
|
+
} else {
|
|
321
|
+
boosted.set(filePath, {
|
|
322
|
+
filePath,
|
|
323
|
+
score: layerBoost,
|
|
324
|
+
matchedTerms: [`[layer:${layerHits[0]}]`]
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
330
|
+
}
|
|
331
|
+
function boostByImports(matches, dependencies, topK = 10, boostFactor = 0.4) {
|
|
332
|
+
if (matches.length === 0 || dependencies.size === 0) return matches;
|
|
333
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
334
|
+
for (const m of matches) {
|
|
335
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
336
|
+
}
|
|
337
|
+
const reverseDeps = /* @__PURE__ */ new Map();
|
|
338
|
+
for (const [from, tos] of dependencies) {
|
|
339
|
+
for (const to of tos) {
|
|
340
|
+
const existing = reverseDeps.get(to) ?? [];
|
|
341
|
+
existing.push(from);
|
|
342
|
+
reverseDeps.set(to, existing);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
const topMatches = matches.slice(0, topK);
|
|
346
|
+
const maxDepsPerParent = 5;
|
|
347
|
+
for (const parent of topMatches) {
|
|
348
|
+
const boost = parent.score * boostFactor;
|
|
349
|
+
const imports = dependencies.get(parent.filePath) ?? [];
|
|
350
|
+
const sortedImports = [...imports].sort((a, b) => {
|
|
351
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
352
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
353
|
+
return sb - sa;
|
|
354
|
+
});
|
|
355
|
+
for (const dep of sortedImports.slice(0, maxDepsPerParent)) {
|
|
356
|
+
applyImportBoost(boosted, dep, boost, parent.filePath, "imported-by");
|
|
357
|
+
}
|
|
358
|
+
const importers = reverseDeps.get(parent.filePath) ?? [];
|
|
359
|
+
const sortedImporters = [...importers].sort((a, b) => {
|
|
360
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
361
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
362
|
+
return sb - sa;
|
|
363
|
+
});
|
|
364
|
+
for (const imp of sortedImporters.slice(0, maxDepsPerParent)) {
|
|
365
|
+
applyImportBoost(boosted, imp, boost * 0.7, parent.filePath, "imports");
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
369
|
+
}
|
|
370
|
+
function applyImportBoost(boosted, filePath, boost, parentPath, relation) {
|
|
371
|
+
const existing = boosted.get(filePath);
|
|
372
|
+
if (existing) {
|
|
373
|
+
existing.score = existing.score + boost;
|
|
374
|
+
} else {
|
|
375
|
+
boosted.set(filePath, {
|
|
376
|
+
filePath,
|
|
377
|
+
score: boost,
|
|
378
|
+
matchedTerms: [`[${relation}:${parentPath.split("/").pop()}]`]
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
var STOP_WORDS, TERM_FAMILIES, LAYER_MAP;
|
|
383
|
+
var init_tfidf = __esm({
|
|
384
|
+
"src/engine/tfidf.ts"() {
|
|
385
|
+
"use strict";
|
|
386
|
+
init_synonyms();
|
|
387
|
+
STOP_WORDS = /* @__PURE__ */ new Set([
|
|
388
|
+
// Language keywords
|
|
389
|
+
"import",
|
|
390
|
+
"export",
|
|
391
|
+
"from",
|
|
392
|
+
"const",
|
|
393
|
+
"let",
|
|
394
|
+
"var",
|
|
395
|
+
"function",
|
|
396
|
+
"class",
|
|
397
|
+
"interface",
|
|
398
|
+
"type",
|
|
399
|
+
"return",
|
|
400
|
+
"async",
|
|
401
|
+
"await",
|
|
402
|
+
"new",
|
|
403
|
+
"this",
|
|
404
|
+
"that",
|
|
405
|
+
"true",
|
|
406
|
+
"false",
|
|
407
|
+
"null",
|
|
408
|
+
"undefined",
|
|
409
|
+
"void",
|
|
410
|
+
"string",
|
|
411
|
+
"number",
|
|
412
|
+
"boolean",
|
|
413
|
+
"any",
|
|
414
|
+
"unknown",
|
|
415
|
+
"never",
|
|
416
|
+
"object",
|
|
417
|
+
"array",
|
|
418
|
+
"promise",
|
|
419
|
+
"if",
|
|
420
|
+
"else",
|
|
421
|
+
"for",
|
|
422
|
+
"while",
|
|
423
|
+
"do",
|
|
424
|
+
"switch",
|
|
425
|
+
"case",
|
|
426
|
+
"break",
|
|
427
|
+
"continue",
|
|
428
|
+
"try",
|
|
429
|
+
"catch",
|
|
430
|
+
"throw",
|
|
431
|
+
"finally",
|
|
432
|
+
"default",
|
|
433
|
+
"extends",
|
|
434
|
+
"implements",
|
|
435
|
+
"static",
|
|
436
|
+
"private",
|
|
437
|
+
"public",
|
|
438
|
+
"protected",
|
|
439
|
+
"readonly",
|
|
440
|
+
"abstract",
|
|
441
|
+
"override",
|
|
442
|
+
"super",
|
|
443
|
+
"typeof",
|
|
444
|
+
"instanceof",
|
|
445
|
+
"in",
|
|
446
|
+
"of",
|
|
447
|
+
"as",
|
|
448
|
+
"is",
|
|
449
|
+
"keyof",
|
|
450
|
+
"enum",
|
|
451
|
+
"namespace",
|
|
452
|
+
"module",
|
|
453
|
+
"declare",
|
|
454
|
+
// Python
|
|
455
|
+
"def",
|
|
456
|
+
"self",
|
|
457
|
+
"cls",
|
|
458
|
+
"none",
|
|
459
|
+
"pass",
|
|
460
|
+
"yield",
|
|
461
|
+
"lambda",
|
|
462
|
+
"with",
|
|
463
|
+
"elif",
|
|
464
|
+
"except",
|
|
465
|
+
"raise",
|
|
466
|
+
"assert",
|
|
467
|
+
"global",
|
|
468
|
+
"nonlocal",
|
|
469
|
+
// Natural language stop words only — NOT domain terms that carry signal
|
|
470
|
+
"the",
|
|
471
|
+
"and",
|
|
472
|
+
"for",
|
|
473
|
+
"with",
|
|
474
|
+
"not",
|
|
475
|
+
"but",
|
|
476
|
+
"are",
|
|
477
|
+
"was",
|
|
478
|
+
"were",
|
|
479
|
+
"has",
|
|
480
|
+
"have",
|
|
481
|
+
"had",
|
|
482
|
+
"will",
|
|
483
|
+
"would",
|
|
484
|
+
"could",
|
|
485
|
+
"should",
|
|
486
|
+
"may",
|
|
487
|
+
"can",
|
|
488
|
+
"its",
|
|
489
|
+
"also",
|
|
490
|
+
"than",
|
|
491
|
+
"then",
|
|
492
|
+
"into",
|
|
493
|
+
"only",
|
|
494
|
+
"very",
|
|
495
|
+
"just",
|
|
496
|
+
"about",
|
|
497
|
+
"being",
|
|
498
|
+
"been",
|
|
499
|
+
"does",
|
|
500
|
+
"did",
|
|
501
|
+
"doing",
|
|
502
|
+
"todo",
|
|
503
|
+
"fixme",
|
|
504
|
+
"hack",
|
|
505
|
+
"note",
|
|
506
|
+
"xxx"
|
|
507
|
+
]);
|
|
508
|
+
TERM_FAMILIES = [
|
|
509
|
+
["authenticat", "auth"],
|
|
510
|
+
["authori", "auth"],
|
|
511
|
+
["configur", "config"],
|
|
512
|
+
["connect", "connect"],
|
|
513
|
+
["request", "request"],
|
|
514
|
+
["response", "respons"],
|
|
515
|
+
["middlewar", "middlewar"],
|
|
516
|
+
["validat", "valid"],
|
|
517
|
+
["initiali", "init"],
|
|
518
|
+
["subscri", "subscrib"],
|
|
519
|
+
["transform", "transform"],
|
|
520
|
+
["seriali", "serial"],
|
|
521
|
+
["deseriali", "serial"],
|
|
522
|
+
["dependen", "depend"],
|
|
523
|
+
["environ", "environ"],
|
|
524
|
+
["permiss", "permiss"],
|
|
525
|
+
["migrat", "migrat"],
|
|
526
|
+
["transact", "transact"],
|
|
527
|
+
["encryp", "encrypt"],
|
|
528
|
+
["decryp", "encrypt"]
|
|
529
|
+
];
|
|
530
|
+
LAYER_MAP = {
|
|
531
|
+
// Query terms → directory segments that should be boosted
|
|
532
|
+
"endpoint": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
533
|
+
"api": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
534
|
+
"controller": ["endpoint", "controller", "handler", "route", "router"],
|
|
535
|
+
"repositori": ["repositori", "dao", "store", "persist"],
|
|
536
|
+
"databas": ["repositori", "dao", "store", "persist", "migrat"],
|
|
537
|
+
"storag": ["repositori", "dao", "store", "persist"],
|
|
538
|
+
"cach": ["cach", "redis", "memcach", "store"],
|
|
539
|
+
"servic": ["servic", "usecas", "core"],
|
|
540
|
+
"usecas": ["usecas", "servic", "core"],
|
|
541
|
+
"config": ["config", "inject", "setup", "bootstrap"],
|
|
542
|
+
"inject": ["config", "inject", "setup"],
|
|
543
|
+
"depend": ["config", "inject", "setup"],
|
|
544
|
+
"event": ["event", "listen", "handler", "subscrib"],
|
|
545
|
+
"error": ["error", "except", "handler", "fault"],
|
|
546
|
+
"except": ["except", "error", "handler", "fault"],
|
|
547
|
+
"model": ["model", "entiti", "dto", "domain", "schema"],
|
|
548
|
+
"entiti": ["entiti", "model", "dto", "domain"],
|
|
549
|
+
"metric": ["metric", "monitor", "observ", "telemetri"],
|
|
550
|
+
"test": ["test", "spec", "mock", "fixtur"],
|
|
551
|
+
"migrat": ["migrat", "schema", "databas"]
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
// src/engine/multi-repo.ts
|
|
557
|
+
var multi_repo_exports = {};
|
|
558
|
+
__export(multi_repo_exports, {
|
|
559
|
+
discoverSiblingRepos: () => discoverSiblingRepos,
|
|
560
|
+
parseSiblingPaths: () => parseSiblingPaths,
|
|
561
|
+
querySiblingRepos: () => querySiblingRepos,
|
|
562
|
+
renderMultiRepoSummary: () => renderMultiRepoSummary
|
|
563
|
+
});
|
|
564
|
+
import { readdirSync, readFileSync as readFileSync5, statSync as statSync2, existsSync as existsSync5 } from "fs";
|
|
565
|
+
import { join as join7, basename as basename3, resolve as resolve5, relative as relative5 } from "path";
|
|
566
|
+
function discoverSiblingRepos(projectPath) {
|
|
567
|
+
const absProject = resolve5(projectPath);
|
|
568
|
+
const parentDir = join7(absProject, "..");
|
|
569
|
+
const projectName = basename3(absProject);
|
|
570
|
+
const siblings = [];
|
|
571
|
+
let entries;
|
|
572
|
+
try {
|
|
573
|
+
entries = readdirSync(parentDir);
|
|
574
|
+
} catch {
|
|
575
|
+
return [];
|
|
576
|
+
}
|
|
577
|
+
for (const entry of entries) {
|
|
578
|
+
if (entry === projectName) continue;
|
|
579
|
+
if (entry.startsWith(".")) continue;
|
|
580
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
581
|
+
const candidatePath = join7(parentDir, entry);
|
|
582
|
+
try {
|
|
583
|
+
if (!statSync2(candidatePath).isDirectory()) continue;
|
|
584
|
+
} catch {
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
const hasMarker = REPO_MARKERS.some((marker) => {
|
|
588
|
+
try {
|
|
589
|
+
return existsSync5(join7(candidatePath, marker));
|
|
590
|
+
} catch {
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
});
|
|
594
|
+
if (!hasMarker) continue;
|
|
595
|
+
const stack = detectStack2(candidatePath);
|
|
596
|
+
siblings.push({
|
|
597
|
+
path: candidatePath,
|
|
598
|
+
name: entry,
|
|
599
|
+
stack,
|
|
600
|
+
fileCount: 0
|
|
601
|
+
// filled during indexing
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
return siblings;
|
|
605
|
+
}
|
|
606
|
+
function detectStack2(repoPath) {
|
|
607
|
+
const stack = [];
|
|
608
|
+
try {
|
|
609
|
+
if (existsSync5(join7(repoPath, "tsconfig.json"))) stack.push("TypeScript");
|
|
610
|
+
if (existsSync5(join7(repoPath, "package.json"))) stack.push("Node.js");
|
|
611
|
+
if (existsSync5(join7(repoPath, "Cargo.toml"))) stack.push("Rust");
|
|
612
|
+
if (existsSync5(join7(repoPath, "go.mod"))) stack.push("Go");
|
|
613
|
+
if (existsSync5(join7(repoPath, "pyproject.toml"))) stack.push("Python");
|
|
614
|
+
if (existsSync5(join7(repoPath, "pom.xml"))) stack.push("Java");
|
|
615
|
+
} catch {
|
|
616
|
+
}
|
|
617
|
+
return stack;
|
|
618
|
+
}
|
|
619
|
+
function listSourceFiles(repoPath, maxFiles = MAX_FILES_PER_REPO) {
|
|
620
|
+
const files = [];
|
|
621
|
+
function walk(dir, depth) {
|
|
622
|
+
if (depth > 8 || files.length >= maxFiles) return;
|
|
623
|
+
let entries;
|
|
624
|
+
try {
|
|
625
|
+
entries = readdirSync(dir);
|
|
626
|
+
} catch {
|
|
627
|
+
return;
|
|
628
|
+
}
|
|
629
|
+
for (const entry of entries) {
|
|
630
|
+
if (files.length >= maxFiles) return;
|
|
631
|
+
if (entry.startsWith(".")) continue;
|
|
632
|
+
if (SKIP_DIRS.has(entry)) continue;
|
|
633
|
+
const fullPath = join7(dir, entry);
|
|
634
|
+
try {
|
|
635
|
+
const stat3 = statSync2(fullPath);
|
|
636
|
+
if (stat3.isDirectory()) {
|
|
637
|
+
walk(fullPath, depth + 1);
|
|
638
|
+
} else if (stat3.isFile() && stat3.size <= MAX_FILE_SIZE) {
|
|
639
|
+
const ext = entry.split(".").pop()?.toLowerCase() ?? "";
|
|
640
|
+
if (SOURCE_EXTENSIONS.has(ext)) {
|
|
641
|
+
files.push(relative5(repoPath, fullPath));
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
} catch {
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
walk(repoPath, 0);
|
|
649
|
+
return files;
|
|
650
|
+
}
|
|
651
|
+
function indexSiblingRepo(repo) {
|
|
652
|
+
const filePaths = listSourceFiles(repo.path);
|
|
653
|
+
repo.fileCount = filePaths.length;
|
|
654
|
+
const contents = [];
|
|
655
|
+
const contentMap = /* @__PURE__ */ new Map();
|
|
656
|
+
for (const relPath of filePaths) {
|
|
657
|
+
try {
|
|
658
|
+
const content = readFileSync5(join7(repo.path, relPath), "utf-8");
|
|
659
|
+
contents.push({ relativePath: relPath, content });
|
|
660
|
+
contentMap.set(relPath, content);
|
|
661
|
+
} catch {
|
|
662
|
+
contents.push({ relativePath: relPath, content: "" });
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return { contents, contentMap };
|
|
666
|
+
}
|
|
667
|
+
function querySiblingRepos(siblings, task, maxPerRepo = 5, minScore = 0.3) {
|
|
668
|
+
const startTime = performance.now();
|
|
669
|
+
const allMatches = [];
|
|
670
|
+
for (const repo of siblings) {
|
|
671
|
+
const { contents, contentMap } = indexSiblingRepo(repo);
|
|
672
|
+
if (contents.length === 0) continue;
|
|
673
|
+
const index = buildIndex(contents);
|
|
674
|
+
const matches = query(index, task, maxPerRepo * 2);
|
|
675
|
+
const boosted = boostByPath(
|
|
676
|
+
matches,
|
|
677
|
+
contents.map((c) => c.relativePath),
|
|
678
|
+
task
|
|
679
|
+
);
|
|
680
|
+
for (const match of boosted.slice(0, maxPerRepo)) {
|
|
681
|
+
if (match.score < minScore) continue;
|
|
682
|
+
const content = contentMap.get(match.filePath) ?? "";
|
|
683
|
+
const tokens = Math.ceil(content.length / 4);
|
|
684
|
+
allMatches.push({
|
|
685
|
+
repoName: repo.name,
|
|
686
|
+
repoPath: repo.path,
|
|
687
|
+
relativePath: match.filePath,
|
|
688
|
+
absolutePath: join7(repo.path, match.filePath),
|
|
689
|
+
score: match.score,
|
|
690
|
+
content,
|
|
691
|
+
tokens
|
|
692
|
+
});
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
allMatches.sort((a, b) => b.score - a.score);
|
|
696
|
+
return {
|
|
697
|
+
siblings,
|
|
698
|
+
matches: allMatches,
|
|
699
|
+
timeMs: Math.round(performance.now() - startTime)
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
function parseSiblingPaths(pathsStr, projectPath) {
|
|
703
|
+
const absProject = resolve5(projectPath);
|
|
704
|
+
return pathsStr.split(",").map((p) => p.trim()).filter((p) => p.length > 0).map((p) => {
|
|
705
|
+
const absPath = resolve5(join7(absProject, ".."), p);
|
|
706
|
+
return {
|
|
707
|
+
path: absPath,
|
|
708
|
+
name: basename3(absPath),
|
|
709
|
+
stack: detectStack2(absPath),
|
|
710
|
+
fileCount: 0
|
|
711
|
+
};
|
|
712
|
+
}).filter((repo) => existsSync5(repo.path));
|
|
713
|
+
}
|
|
714
|
+
function renderMultiRepoSummary(result) {
|
|
715
|
+
const lines = [];
|
|
716
|
+
if (result.siblings.length === 0) {
|
|
717
|
+
lines.push(" No sibling repos found.");
|
|
718
|
+
return lines.join("\n");
|
|
719
|
+
}
|
|
720
|
+
lines.push(` Sibling repos scanned: ${result.siblings.length} (${result.timeMs}ms)`);
|
|
721
|
+
for (const repo of result.siblings) {
|
|
722
|
+
lines.push(` ${repo.name}/ \u2014 ${repo.fileCount} files [${repo.stack.join(", ") || "unknown"}]`);
|
|
723
|
+
}
|
|
724
|
+
if (result.matches.length === 0) {
|
|
725
|
+
lines.push(" No relevant files found in sibling repos.");
|
|
726
|
+
} else {
|
|
727
|
+
lines.push(` Cross-repo matches: ${result.matches.length}`);
|
|
728
|
+
for (const m of result.matches.slice(0, 10)) {
|
|
729
|
+
const pct = Math.round(m.score * 100);
|
|
730
|
+
lines.push(` ${m.repoName}/${m.relativePath} sem: ${pct}% (~${Math.round(m.tokens / 1e3)}K tok)`);
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
return lines.join("\n");
|
|
734
|
+
}
|
|
735
|
+
var REPO_MARKERS, SKIP_DIRS, SOURCE_EXTENSIONS, MAX_FILES_PER_REPO, MAX_FILE_SIZE;
|
|
736
|
+
var init_multi_repo = __esm({
|
|
737
|
+
"src/engine/multi-repo.ts"() {
|
|
738
|
+
"use strict";
|
|
739
|
+
init_tfidf();
|
|
740
|
+
REPO_MARKERS = ["package.json", "tsconfig.json", "Cargo.toml", "go.mod", "pyproject.toml", "pom.xml"];
|
|
741
|
+
SKIP_DIRS = /* @__PURE__ */ new Set(["node_modules", ".git", "dist", "build", ".next", "__pycache__", "target", "vendor"]);
|
|
742
|
+
SOURCE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
743
|
+
"ts",
|
|
744
|
+
"tsx",
|
|
745
|
+
"js",
|
|
746
|
+
"jsx",
|
|
747
|
+
"mjs",
|
|
748
|
+
"cjs",
|
|
749
|
+
"py",
|
|
750
|
+
"rs",
|
|
751
|
+
"go",
|
|
752
|
+
"java",
|
|
753
|
+
"kt",
|
|
754
|
+
"rb",
|
|
755
|
+
"c",
|
|
756
|
+
"cpp",
|
|
757
|
+
"h",
|
|
758
|
+
"hpp",
|
|
759
|
+
"cs",
|
|
760
|
+
"json",
|
|
761
|
+
"yaml",
|
|
762
|
+
"yml",
|
|
763
|
+
"toml",
|
|
764
|
+
"md",
|
|
765
|
+
"txt"
|
|
766
|
+
]);
|
|
767
|
+
MAX_FILES_PER_REPO = 500;
|
|
768
|
+
MAX_FILE_SIZE = 1e5;
|
|
769
|
+
}
|
|
770
|
+
});
|
|
2
771
|
|
|
3
772
|
// src/mcp/index.ts
|
|
4
773
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
5
774
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
775
|
import { z } from "zod";
|
|
7
|
-
import { resolve as
|
|
8
|
-
import { readFileSync as readFileSync2 } from "fs";
|
|
776
|
+
import { resolve as resolve6 } from "path";
|
|
9
777
|
|
|
10
778
|
// src/engine/analyzer.ts
|
|
11
779
|
import { readFile as readFile2, readdir, stat as stat2 } from "fs/promises";
|
|
12
|
-
import { join as
|
|
780
|
+
import { join as join3, extname, relative as relative3, resolve as resolve3, basename as basename2 } from "path";
|
|
13
781
|
import { createHash } from "crypto";
|
|
14
782
|
|
|
15
783
|
// src/types/engine.ts
|
|
@@ -33,7 +801,19 @@ var DEFAULT_CONFIG = {
|
|
|
33
801
|
},
|
|
34
802
|
ignore: {
|
|
35
803
|
dirs: ["node_modules", "dist", "build", ".git", "coverage", "__pycache__", ".next", "vendor", ".cto"],
|
|
36
|
-
patterns: [
|
|
804
|
+
patterns: [
|
|
805
|
+
"*.min.js",
|
|
806
|
+
"*.map",
|
|
807
|
+
"*.lock",
|
|
808
|
+
"*.generated.*",
|
|
809
|
+
"CHANGELOG*",
|
|
810
|
+
"LICENSE*",
|
|
811
|
+
"CONTRIBUTING*",
|
|
812
|
+
"CODE_OF_CONDUCT*",
|
|
813
|
+
"AUTHORS*",
|
|
814
|
+
"CODEOWNERS",
|
|
815
|
+
"SECURITY*"
|
|
816
|
+
]
|
|
37
817
|
},
|
|
38
818
|
maxDepth: 20
|
|
39
819
|
},
|
|
@@ -93,27 +873,742 @@ function estimateTokens(content, sizeInBytes, method = "chars4") {
|
|
|
93
873
|
|
|
94
874
|
// src/engine/graph.ts
|
|
95
875
|
import { Project, SyntaxKind } from "ts-morph";
|
|
96
|
-
import { resolve, relative, dirname, join } from "path";
|
|
97
|
-
import { existsSync } from "fs";
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
876
|
+
import { resolve as resolve2, relative as relative2, dirname as dirname2, join as join2 } from "path";
|
|
877
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
878
|
+
|
|
879
|
+
// src/engine/polyglot-graph.ts
|
|
880
|
+
import { readFileSync } from "fs";
|
|
881
|
+
import { join, dirname } from "path";
|
|
882
|
+
var LANG_EXTENSIONS = {
|
|
883
|
+
"py": "python",
|
|
884
|
+
"pyw": "python",
|
|
885
|
+
"go": "go",
|
|
886
|
+
"java": "java",
|
|
887
|
+
"rs": "rust",
|
|
888
|
+
"ts": "typescript",
|
|
889
|
+
"tsx": "typescript",
|
|
890
|
+
"js": "typescript",
|
|
891
|
+
"jsx": "typescript",
|
|
892
|
+
"mts": "typescript",
|
|
893
|
+
"mjs": "typescript",
|
|
894
|
+
"cts": "typescript",
|
|
895
|
+
"cjs": "typescript"
|
|
896
|
+
};
|
|
897
|
+
function detectLanguage(filePath) {
|
|
898
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
899
|
+
return LANG_EXTENSIONS[ext] ?? null;
|
|
900
|
+
}
|
|
901
|
+
function parseImports(filePath, relativePath, projectPath, allRelativePaths, content) {
|
|
902
|
+
const lang = detectLanguage(relativePath);
|
|
903
|
+
if (!lang || lang === "typescript") return [];
|
|
904
|
+
const src = content ?? safeReadFile(filePath);
|
|
905
|
+
if (!src) return [];
|
|
906
|
+
const edges = [];
|
|
907
|
+
let specs;
|
|
908
|
+
switch (lang) {
|
|
909
|
+
case "python":
|
|
910
|
+
specs = parsePythonImports(src);
|
|
911
|
+
break;
|
|
912
|
+
case "go":
|
|
913
|
+
specs = parseGoImports(src);
|
|
914
|
+
break;
|
|
915
|
+
case "java":
|
|
916
|
+
specs = parseJavaImports(src);
|
|
917
|
+
break;
|
|
918
|
+
case "rust":
|
|
919
|
+
specs = parseRustImports(src);
|
|
920
|
+
break;
|
|
921
|
+
default:
|
|
922
|
+
return [];
|
|
923
|
+
}
|
|
924
|
+
for (const spec of specs) {
|
|
925
|
+
const resolved = resolveImportSpec(spec, relativePath, projectPath, allRelativePaths, lang);
|
|
926
|
+
if (resolved) {
|
|
927
|
+
edges.push({ from: relativePath, to: resolved, type: "import" });
|
|
112
928
|
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
929
|
+
}
|
|
930
|
+
return edges;
|
|
931
|
+
}
|
|
932
|
+
function parseAllPolyglotImports(files, projectPath) {
|
|
933
|
+
const allPaths = new Set(files.map((f) => f.relativePath));
|
|
934
|
+
const edges = [];
|
|
935
|
+
for (const file of files) {
|
|
936
|
+
const lang = detectLanguage(file.relativePath);
|
|
937
|
+
if (!lang || lang === "typescript") continue;
|
|
938
|
+
const fileEdges = parseImports(
|
|
939
|
+
file.absolutePath,
|
|
940
|
+
file.relativePath,
|
|
941
|
+
projectPath,
|
|
942
|
+
allPaths,
|
|
943
|
+
file.content
|
|
944
|
+
);
|
|
945
|
+
edges.push(...fileEdges);
|
|
946
|
+
}
|
|
947
|
+
return edges;
|
|
948
|
+
}
|
|
949
|
+
function estimateComplexity(content, lang) {
|
|
950
|
+
let complexity = 1;
|
|
951
|
+
const lines = content.split("\n");
|
|
952
|
+
const patterns = {
|
|
953
|
+
python: [
|
|
954
|
+
/^\s*if\s/,
|
|
955
|
+
/^\s*elif\s/,
|
|
956
|
+
/^\s*for\s/,
|
|
957
|
+
/^\s*while\s/,
|
|
958
|
+
/^\s*except\s/,
|
|
959
|
+
/\sif\s.*\selse\s/,
|
|
960
|
+
// ternary
|
|
961
|
+
/\sand\s/,
|
|
962
|
+
/\sor\s/
|
|
963
|
+
],
|
|
964
|
+
go: [
|
|
965
|
+
/^\s*if\s/,
|
|
966
|
+
/^\s*for\s/,
|
|
967
|
+
/^\s*case\s/,
|
|
968
|
+
/^\s*select\s*{/,
|
|
969
|
+
/&&/,
|
|
970
|
+
/\|\|/
|
|
971
|
+
],
|
|
972
|
+
java: [
|
|
973
|
+
/^\s*if\s*\(/,
|
|
974
|
+
/^\s*for\s*\(/,
|
|
975
|
+
/^\s*while\s*\(/,
|
|
976
|
+
/^\s*case\s/,
|
|
977
|
+
/^\s*catch\s*\(/,
|
|
978
|
+
/\?\s/,
|
|
979
|
+
// ternary
|
|
980
|
+
/&&/,
|
|
981
|
+
/\|\|/
|
|
982
|
+
],
|
|
983
|
+
rust: [
|
|
984
|
+
/^\s*if\s/,
|
|
985
|
+
/^\s*for\s/,
|
|
986
|
+
/^\s*while\s/,
|
|
987
|
+
/^\s*match\s/,
|
|
988
|
+
/=>\s/,
|
|
989
|
+
// match arms
|
|
990
|
+
/&&/,
|
|
991
|
+
/\|\|/
|
|
992
|
+
],
|
|
993
|
+
typescript: []
|
|
994
|
+
// handled by ts-morph
|
|
995
|
+
};
|
|
996
|
+
const langPatterns = patterns[lang];
|
|
997
|
+
for (const line of lines) {
|
|
998
|
+
for (const pattern of langPatterns) {
|
|
999
|
+
if (pattern.test(line)) {
|
|
1000
|
+
complexity++;
|
|
1001
|
+
break;
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
return complexity;
|
|
1006
|
+
}
|
|
1007
|
+
var PYTHON_STDLIB = /* @__PURE__ */ new Set([
|
|
1008
|
+
"__future__",
|
|
1009
|
+
"abc",
|
|
1010
|
+
"aifc",
|
|
1011
|
+
"argparse",
|
|
1012
|
+
"array",
|
|
1013
|
+
"ast",
|
|
1014
|
+
"asynchat",
|
|
1015
|
+
"asyncio",
|
|
1016
|
+
"asyncore",
|
|
1017
|
+
"atexit",
|
|
1018
|
+
"audioop",
|
|
1019
|
+
"base64",
|
|
1020
|
+
"bdb",
|
|
1021
|
+
"binascii",
|
|
1022
|
+
"binhex",
|
|
1023
|
+
"bisect",
|
|
1024
|
+
"builtins",
|
|
1025
|
+
"bz2",
|
|
1026
|
+
"calendar",
|
|
1027
|
+
"cgi",
|
|
1028
|
+
"cgitb",
|
|
1029
|
+
"chunk",
|
|
1030
|
+
"cmath",
|
|
1031
|
+
"cmd",
|
|
1032
|
+
"code",
|
|
1033
|
+
"codecs",
|
|
1034
|
+
"codeop",
|
|
1035
|
+
"collections",
|
|
1036
|
+
"colorsys",
|
|
1037
|
+
"compileall",
|
|
1038
|
+
"concurrent",
|
|
1039
|
+
"configparser",
|
|
1040
|
+
"contextlib",
|
|
1041
|
+
"contextvars",
|
|
1042
|
+
"copy",
|
|
1043
|
+
"copyreg",
|
|
1044
|
+
"cProfile",
|
|
1045
|
+
"crypt",
|
|
1046
|
+
"csv",
|
|
1047
|
+
"ctypes",
|
|
1048
|
+
"curses",
|
|
1049
|
+
"dataclasses",
|
|
1050
|
+
"datetime",
|
|
1051
|
+
"dbm",
|
|
1052
|
+
"decimal",
|
|
1053
|
+
"difflib",
|
|
1054
|
+
"dis",
|
|
1055
|
+
"distutils",
|
|
1056
|
+
"doctest",
|
|
1057
|
+
"email",
|
|
1058
|
+
"encodings",
|
|
1059
|
+
"enum",
|
|
1060
|
+
"errno",
|
|
1061
|
+
"faulthandler",
|
|
1062
|
+
"fcntl",
|
|
1063
|
+
"filecmp",
|
|
1064
|
+
"fileinput",
|
|
1065
|
+
"fnmatch",
|
|
1066
|
+
"fractions",
|
|
1067
|
+
"ftplib",
|
|
1068
|
+
"functools",
|
|
1069
|
+
"gc",
|
|
1070
|
+
"getopt",
|
|
1071
|
+
"getpass",
|
|
1072
|
+
"gettext",
|
|
1073
|
+
"glob",
|
|
1074
|
+
"grp",
|
|
1075
|
+
"gzip",
|
|
1076
|
+
"hashlib",
|
|
1077
|
+
"heapq",
|
|
1078
|
+
"hmac",
|
|
1079
|
+
"html",
|
|
1080
|
+
"http",
|
|
1081
|
+
"idlelib",
|
|
1082
|
+
"imaplib",
|
|
1083
|
+
"imghdr",
|
|
1084
|
+
"imp",
|
|
1085
|
+
"importlib",
|
|
1086
|
+
"inspect",
|
|
1087
|
+
"io",
|
|
1088
|
+
"ipaddress",
|
|
1089
|
+
"itertools",
|
|
1090
|
+
"json",
|
|
1091
|
+
"keyword",
|
|
1092
|
+
"lib2to3",
|
|
1093
|
+
"linecache",
|
|
1094
|
+
"locale",
|
|
1095
|
+
"logging",
|
|
1096
|
+
"lzma",
|
|
1097
|
+
"mailbox",
|
|
1098
|
+
"mailcap",
|
|
1099
|
+
"marshal",
|
|
1100
|
+
"math",
|
|
1101
|
+
"mimetypes",
|
|
1102
|
+
"mmap",
|
|
1103
|
+
"modulefinder",
|
|
1104
|
+
"multiprocessing",
|
|
1105
|
+
"netrc",
|
|
1106
|
+
"nis",
|
|
1107
|
+
"nntplib",
|
|
1108
|
+
"numbers",
|
|
1109
|
+
"operator",
|
|
1110
|
+
"optparse",
|
|
1111
|
+
"os",
|
|
1112
|
+
"ossaudiodev",
|
|
1113
|
+
"pathlib",
|
|
1114
|
+
"pdb",
|
|
1115
|
+
"pickle",
|
|
1116
|
+
"pickletools",
|
|
1117
|
+
"pipes",
|
|
1118
|
+
"pkgutil",
|
|
1119
|
+
"platform",
|
|
1120
|
+
"plistlib",
|
|
1121
|
+
"poplib",
|
|
1122
|
+
"posix",
|
|
1123
|
+
"posixpath",
|
|
1124
|
+
"pprint",
|
|
1125
|
+
"profile",
|
|
1126
|
+
"pstats",
|
|
1127
|
+
"pty",
|
|
1128
|
+
"pwd",
|
|
1129
|
+
"py_compile",
|
|
1130
|
+
"pyclbr",
|
|
1131
|
+
"pydoc",
|
|
1132
|
+
"queue",
|
|
1133
|
+
"quopri",
|
|
1134
|
+
"random",
|
|
1135
|
+
"re",
|
|
1136
|
+
"readline",
|
|
1137
|
+
"reprlib",
|
|
1138
|
+
"resource",
|
|
1139
|
+
"rlcompleter",
|
|
1140
|
+
"runpy",
|
|
1141
|
+
"sched",
|
|
1142
|
+
"secrets",
|
|
1143
|
+
"select",
|
|
1144
|
+
"selectors",
|
|
1145
|
+
"shelve",
|
|
1146
|
+
"shlex",
|
|
1147
|
+
"shutil",
|
|
1148
|
+
"signal",
|
|
1149
|
+
"site",
|
|
1150
|
+
"smtpd",
|
|
1151
|
+
"smtplib",
|
|
1152
|
+
"sndhdr",
|
|
1153
|
+
"socket",
|
|
1154
|
+
"socketserver",
|
|
1155
|
+
"spwd",
|
|
1156
|
+
"sqlite3",
|
|
1157
|
+
"ssl",
|
|
1158
|
+
"stat",
|
|
1159
|
+
"statistics",
|
|
1160
|
+
"string",
|
|
1161
|
+
"stringprep",
|
|
1162
|
+
"struct",
|
|
1163
|
+
"subprocess",
|
|
1164
|
+
"sunau",
|
|
1165
|
+
"symtable",
|
|
1166
|
+
"sys",
|
|
1167
|
+
"sysconfig",
|
|
1168
|
+
"syslog",
|
|
1169
|
+
"tabnanny",
|
|
1170
|
+
"tarfile",
|
|
1171
|
+
"telnetlib",
|
|
1172
|
+
"tempfile",
|
|
1173
|
+
"termios",
|
|
1174
|
+
"test",
|
|
1175
|
+
"textwrap",
|
|
1176
|
+
"threading",
|
|
1177
|
+
"time",
|
|
1178
|
+
"timeit",
|
|
1179
|
+
"tkinter",
|
|
1180
|
+
"token",
|
|
1181
|
+
"tokenize",
|
|
1182
|
+
"tomllib",
|
|
1183
|
+
"trace",
|
|
1184
|
+
"traceback",
|
|
1185
|
+
"tracemalloc",
|
|
1186
|
+
"tty",
|
|
1187
|
+
"turtle",
|
|
1188
|
+
"turtledemo",
|
|
1189
|
+
"types",
|
|
1190
|
+
"typing",
|
|
1191
|
+
"unicodedata",
|
|
1192
|
+
"unittest",
|
|
1193
|
+
"urllib",
|
|
1194
|
+
"uu",
|
|
1195
|
+
"uuid",
|
|
1196
|
+
"venv",
|
|
1197
|
+
"warnings",
|
|
1198
|
+
"wave",
|
|
1199
|
+
"weakref",
|
|
1200
|
+
"webbrowser",
|
|
1201
|
+
"winreg",
|
|
1202
|
+
"winsound",
|
|
1203
|
+
"wsgiref",
|
|
1204
|
+
"xdrlib",
|
|
1205
|
+
"xml",
|
|
1206
|
+
"xmlrpc",
|
|
1207
|
+
"zipapp",
|
|
1208
|
+
"zipfile",
|
|
1209
|
+
"zipimport",
|
|
1210
|
+
"zlib",
|
|
1211
|
+
"_thread"
|
|
1212
|
+
]);
|
|
1213
|
+
function isPythonStdlib(module) {
|
|
1214
|
+
const topLevel = module.split(".")[0];
|
|
1215
|
+
return PYTHON_STDLIB.has(topLevel);
|
|
1216
|
+
}
|
|
1217
|
+
function parsePythonImports(content) {
|
|
1218
|
+
const specs = [];
|
|
1219
|
+
const joined = content.replace(/\(\s*\n([^)]*?)\)/gs, (_, inner) => {
|
|
1220
|
+
return "(" + inner.replace(/\n/g, " ").replace(/\s+/g, " ") + ")";
|
|
1221
|
+
});
|
|
1222
|
+
const lines = joined.split("\n");
|
|
1223
|
+
for (const line of lines) {
|
|
1224
|
+
const trimmed = line.trimStart();
|
|
1225
|
+
if (trimmed.startsWith("#")) continue;
|
|
1226
|
+
const fromMatch = trimmed.match(/^from\s+(\.{0,10}[\w.]*)\s+import\s+(.+)/);
|
|
1227
|
+
if (fromMatch) {
|
|
1228
|
+
const raw = fromMatch[1];
|
|
1229
|
+
const isRelative = raw.startsWith(".");
|
|
1230
|
+
if (!isRelative && isPythonStdlib(raw)) continue;
|
|
1231
|
+
specs.push({ raw, isRelative });
|
|
1232
|
+
continue;
|
|
1233
|
+
}
|
|
1234
|
+
const importMatch = trimmed.match(/^import\s+(.+)/);
|
|
1235
|
+
if (importMatch) {
|
|
1236
|
+
const modules = importMatch[1].split(",").map((m) => m.trim().split(/\s+as\s+/)[0].trim());
|
|
1237
|
+
for (const mod of modules) {
|
|
1238
|
+
if (!mod || !mod.match(/^[\w.]+$/)) continue;
|
|
1239
|
+
if (isPythonStdlib(mod)) continue;
|
|
1240
|
+
specs.push({ raw: mod, isRelative: false });
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
return specs;
|
|
1245
|
+
}
|
|
1246
|
+
var GO_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
1247
|
+
"archive",
|
|
1248
|
+
"bufio",
|
|
1249
|
+
"bytes",
|
|
1250
|
+
"cmp",
|
|
1251
|
+
"compress",
|
|
1252
|
+
"container",
|
|
1253
|
+
"context",
|
|
1254
|
+
"crypto",
|
|
1255
|
+
"database",
|
|
1256
|
+
"debug",
|
|
1257
|
+
"embed",
|
|
1258
|
+
"encoding",
|
|
1259
|
+
"errors",
|
|
1260
|
+
"expvar",
|
|
1261
|
+
"flag",
|
|
1262
|
+
"fmt",
|
|
1263
|
+
"go",
|
|
1264
|
+
"hash",
|
|
1265
|
+
"html",
|
|
1266
|
+
"image",
|
|
1267
|
+
"index",
|
|
1268
|
+
"internal",
|
|
1269
|
+
"io",
|
|
1270
|
+
"iter",
|
|
1271
|
+
"log",
|
|
1272
|
+
"maps",
|
|
1273
|
+
"math",
|
|
1274
|
+
"mime",
|
|
1275
|
+
"net",
|
|
1276
|
+
"os",
|
|
1277
|
+
"path",
|
|
1278
|
+
"plugin",
|
|
1279
|
+
"reflect",
|
|
1280
|
+
"regexp",
|
|
1281
|
+
"runtime",
|
|
1282
|
+
"slices",
|
|
1283
|
+
"sort",
|
|
1284
|
+
"strconv",
|
|
1285
|
+
"strings",
|
|
1286
|
+
"structs",
|
|
1287
|
+
"sync",
|
|
1288
|
+
"syscall",
|
|
1289
|
+
"testing",
|
|
1290
|
+
"text",
|
|
1291
|
+
"time",
|
|
1292
|
+
"unicode",
|
|
1293
|
+
"unsafe"
|
|
1294
|
+
]);
|
|
1295
|
+
function isGoStdlib(importPath) {
|
|
1296
|
+
const firstSegment = importPath.split("/")[0];
|
|
1297
|
+
if (firstSegment.includes(".")) return false;
|
|
1298
|
+
return GO_STDLIB_PREFIXES.has(firstSegment);
|
|
1299
|
+
}
|
|
1300
|
+
function parseGoImports(content) {
|
|
1301
|
+
const specs = [];
|
|
1302
|
+
const singlePattern = /^\s*import\s+(?:[\w_.]+\s+)?"([^"]+)"/gm;
|
|
1303
|
+
let match;
|
|
1304
|
+
while ((match = singlePattern.exec(content)) !== null) {
|
|
1305
|
+
const pkg = match[1];
|
|
1306
|
+
if (isGoStdlib(pkg)) continue;
|
|
1307
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1308
|
+
}
|
|
1309
|
+
const blockPattern = /import\s*\(([\s\S]*?)\)/g;
|
|
1310
|
+
while ((match = blockPattern.exec(content)) !== null) {
|
|
1311
|
+
const block = match[1];
|
|
1312
|
+
const linePattern = /(?:[\w_.]+\s+)?"([^"]+)"/g;
|
|
1313
|
+
let lineMatch;
|
|
1314
|
+
while ((lineMatch = linePattern.exec(block)) !== null) {
|
|
1315
|
+
const pkg = lineMatch[1];
|
|
1316
|
+
if (isGoStdlib(pkg)) continue;
|
|
1317
|
+
specs.push({ raw: pkg, isRelative: false });
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
return specs;
|
|
1321
|
+
}
|
|
1322
|
+
var JAVA_STDLIB_PREFIXES = /* @__PURE__ */ new Set([
|
|
1323
|
+
"java",
|
|
1324
|
+
"javax",
|
|
1325
|
+
"jdk",
|
|
1326
|
+
"sun",
|
|
1327
|
+
"com.sun",
|
|
1328
|
+
"org.w3c",
|
|
1329
|
+
"org.xml",
|
|
1330
|
+
"org.ietf"
|
|
1331
|
+
]);
|
|
1332
|
+
function isJavaStdlib(importPath) {
|
|
1333
|
+
for (const prefix of JAVA_STDLIB_PREFIXES) {
|
|
1334
|
+
if (importPath === prefix || importPath.startsWith(prefix + ".")) return true;
|
|
1335
|
+
}
|
|
1336
|
+
return false;
|
|
1337
|
+
}
|
|
1338
|
+
function parseJavaImports(content) {
|
|
1339
|
+
const specs = [];
|
|
1340
|
+
const pattern = /^\s*import\s+(?:static\s+)?([\w.*]+)\s*;/gm;
|
|
1341
|
+
let match;
|
|
1342
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
1343
|
+
const raw = match[1];
|
|
1344
|
+
if (isJavaStdlib(raw)) continue;
|
|
1345
|
+
specs.push({ raw, isRelative: false });
|
|
1346
|
+
}
|
|
1347
|
+
return specs;
|
|
1348
|
+
}
|
|
1349
|
+
function parseRustImports(content) {
|
|
1350
|
+
const specs = [];
|
|
1351
|
+
const usePattern = /^\s*(?:pub\s+)?use\s+((?:crate|super|self)(?:::\w+)*)/gm;
|
|
1352
|
+
let match;
|
|
1353
|
+
while ((match = usePattern.exec(content)) !== null) {
|
|
1354
|
+
const raw = match[1];
|
|
1355
|
+
const isRelative = raw.startsWith("super") || raw.startsWith("self");
|
|
1356
|
+
specs.push({ raw, isRelative });
|
|
1357
|
+
}
|
|
1358
|
+
const modPattern = /^\s*(?:pub\s+)?mod\s+(\w+)\s*;/gm;
|
|
1359
|
+
while ((match = modPattern.exec(content)) !== null) {
|
|
1360
|
+
specs.push({ raw: `mod::${match[1]}`, isRelative: true });
|
|
1361
|
+
}
|
|
1362
|
+
return specs;
|
|
1363
|
+
}
|
|
1364
|
+
function resolveImportSpec(spec, fromRelativePath, projectPath, allPaths, lang) {
|
|
1365
|
+
switch (lang) {
|
|
1366
|
+
case "python":
|
|
1367
|
+
return resolvePythonImport(spec, fromRelativePath, allPaths);
|
|
1368
|
+
case "go":
|
|
1369
|
+
return resolveGoImport(spec, fromRelativePath, projectPath, allPaths);
|
|
1370
|
+
case "java":
|
|
1371
|
+
return resolveJavaImport(spec, allPaths);
|
|
1372
|
+
case "rust":
|
|
1373
|
+
return resolveRustImport(spec, fromRelativePath, allPaths);
|
|
1374
|
+
default:
|
|
1375
|
+
return null;
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
function resolvePythonImport(spec, fromRelativePath, allPaths) {
|
|
1379
|
+
if (spec.isRelative) {
|
|
1380
|
+
const dots = spec.raw.match(/^\.+/)?.[0].length ?? 0;
|
|
1381
|
+
const modulePart = spec.raw.slice(dots);
|
|
1382
|
+
let baseDir = dirname(fromRelativePath);
|
|
1383
|
+
for (let i = 1; i < dots; i++) {
|
|
1384
|
+
baseDir = dirname(baseDir);
|
|
1385
|
+
}
|
|
1386
|
+
if (!modulePart) {
|
|
1387
|
+
return tryResolvePython(baseDir, "", allPaths);
|
|
1388
|
+
}
|
|
1389
|
+
const modulePath2 = modulePart.replace(/\./g, "/");
|
|
1390
|
+
return tryResolvePython(baseDir, modulePath2, allPaths);
|
|
1391
|
+
}
|
|
1392
|
+
const modulePath = spec.raw.replace(/\./g, "/");
|
|
1393
|
+
return tryResolvePython("", modulePath, allPaths);
|
|
1394
|
+
}
|
|
1395
|
+
function tryResolvePython(baseDir, modulePath, allPaths) {
|
|
1396
|
+
const candidates = [];
|
|
1397
|
+
if (!modulePath) {
|
|
1398
|
+
candidates.push(join(baseDir, "__init__.py"));
|
|
1399
|
+
} else {
|
|
1400
|
+
candidates.push(
|
|
1401
|
+
join(baseDir, `${modulePath}.py`),
|
|
1402
|
+
join(baseDir, modulePath, "__init__.py")
|
|
1403
|
+
);
|
|
1404
|
+
if (baseDir) {
|
|
1405
|
+
candidates.push(
|
|
1406
|
+
`${modulePath}.py`,
|
|
1407
|
+
join(modulePath, "__init__.py")
|
|
1408
|
+
);
|
|
1409
|
+
}
|
|
1410
|
+
for (const prefix of ["src", "lib", "app"]) {
|
|
1411
|
+
candidates.push(
|
|
1412
|
+
join(prefix, `${modulePath}.py`),
|
|
1413
|
+
join(prefix, modulePath, "__init__.py")
|
|
1414
|
+
);
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
const normalized = candidates.map((p) => p.replace(/^\.[\\/]/, ""));
|
|
1418
|
+
for (const candidate of normalized) {
|
|
1419
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1420
|
+
}
|
|
1421
|
+
return null;
|
|
1422
|
+
}
|
|
1423
|
+
function resolveGoImport(spec, fromRelativePath, projectPath, allPaths) {
|
|
1424
|
+
const dirFiles = /* @__PURE__ */ new Map();
|
|
1425
|
+
for (const p of allPaths) {
|
|
1426
|
+
if (!p.endsWith(".go")) continue;
|
|
1427
|
+
if (p.endsWith("_test.go")) continue;
|
|
1428
|
+
const dir = dirname(p);
|
|
1429
|
+
const existing = dirFiles.get(dir);
|
|
1430
|
+
if (existing) existing.push(p);
|
|
1431
|
+
else dirFiles.set(dir, [p]);
|
|
1432
|
+
}
|
|
1433
|
+
const importParts = spec.raw.split("/");
|
|
1434
|
+
const pkgName = importParts[importParts.length - 1];
|
|
1435
|
+
let goModModule = "";
|
|
1436
|
+
for (const p of allPaths) {
|
|
1437
|
+
if (p === "go.mod" || p.endsWith("/go.mod")) {
|
|
1438
|
+
try {
|
|
1439
|
+
const goModContent = safeReadFile(join(projectPath, p));
|
|
1440
|
+
if (goModContent) {
|
|
1441
|
+
const modMatch = goModContent.match(/^module\s+(\S+)/m);
|
|
1442
|
+
if (modMatch) goModModule = modMatch[1];
|
|
1443
|
+
}
|
|
1444
|
+
} catch {
|
|
1445
|
+
}
|
|
1446
|
+
break;
|
|
1447
|
+
}
|
|
1448
|
+
}
|
|
1449
|
+
if (goModModule && spec.raw.startsWith(goModModule + "/")) {
|
|
1450
|
+
const localPath = spec.raw.slice(goModModule.length + 1);
|
|
1451
|
+
const files = dirFiles.get(localPath);
|
|
1452
|
+
if (files && files.length > 0) return files.sort()[0];
|
|
1453
|
+
for (const prefix of ["", "cmd/", "pkg/", "internal/"]) {
|
|
1454
|
+
const tryPath = prefix + localPath;
|
|
1455
|
+
const tryFiles = dirFiles.get(tryPath);
|
|
1456
|
+
if (tryFiles && tryFiles.length > 0) return tryFiles.sort()[0];
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
for (const [dir, files] of dirFiles) {
|
|
1460
|
+
const dirName = dir.split("/").pop();
|
|
1461
|
+
if (dirName === pkgName) return files.sort()[0];
|
|
1462
|
+
}
|
|
1463
|
+
for (let depth = 2; depth <= Math.min(importParts.length, 4); depth++) {
|
|
1464
|
+
const suffix = importParts.slice(-depth).join("/");
|
|
1465
|
+
for (const [dir, files] of dirFiles) {
|
|
1466
|
+
if (dir === suffix || dir.endsWith("/" + suffix)) {
|
|
1467
|
+
return files.sort()[0];
|
|
1468
|
+
}
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
return null;
|
|
1472
|
+
}
|
|
1473
|
+
function resolveJavaImport(spec, allPaths) {
|
|
1474
|
+
const parts = spec.raw.split(".");
|
|
1475
|
+
if (parts[parts.length - 1] === "*") {
|
|
1476
|
+
const packagePath2 = parts.slice(0, -1).join("/");
|
|
1477
|
+
for (const prefix of ["src/main/java/", "src/", ""]) {
|
|
1478
|
+
for (const path of allPaths) {
|
|
1479
|
+
if (path.startsWith(prefix + packagePath2 + "/") && path.endsWith(".java")) {
|
|
1480
|
+
return path;
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
for (const path of allPaths) {
|
|
1485
|
+
if (path.includes(packagePath2 + "/") && path.endsWith(".java")) {
|
|
1486
|
+
return path;
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
return null;
|
|
1490
|
+
}
|
|
1491
|
+
let className = parts[parts.length - 1];
|
|
1492
|
+
let packageParts = parts.slice(0, -1);
|
|
1493
|
+
if (className[0] && className[0] === className[0].toLowerCase() && packageParts.length > 0) {
|
|
1494
|
+
className = packageParts[packageParts.length - 1];
|
|
1495
|
+
packageParts = packageParts.slice(0, -1);
|
|
1496
|
+
}
|
|
1497
|
+
if (packageParts.length > 0) {
|
|
1498
|
+
const maybeOuter = packageParts[packageParts.length - 1];
|
|
1499
|
+
if (maybeOuter[0] && maybeOuter[0] === maybeOuter[0].toUpperCase() && className[0] && className[0] === className[0].toUpperCase()) {
|
|
1500
|
+
className = maybeOuter;
|
|
1501
|
+
packageParts = packageParts.slice(0, -1);
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
const packagePath = packageParts.join("/");
|
|
1505
|
+
const javaFile = `${className}.java`;
|
|
1506
|
+
const fullPath = packagePath ? `${packagePath}/${javaFile}` : javaFile;
|
|
1507
|
+
const prefixes = ["src/main/java/", "src/main/kotlin/", "src/", "app/src/main/java/", ""];
|
|
1508
|
+
for (const prefix of prefixes) {
|
|
1509
|
+
const candidate = prefix + fullPath;
|
|
1510
|
+
if (allPaths.has(candidate)) return candidate;
|
|
1511
|
+
}
|
|
1512
|
+
for (const path of allPaths) {
|
|
1513
|
+
if (path.endsWith(fullPath)) return path;
|
|
1514
|
+
}
|
|
1515
|
+
if (packageParts.length >= 2) {
|
|
1516
|
+
const lastTwoPkg = packageParts.slice(-2).join("/");
|
|
1517
|
+
for (const path of allPaths) {
|
|
1518
|
+
if (path.endsWith(javaFile) && path.includes(lastTwoPkg)) return path;
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
return null;
|
|
1522
|
+
}
|
|
1523
|
+
function resolveRustImport(spec, fromRelativePath, allPaths) {
|
|
1524
|
+
if (spec.raw.startsWith("mod::")) {
|
|
1525
|
+
const modName = spec.raw.slice(5);
|
|
1526
|
+
const dir = dirname(fromRelativePath);
|
|
1527
|
+
const candidates = [
|
|
1528
|
+
join(dir, `${modName}.rs`),
|
|
1529
|
+
join(dir, modName, "mod.rs")
|
|
1530
|
+
];
|
|
1531
|
+
for (const c of candidates) {
|
|
1532
|
+
if (allPaths.has(c)) return c;
|
|
1533
|
+
}
|
|
1534
|
+
return null;
|
|
1535
|
+
}
|
|
1536
|
+
if (spec.raw.startsWith("crate::")) {
|
|
1537
|
+
const parts = spec.raw.replace("crate::", "").split("::");
|
|
1538
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1539
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1540
|
+
const candidates = [
|
|
1541
|
+
`src/${modulePath}.rs`,
|
|
1542
|
+
`src/${modulePath}/mod.rs`,
|
|
1543
|
+
`${modulePath}.rs`,
|
|
1544
|
+
`${modulePath}/mod.rs`
|
|
1545
|
+
];
|
|
1546
|
+
for (const c of candidates) {
|
|
1547
|
+
if (allPaths.has(c)) return c;
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
return null;
|
|
1551
|
+
}
|
|
1552
|
+
if (spec.raw.startsWith("super::")) {
|
|
1553
|
+
const parts = spec.raw.replace("super::", "").split("::");
|
|
1554
|
+
const parentDir = dirname(dirname(fromRelativePath));
|
|
1555
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1556
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1557
|
+
const candidates = [
|
|
1558
|
+
join(parentDir, `${modulePath}.rs`),
|
|
1559
|
+
join(parentDir, modulePath, "mod.rs")
|
|
1560
|
+
];
|
|
1561
|
+
for (const c of candidates) {
|
|
1562
|
+
if (allPaths.has(c)) return c;
|
|
1563
|
+
}
|
|
1564
|
+
}
|
|
1565
|
+
return null;
|
|
1566
|
+
}
|
|
1567
|
+
if (spec.raw.startsWith("self::")) {
|
|
1568
|
+
const parts = spec.raw.replace("self::", "").split("::");
|
|
1569
|
+
const dir = dirname(fromRelativePath);
|
|
1570
|
+
for (let i = parts.length; i >= 1; i--) {
|
|
1571
|
+
const modulePath = parts.slice(0, i).join("/");
|
|
1572
|
+
const candidates = [
|
|
1573
|
+
join(dir, `${modulePath}.rs`),
|
|
1574
|
+
join(dir, modulePath, "mod.rs")
|
|
1575
|
+
];
|
|
1576
|
+
for (const c of candidates) {
|
|
1577
|
+
if (allPaths.has(c)) return c;
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
return null;
|
|
1581
|
+
}
|
|
1582
|
+
return null;
|
|
1583
|
+
}
|
|
1584
|
+
function safeReadFile(path) {
|
|
1585
|
+
try {
|
|
1586
|
+
return readFileSync(path, "utf-8");
|
|
1587
|
+
} catch {
|
|
1588
|
+
return null;
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
|
|
1592
|
+
// src/engine/graph.ts
|
|
1593
|
+
var TS_EXTENSIONS = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs", "cts", "cjs"]);
|
|
1594
|
+
function createProject(projectPath, filePaths) {
|
|
1595
|
+
const tsConfigPath = join2(projectPath, "tsconfig.json");
|
|
1596
|
+
const hasTsConfig = existsSync2(tsConfigPath);
|
|
1597
|
+
const project = new Project({
|
|
1598
|
+
tsConfigFilePath: hasTsConfig ? tsConfigPath : void 0,
|
|
1599
|
+
skipAddingFilesFromTsConfig: true,
|
|
1600
|
+
compilerOptions: hasTsConfig ? void 0 : {
|
|
1601
|
+
allowJs: true,
|
|
1602
|
+
jsx: 4,
|
|
1603
|
+
// JsxEmit.ReactJSX
|
|
1604
|
+
esModuleInterop: true,
|
|
1605
|
+
moduleResolution: 100
|
|
1606
|
+
// Bundler
|
|
1607
|
+
}
|
|
1608
|
+
});
|
|
1609
|
+
const tsFiles = filePaths.filter((f) => {
|
|
1610
|
+
const ext = f.split(".").pop()?.toLowerCase() ?? "";
|
|
1611
|
+
return TS_EXTENSIONS.has(ext);
|
|
117
1612
|
});
|
|
118
1613
|
for (const filePath of tsFiles) {
|
|
119
1614
|
try {
|
|
@@ -124,9 +1619,11 @@ function createProject(projectPath, filePaths) {
|
|
|
124
1619
|
return project;
|
|
125
1620
|
}
|
|
126
1621
|
function buildProjectGraph(projectPath, files) {
|
|
127
|
-
const absPath =
|
|
1622
|
+
const absPath = resolve2(projectPath);
|
|
128
1623
|
const tsFiles = files.filter((f) => TS_EXTENSIONS.has(f.extension)).map((f) => f.path);
|
|
129
|
-
|
|
1624
|
+
const polyglotFiles = files.filter((f) => !TS_EXTENSIONS.has(f.extension)).map((f) => ({ relativePath: f.relativePath, absolutePath: f.path }));
|
|
1625
|
+
const polyglotEdges = parseAllPolyglotImports(polyglotFiles, absPath);
|
|
1626
|
+
if (tsFiles.length === 0 && polyglotEdges.length === 0) {
|
|
130
1627
|
return emptyGraph(files);
|
|
131
1628
|
}
|
|
132
1629
|
let project;
|
|
@@ -138,7 +1635,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
138
1635
|
const edges = [];
|
|
139
1636
|
const nodeSet = /* @__PURE__ */ new Set();
|
|
140
1637
|
for (const sourceFile of project.getSourceFiles()) {
|
|
141
|
-
const fromRel =
|
|
1638
|
+
const fromRel = relative2(absPath, sourceFile.getFilePath());
|
|
142
1639
|
if (fromRel.startsWith("..") || fromRel.includes("node_modules")) continue;
|
|
143
1640
|
nodeSet.add(fromRel);
|
|
144
1641
|
for (const imp of sourceFile.getImportDeclarations()) {
|
|
@@ -160,6 +1657,11 @@ function buildProjectGraph(projectPath, files) {
|
|
|
160
1657
|
}
|
|
161
1658
|
}
|
|
162
1659
|
}
|
|
1660
|
+
for (const edge of polyglotEdges) {
|
|
1661
|
+
nodeSet.add(edge.from);
|
|
1662
|
+
nodeSet.add(edge.to);
|
|
1663
|
+
edges.push(edge);
|
|
1664
|
+
}
|
|
163
1665
|
const nodes = Array.from(nodeSet);
|
|
164
1666
|
const importedByCount = /* @__PURE__ */ new Map();
|
|
165
1667
|
const importCount = /* @__PURE__ */ new Map();
|
|
@@ -192,6 +1694,7 @@ function buildProjectGraph(projectPath, files) {
|
|
|
192
1694
|
const orphans = Array.from(allFileNodes).filter((n) => !connectedNodes.has(n));
|
|
193
1695
|
const clusters = detectClusters(nodes, edges, files);
|
|
194
1696
|
enrichComplexity(project, absPath, files);
|
|
1697
|
+
enrichPolyglotComplexity(files);
|
|
195
1698
|
return { nodes, edges, hubs, leaves, orphans, clusters };
|
|
196
1699
|
}
|
|
197
1700
|
var UnionFind = class {
|
|
@@ -284,7 +1787,7 @@ function commonPrefix(paths) {
|
|
|
284
1787
|
function enrichComplexity(project, absPath, files) {
|
|
285
1788
|
const fileMap = new Map(files.map((f) => [f.relativePath, f]));
|
|
286
1789
|
for (const sourceFile of project.getSourceFiles()) {
|
|
287
|
-
const relPath =
|
|
1790
|
+
const relPath = relative2(absPath, sourceFile.getFilePath());
|
|
288
1791
|
if (relPath.startsWith("..") || relPath.includes("node_modules")) continue;
|
|
289
1792
|
const file = fileMap.get(relPath);
|
|
290
1793
|
if (!file) continue;
|
|
@@ -335,22 +1838,34 @@ function calculateCyclomaticComplexity(node) {
|
|
|
335
1838
|
});
|
|
336
1839
|
return complexity;
|
|
337
1840
|
}
|
|
1841
|
+
function enrichPolyglotComplexity(files) {
|
|
1842
|
+
for (const file of files) {
|
|
1843
|
+
if (TS_EXTENSIONS.has(file.extension)) continue;
|
|
1844
|
+
const lang = detectLanguage(file.relativePath);
|
|
1845
|
+
if (!lang) continue;
|
|
1846
|
+
try {
|
|
1847
|
+
const content = readFileSync2(file.path, "utf-8");
|
|
1848
|
+
file.complexity = Math.max(1, estimateComplexity(content, lang));
|
|
1849
|
+
} catch {
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
}
|
|
338
1853
|
function resolveImport(sourceFile, moduleSpecifier, projectRoot) {
|
|
339
1854
|
if (!moduleSpecifier.startsWith(".")) return null;
|
|
340
|
-
const sourceDir =
|
|
341
|
-
const basePath =
|
|
1855
|
+
const sourceDir = dirname2(sourceFile.getFilePath());
|
|
1856
|
+
const basePath = resolve2(sourceDir, moduleSpecifier);
|
|
342
1857
|
const extensions = [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js", "/index.jsx"];
|
|
343
1858
|
for (const ext of extensions) {
|
|
344
1859
|
const candidate = basePath.endsWith(ext) ? basePath : basePath + ext;
|
|
345
|
-
if (
|
|
346
|
-
const rel =
|
|
1860
|
+
if (existsSync2(candidate)) {
|
|
1861
|
+
const rel = relative2(projectRoot, candidate);
|
|
347
1862
|
if (!rel.startsWith("..")) return rel;
|
|
348
1863
|
}
|
|
349
1864
|
}
|
|
350
1865
|
if (moduleSpecifier.endsWith(".js")) {
|
|
351
1866
|
const tsPath = basePath.replace(/\.js$/, ".ts");
|
|
352
|
-
if (
|
|
353
|
-
const rel =
|
|
1867
|
+
if (existsSync2(tsPath)) {
|
|
1868
|
+
const rel = relative2(projectRoot, tsPath);
|
|
354
1869
|
if (!rel.startsWith("..")) return rel;
|
|
355
1870
|
}
|
|
356
1871
|
}
|
|
@@ -486,11 +2001,15 @@ function computeTypeProviderUsage(files, graph) {
|
|
|
486
2001
|
|
|
487
2002
|
// src/engine/analyzer.ts
|
|
488
2003
|
function matchesPattern(filename, patterns) {
|
|
2004
|
+
const lower = filename.toLowerCase();
|
|
489
2005
|
for (const pattern of patterns) {
|
|
490
2006
|
if (pattern.startsWith("*.")) {
|
|
491
2007
|
const ext = pattern.slice(1);
|
|
492
2008
|
if (filename.endsWith(ext)) return true;
|
|
493
|
-
} else if (
|
|
2009
|
+
} else if (pattern.endsWith("*")) {
|
|
2010
|
+
const prefix = pattern.slice(0, -1).toLowerCase();
|
|
2011
|
+
if (lower.startsWith(prefix)) return true;
|
|
2012
|
+
} else if (lower === pattern.toLowerCase()) {
|
|
494
2013
|
return true;
|
|
495
2014
|
}
|
|
496
2015
|
}
|
|
@@ -510,7 +2029,7 @@ async function walkProject(rootPath, options) {
|
|
|
510
2029
|
}
|
|
511
2030
|
const promises = [];
|
|
512
2031
|
for (const entry of entries) {
|
|
513
|
-
const fullPath =
|
|
2032
|
+
const fullPath = join3(dir, entry.name);
|
|
514
2033
|
if (entry.isDirectory()) {
|
|
515
2034
|
if (!ignoreDirSet.has(entry.name) && !entry.name.startsWith(".")) {
|
|
516
2035
|
promises.push(walk(fullPath, depth + 1));
|
|
@@ -531,7 +2050,7 @@ async function walkProject(rootPath, options) {
|
|
|
531
2050
|
}
|
|
532
2051
|
results.push({
|
|
533
2052
|
path: fullPath,
|
|
534
|
-
relativePath:
|
|
2053
|
+
relativePath: relative3(rootPath, fullPath),
|
|
535
2054
|
extension: ext,
|
|
536
2055
|
size: fileStat.size,
|
|
537
2056
|
lastModified: fileStat.mtime,
|
|
@@ -580,7 +2099,7 @@ function detectStack(files) {
|
|
|
580
2099
|
return stack;
|
|
581
2100
|
}
|
|
582
2101
|
async function analyzeProject(projectPath, config) {
|
|
583
|
-
const absPath =
|
|
2102
|
+
const absPath = resolve3(projectPath);
|
|
584
2103
|
const projectName = basename2(absPath);
|
|
585
2104
|
const mergedConfig = mergeConfig(DEFAULT_CONFIG, config);
|
|
586
2105
|
const allExtensions = [
|
|
@@ -714,13 +2233,90 @@ function mergeConfig(base, overrides) {
|
|
|
714
2233
|
};
|
|
715
2234
|
}
|
|
716
2235
|
|
|
717
|
-
// src/engine/
|
|
718
|
-
import {
|
|
2236
|
+
// src/engine/learner.ts
|
|
2237
|
+
import { readFile as readFile3, writeFile, mkdir } from "fs/promises";
|
|
2238
|
+
import { join as join4 } from "path";
|
|
2239
|
+
var MODEL_DIR = ".cto";
|
|
2240
|
+
var MODEL_FILE = "learner.json";
|
|
2241
|
+
var MIN_OBSERVATIONS = 3;
|
|
2242
|
+
async function loadLearner(projectPath) {
|
|
2243
|
+
const modelPath = join4(projectPath, MODEL_DIR, MODEL_FILE);
|
|
2244
|
+
try {
|
|
2245
|
+
const raw = await readFile3(modelPath, "utf-8");
|
|
2246
|
+
const parsed = JSON.parse(raw);
|
|
2247
|
+
if (parsed.version === 2) return parsed;
|
|
2248
|
+
} catch {
|
|
2249
|
+
}
|
|
2250
|
+
return createEmptyModel();
|
|
2251
|
+
}
|
|
2252
|
+
function getLearnerBoosts(model, taskType, files) {
|
|
2253
|
+
if (model.totalSelections < MIN_OBSERVATIONS) return [];
|
|
2254
|
+
const boosts = [];
|
|
2255
|
+
const taskPatterns = model.taskPatterns[taskType] ?? {};
|
|
2256
|
+
for (const file of files) {
|
|
2257
|
+
const pattern = extractPattern(file);
|
|
2258
|
+
const taskStats = taskPatterns[pattern];
|
|
2259
|
+
const globalStats = model.patterns[pattern];
|
|
2260
|
+
const stats = taskStats ?? globalStats;
|
|
2261
|
+
if (!stats) continue;
|
|
2262
|
+
const total = stats.alpha + stats.beta;
|
|
2263
|
+
if (total < MIN_OBSERVATIONS) continue;
|
|
2264
|
+
const p = stats.alpha / total;
|
|
2265
|
+
const z2 = 1.96;
|
|
2266
|
+
const denominator = 1 + z2 * z2 / total;
|
|
2267
|
+
const center = p + z2 * z2 / (2 * total);
|
|
2268
|
+
const spread = z2 * Math.sqrt((p * (1 - p) + z2 * z2 / (4 * total)) / total);
|
|
2269
|
+
const lower = (center - spread) / denominator;
|
|
2270
|
+
const boost = (lower - 0.5) * 2;
|
|
2271
|
+
const confidence = Math.min(1, total / 20);
|
|
2272
|
+
if (Math.abs(boost) > 0.05) {
|
|
2273
|
+
boosts.push({
|
|
2274
|
+
filePath: file,
|
|
2275
|
+
boost,
|
|
2276
|
+
confidence,
|
|
2277
|
+
reason: taskStats ? `${pattern} selected ${Math.round(p * 100)}% of the time for ${taskType} tasks` : `${pattern} selected ${Math.round(p * 100)}% of the time globally`
|
|
2278
|
+
});
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
return boosts.sort((a, b) => Math.abs(b.boost) - Math.abs(a.boost));
|
|
2282
|
+
}
|
|
2283
|
+
function createEmptyModel() {
|
|
2284
|
+
return {
|
|
2285
|
+
version: 2,
|
|
2286
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2287
|
+
patterns: {},
|
|
2288
|
+
taskPatterns: {},
|
|
2289
|
+
totalSelections: 0
|
|
2290
|
+
};
|
|
2291
|
+
}
|
|
2292
|
+
function extractPattern(filePath) {
|
|
2293
|
+
const parts = filePath.split("/");
|
|
2294
|
+
const fileName = parts[parts.length - 1];
|
|
2295
|
+
const ext = fileName.includes(".") ? "." + fileName.split(".").pop() : "";
|
|
2296
|
+
if (fileName.includes(".test.") || fileName.includes(".spec.")) {
|
|
2297
|
+
const testExt = fileName.includes(".test.") ? ".test" + ext : ".spec" + ext;
|
|
2298
|
+
if (parts.includes("tests") || parts.includes("test") || parts.includes("__tests__")) {
|
|
2299
|
+
return `tests/**/*${testExt}`;
|
|
2300
|
+
}
|
|
2301
|
+
return `*${testExt}`;
|
|
2302
|
+
}
|
|
2303
|
+
if (["config", "configuration", "settings"].some((c) => fileName.toLowerCase().includes(c))) {
|
|
2304
|
+
return `config/*${ext}`;
|
|
2305
|
+
}
|
|
2306
|
+
if (fileName.endsWith(".d.ts") || parts.includes("types") || parts.includes("typings")) {
|
|
2307
|
+
return `types/*${ext}`;
|
|
2308
|
+
}
|
|
2309
|
+
const meaningfulDirs = parts.slice(0, -1).filter((d) => d !== "src" && d !== "lib" && d !== "app");
|
|
2310
|
+
if (meaningfulDirs.length > 0) {
|
|
2311
|
+
return `${meaningfulDirs[0]}/*${ext}`;
|
|
2312
|
+
}
|
|
2313
|
+
return `*${ext}`;
|
|
2314
|
+
}
|
|
719
2315
|
|
|
720
2316
|
// src/govern/secrets.ts
|
|
721
|
-
import { readFile as
|
|
722
|
-
import { readFileSync, existsSync as
|
|
723
|
-
import { resolve as
|
|
2317
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
2318
|
+
import { readFileSync as readFileSync3, existsSync as existsSync3, mkdirSync, writeFileSync } from "fs";
|
|
2319
|
+
import { resolve as resolve4, relative as relative4, join as join5, dirname as dirname4 } from "path";
|
|
724
2320
|
import { createHash as createHash2 } from "crypto";
|
|
725
2321
|
var BUILTIN_PATTERNS = [
|
|
726
2322
|
// API Keys
|
|
@@ -847,8 +2443,8 @@ function scanContentForSecrets(content, filePath, customPatterns = [], extraPiiS
|
|
|
847
2443
|
}
|
|
848
2444
|
async function scanFileForSecrets(filePath, projectPath, customPatterns = []) {
|
|
849
2445
|
try {
|
|
850
|
-
const content = await
|
|
851
|
-
const relPath =
|
|
2446
|
+
const content = await readFile4(filePath, "utf-8");
|
|
2447
|
+
const relPath = relative4(resolve4(projectPath), resolve4(filePath));
|
|
852
2448
|
return scanContentForSecrets(content, relPath, customPatterns);
|
|
853
2449
|
} catch {
|
|
854
2450
|
return [];
|
|
@@ -941,8 +2537,42 @@ function deduplicateFindings(findings) {
|
|
|
941
2537
|
});
|
|
942
2538
|
}
|
|
943
2539
|
|
|
2540
|
+
// src/interact/router.ts
|
|
2541
|
+
var TASK_KEYWORDS = {
|
|
2542
|
+
debug: ["debug", "fix", "bug", "error", "issue", "broken", "crash", "failing", "wrong"],
|
|
2543
|
+
review: ["review", "check", "assess", "evaluate", "audit", "inspect", "critique"],
|
|
2544
|
+
refactor: ["refactor", "restructure", "reorganize", "clean up", "simplify", "extract", "move"],
|
|
2545
|
+
test: ["test", "spec", "coverage", "unit test", "integration test", "e2e"],
|
|
2546
|
+
docs: ["document", "docs", "readme", "jsdoc", "comment", "explain"],
|
|
2547
|
+
feature: ["add", "implement", "create", "build", "new", "feature", "endpoint"],
|
|
2548
|
+
architecture: ["architecture", "design", "system", "structure", "migrate", "pattern"],
|
|
2549
|
+
"simple-edit": ["rename", "typo", "update", "change", "modify", "tweak", "adjust"]
|
|
2550
|
+
};
|
|
2551
|
+
function classifyTask(taskDescription) {
|
|
2552
|
+
const lower = taskDescription.toLowerCase();
|
|
2553
|
+
let bestType = "simple-edit";
|
|
2554
|
+
let bestScore = 0;
|
|
2555
|
+
for (const [type, keywords] of Object.entries(TASK_KEYWORDS)) {
|
|
2556
|
+
let score = 0;
|
|
2557
|
+
for (const kw of keywords) {
|
|
2558
|
+
if (lower.includes(kw)) score++;
|
|
2559
|
+
}
|
|
2560
|
+
if (score > bestScore) {
|
|
2561
|
+
bestScore = score;
|
|
2562
|
+
bestType = type;
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
return bestType;
|
|
2566
|
+
}
|
|
2567
|
+
|
|
2568
|
+
// src/engine/context-pipeline.ts
|
|
2569
|
+
import { readFileSync as readFileSync6 } from "fs";
|
|
2570
|
+
|
|
2571
|
+
// src/engine/selector.ts
|
|
2572
|
+
import { createHash as createHash3 } from "crypto";
|
|
2573
|
+
|
|
944
2574
|
// src/engine/pruner.ts
|
|
945
|
-
import { readFile as
|
|
2575
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
946
2576
|
var TS_EXTENSIONS2 = /* @__PURE__ */ new Set(["ts", "tsx", "js", "jsx", "mts", "mjs"]);
|
|
947
2577
|
async function pruneFile(file, level) {
|
|
948
2578
|
if (level === "excluded") {
|
|
@@ -961,7 +2591,7 @@ async function pruneFile(file, level) {
|
|
|
961
2591
|
async function pruneTypeScript(file, level) {
|
|
962
2592
|
let content;
|
|
963
2593
|
try {
|
|
964
|
-
content = await
|
|
2594
|
+
content = await readFile5(file.path, "utf-8");
|
|
965
2595
|
} catch {
|
|
966
2596
|
return emptyResult(file, level);
|
|
967
2597
|
}
|
|
@@ -1255,7 +2885,7 @@ function extractClassOutline(lines, start) {
|
|
|
1255
2885
|
async function pruneGeneric(file, level) {
|
|
1256
2886
|
let content;
|
|
1257
2887
|
try {
|
|
1258
|
-
content = await
|
|
2888
|
+
content = await readFile5(file.path, "utf-8");
|
|
1259
2889
|
} catch {
|
|
1260
2890
|
return emptyResult(file, level);
|
|
1261
2891
|
}
|
|
@@ -1290,7 +2920,7 @@ function pruneGenericFromContent(file, content, level) {
|
|
|
1290
2920
|
async function fullContent(file) {
|
|
1291
2921
|
let content = "";
|
|
1292
2922
|
try {
|
|
1293
|
-
content = await
|
|
2923
|
+
content = await readFile5(file.path, "utf-8");
|
|
1294
2924
|
} catch {
|
|
1295
2925
|
}
|
|
1296
2926
|
return {
|
|
@@ -1442,19 +3072,8 @@ async function selectContext(input) {
|
|
|
1442
3072
|
for (const s of input.semanticScores ?? []) semanticMap.set(s.filePath, s.score);
|
|
1443
3073
|
const learnerMap = /* @__PURE__ */ new Map();
|
|
1444
3074
|
for (const b of input.learnerBoosts ?? []) learnerMap.set(b.filePath, b.boost);
|
|
1445
|
-
|
|
1446
|
-
if (targetPaths.length
|
|
1447
|
-
const sorted = [...semanticMap.entries()].sort((a, b) => b[1] - a[1]);
|
|
1448
|
-
const threshold = 0.5;
|
|
1449
|
-
targetPaths = sorted.filter(([, score]) => score >= threshold).slice(0, 10).map(([path]) => path);
|
|
1450
|
-
if (targetPaths.length > 0) {
|
|
1451
|
-
decisions.push({
|
|
1452
|
-
file: targetPaths.join(", "),
|
|
1453
|
-
action: "include-full",
|
|
1454
|
-
reason: `Top ${targetPaths.length} file(s) identified via semantic matching (score \u2265 ${threshold})`
|
|
1455
|
-
});
|
|
1456
|
-
}
|
|
1457
|
-
} else if (targetPaths.length > 0) {
|
|
3075
|
+
const targetPaths = identifyTargetFiles(task, analysis.files);
|
|
3076
|
+
if (targetPaths.length > 0) {
|
|
1458
3077
|
decisions.push({
|
|
1459
3078
|
file: targetPaths.join(", "),
|
|
1460
3079
|
action: "include-full",
|
|
@@ -1486,7 +3105,7 @@ async function selectContext(input) {
|
|
|
1486
3105
|
}
|
|
1487
3106
|
const { mustInclude, mustExclude } = applyPolicies(analysis.files, policies);
|
|
1488
3107
|
const candidateSet = /* @__PURE__ */ new Set([...expandedPaths, ...mustInclude]);
|
|
1489
|
-
if (targetPaths.length === 0) {
|
|
3108
|
+
if (semanticMap.size > 0 || targetPaths.length === 0) {
|
|
1490
3109
|
for (const f of analysis.files) {
|
|
1491
3110
|
candidateSet.add(f.relativePath);
|
|
1492
3111
|
}
|
|
@@ -1525,22 +3144,34 @@ async function selectContext(input) {
|
|
|
1525
3144
|
const riskNorm = file.riskScore / maxRisk;
|
|
1526
3145
|
const semantic = semanticMap.get(file.relativePath) ?? 0;
|
|
1527
3146
|
const learner = ((learnerMap.get(file.relativePath) ?? 0) + 1) / 2;
|
|
1528
|
-
return
|
|
3147
|
+
return semantic * 0.55 + riskNorm * 0.25 + learner * 0.2;
|
|
1529
3148
|
}
|
|
3149
|
+
const targetSet = new Set(targetPaths);
|
|
1530
3150
|
const candidates = Array.from(candidateSet).map((p) => allFileMap.get(p)).filter((f) => f !== void 0).sort((a, b) => {
|
|
1531
|
-
const
|
|
1532
|
-
const
|
|
1533
|
-
|
|
1534
|
-
const aIsMust = mustInclude.has(a.relativePath) ? 0 : 1;
|
|
1535
|
-
const bIsMust = mustInclude.has(b.relativePath) ? 0 : 1;
|
|
1536
|
-
if (aIsMust !== bIsMust) return aIsMust - bIsMust;
|
|
1537
|
-
return compositeScore(b) - compositeScore(a);
|
|
3151
|
+
const aBonus = (targetSet.has(a.relativePath) ? 0.3 : 0) + (mustInclude.has(a.relativePath) ? 0.15 : 0);
|
|
3152
|
+
const bBonus = (targetSet.has(b.relativePath) ? 0.3 : 0) + (mustInclude.has(b.relativePath) ? 0.15 : 0);
|
|
3153
|
+
return compositeScore(b) + bBonus - (compositeScore(a) + aBonus);
|
|
1538
3154
|
});
|
|
1539
3155
|
const selectedFiles = [];
|
|
1540
3156
|
let usedTokens = 0;
|
|
3157
|
+
const hasSemanticSignal = semanticMap.size > 0;
|
|
3158
|
+
const maxSemanticScore = hasSemanticSignal ? Math.max(...Array.from(semanticMap.values())) : 0;
|
|
3159
|
+
const semanticFloor = maxSemanticScore * 0.1;
|
|
1541
3160
|
for (const file of candidates) {
|
|
1542
|
-
const isTarget =
|
|
3161
|
+
const isTarget = targetSet.has(file.relativePath);
|
|
1543
3162
|
const isMustInclude = mustInclude.has(file.relativePath);
|
|
3163
|
+
if (hasSemanticSignal && !isTarget && !isMustInclude) {
|
|
3164
|
+
const semScore = semanticMap.get(file.relativePath) ?? 0;
|
|
3165
|
+
const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
|
|
3166
|
+
if (semScore < semanticFloor && lrnBoost <= 0) {
|
|
3167
|
+
decisions.push({
|
|
3168
|
+
file: file.relativePath,
|
|
3169
|
+
action: "exclude",
|
|
3170
|
+
reason: `Skipped: semantic score ${semScore.toFixed(3)} below floor ${semanticFloor.toFixed(3)}`
|
|
3171
|
+
});
|
|
3172
|
+
continue;
|
|
3173
|
+
}
|
|
3174
|
+
}
|
|
1544
3175
|
const defaultLevel = isTarget ? "full" : getPruneLevelForRisk(file.riskScore);
|
|
1545
3176
|
const levels = getCascadeLevels(defaultLevel);
|
|
1546
3177
|
let included = false;
|
|
@@ -1664,370 +3295,2105 @@ function buildReason(file, level, isTarget, isMustInclude) {
|
|
|
1664
3295
|
return `Low relevance (risk ${file.riskScore}) \u2014 ${levelStr}`;
|
|
1665
3296
|
}
|
|
1666
3297
|
|
|
1667
|
-
// src/engine/
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
"
|
|
1674
|
-
"
|
|
1675
|
-
"
|
|
1676
|
-
"
|
|
1677
|
-
"
|
|
1678
|
-
"
|
|
1679
|
-
"
|
|
1680
|
-
"
|
|
1681
|
-
"
|
|
1682
|
-
"
|
|
3298
|
+
// src/engine/context-pipeline.ts
|
|
3299
|
+
init_tfidf();
|
|
3300
|
+
|
|
3301
|
+
// src/engine/ast-tokenizer.ts
|
|
3302
|
+
init_tfidf();
|
|
3303
|
+
var ANNOTATION_LAYER_MAP = {
|
|
3304
|
+
"repository": ["repositori", "dao", "store", "persist", "databas"],
|
|
3305
|
+
"service": ["servic", "usecas", "busi", "logic"],
|
|
3306
|
+
"controller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3307
|
+
"restcontroller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3308
|
+
"component": ["compon", "bean", "inject"],
|
|
3309
|
+
"entity": ["entiti", "model", "domain", "persist"],
|
|
3310
|
+
"configuration": ["config", "setup", "inject", "wire"],
|
|
3311
|
+
"bean": ["config", "inject", "wire", "bean"],
|
|
3312
|
+
"autowired": ["inject", "wire", "depend"],
|
|
3313
|
+
"inject": ["inject", "wire", "depend"],
|
|
3314
|
+
"provides": ["inject", "wire", "depend", "config"],
|
|
3315
|
+
"singleton": ["singleton", "scope", "lifecycl"],
|
|
3316
|
+
"test": ["test", "spec", "assert", "mock"],
|
|
3317
|
+
"override": ["overrid", "inherit", "polymorph"],
|
|
3318
|
+
"transactional": ["transact", "databas", "commit", "rollback"],
|
|
3319
|
+
"cacheable": ["cach", "ttl", "evict", "invalidat"],
|
|
3320
|
+
"async": ["async", "concurr", "thread", "parallel"],
|
|
3321
|
+
"eventlistener": ["event", "listen", "handler", "subscrib"],
|
|
3322
|
+
"scheduled": ["schedul", "cron", "timer", "job"],
|
|
3323
|
+
"slf4j": ["log", "metric", "observ"],
|
|
3324
|
+
"data": ["model", "entiti", "dto", "data"],
|
|
3325
|
+
"getter": ["model", "entiti", "dto", "accessor"],
|
|
3326
|
+
"setter": ["model", "entiti", "dto", "mutator"],
|
|
3327
|
+
"builder": ["build", "pattern", "fluent"],
|
|
3328
|
+
"value": ["model", "entiti", "dto", "immut"]
|
|
3329
|
+
};
|
|
3330
|
+
function extractStructuralTokens(content, filePath) {
|
|
3331
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
3332
|
+
const language = detectLanguage2(ext);
|
|
3333
|
+
switch (language) {
|
|
3334
|
+
case "java":
|
|
3335
|
+
return extractJava(content);
|
|
3336
|
+
case "python":
|
|
3337
|
+
return extractPython(content);
|
|
3338
|
+
case "go":
|
|
3339
|
+
return extractGo(content);
|
|
3340
|
+
case "typescript":
|
|
3341
|
+
return extractTypeScript(content);
|
|
3342
|
+
default:
|
|
3343
|
+
return { classNames: [], methodNames: [], annotations: [], parents: [], packageName: null, language: "unknown" };
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
function detectLanguage2(ext) {
|
|
3347
|
+
switch (ext) {
|
|
3348
|
+
case "java":
|
|
3349
|
+
return "java";
|
|
3350
|
+
case "py":
|
|
3351
|
+
return "python";
|
|
3352
|
+
case "go":
|
|
3353
|
+
return "go";
|
|
3354
|
+
case "ts":
|
|
3355
|
+
case "tsx":
|
|
3356
|
+
case "js":
|
|
3357
|
+
case "jsx":
|
|
3358
|
+
return "typescript";
|
|
3359
|
+
default:
|
|
3360
|
+
return "unknown";
|
|
3361
|
+
}
|
|
3362
|
+
}
|
|
3363
|
+
function extractJava(content) {
|
|
3364
|
+
const classNames = [];
|
|
3365
|
+
const methodNames = [];
|
|
3366
|
+
const annotations = [];
|
|
3367
|
+
const parents = [];
|
|
3368
|
+
let packageName = null;
|
|
3369
|
+
const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
|
|
3370
|
+
if (pkgMatch) packageName = pkgMatch[1];
|
|
3371
|
+
const annRegex = /@(\w+)/g;
|
|
3372
|
+
let annMatch;
|
|
3373
|
+
while ((annMatch = annRegex.exec(content)) !== null) {
|
|
3374
|
+
const ann = annMatch[1].toLowerCase();
|
|
3375
|
+
if (ann !== "override" && ann.length > 2) {
|
|
3376
|
+
annotations.push(ann);
|
|
3377
|
+
}
|
|
3378
|
+
}
|
|
3379
|
+
const classRegex = /(?:public|private|protected|abstract|final|static)?\s*(?:class|interface|enum)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3380
|
+
let classMatch;
|
|
3381
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3382
|
+
classNames.push(classMatch[1]);
|
|
3383
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3384
|
+
if (classMatch[3]) {
|
|
3385
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3386
|
+
const trimmed = impl.trim();
|
|
3387
|
+
if (trimmed) parents.push(trimmed);
|
|
3388
|
+
}
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
const methodRegex = /(?:public|private|protected|static|abstract|final|synchronized|default)\s+(?:<[\w\s,?]+>\s+)?(?:[\w<>\[\]?,\s]+)\s+(\w+)\s*\(/g;
|
|
3392
|
+
let methodMatch;
|
|
3393
|
+
while ((methodMatch = methodRegex.exec(content)) !== null) {
|
|
3394
|
+
const name = methodMatch[1];
|
|
3395
|
+
if (!["equals", "hashCode", "toString", "main", "get", "set"].includes(name)) {
|
|
3396
|
+
methodNames.push(name);
|
|
3397
|
+
}
|
|
3398
|
+
}
|
|
3399
|
+
return { classNames, methodNames, annotations, parents, packageName, language: "java" };
|
|
3400
|
+
}
|
|
3401
|
+
function extractPython(content) {
|
|
3402
|
+
const classNames = [];
|
|
3403
|
+
const methodNames = [];
|
|
3404
|
+
const annotations = [];
|
|
3405
|
+
const parents = [];
|
|
3406
|
+
const classRegex = /^\s*class\s+(\w+)(?:\(([^)]+)\))?/gm;
|
|
3407
|
+
let classMatch;
|
|
3408
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3409
|
+
classNames.push(classMatch[1]);
|
|
3410
|
+
if (classMatch[2]) {
|
|
3411
|
+
for (const parent of classMatch[2].split(",")) {
|
|
3412
|
+
const trimmed = parent.trim().split("[")[0];
|
|
3413
|
+
if (trimmed && trimmed !== "object") parents.push(trimmed);
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
}
|
|
3417
|
+
const decRegex = /^\s*@(\w+)/gm;
|
|
3418
|
+
let decMatch;
|
|
3419
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3420
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3421
|
+
}
|
|
3422
|
+
const funcRegex = /^\s*(?:async\s+)?def\s+(\w+)/gm;
|
|
3423
|
+
let funcMatch;
|
|
3424
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3425
|
+
const name = funcMatch[1];
|
|
3426
|
+
if (!name.startsWith("__") || name === "__init__") {
|
|
3427
|
+
methodNames.push(name.replace(/^_+|_+$/g, ""));
|
|
3428
|
+
}
|
|
3429
|
+
}
|
|
3430
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "python" };
|
|
3431
|
+
}
|
|
3432
|
+
function extractGo(content) {
|
|
3433
|
+
const classNames = [];
|
|
3434
|
+
const methodNames = [];
|
|
3435
|
+
const parents = [];
|
|
3436
|
+
const pkgMatch = content.match(/^package\s+(\w+)/m);
|
|
3437
|
+
const packageName = pkgMatch ? pkgMatch[1] : null;
|
|
3438
|
+
const typeRegex = /type\s+(\w+)\s+(?:struct|interface)/g;
|
|
3439
|
+
let typeMatch;
|
|
3440
|
+
while ((typeMatch = typeRegex.exec(content)) !== null) {
|
|
3441
|
+
classNames.push(typeMatch[1]);
|
|
3442
|
+
}
|
|
3443
|
+
const funcRegex = /func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)\s*\(/g;
|
|
3444
|
+
let funcMatch;
|
|
3445
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3446
|
+
methodNames.push(funcMatch[2]);
|
|
3447
|
+
if (funcMatch[1]) {
|
|
3448
|
+
parents.push(funcMatch[1]);
|
|
3449
|
+
}
|
|
3450
|
+
}
|
|
3451
|
+
return { classNames, methodNames, annotations: [], parents, packageName, language: "go" };
|
|
3452
|
+
}
|
|
3453
|
+
function extractTypeScript(content) {
|
|
3454
|
+
const classNames = [];
|
|
3455
|
+
const methodNames = [];
|
|
3456
|
+
const annotations = [];
|
|
3457
|
+
const parents = [];
|
|
3458
|
+
const classRegex = /(?:export\s+)?(?:abstract\s+)?(?:class|interface)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3459
|
+
let classMatch;
|
|
3460
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3461
|
+
classNames.push(classMatch[1]);
|
|
3462
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3463
|
+
if (classMatch[3]) {
|
|
3464
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3465
|
+
const trimmed = impl.trim();
|
|
3466
|
+
if (trimmed) parents.push(trimmed);
|
|
3467
|
+
}
|
|
3468
|
+
}
|
|
3469
|
+
}
|
|
3470
|
+
const decRegex = /@(\w+)/g;
|
|
3471
|
+
let decMatch;
|
|
3472
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3473
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3474
|
+
}
|
|
3475
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)/g;
|
|
3476
|
+
let funcMatch;
|
|
3477
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3478
|
+
methodNames.push(funcMatch[1]);
|
|
3479
|
+
}
|
|
3480
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "typescript" };
|
|
3481
|
+
}
|
|
3482
|
+
function augmentContentWithStructure(content, filePath) {
|
|
3483
|
+
const struct = extractStructuralTokens(content, filePath);
|
|
3484
|
+
const augmentParts = [];
|
|
3485
|
+
for (const name of struct.classNames) {
|
|
3486
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3487
|
+
augmentParts.push(words, words, words);
|
|
3488
|
+
}
|
|
3489
|
+
for (const name of struct.methodNames) {
|
|
3490
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3491
|
+
augmentParts.push(words, words);
|
|
3492
|
+
}
|
|
3493
|
+
for (const parent of struct.parents) {
|
|
3494
|
+
const words = parent.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3495
|
+
augmentParts.push(words);
|
|
3496
|
+
}
|
|
3497
|
+
for (const ann of struct.annotations) {
|
|
3498
|
+
const layerTerms = ANNOTATION_LAYER_MAP[ann];
|
|
3499
|
+
if (layerTerms) {
|
|
3500
|
+
augmentParts.push(...layerTerms);
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
if (augmentParts.length === 0) return content;
|
|
3504
|
+
return augmentParts.join(" ") + "\n" + content;
|
|
3505
|
+
}
|
|
3506
|
+
|
|
3507
|
+
// src/engine/index-cache.ts
|
|
3508
|
+
init_tfidf();
|
|
3509
|
+
import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
|
|
3510
|
+
import { join as join6 } from "path";
|
|
3511
|
+
var CACHE_VERSION = 2;
|
|
3512
|
+
var CACHE_DIR = ".cto";
|
|
3513
|
+
var CACHE_FILE = "index-cache.json";
|
|
3514
|
+
function buildIndexCached(projectPath, files) {
|
|
3515
|
+
const startTime = Date.now();
|
|
3516
|
+
const cachePath = join6(projectPath, CACHE_DIR, CACHE_FILE);
|
|
3517
|
+
const existing = loadCache(cachePath);
|
|
3518
|
+
const cacheHit = existing !== null;
|
|
3519
|
+
const cachedFiles = existing?.files ?? {};
|
|
3520
|
+
const newCachedFiles = {};
|
|
3521
|
+
let updatedFiles = 0;
|
|
3522
|
+
let removedFiles = 0;
|
|
3523
|
+
let cachedCount = 0;
|
|
3524
|
+
const currentPaths = new Set(files.map((f) => f.relativePath));
|
|
3525
|
+
if (existing) {
|
|
3526
|
+
for (const path of Object.keys(cachedFiles)) {
|
|
3527
|
+
if (!currentPaths.has(path)) {
|
|
3528
|
+
removedFiles++;
|
|
3529
|
+
}
|
|
3530
|
+
}
|
|
3531
|
+
}
|
|
3532
|
+
for (const file of files) {
|
|
3533
|
+
const cached = cachedFiles[file.relativePath];
|
|
3534
|
+
let currentMtime = 0;
|
|
3535
|
+
try {
|
|
3536
|
+
const st = statSync(file.absolutePath);
|
|
3537
|
+
currentMtime = st.mtimeMs;
|
|
3538
|
+
} catch {
|
|
3539
|
+
continue;
|
|
3540
|
+
}
|
|
3541
|
+
if (cached && cached.mtime === currentMtime) {
|
|
3542
|
+
newCachedFiles[file.relativePath] = cached;
|
|
3543
|
+
cachedCount++;
|
|
3544
|
+
} else {
|
|
3545
|
+
let content = file.content;
|
|
3546
|
+
if (content === void 0) {
|
|
3547
|
+
try {
|
|
3548
|
+
content = readFileSync4(file.absolutePath, "utf-8");
|
|
3549
|
+
} catch {
|
|
3550
|
+
continue;
|
|
3551
|
+
}
|
|
3552
|
+
}
|
|
3553
|
+
const terms = tokenize(content);
|
|
3554
|
+
const termCounts = {};
|
|
3555
|
+
for (const term of terms) {
|
|
3556
|
+
termCounts[term] = (termCounts[term] ?? 0) + 1;
|
|
3557
|
+
}
|
|
3558
|
+
newCachedFiles[file.relativePath] = {
|
|
3559
|
+
mtime: currentMtime,
|
|
3560
|
+
terms: termCounts,
|
|
3561
|
+
length: terms.length
|
|
3562
|
+
};
|
|
3563
|
+
updatedFiles++;
|
|
3564
|
+
}
|
|
3565
|
+
}
|
|
3566
|
+
const index = rebuildIndex(newCachedFiles);
|
|
3567
|
+
saveCache(cachePath, newCachedFiles);
|
|
3568
|
+
const stats = {
|
|
3569
|
+
totalFiles: Object.keys(newCachedFiles).length,
|
|
3570
|
+
updatedFiles,
|
|
3571
|
+
removedFiles,
|
|
3572
|
+
cachedFiles: cachedCount,
|
|
3573
|
+
cacheHit,
|
|
3574
|
+
buildTimeMs: Date.now() - startTime
|
|
3575
|
+
};
|
|
3576
|
+
return { index, stats };
|
|
3577
|
+
}
|
|
3578
|
+
function loadCache(cachePath) {
|
|
3579
|
+
try {
|
|
3580
|
+
if (!existsSync4(cachePath)) return null;
|
|
3581
|
+
const raw = readFileSync4(cachePath, "utf-8");
|
|
3582
|
+
const data = JSON.parse(raw);
|
|
3583
|
+
if (data.version !== CACHE_VERSION) return null;
|
|
3584
|
+
if (!data.files || typeof data.files !== "object") return null;
|
|
3585
|
+
return data;
|
|
3586
|
+
} catch {
|
|
3587
|
+
return null;
|
|
3588
|
+
}
|
|
3589
|
+
}
|
|
3590
|
+
function saveCache(cachePath, files) {
|
|
3591
|
+
try {
|
|
3592
|
+
const dir = cachePath.substring(0, cachePath.lastIndexOf("/"));
|
|
3593
|
+
if (!existsSync4(dir)) {
|
|
3594
|
+
mkdirSync2(dir, { recursive: true });
|
|
3595
|
+
}
|
|
3596
|
+
const data = {
|
|
3597
|
+
version: CACHE_VERSION,
|
|
3598
|
+
builtAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3599
|
+
files
|
|
3600
|
+
};
|
|
3601
|
+
writeFileSync2(cachePath, JSON.stringify(data));
|
|
3602
|
+
} catch {
|
|
3603
|
+
}
|
|
3604
|
+
}
|
|
3605
|
+
function rebuildIndex(cachedFiles) {
|
|
3606
|
+
const documents = /* @__PURE__ */ new Map();
|
|
3607
|
+
const docFreq = /* @__PURE__ */ new Map();
|
|
3608
|
+
let totalLength = 0;
|
|
3609
|
+
for (const [path, cached] of Object.entries(cachedFiles)) {
|
|
3610
|
+
const termMap = /* @__PURE__ */ new Map();
|
|
3611
|
+
for (const [term, count] of Object.entries(cached.terms)) {
|
|
3612
|
+
termMap.set(term, count);
|
|
3613
|
+
}
|
|
3614
|
+
documents.set(path, { terms: termMap, length: cached.length });
|
|
3615
|
+
totalLength += cached.length;
|
|
3616
|
+
for (const term of termMap.keys()) {
|
|
3617
|
+
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
3618
|
+
}
|
|
3619
|
+
}
|
|
3620
|
+
const totalDocs = documents.size;
|
|
3621
|
+
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
3622
|
+
const idf = /* @__PURE__ */ new Map();
|
|
3623
|
+
for (const [term, df] of docFreq) {
|
|
3624
|
+
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
3625
|
+
}
|
|
3626
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
3627
|
+
}
|
|
3628
|
+
|
|
3629
|
+
// src/engine/reranker.ts
|
|
3630
|
+
init_tfidf();
|
|
3631
|
+
var WEIGHTS = {
|
|
3632
|
+
termCoverage: 0.35,
|
|
3633
|
+
// What fraction of query terms does the file match?
|
|
3634
|
+
termSpecificity: 0.25,
|
|
3635
|
+
// Are matched terms rare or common?
|
|
3636
|
+
bigramProximity: 0.15,
|
|
3637
|
+
// Do query terms appear near each other?
|
|
3638
|
+
dependencySignal: 0.1,
|
|
3639
|
+
// Is this file connected to a top match?
|
|
3640
|
+
pathRelevance: 0.15
|
|
3641
|
+
// Does the file path match query terms?
|
|
3642
|
+
};
|
|
3643
|
+
var ABSOLUTE_FLOOR = 0.18;
|
|
3644
|
+
var ELBOW_DROP_RATIO = 0.35;
|
|
3645
|
+
var MIN_TERM_COVERAGE = 0.3;
|
|
3646
|
+
function rerank(input) {
|
|
3647
|
+
const startTime = Date.now();
|
|
3648
|
+
const { task, candidates, index, fileContents, dependencies, allFilePaths } = input;
|
|
3649
|
+
const emptyTelemetry = {
|
|
3650
|
+
candidatesIn: candidates.length,
|
|
3651
|
+
candidatesOut: 0,
|
|
3652
|
+
candidatesFiltered: 0,
|
|
3653
|
+
durationMs: 0,
|
|
3654
|
+
weights: { ...WEIGHTS },
|
|
3655
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3656
|
+
signalStats: {
|
|
3657
|
+
termCoverage: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3658
|
+
termSpecificity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3659
|
+
bigramProximity: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3660
|
+
dependencySignal: { min: 0, max: 0, mean: 0, median: 0 },
|
|
3661
|
+
pathRelevance: { min: 0, max: 0, mean: 0, median: 0 }
|
|
3662
|
+
},
|
|
3663
|
+
filterReasons: {},
|
|
3664
|
+
scoreDistribution: [0, 0, 0, 0, 0],
|
|
3665
|
+
queryTermCount: 0,
|
|
3666
|
+
relevanceConeSize: 0
|
|
3667
|
+
};
|
|
3668
|
+
if (candidates.length === 0) {
|
|
3669
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
3670
|
+
}
|
|
3671
|
+
const queryTerms = tokenize(task);
|
|
3672
|
+
const uniqueQueryTerms = [...new Set(queryTerms)];
|
|
3673
|
+
if (uniqueQueryTerms.length === 0) {
|
|
3674
|
+
return { files: [], filtered: [], qualityThreshold: 0, telemetry: { ...emptyTelemetry, durationMs: Date.now() - startTime } };
|
|
3675
|
+
}
|
|
3676
|
+
const queryTermIdfs = /* @__PURE__ */ new Map();
|
|
3677
|
+
for (const term of uniqueQueryTerms) {
|
|
3678
|
+
queryTermIdfs.set(term, index.idf.get(term) ?? 0);
|
|
3679
|
+
}
|
|
3680
|
+
const maxIdf = Math.max(1, ...queryTermIdfs.values());
|
|
3681
|
+
const pathTermsCache = /* @__PURE__ */ new Map();
|
|
3682
|
+
for (const fp of allFilePaths) {
|
|
3683
|
+
pathTermsCache.set(fp, new Set(tokenize(fp.replace(/[/\\.]/g, " "))));
|
|
3684
|
+
}
|
|
3685
|
+
const scored = [];
|
|
3686
|
+
for (const candidate of candidates) {
|
|
3687
|
+
const doc = index.documents.get(candidate.filePath);
|
|
3688
|
+
if (!doc) continue;
|
|
3689
|
+
const matchedQueryTerms = /* @__PURE__ */ new Set();
|
|
3690
|
+
for (const term of uniqueQueryTerms) {
|
|
3691
|
+
if ((doc.terms.get(term) ?? 0) > 0) {
|
|
3692
|
+
matchedQueryTerms.add(term);
|
|
3693
|
+
}
|
|
3694
|
+
}
|
|
3695
|
+
const termCoverage = matchedQueryTerms.size / uniqueQueryTerms.length;
|
|
3696
|
+
let specificitySum = 0;
|
|
3697
|
+
let specificityMax = 0;
|
|
3698
|
+
for (const term of matchedQueryTerms) {
|
|
3699
|
+
const idf = queryTermIdfs.get(term) ?? 0;
|
|
3700
|
+
specificitySum += idf;
|
|
3701
|
+
specificityMax += maxIdf;
|
|
3702
|
+
}
|
|
3703
|
+
const termSpecificity = specificityMax > 0 ? specificitySum / specificityMax : 0;
|
|
3704
|
+
const content = fileContents.get(candidate.filePath) ?? "";
|
|
3705
|
+
const bigramProximity = computeBigramProximity(content, uniqueQueryTerms);
|
|
3706
|
+
const dependencySignal = 0;
|
|
3707
|
+
const pathTerms = pathTermsCache.get(candidate.filePath) ?? /* @__PURE__ */ new Set();
|
|
3708
|
+
const queryTermSet = new Set(uniqueQueryTerms);
|
|
3709
|
+
let pathHits = 0;
|
|
3710
|
+
for (const pt of pathTerms) {
|
|
3711
|
+
if (queryTermSet.has(pt)) pathHits++;
|
|
3712
|
+
}
|
|
3713
|
+
const pathRelevance = Math.min(1, pathHits / Math.max(1, uniqueQueryTerms.length) * 2);
|
|
3714
|
+
const score = termCoverage * WEIGHTS.termCoverage + termSpecificity * WEIGHTS.termSpecificity + bigramProximity * WEIGHTS.bigramProximity + dependencySignal * WEIGHTS.dependencySignal + pathRelevance * WEIGHTS.pathRelevance;
|
|
3715
|
+
scored.push({
|
|
3716
|
+
filePath: candidate.filePath,
|
|
3717
|
+
score,
|
|
3718
|
+
bm25Score: candidate.score,
|
|
3719
|
+
signals: {
|
|
3720
|
+
termCoverage,
|
|
3721
|
+
termSpecificity,
|
|
3722
|
+
bigramProximity,
|
|
3723
|
+
dependencySignal,
|
|
3724
|
+
pathRelevance
|
|
3725
|
+
}
|
|
3726
|
+
});
|
|
3727
|
+
}
|
|
3728
|
+
const topByScore = [...scored].sort((a, b) => b.score - a.score).slice(0, 5);
|
|
3729
|
+
const relevanceCone = /* @__PURE__ */ new Set();
|
|
3730
|
+
for (const top of topByScore) {
|
|
3731
|
+
relevanceCone.add(top.filePath);
|
|
3732
|
+
const deps = dependencies.get(top.filePath) ?? [];
|
|
3733
|
+
for (const dep of deps) relevanceCone.add(dep);
|
|
3734
|
+
for (const [from, tos] of dependencies) {
|
|
3735
|
+
if (tos.includes(top.filePath)) relevanceCone.add(from);
|
|
3736
|
+
}
|
|
3737
|
+
}
|
|
3738
|
+
for (const item of scored) {
|
|
3739
|
+
const inCone = relevanceCone.has(item.filePath) ? 1 : 0;
|
|
3740
|
+
item.signals.dependencySignal = inCone;
|
|
3741
|
+
item.score = item.signals.termCoverage * WEIGHTS.termCoverage + item.signals.termSpecificity * WEIGHTS.termSpecificity + item.signals.bigramProximity * WEIGHTS.bigramProximity + item.signals.dependencySignal * WEIGHTS.dependencySignal + item.signals.pathRelevance * WEIGHTS.pathRelevance;
|
|
3742
|
+
}
|
|
3743
|
+
scored.sort((a, b) => b.score - a.score);
|
|
3744
|
+
const { passed, filtered, threshold } = applyQualityGate(scored);
|
|
3745
|
+
const filterReasons = {};
|
|
3746
|
+
for (const f of filtered) {
|
|
3747
|
+
const reason = f.reason.replace(/\([^)]+\)/g, "").trim();
|
|
3748
|
+
filterReasons[reason] = (filterReasons[reason] ?? 0) + 1;
|
|
3749
|
+
}
|
|
3750
|
+
const allScores = scored.map((s) => s.score).sort((a, b) => a - b);
|
|
3751
|
+
const signalNames = ["termCoverage", "termSpecificity", "bigramProximity", "dependencySignal", "pathRelevance"];
|
|
3752
|
+
const signalStats = {};
|
|
3753
|
+
for (const name of signalNames) {
|
|
3754
|
+
const vals = scored.map((s) => s.signals[name]).sort((a, b) => a - b);
|
|
3755
|
+
signalStats[name] = {
|
|
3756
|
+
min: vals[0] ?? 0,
|
|
3757
|
+
max: vals[vals.length - 1] ?? 0,
|
|
3758
|
+
mean: vals.length > 0 ? vals.reduce((a, b) => a + b, 0) / vals.length : 0,
|
|
3759
|
+
median: vals.length > 0 ? vals[Math.floor(vals.length / 2)] : 0
|
|
3760
|
+
};
|
|
3761
|
+
}
|
|
3762
|
+
const telemetry = {
|
|
3763
|
+
candidatesIn: candidates.length,
|
|
3764
|
+
candidatesOut: passed.length,
|
|
3765
|
+
candidatesFiltered: filtered.length,
|
|
3766
|
+
durationMs: Date.now() - startTime,
|
|
3767
|
+
weights: { ...WEIGHTS },
|
|
3768
|
+
gateConfig: { absoluteFloor: ABSOLUTE_FLOOR, elbowDropRatio: ELBOW_DROP_RATIO, minTermCoverage: MIN_TERM_COVERAGE },
|
|
3769
|
+
signalStats,
|
|
3770
|
+
filterReasons,
|
|
3771
|
+
scoreDistribution: [
|
|
3772
|
+
allScores[0] ?? 0,
|
|
3773
|
+
allScores[Math.floor(allScores.length * 0.25)] ?? 0,
|
|
3774
|
+
allScores[Math.floor(allScores.length * 0.5)] ?? 0,
|
|
3775
|
+
allScores[Math.floor(allScores.length * 0.75)] ?? 0,
|
|
3776
|
+
allScores[allScores.length - 1] ?? 0
|
|
3777
|
+
],
|
|
3778
|
+
queryTermCount: uniqueQueryTerms.length,
|
|
3779
|
+
relevanceConeSize: relevanceCone.size
|
|
3780
|
+
};
|
|
3781
|
+
return {
|
|
3782
|
+
files: passed,
|
|
3783
|
+
filtered,
|
|
3784
|
+
qualityThreshold: threshold,
|
|
3785
|
+
telemetry
|
|
3786
|
+
};
|
|
3787
|
+
}
|
|
3788
|
+
function computeBigramProximity(content, queryTerms) {
|
|
3789
|
+
if (queryTerms.length < 2 || !content) return 0;
|
|
3790
|
+
const contentTokens = tokenize(content);
|
|
3791
|
+
const termPositions = /* @__PURE__ */ new Map();
|
|
3792
|
+
for (let i = 0; i < contentTokens.length; i++) {
|
|
3793
|
+
const token = contentTokens[i];
|
|
3794
|
+
if (queryTerms.includes(token)) {
|
|
3795
|
+
const positions = termPositions.get(token) ?? [];
|
|
3796
|
+
positions.push(i);
|
|
3797
|
+
termPositions.set(token, positions);
|
|
3798
|
+
}
|
|
3799
|
+
}
|
|
3800
|
+
let totalScore = 0;
|
|
3801
|
+
let pairCount = 0;
|
|
3802
|
+
for (let i = 0; i < queryTerms.length; i++) {
|
|
3803
|
+
for (let j = i + 1; j < queryTerms.length; j++) {
|
|
3804
|
+
const posA = termPositions.get(queryTerms[i]);
|
|
3805
|
+
const posB = termPositions.get(queryTerms[j]);
|
|
3806
|
+
if (!posA || !posB) continue;
|
|
3807
|
+
let minDist = Infinity;
|
|
3808
|
+
for (const a of posA) {
|
|
3809
|
+
for (const b of posB) {
|
|
3810
|
+
minDist = Math.min(minDist, Math.abs(a - b));
|
|
3811
|
+
}
|
|
3812
|
+
}
|
|
3813
|
+
if (minDist < Infinity) {
|
|
3814
|
+
totalScore += Math.max(0, 1 - minDist / 20);
|
|
3815
|
+
pairCount++;
|
|
3816
|
+
}
|
|
3817
|
+
}
|
|
3818
|
+
}
|
|
3819
|
+
return pairCount > 0 ? totalScore / pairCount : 0;
|
|
3820
|
+
}
|
|
3821
|
+
function applyQualityGate(scored) {
|
|
3822
|
+
const passed = [];
|
|
3823
|
+
const filtered = [];
|
|
3824
|
+
if (scored.length === 0) {
|
|
3825
|
+
return { passed, filtered, threshold: 0 };
|
|
3826
|
+
}
|
|
3827
|
+
let elbowIndex = scored.length;
|
|
3828
|
+
if (scored.length >= 3) {
|
|
3829
|
+
let maxDrop = 0;
|
|
3830
|
+
for (let i = 1; i < scored.length; i++) {
|
|
3831
|
+
if (scored[i - 1].score > 0) {
|
|
3832
|
+
const drop = (scored[i - 1].score - scored[i].score) / scored[i - 1].score;
|
|
3833
|
+
if (drop > maxDrop && drop >= ELBOW_DROP_RATIO) {
|
|
3834
|
+
maxDrop = drop;
|
|
3835
|
+
elbowIndex = i;
|
|
3836
|
+
}
|
|
3837
|
+
}
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
const threshold = Math.max(
|
|
3841
|
+
ABSOLUTE_FLOOR,
|
|
3842
|
+
elbowIndex < scored.length ? scored[elbowIndex].score : 0
|
|
3843
|
+
);
|
|
3844
|
+
for (let i = 0; i < scored.length; i++) {
|
|
3845
|
+
const item = scored[i];
|
|
3846
|
+
if (item.score < ABSOLUTE_FLOOR) {
|
|
3847
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below absolute floor (${item.score.toFixed(3)} < ${ABSOLUTE_FLOOR})` });
|
|
3848
|
+
continue;
|
|
3849
|
+
}
|
|
3850
|
+
if (item.signals.termCoverage < MIN_TERM_COVERAGE) {
|
|
3851
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Low term coverage (${(item.signals.termCoverage * 100).toFixed(0)}% < ${MIN_TERM_COVERAGE * 100}%)` });
|
|
3852
|
+
continue;
|
|
3853
|
+
}
|
|
3854
|
+
if (i >= elbowIndex && item.score < scored[0].score * 0.5) {
|
|
3855
|
+
filtered.push({ filePath: item.filePath, score: item.score, reason: `Below elbow cutoff (rank ${i + 1}, score ${item.score.toFixed(3)})` });
|
|
3856
|
+
continue;
|
|
3857
|
+
}
|
|
3858
|
+
passed.push(item);
|
|
3859
|
+
}
|
|
3860
|
+
return { passed, filtered, threshold };
|
|
3861
|
+
}
|
|
3862
|
+
|
|
3863
|
+
// src/engine/call-graph.ts
|
|
3864
|
+
function getLanguage(filePath) {
|
|
3865
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
3866
|
+
if (ext === "java") return "java";
|
|
3867
|
+
if (["ts", "tsx", "js", "jsx", "mts", "mjs"].includes(ext)) return "ts";
|
|
3868
|
+
if (ext === "py") return "python";
|
|
3869
|
+
if (ext === "go") return "go";
|
|
3870
|
+
return null;
|
|
3871
|
+
}
|
|
3872
|
+
function extractJavaDefinitions(content, filePath) {
|
|
3873
|
+
const defs = [];
|
|
3874
|
+
const classMatch = content.match(/(?:public|abstract)\s+(?:class|interface)\s+(\w+)/);
|
|
3875
|
+
const className = classMatch?.[1];
|
|
3876
|
+
const methodRegex = /(?:public|protected|private|static|\s)+\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
3877
|
+
let match;
|
|
3878
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
3879
|
+
const name = match[1];
|
|
3880
|
+
if (name === className || name === "if" || name === "for" || name === "while" || name === "switch" || name === "catch" || name === "return") continue;
|
|
3881
|
+
const linePrefix = content.substring(Math.max(0, match.index - 200), match.index);
|
|
3882
|
+
const isPublic = /public\s/.test(match[0]);
|
|
3883
|
+
defs.push({
|
|
3884
|
+
name,
|
|
3885
|
+
className,
|
|
3886
|
+
filePath,
|
|
3887
|
+
isExported: isPublic
|
|
3888
|
+
});
|
|
3889
|
+
}
|
|
3890
|
+
return defs;
|
|
3891
|
+
}
|
|
3892
|
+
function extractTsDefinitions(content, filePath) {
|
|
3893
|
+
const defs = [];
|
|
3894
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/gm;
|
|
3895
|
+
let match;
|
|
3896
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3897
|
+
const isExported = match[0].startsWith("export");
|
|
3898
|
+
defs.push({ name: match[1], filePath, isExported });
|
|
3899
|
+
}
|
|
3900
|
+
const classRegex = /(?:export\s+)?class\s+(\w+)/gm;
|
|
3901
|
+
while ((match = classRegex.exec(content)) !== null) {
|
|
3902
|
+
const className = match[1];
|
|
3903
|
+
const isExportedClass = match[0].startsWith("export");
|
|
3904
|
+
const classStart = match.index + match[0].length;
|
|
3905
|
+
const methodInClassRegex = /(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\w[\w<>\[\]|,\s]*\s*)?{/gm;
|
|
3906
|
+
methodInClassRegex.lastIndex = classStart;
|
|
3907
|
+
let methodMatch;
|
|
3908
|
+
while ((methodMatch = methodInClassRegex.exec(content)) !== null) {
|
|
3909
|
+
const name = methodMatch[1];
|
|
3910
|
+
if (name === "constructor" || name === "if" || name === "for" || name === "while" || name === "catch" || name === "return" || name === "function" || name === "class") continue;
|
|
3911
|
+
defs.push({ name, className, filePath, isExported: isExportedClass });
|
|
3912
|
+
if (methodMatch.index - classStart > 1e4) break;
|
|
3913
|
+
}
|
|
3914
|
+
}
|
|
3915
|
+
const arrowRegex = /export\s+const\s+(\w+)\s*=\s*(?:async\s+)?\(/gm;
|
|
3916
|
+
while ((match = arrowRegex.exec(content)) !== null) {
|
|
3917
|
+
defs.push({ name: match[1], filePath, isExported: true });
|
|
3918
|
+
}
|
|
3919
|
+
return defs;
|
|
3920
|
+
}
|
|
3921
|
+
function extractPythonDefinitions(content, filePath) {
|
|
3922
|
+
const defs = [];
|
|
3923
|
+
const classRegex = /^class\s+(\w+)/gm;
|
|
3924
|
+
let currentClass;
|
|
3925
|
+
let match;
|
|
3926
|
+
const funcRegex = /^(\s*)def\s+(\w+)\s*\(/gm;
|
|
3927
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3928
|
+
const indent = match[1];
|
|
3929
|
+
const name = match[2];
|
|
3930
|
+
if (name.startsWith("_") && name !== "__init__") continue;
|
|
3931
|
+
const before = content.substring(0, match.index);
|
|
3932
|
+
const lastClass = before.match(/^class\s+(\w+)/gm);
|
|
3933
|
+
const isMethod = indent.length > 0 && lastClass;
|
|
3934
|
+
const className = isMethod ? lastClass[lastClass.length - 1].replace(/^class\s+/, "") : void 0;
|
|
3935
|
+
defs.push({
|
|
3936
|
+
name: name === "__init__" ? className ?? name : name,
|
|
3937
|
+
className,
|
|
3938
|
+
filePath,
|
|
3939
|
+
isExported: !name.startsWith("_")
|
|
3940
|
+
});
|
|
3941
|
+
}
|
|
3942
|
+
return defs;
|
|
3943
|
+
}
|
|
3944
|
+
function extractGoDefinitions(content, filePath) {
|
|
3945
|
+
const defs = [];
|
|
3946
|
+
const funcRegex = /^func\s+(\w+)\s*\(/gm;
|
|
3947
|
+
let match;
|
|
3948
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3949
|
+
const name = match[1];
|
|
3950
|
+
defs.push({
|
|
3951
|
+
name,
|
|
3952
|
+
filePath,
|
|
3953
|
+
isExported: name[0] === name[0].toUpperCase()
|
|
3954
|
+
});
|
|
3955
|
+
}
|
|
3956
|
+
const methodRegex = /^func\s+\(\s*\w+\s+\*?(\w+)\s*\)\s+(\w+)\s*\(/gm;
|
|
3957
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
3958
|
+
defs.push({
|
|
3959
|
+
name: match[2],
|
|
3960
|
+
className: match[1],
|
|
3961
|
+
filePath,
|
|
3962
|
+
isExported: match[2][0] === match[2][0].toUpperCase()
|
|
3963
|
+
});
|
|
3964
|
+
}
|
|
3965
|
+
return defs;
|
|
3966
|
+
}
|
|
3967
|
+
function extractJavaCalls(content, filePath) {
|
|
3968
|
+
const calls = [];
|
|
3969
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
3970
|
+
let match;
|
|
3971
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
3972
|
+
const receiver = match[1];
|
|
3973
|
+
const method = match[2];
|
|
3974
|
+
if ([
|
|
3975
|
+
"System",
|
|
3976
|
+
"LOG",
|
|
3977
|
+
"LOGGER",
|
|
3978
|
+
"logger",
|
|
3979
|
+
"log",
|
|
3980
|
+
"this",
|
|
3981
|
+
"super",
|
|
3982
|
+
"String",
|
|
3983
|
+
"Integer",
|
|
3984
|
+
"Long",
|
|
3985
|
+
"Boolean",
|
|
3986
|
+
"Double",
|
|
3987
|
+
"Float",
|
|
3988
|
+
"Math",
|
|
3989
|
+
"Arrays",
|
|
3990
|
+
"Collections",
|
|
3991
|
+
"Objects",
|
|
3992
|
+
"Optional",
|
|
3993
|
+
"List",
|
|
3994
|
+
"Map",
|
|
3995
|
+
"Set",
|
|
3996
|
+
"Stream"
|
|
3997
|
+
].includes(receiver)) continue;
|
|
3998
|
+
if ([
|
|
3999
|
+
"toString",
|
|
4000
|
+
"hashCode",
|
|
4001
|
+
"equals",
|
|
4002
|
+
"getClass",
|
|
4003
|
+
"wait",
|
|
4004
|
+
"notify",
|
|
4005
|
+
"length",
|
|
4006
|
+
"size",
|
|
4007
|
+
"isEmpty",
|
|
4008
|
+
"get",
|
|
4009
|
+
"set",
|
|
4010
|
+
"add",
|
|
4011
|
+
"remove",
|
|
4012
|
+
"contains",
|
|
4013
|
+
"put",
|
|
4014
|
+
"stream",
|
|
4015
|
+
"map",
|
|
4016
|
+
"filter",
|
|
4017
|
+
"collect",
|
|
4018
|
+
"orElse",
|
|
4019
|
+
"orElseGet",
|
|
4020
|
+
"orElseThrow",
|
|
4021
|
+
"isPresent",
|
|
4022
|
+
"ifPresent",
|
|
4023
|
+
"of",
|
|
4024
|
+
"valueOf",
|
|
4025
|
+
"format",
|
|
4026
|
+
"println",
|
|
4027
|
+
"append",
|
|
4028
|
+
"build",
|
|
4029
|
+
"builder",
|
|
4030
|
+
"thenReturn",
|
|
4031
|
+
"when",
|
|
4032
|
+
"verify",
|
|
4033
|
+
"mock",
|
|
4034
|
+
"given"
|
|
4035
|
+
].includes(method)) continue;
|
|
4036
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4037
|
+
}
|
|
4038
|
+
const staticRegex = /(?<!\w)([A-Z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4039
|
+
while ((match = staticRegex.exec(content)) !== null) {
|
|
4040
|
+
const receiver = match[1];
|
|
4041
|
+
const method = match[2];
|
|
4042
|
+
if ([
|
|
4043
|
+
"System",
|
|
4044
|
+
"Math",
|
|
4045
|
+
"Arrays",
|
|
4046
|
+
"Collections",
|
|
4047
|
+
"Objects",
|
|
4048
|
+
"Optional",
|
|
4049
|
+
"String",
|
|
4050
|
+
"Integer",
|
|
4051
|
+
"Long",
|
|
4052
|
+
"Boolean",
|
|
4053
|
+
"Double",
|
|
4054
|
+
"Float",
|
|
4055
|
+
"LoggerFactory",
|
|
4056
|
+
"Logger",
|
|
4057
|
+
"Assert",
|
|
4058
|
+
"Mockito",
|
|
4059
|
+
"Assertions",
|
|
4060
|
+
"ResponseEntity",
|
|
4061
|
+
"HttpStatus"
|
|
4062
|
+
].includes(receiver)) continue;
|
|
4063
|
+
if ([
|
|
4064
|
+
"of",
|
|
4065
|
+
"valueOf",
|
|
4066
|
+
"format",
|
|
4067
|
+
"parse",
|
|
4068
|
+
"toString",
|
|
4069
|
+
"getLogger",
|
|
4070
|
+
"builder",
|
|
4071
|
+
"newBuilder",
|
|
4072
|
+
"create",
|
|
4073
|
+
"getInstance"
|
|
4074
|
+
].includes(method)) continue;
|
|
4075
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4076
|
+
}
|
|
4077
|
+
return calls;
|
|
4078
|
+
}
|
|
4079
|
+
function extractTsCalls(content, filePath) {
|
|
4080
|
+
const calls = [];
|
|
4081
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4082
|
+
let match;
|
|
4083
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4084
|
+
const receiver = match[1];
|
|
4085
|
+
const method = match[2];
|
|
4086
|
+
if ([
|
|
4087
|
+
"console",
|
|
4088
|
+
"process",
|
|
4089
|
+
"Math",
|
|
4090
|
+
"JSON",
|
|
4091
|
+
"Promise",
|
|
4092
|
+
"Object",
|
|
4093
|
+
"Array",
|
|
4094
|
+
"String",
|
|
4095
|
+
"Number",
|
|
4096
|
+
"Date",
|
|
4097
|
+
"Error",
|
|
4098
|
+
"RegExp",
|
|
4099
|
+
"Buffer",
|
|
4100
|
+
"this",
|
|
4101
|
+
"super",
|
|
4102
|
+
"window",
|
|
4103
|
+
"document",
|
|
4104
|
+
"expect",
|
|
4105
|
+
"describe",
|
|
4106
|
+
"it",
|
|
4107
|
+
"test",
|
|
4108
|
+
"vi",
|
|
4109
|
+
"jest"
|
|
4110
|
+
].includes(receiver)) continue;
|
|
4111
|
+
if ([
|
|
4112
|
+
"toString",
|
|
4113
|
+
"valueOf",
|
|
4114
|
+
"hasOwnProperty",
|
|
4115
|
+
"length",
|
|
4116
|
+
"push",
|
|
4117
|
+
"pop",
|
|
4118
|
+
"shift",
|
|
4119
|
+
"unshift",
|
|
4120
|
+
"slice",
|
|
4121
|
+
"splice",
|
|
4122
|
+
"map",
|
|
4123
|
+
"filter",
|
|
4124
|
+
"reduce",
|
|
4125
|
+
"forEach",
|
|
4126
|
+
"find",
|
|
4127
|
+
"findIndex",
|
|
4128
|
+
"some",
|
|
4129
|
+
"every",
|
|
4130
|
+
"includes",
|
|
4131
|
+
"indexOf",
|
|
4132
|
+
"join",
|
|
4133
|
+
"split",
|
|
4134
|
+
"replace",
|
|
4135
|
+
"match",
|
|
4136
|
+
"trim",
|
|
4137
|
+
"toLowerCase",
|
|
4138
|
+
"toUpperCase",
|
|
4139
|
+
"startsWith",
|
|
4140
|
+
"endsWith",
|
|
4141
|
+
"keys",
|
|
4142
|
+
"values",
|
|
4143
|
+
"entries",
|
|
4144
|
+
"has",
|
|
4145
|
+
"get",
|
|
4146
|
+
"set",
|
|
4147
|
+
"delete",
|
|
4148
|
+
"add",
|
|
4149
|
+
"size",
|
|
4150
|
+
"then",
|
|
4151
|
+
"catch",
|
|
4152
|
+
"finally",
|
|
4153
|
+
"resolve",
|
|
4154
|
+
"reject",
|
|
4155
|
+
"stringify",
|
|
4156
|
+
"parse",
|
|
4157
|
+
"log",
|
|
4158
|
+
"warn",
|
|
4159
|
+
"error",
|
|
4160
|
+
"info",
|
|
4161
|
+
"debug"
|
|
4162
|
+
].includes(method)) continue;
|
|
4163
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4164
|
+
}
|
|
4165
|
+
return calls;
|
|
4166
|
+
}
|
|
4167
|
+
function extractPythonCalls(content, filePath) {
|
|
4168
|
+
const calls = [];
|
|
4169
|
+
const callRegex = /(?<!\w)(?:self\.)?([a-z_]\w+)\.([a-z_]\w+)\s*\(/gm;
|
|
4170
|
+
let match;
|
|
4171
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4172
|
+
const receiver = match[1];
|
|
4173
|
+
const method = match[2];
|
|
4174
|
+
if ([
|
|
4175
|
+
"self",
|
|
4176
|
+
"cls",
|
|
4177
|
+
"os",
|
|
4178
|
+
"sys",
|
|
4179
|
+
"json",
|
|
4180
|
+
"logging",
|
|
4181
|
+
"print",
|
|
4182
|
+
"str",
|
|
4183
|
+
"int",
|
|
4184
|
+
"float",
|
|
4185
|
+
"list",
|
|
4186
|
+
"dict",
|
|
4187
|
+
"set",
|
|
4188
|
+
"tuple",
|
|
4189
|
+
"super",
|
|
4190
|
+
"type",
|
|
4191
|
+
"isinstance",
|
|
4192
|
+
"len",
|
|
4193
|
+
"range",
|
|
4194
|
+
"enumerate"
|
|
4195
|
+
].includes(receiver)) continue;
|
|
4196
|
+
if ([
|
|
4197
|
+
"append",
|
|
4198
|
+
"extend",
|
|
4199
|
+
"insert",
|
|
4200
|
+
"remove",
|
|
4201
|
+
"pop",
|
|
4202
|
+
"clear",
|
|
4203
|
+
"get",
|
|
4204
|
+
"keys",
|
|
4205
|
+
"values",
|
|
4206
|
+
"items",
|
|
4207
|
+
"update",
|
|
4208
|
+
"format",
|
|
4209
|
+
"join",
|
|
4210
|
+
"split",
|
|
4211
|
+
"strip",
|
|
4212
|
+
"replace",
|
|
4213
|
+
"lower",
|
|
4214
|
+
"upper",
|
|
4215
|
+
"startswith",
|
|
4216
|
+
"endswith",
|
|
4217
|
+
"encode",
|
|
4218
|
+
"decode"
|
|
4219
|
+
].includes(method)) continue;
|
|
4220
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4221
|
+
}
|
|
4222
|
+
return calls;
|
|
4223
|
+
}
|
|
4224
|
+
function extractGoCalls(content, filePath) {
|
|
4225
|
+
const calls = [];
|
|
4226
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([A-Z]\w+)\s*\(/gm;
|
|
4227
|
+
let match;
|
|
4228
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4229
|
+
const receiver = match[1];
|
|
4230
|
+
const method = match[2];
|
|
4231
|
+
if ([
|
|
4232
|
+
"fmt",
|
|
4233
|
+
"log",
|
|
4234
|
+
"os",
|
|
4235
|
+
"io",
|
|
4236
|
+
"strings",
|
|
4237
|
+
"strconv",
|
|
4238
|
+
"bytes",
|
|
4239
|
+
"context",
|
|
4240
|
+
"errors",
|
|
4241
|
+
"sync",
|
|
4242
|
+
"time",
|
|
4243
|
+
"math",
|
|
4244
|
+
"sort",
|
|
4245
|
+
"http",
|
|
4246
|
+
"json",
|
|
4247
|
+
"testing",
|
|
4248
|
+
"reflect"
|
|
4249
|
+
].includes(receiver)) continue;
|
|
4250
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4251
|
+
}
|
|
4252
|
+
return calls;
|
|
4253
|
+
}
|
|
4254
|
+
function buildJavaImportMap(content, allFiles) {
|
|
4255
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4256
|
+
const importRegex = /^import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm;
|
|
4257
|
+
let match;
|
|
4258
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4259
|
+
const className = match[1];
|
|
4260
|
+
const targetFile = allFiles.find((f) => {
|
|
4261
|
+
const basename4 = f.split("/").pop()?.replace(".java", "") ?? "";
|
|
4262
|
+
return basename4 === className;
|
|
4263
|
+
});
|
|
4264
|
+
if (targetFile) {
|
|
4265
|
+
importMap.set(className, targetFile);
|
|
4266
|
+
const varName = className.charAt(0).toLowerCase() + className.slice(1);
|
|
4267
|
+
importMap.set(varName, targetFile);
|
|
4268
|
+
}
|
|
4269
|
+
}
|
|
4270
|
+
const fieldRegex = /(?:private|protected)\s+(?:final\s+)?(\w+)\s+(\w+)\s*[;=]/gm;
|
|
4271
|
+
while ((match = fieldRegex.exec(content)) !== null) {
|
|
4272
|
+
const typeName = match[1];
|
|
4273
|
+
const fieldName = match[2];
|
|
4274
|
+
const existing = importMap.get(typeName);
|
|
4275
|
+
if (existing) {
|
|
4276
|
+
importMap.set(fieldName, existing);
|
|
4277
|
+
}
|
|
4278
|
+
}
|
|
4279
|
+
return importMap;
|
|
4280
|
+
}
|
|
4281
|
+
function buildTsImportMap(content, allFiles) {
|
|
4282
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4283
|
+
const importRegex = /import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]/gm;
|
|
4284
|
+
let match;
|
|
4285
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4286
|
+
const namedImports = match[1];
|
|
4287
|
+
const defaultImport = match[2];
|
|
4288
|
+
const modulePath = match[3];
|
|
4289
|
+
const targetFile = allFiles.find((f) => {
|
|
4290
|
+
const stripped = f.replace(/\.(ts|tsx|js|jsx|mts|mjs)$/, "");
|
|
4291
|
+
return stripped.endsWith(modulePath.replace(/^\.\//, "").replace(/^\.\.\//, "")) || f.endsWith(modulePath.replace(/^\.\//, "") + "/index.ts");
|
|
4292
|
+
});
|
|
4293
|
+
if (targetFile) {
|
|
4294
|
+
if (namedImports) {
|
|
4295
|
+
for (const name of namedImports.split(",").map((s) => s.trim())) {
|
|
4296
|
+
const cleanName = name.split(" as ").pop()?.trim() ?? name.trim();
|
|
4297
|
+
if (cleanName) importMap.set(cleanName, targetFile);
|
|
4298
|
+
}
|
|
4299
|
+
}
|
|
4300
|
+
if (defaultImport) {
|
|
4301
|
+
importMap.set(defaultImport, targetFile);
|
|
4302
|
+
}
|
|
4303
|
+
}
|
|
4304
|
+
}
|
|
4305
|
+
return importMap;
|
|
4306
|
+
}
|
|
4307
|
+
function buildPythonImportMap(content, allFiles) {
|
|
4308
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4309
|
+
const fromRegex = /^from\s+([\w.]+)\s+import\s+(.+)$/gm;
|
|
4310
|
+
let match;
|
|
4311
|
+
while ((match = fromRegex.exec(content)) !== null) {
|
|
4312
|
+
const modulePath = match[1].replace(/\./g, "/");
|
|
4313
|
+
const names = match[2].split(",").map((s) => s.trim().split(" as ").pop()?.trim() ?? "");
|
|
4314
|
+
const targetFile = allFiles.find((f) => f.includes(modulePath + ".py") || f.includes(modulePath + "/__init__.py"));
|
|
4315
|
+
if (targetFile) {
|
|
4316
|
+
for (const name of names) {
|
|
4317
|
+
if (name) importMap.set(name, targetFile);
|
|
4318
|
+
const snakeName = name.replace(/([A-Z])/g, "_$1").toLowerCase().replace(/^_/, "");
|
|
4319
|
+
if (snakeName !== name) importMap.set(snakeName, targetFile);
|
|
4320
|
+
}
|
|
4321
|
+
}
|
|
4322
|
+
}
|
|
4323
|
+
return importMap;
|
|
4324
|
+
}
|
|
4325
|
+
function buildCallGraph(files) {
|
|
4326
|
+
const allPaths = files.map((f) => f.relativePath);
|
|
4327
|
+
const allDefinitions = [];
|
|
4328
|
+
const allCalls = [];
|
|
4329
|
+
for (const file of files) {
|
|
4330
|
+
const lang = getLanguage(file.relativePath);
|
|
4331
|
+
if (!lang) continue;
|
|
4332
|
+
let defs;
|
|
4333
|
+
let calls;
|
|
4334
|
+
switch (lang) {
|
|
4335
|
+
case "java":
|
|
4336
|
+
defs = extractJavaDefinitions(file.content, file.relativePath);
|
|
4337
|
+
calls = extractJavaCalls(file.content, file.relativePath);
|
|
4338
|
+
break;
|
|
4339
|
+
case "ts":
|
|
4340
|
+
defs = extractTsDefinitions(file.content, file.relativePath);
|
|
4341
|
+
calls = extractTsCalls(file.content, file.relativePath);
|
|
4342
|
+
break;
|
|
4343
|
+
case "python":
|
|
4344
|
+
defs = extractPythonDefinitions(file.content, file.relativePath);
|
|
4345
|
+
calls = extractPythonCalls(file.content, file.relativePath);
|
|
4346
|
+
break;
|
|
4347
|
+
case "go":
|
|
4348
|
+
defs = extractGoDefinitions(file.content, file.relativePath);
|
|
4349
|
+
calls = extractGoCalls(file.content, file.relativePath);
|
|
4350
|
+
break;
|
|
4351
|
+
}
|
|
4352
|
+
allDefinitions.push(...defs);
|
|
4353
|
+
allCalls.push(...calls);
|
|
4354
|
+
}
|
|
4355
|
+
const defByMethod = /* @__PURE__ */ new Map();
|
|
4356
|
+
for (const def of allDefinitions) {
|
|
4357
|
+
const existing = defByMethod.get(def.name) ?? [];
|
|
4358
|
+
existing.push(def);
|
|
4359
|
+
defByMethod.set(def.name, existing);
|
|
4360
|
+
}
|
|
4361
|
+
const defByQualified = /* @__PURE__ */ new Map();
|
|
4362
|
+
for (const def of allDefinitions) {
|
|
4363
|
+
if (def.className) {
|
|
4364
|
+
defByQualified.set(`${def.className}.${def.name}`, def);
|
|
4365
|
+
}
|
|
4366
|
+
}
|
|
4367
|
+
const edges = [];
|
|
4368
|
+
const edgeSet = /* @__PURE__ */ new Set();
|
|
4369
|
+
for (const file of files) {
|
|
4370
|
+
const lang = getLanguage(file.relativePath);
|
|
4371
|
+
if (!lang) continue;
|
|
4372
|
+
let importMap;
|
|
4373
|
+
switch (lang) {
|
|
4374
|
+
case "java":
|
|
4375
|
+
importMap = buildJavaImportMap(file.content, allPaths);
|
|
4376
|
+
break;
|
|
4377
|
+
case "ts":
|
|
4378
|
+
importMap = buildTsImportMap(file.content, allPaths);
|
|
4379
|
+
break;
|
|
4380
|
+
case "python":
|
|
4381
|
+
importMap = buildPythonImportMap(file.content, allPaths);
|
|
4382
|
+
break;
|
|
4383
|
+
default:
|
|
4384
|
+
importMap = /* @__PURE__ */ new Map();
|
|
4385
|
+
}
|
|
4386
|
+
const fileCalls = allCalls.filter((c) => c.callerFile === file.relativePath);
|
|
4387
|
+
for (const call of fileCalls) {
|
|
4388
|
+
let targetFile;
|
|
4389
|
+
targetFile = importMap.get(call.receiverName);
|
|
4390
|
+
if (!targetFile) {
|
|
4391
|
+
const qualDef = defByQualified.get(`${call.receiverName}.${call.methodName}`);
|
|
4392
|
+
if (qualDef) targetFile = qualDef.filePath;
|
|
4393
|
+
}
|
|
4394
|
+
if (!targetFile) {
|
|
4395
|
+
const capitalized = call.receiverName.charAt(0).toUpperCase() + call.receiverName.slice(1);
|
|
4396
|
+
targetFile = importMap.get(capitalized);
|
|
4397
|
+
}
|
|
4398
|
+
if (!targetFile) {
|
|
4399
|
+
const candidates = defByMethod.get(call.methodName);
|
|
4400
|
+
if (candidates && candidates.length === 1 && candidates[0].filePath !== file.relativePath) {
|
|
4401
|
+
targetFile = candidates[0].filePath;
|
|
4402
|
+
}
|
|
4403
|
+
}
|
|
4404
|
+
if (targetFile && targetFile !== file.relativePath) {
|
|
4405
|
+
const key = `${file.relativePath}\u2192${targetFile}`;
|
|
4406
|
+
if (!edgeSet.has(key)) {
|
|
4407
|
+
edgeSet.add(key);
|
|
4408
|
+
edges.push({ from: file.relativePath, to: targetFile, type: "call" });
|
|
4409
|
+
}
|
|
4410
|
+
}
|
|
4411
|
+
}
|
|
4412
|
+
}
|
|
4413
|
+
return { definitions: allDefinitions, calls: allCalls, edges };
|
|
4414
|
+
}
|
|
4415
|
+
function boostByCallGraph(matches, callEdges, topK = 10, boostFactor = 0.3) {
|
|
4416
|
+
if (matches.length === 0 || callEdges.length === 0) return matches;
|
|
4417
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
4418
|
+
for (const m of matches) {
|
|
4419
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
4420
|
+
}
|
|
4421
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
4422
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
4423
|
+
for (const edge of callEdges) {
|
|
4424
|
+
if (edge.type !== "call") continue;
|
|
4425
|
+
const fwd = callsTo.get(edge.from) ?? [];
|
|
4426
|
+
fwd.push(edge.to);
|
|
4427
|
+
callsTo.set(edge.from, fwd);
|
|
4428
|
+
const rev = calledBy.get(edge.to) ?? [];
|
|
4429
|
+
rev.push(edge.from);
|
|
4430
|
+
calledBy.set(edge.to, rev);
|
|
4431
|
+
}
|
|
4432
|
+
const topMatches = matches.slice(0, topK);
|
|
4433
|
+
const maxBoostPerParent = 5;
|
|
4434
|
+
for (const parent of topMatches) {
|
|
4435
|
+
const boost = parent.score * boostFactor;
|
|
4436
|
+
const called = callsTo.get(parent.filePath) ?? [];
|
|
4437
|
+
for (const target of called.slice(0, maxBoostPerParent)) {
|
|
4438
|
+
const existing = boosted.get(target);
|
|
4439
|
+
if (existing) {
|
|
4440
|
+
existing.score += boost;
|
|
4441
|
+
if (!existing.matchedTerms.includes("[call-graph:called-by-match]")) {
|
|
4442
|
+
existing.matchedTerms.push("[call-graph:called-by-match]");
|
|
4443
|
+
}
|
|
4444
|
+
} else {
|
|
4445
|
+
boosted.set(target, {
|
|
4446
|
+
filePath: target,
|
|
4447
|
+
score: boost,
|
|
4448
|
+
matchedTerms: ["[call-graph:called-by-match]"]
|
|
4449
|
+
});
|
|
4450
|
+
}
|
|
4451
|
+
}
|
|
4452
|
+
const callers = calledBy.get(parent.filePath) ?? [];
|
|
4453
|
+
for (const caller of callers.slice(0, maxBoostPerParent)) {
|
|
4454
|
+
const callerBoost = boost * 0.7;
|
|
4455
|
+
const existing = boosted.get(caller);
|
|
4456
|
+
if (existing) {
|
|
4457
|
+
existing.score += callerBoost;
|
|
4458
|
+
if (!existing.matchedTerms.includes("[call-graph:calls-match]")) {
|
|
4459
|
+
existing.matchedTerms.push("[call-graph:calls-match]");
|
|
4460
|
+
}
|
|
4461
|
+
} else {
|
|
4462
|
+
boosted.set(caller, {
|
|
4463
|
+
filePath: caller,
|
|
4464
|
+
score: callerBoost,
|
|
4465
|
+
matchedTerms: ["[call-graph:calls-match]"]
|
|
4466
|
+
});
|
|
4467
|
+
}
|
|
4468
|
+
}
|
|
4469
|
+
}
|
|
4470
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
4471
|
+
}
|
|
4472
|
+
|
|
4473
|
+
// src/engine/git-relevance.ts
|
|
4474
|
+
import { execSync } from "child_process";
|
|
4475
|
+
function buildCoChangeMatrix(projectPath, maxCommits = 500, minCoChanges = 2) {
|
|
4476
|
+
const emptyMatrix = {
|
|
4477
|
+
entries: /* @__PURE__ */ new Map(),
|
|
4478
|
+
fileCommitCounts: /* @__PURE__ */ new Map(),
|
|
4479
|
+
totalCommits: 0
|
|
4480
|
+
};
|
|
4481
|
+
let gitOutput;
|
|
4482
|
+
try {
|
|
4483
|
+
gitOutput = execSync(
|
|
4484
|
+
`git log --no-merges --diff-filter=ACMR --name-only --format="---COMMIT---" -n ${maxCommits}`,
|
|
4485
|
+
{ cwd: projectPath, encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, timeout: 15e3 }
|
|
4486
|
+
);
|
|
4487
|
+
} catch {
|
|
4488
|
+
return emptyMatrix;
|
|
4489
|
+
}
|
|
4490
|
+
const commits = [];
|
|
4491
|
+
let currentFiles = [];
|
|
4492
|
+
for (const line of gitOutput.split("\n")) {
|
|
4493
|
+
const trimmed = line.trim();
|
|
4494
|
+
if (trimmed === "---COMMIT---") {
|
|
4495
|
+
if (currentFiles.length > 0) {
|
|
4496
|
+
commits.push(currentFiles);
|
|
4497
|
+
}
|
|
4498
|
+
currentFiles = [];
|
|
4499
|
+
} else if (trimmed.length > 0) {
|
|
4500
|
+
currentFiles.push(trimmed);
|
|
4501
|
+
}
|
|
4502
|
+
}
|
|
4503
|
+
if (currentFiles.length > 0) {
|
|
4504
|
+
commits.push(currentFiles);
|
|
4505
|
+
}
|
|
4506
|
+
if (commits.length === 0) return emptyMatrix;
|
|
4507
|
+
const fileCommitCounts = /* @__PURE__ */ new Map();
|
|
4508
|
+
const coChangeCounts = /* @__PURE__ */ new Map();
|
|
4509
|
+
for (const files of commits) {
|
|
4510
|
+
const unique = [...new Set(files)];
|
|
4511
|
+
for (const file of unique) {
|
|
4512
|
+
fileCommitCounts.set(file, (fileCommitCounts.get(file) ?? 0) + 1);
|
|
4513
|
+
}
|
|
4514
|
+
const capped = unique.slice(0, 20);
|
|
4515
|
+
for (let i = 0; i < capped.length; i++) {
|
|
4516
|
+
for (let j = i + 1; j < capped.length; j++) {
|
|
4517
|
+
const [a, b] = capped[i] < capped[j] ? [capped[i], capped[j]] : [capped[j], capped[i]];
|
|
4518
|
+
const key = `${a}\0${b}`;
|
|
4519
|
+
coChangeCounts.set(key, (coChangeCounts.get(key) ?? 0) + 1);
|
|
4520
|
+
}
|
|
4521
|
+
}
|
|
4522
|
+
}
|
|
4523
|
+
const entries = /* @__PURE__ */ new Map();
|
|
4524
|
+
for (const [key, coCommits] of coChangeCounts) {
|
|
4525
|
+
if (coCommits < minCoChanges) continue;
|
|
4526
|
+
const [fileA, fileB] = key.split("\0");
|
|
4527
|
+
const commitsA = fileCommitCounts.get(fileA) ?? 0;
|
|
4528
|
+
const commitsB = fileCommitCounts.get(fileB) ?? 0;
|
|
4529
|
+
const union = commitsA + commitsB - coCommits;
|
|
4530
|
+
const similarity = union > 0 ? coCommits / union : 0;
|
|
4531
|
+
const entry = { fileA, fileB, coCommits, similarity };
|
|
4532
|
+
const listA = entries.get(fileA) ?? [];
|
|
4533
|
+
listA.push(entry);
|
|
4534
|
+
entries.set(fileA, listA);
|
|
4535
|
+
const listB = entries.get(fileB) ?? [];
|
|
4536
|
+
listB.push({ ...entry, fileA: fileB, fileB: fileA });
|
|
4537
|
+
entries.set(fileB, listB);
|
|
4538
|
+
}
|
|
4539
|
+
for (const [, list] of entries) {
|
|
4540
|
+
list.sort((a, b) => b.similarity - a.similarity);
|
|
4541
|
+
}
|
|
4542
|
+
return { entries, fileCommitCounts, totalCommits: commits.length };
|
|
4543
|
+
}
|
|
4544
|
+
function boostByGitCoChange(matches, coChangeMatrix, topK = 10, boostFactor = 0.25, minSimilarity = 0.15) {
|
|
4545
|
+
if (matches.length === 0 || coChangeMatrix.entries.size === 0) return matches;
|
|
4546
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
4547
|
+
for (const m of matches) {
|
|
4548
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
4549
|
+
}
|
|
4550
|
+
const topMatches = matches.slice(0, topK);
|
|
4551
|
+
const maxBoostTargets = 5;
|
|
4552
|
+
for (const parent of topMatches) {
|
|
4553
|
+
const partners = coChangeMatrix.entries.get(parent.filePath) ?? [];
|
|
4554
|
+
let boostedCount = 0;
|
|
4555
|
+
for (const partner of partners) {
|
|
4556
|
+
if (boostedCount >= maxBoostTargets) break;
|
|
4557
|
+
if (partner.similarity < minSimilarity) break;
|
|
4558
|
+
const boost = parent.score * boostFactor * partner.similarity;
|
|
4559
|
+
const existing = boosted.get(partner.fileB);
|
|
4560
|
+
if (existing) {
|
|
4561
|
+
existing.score += boost;
|
|
4562
|
+
if (!existing.matchedTerms.includes("[git-cochange]")) {
|
|
4563
|
+
existing.matchedTerms.push("[git-cochange]");
|
|
4564
|
+
}
|
|
4565
|
+
} else {
|
|
4566
|
+
boosted.set(partner.fileB, {
|
|
4567
|
+
filePath: partner.fileB,
|
|
4568
|
+
score: boost,
|
|
4569
|
+
matchedTerms: ["[git-cochange]"]
|
|
4570
|
+
});
|
|
4571
|
+
}
|
|
4572
|
+
boostedCount++;
|
|
4573
|
+
}
|
|
4574
|
+
}
|
|
4575
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
4576
|
+
}
|
|
4577
|
+
|
|
4578
|
+
// src/engine/multi-hop.ts
|
|
4579
|
+
init_tfidf();
|
|
4580
|
+
var DEFAULT_CONFIG2 = {
|
|
4581
|
+
maxHops: 2,
|
|
4582
|
+
topKPerHop: 5,
|
|
4583
|
+
decayFactor: 0.5,
|
|
4584
|
+
minScoreThreshold: 0.2
|
|
4585
|
+
};
|
|
4586
|
+
function multiHopQuery(index, task, deps, callEdges, fileContents, config = {}) {
|
|
4587
|
+
const cfg = { ...DEFAULT_CONFIG2, ...config };
|
|
4588
|
+
const hops = [];
|
|
4589
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
4590
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
4591
|
+
for (const edge of callEdges) {
|
|
4592
|
+
const fwd = callsTo.get(edge.from) ?? /* @__PURE__ */ new Set();
|
|
4593
|
+
fwd.add(edge.to);
|
|
4594
|
+
callsTo.set(edge.from, fwd);
|
|
4595
|
+
const rev = calledBy.get(edge.to) ?? /* @__PURE__ */ new Set();
|
|
4596
|
+
rev.add(edge.from);
|
|
4597
|
+
calledBy.set(edge.to, rev);
|
|
4598
|
+
}
|
|
4599
|
+
const aggregateScores = /* @__PURE__ */ new Map();
|
|
4600
|
+
const aggregateTerms = /* @__PURE__ */ new Map();
|
|
4601
|
+
const explored = /* @__PURE__ */ new Set();
|
|
4602
|
+
const initialResults = query(index, task, 50);
|
|
4603
|
+
for (const m of initialResults) {
|
|
4604
|
+
aggregateScores.set(m.filePath, m.score);
|
|
4605
|
+
aggregateTerms.set(m.filePath, new Set(m.matchedTerms));
|
|
4606
|
+
explored.add(m.filePath);
|
|
4607
|
+
}
|
|
4608
|
+
hops.push({
|
|
4609
|
+
hop: 0,
|
|
4610
|
+
seedFiles: [],
|
|
4611
|
+
newFiles: initialResults.slice(0, cfg.topKPerHop).map((m) => m.filePath),
|
|
4612
|
+
expandedTerms: tokenize(task)
|
|
4613
|
+
});
|
|
4614
|
+
let currentSeeds = initialResults.slice(0, cfg.topKPerHop);
|
|
4615
|
+
for (let hop = 1; hop <= cfg.maxHops; hop++) {
|
|
4616
|
+
if (currentSeeds.length === 0) break;
|
|
4617
|
+
const seedFiles = currentSeeds.map((m) => m.filePath);
|
|
4618
|
+
const newFiles = [];
|
|
4619
|
+
const expandedTerms = [];
|
|
4620
|
+
const connectedFiles = /* @__PURE__ */ new Set();
|
|
4621
|
+
for (const seed of seedFiles) {
|
|
4622
|
+
const importDeps = deps.get(seed) ?? [];
|
|
4623
|
+
for (const dep of importDeps) {
|
|
4624
|
+
if (!explored.has(dep)) connectedFiles.add(dep);
|
|
4625
|
+
}
|
|
4626
|
+
const calls = callsTo.get(seed) ?? /* @__PURE__ */ new Set();
|
|
4627
|
+
for (const called of calls) {
|
|
4628
|
+
if (!explored.has(called)) connectedFiles.add(called);
|
|
4629
|
+
}
|
|
4630
|
+
const callers = calledBy.get(seed) ?? /* @__PURE__ */ new Set();
|
|
4631
|
+
for (const caller of callers) {
|
|
4632
|
+
if (!explored.has(caller)) connectedFiles.add(caller);
|
|
4633
|
+
}
|
|
4634
|
+
}
|
|
4635
|
+
for (const seed of seedFiles) {
|
|
4636
|
+
const content = fileContents.get(seed);
|
|
4637
|
+
if (!content) continue;
|
|
4638
|
+
const identifiers = extractKeyIdentifiers(content, seed);
|
|
4639
|
+
expandedTerms.push(...identifiers);
|
|
4640
|
+
}
|
|
4641
|
+
const decayMultiplier = Math.pow(cfg.decayFactor, hop);
|
|
4642
|
+
const uniqueExpandedTerms = [...new Set(expandedTerms)];
|
|
4643
|
+
const expandedQuery = task + " " + uniqueExpandedTerms.slice(0, 10).join(" ");
|
|
4644
|
+
const expandedResults = query(index, expandedQuery, 30);
|
|
4645
|
+
for (const connected of connectedFiles) {
|
|
4646
|
+
const expandedMatch = expandedResults.find((r) => r.filePath === connected);
|
|
4647
|
+
const graphScore = 0.3;
|
|
4648
|
+
const bm25Score = expandedMatch?.score ?? 0;
|
|
4649
|
+
const hopScore = (graphScore + bm25Score) * decayMultiplier;
|
|
4650
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
4651
|
+
const existing = aggregateScores.get(connected) ?? 0;
|
|
4652
|
+
aggregateScores.set(connected, existing + hopScore);
|
|
4653
|
+
const terms = aggregateTerms.get(connected) ?? /* @__PURE__ */ new Set();
|
|
4654
|
+
terms.add(`[hop-${hop}]`);
|
|
4655
|
+
if (expandedMatch) {
|
|
4656
|
+
for (const t of expandedMatch.matchedTerms) terms.add(t);
|
|
4657
|
+
}
|
|
4658
|
+
aggregateTerms.set(connected, terms);
|
|
4659
|
+
if (!explored.has(connected)) {
|
|
4660
|
+
newFiles.push(connected);
|
|
4661
|
+
explored.add(connected);
|
|
4662
|
+
}
|
|
4663
|
+
}
|
|
4664
|
+
}
|
|
4665
|
+
for (const r of expandedResults) {
|
|
4666
|
+
if (!explored.has(r.filePath)) {
|
|
4667
|
+
const hopScore = r.score * decayMultiplier * 0.5;
|
|
4668
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
4669
|
+
const existing = aggregateScores.get(r.filePath) ?? 0;
|
|
4670
|
+
aggregateScores.set(r.filePath, existing + hopScore);
|
|
4671
|
+
const terms = aggregateTerms.get(r.filePath) ?? /* @__PURE__ */ new Set();
|
|
4672
|
+
terms.add(`[hop-${hop}-bm25]`);
|
|
4673
|
+
for (const t of r.matchedTerms) terms.add(t);
|
|
4674
|
+
aggregateTerms.set(r.filePath, terms);
|
|
4675
|
+
newFiles.push(r.filePath);
|
|
4676
|
+
explored.add(r.filePath);
|
|
4677
|
+
}
|
|
4678
|
+
}
|
|
4679
|
+
}
|
|
4680
|
+
hops.push({ hop, seedFiles, newFiles, expandedTerms: uniqueExpandedTerms.slice(0, 20) });
|
|
4681
|
+
const newScored = newFiles.map((f) => ({ filePath: f, score: aggregateScores.get(f) ?? 0 })).sort((a, b) => b.score - a.score).slice(0, cfg.topKPerHop);
|
|
4682
|
+
currentSeeds = newScored.map((s) => ({
|
|
4683
|
+
filePath: s.filePath,
|
|
4684
|
+
score: s.score,
|
|
4685
|
+
matchedTerms: [...aggregateTerms.get(s.filePath) ?? []]
|
|
4686
|
+
}));
|
|
4687
|
+
}
|
|
4688
|
+
const matches = [];
|
|
4689
|
+
for (const [filePath, score] of aggregateScores) {
|
|
4690
|
+
const terms = aggregateTerms.get(filePath) ?? /* @__PURE__ */ new Set();
|
|
4691
|
+
matches.push({ filePath, score, matchedTerms: [...terms] });
|
|
4692
|
+
}
|
|
4693
|
+
matches.sort((a, b) => b.score - a.score);
|
|
4694
|
+
return {
|
|
4695
|
+
matches,
|
|
4696
|
+
hops,
|
|
4697
|
+
totalFilesExplored: explored.size
|
|
4698
|
+
};
|
|
4699
|
+
}
|
|
4700
|
+
function extractKeyIdentifiers(content, filePath) {
|
|
4701
|
+
const identifiers = [];
|
|
4702
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
4703
|
+
if (ext === "java") {
|
|
4704
|
+
const classMatches = content.match(/(?:class|interface)\s+(\w+)/g) ?? [];
|
|
4705
|
+
for (const m of classMatches) {
|
|
4706
|
+
const name = m.replace(/(?:class|interface)\s+/, "");
|
|
4707
|
+
identifiers.push(...splitCamelCase(name));
|
|
4708
|
+
}
|
|
4709
|
+
const methodRegex = /(?:public|protected)\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
4710
|
+
let match;
|
|
4711
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
4712
|
+
identifiers.push(...splitCamelCase(match[1]));
|
|
4713
|
+
}
|
|
4714
|
+
} else if (["ts", "tsx", "js", "jsx"].includes(ext)) {
|
|
4715
|
+
const exportMatches = content.match(/export\s+(?:class|function|const|interface|type)\s+(\w+)/g) ?? [];
|
|
4716
|
+
for (const m of exportMatches) {
|
|
4717
|
+
const name = m.replace(/export\s+(?:class|function|const|interface|type)\s+/, "");
|
|
4718
|
+
identifiers.push(...splitCamelCase(name));
|
|
4719
|
+
}
|
|
4720
|
+
} else if (ext === "py") {
|
|
4721
|
+
const defMatches = content.match(/^(?:class|def)\s+(\w+)/gm) ?? [];
|
|
4722
|
+
for (const m of defMatches) {
|
|
4723
|
+
const name = m.replace(/^(?:class|def)\s+/, "");
|
|
4724
|
+
identifiers.push(...splitSnakeCase(name));
|
|
4725
|
+
}
|
|
4726
|
+
} else if (ext === "go") {
|
|
4727
|
+
const funcMatches = content.match(/^func\s+(?:\([^)]+\)\s+)?([A-Z]\w+)/gm) ?? [];
|
|
4728
|
+
for (const m of funcMatches) {
|
|
4729
|
+
const name = m.replace(/^func\s+(?:\([^)]+\)\s+)?/, "");
|
|
4730
|
+
identifiers.push(...splitCamelCase(name));
|
|
4731
|
+
}
|
|
4732
|
+
const typeMatches = content.match(/^type\s+([A-Z]\w+)/gm) ?? [];
|
|
4733
|
+
for (const m of typeMatches) {
|
|
4734
|
+
const name = m.replace(/^type\s+/, "");
|
|
4735
|
+
identifiers.push(...splitCamelCase(name));
|
|
4736
|
+
}
|
|
4737
|
+
}
|
|
4738
|
+
return [...new Set(identifiers)].filter((id) => id.length >= 3 && !NOISE_IDENTIFIERS.has(id.toLowerCase())).slice(0, 30);
|
|
4739
|
+
}
|
|
4740
|
+
function splitCamelCase(name) {
|
|
4741
|
+
return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((s) => s.length >= 3);
|
|
4742
|
+
}
|
|
4743
|
+
function splitSnakeCase(name) {
|
|
4744
|
+
return name.split("_").filter((s) => s.length >= 3);
|
|
4745
|
+
}
|
|
4746
|
+
var NOISE_IDENTIFIERS = /* @__PURE__ */ new Set([
|
|
4747
|
+
"get",
|
|
4748
|
+
"set",
|
|
4749
|
+
"has",
|
|
4750
|
+
"add",
|
|
4751
|
+
"put",
|
|
1683
4752
|
"new",
|
|
4753
|
+
"run",
|
|
4754
|
+
"map",
|
|
4755
|
+
"for",
|
|
4756
|
+
"the",
|
|
4757
|
+
"and",
|
|
4758
|
+
"not",
|
|
4759
|
+
"with",
|
|
4760
|
+
"from",
|
|
1684
4761
|
"this",
|
|
1685
4762
|
"that",
|
|
4763
|
+
"test",
|
|
4764
|
+
"spec",
|
|
4765
|
+
"mock",
|
|
4766
|
+
"void",
|
|
4767
|
+
"null",
|
|
1686
4768
|
"true",
|
|
1687
4769
|
"false",
|
|
1688
|
-
"null",
|
|
1689
|
-
"undefined",
|
|
1690
|
-
"void",
|
|
1691
4770
|
"string",
|
|
1692
4771
|
"number",
|
|
1693
4772
|
"boolean",
|
|
1694
|
-
"
|
|
1695
|
-
"
|
|
1696
|
-
"never",
|
|
1697
|
-
"object",
|
|
1698
|
-
"array",
|
|
1699
|
-
"promise",
|
|
1700
|
-
"if",
|
|
1701
|
-
"else",
|
|
1702
|
-
"for",
|
|
1703
|
-
"while",
|
|
1704
|
-
"do",
|
|
1705
|
-
"switch",
|
|
1706
|
-
"case",
|
|
1707
|
-
"break",
|
|
1708
|
-
"continue",
|
|
1709
|
-
"try",
|
|
1710
|
-
"catch",
|
|
1711
|
-
"throw",
|
|
1712
|
-
"finally",
|
|
4773
|
+
"int",
|
|
4774
|
+
"impl",
|
|
1713
4775
|
"default",
|
|
1714
|
-
"
|
|
1715
|
-
"
|
|
1716
|
-
"
|
|
4776
|
+
"abstract",
|
|
4777
|
+
"base",
|
|
4778
|
+
"main",
|
|
4779
|
+
"init",
|
|
4780
|
+
"setup",
|
|
4781
|
+
"util",
|
|
4782
|
+
"utils",
|
|
4783
|
+
"helper",
|
|
4784
|
+
"helpers",
|
|
4785
|
+
"common",
|
|
4786
|
+
"config",
|
|
4787
|
+
"model",
|
|
4788
|
+
"entity",
|
|
4789
|
+
"service",
|
|
4790
|
+
"repository",
|
|
4791
|
+
"controller",
|
|
4792
|
+
"handler",
|
|
4793
|
+
"interface",
|
|
4794
|
+
"type",
|
|
4795
|
+
"class",
|
|
4796
|
+
"function",
|
|
4797
|
+
"const",
|
|
4798
|
+
"return",
|
|
4799
|
+
"import",
|
|
4800
|
+
"export",
|
|
1717
4801
|
"private",
|
|
1718
4802
|
"public",
|
|
1719
4803
|
"protected",
|
|
1720
|
-
"
|
|
1721
|
-
"
|
|
4804
|
+
"static",
|
|
4805
|
+
"final",
|
|
1722
4806
|
"override",
|
|
1723
|
-
"
|
|
1724
|
-
"
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
"
|
|
1730
|
-
"
|
|
1731
|
-
"
|
|
1732
|
-
"
|
|
1733
|
-
"
|
|
1734
|
-
"
|
|
1735
|
-
|
|
1736
|
-
"
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
"
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
"
|
|
1746
|
-
"
|
|
1747
|
-
"
|
|
1748
|
-
"
|
|
1749
|
-
"
|
|
1750
|
-
|
|
4807
|
+
"async",
|
|
4808
|
+
"await"
|
|
4809
|
+
]);
|
|
4810
|
+
|
|
4811
|
+
// src/engine/query-intent.ts
|
|
4812
|
+
var ACTION_PATTERNS = [
|
|
4813
|
+
[/\b(fix|bug|debug|repair|resolve|broken|crash|error|issue|wrong)\b/i, "fix"],
|
|
4814
|
+
[/\b(add|implement|create|build|new|feature|introduce|wire)\b/i, "add"],
|
|
4815
|
+
[/\b(refactor|restructure|clean|extract|split|move|rename|simplify)\b/i, "refactor"],
|
|
4816
|
+
[/\b(trace|follow|understand|find|where|how|flow|path|chain)\b/i, "trace"],
|
|
4817
|
+
[/\b(test|spec|coverage|assert|mock|verify)\b/i, "test"],
|
|
4818
|
+
[/\b(doc|document|describe|explain|readme|comment)\b/i, "docs"],
|
|
4819
|
+
[/\b(remove|delete|deprecate|drop|kill|eliminate)\b/i, "remove"],
|
|
4820
|
+
[/\b(optimize|performance|speed|fast|slow|latency|efficient)\b/i, "optimize"]
|
|
4821
|
+
];
|
|
4822
|
+
function detectAction(task) {
|
|
4823
|
+
for (const [pattern, action] of ACTION_PATTERNS) {
|
|
4824
|
+
if (pattern.test(task)) return action;
|
|
4825
|
+
}
|
|
4826
|
+
return "unknown";
|
|
4827
|
+
}
|
|
4828
|
+
var LAYER_KEYWORDS = [
|
|
4829
|
+
[["controller", "endpoint", "handler", "router", "route", "api", "rest", "entrypoint"], "endpoint"],
|
|
4830
|
+
[["usecase", "use case", "use-case", "interactor", "application service"], "usecase"],
|
|
4831
|
+
[["service", "domain service", "business logic"], "service"],
|
|
4832
|
+
[["repository", "repo", "dao", "data access", "persistence", "database", "db", "store"], "repository"],
|
|
4833
|
+
[["cache", "redis", "memcached", "caching", "ttl", "invalidat"], "cache"],
|
|
4834
|
+
[["client", "http client", "api client", "rest client", "feign", "retrofit"], "client"],
|
|
4835
|
+
[["model", "entity", "dto", "domain object", "value object", "pojo"], "model"],
|
|
4836
|
+
[["config", "configuration", "injector", "module", "bean", "provider", "dependency injection"], "config"],
|
|
4837
|
+
[["queue", "kafka", "rabbit", "sqs", "event", "listener", "consumer", "producer", "message"], "queue"],
|
|
4838
|
+
[["middleware", "interceptor", "filter", "guard", "pipe"], "middleware"]
|
|
4839
|
+
];
|
|
4840
|
+
function detectLayers(task) {
|
|
4841
|
+
const lower = task.toLowerCase();
|
|
4842
|
+
const layers = [];
|
|
4843
|
+
for (const [keywords, layer] of LAYER_KEYWORDS) {
|
|
4844
|
+
if (keywords.some((kw) => lower.includes(kw))) {
|
|
4845
|
+
layers.push(layer);
|
|
4846
|
+
}
|
|
4847
|
+
}
|
|
4848
|
+
return [...new Set(layers)];
|
|
4849
|
+
}
|
|
4850
|
+
var STOP_WORDS2 = /* @__PURE__ */ new Set([
|
|
1751
4851
|
"the",
|
|
1752
|
-
"
|
|
1753
|
-
"
|
|
1754
|
-
"
|
|
1755
|
-
"not",
|
|
1756
|
-
"but",
|
|
4852
|
+
"a",
|
|
4853
|
+
"an",
|
|
4854
|
+
"is",
|
|
1757
4855
|
"are",
|
|
1758
4856
|
"was",
|
|
1759
4857
|
"were",
|
|
1760
|
-
"
|
|
4858
|
+
"be",
|
|
4859
|
+
"been",
|
|
4860
|
+
"being",
|
|
1761
4861
|
"have",
|
|
4862
|
+
"has",
|
|
1762
4863
|
"had",
|
|
4864
|
+
"do",
|
|
4865
|
+
"does",
|
|
4866
|
+
"did",
|
|
1763
4867
|
"will",
|
|
1764
4868
|
"would",
|
|
1765
|
-
"
|
|
4869
|
+
"shall",
|
|
1766
4870
|
"should",
|
|
1767
4871
|
"may",
|
|
4872
|
+
"might",
|
|
4873
|
+
"must",
|
|
1768
4874
|
"can",
|
|
1769
|
-
"
|
|
1770
|
-
"
|
|
1771
|
-
"
|
|
1772
|
-
"
|
|
1773
|
-
"
|
|
1774
|
-
"
|
|
1775
|
-
"
|
|
1776
|
-
"
|
|
4875
|
+
"could",
|
|
4876
|
+
"need",
|
|
4877
|
+
"not",
|
|
4878
|
+
"and",
|
|
4879
|
+
"but",
|
|
4880
|
+
"or",
|
|
4881
|
+
"nor",
|
|
4882
|
+
"for",
|
|
4883
|
+
"yet",
|
|
4884
|
+
"so",
|
|
4885
|
+
"in",
|
|
4886
|
+
"on",
|
|
4887
|
+
"at",
|
|
4888
|
+
"to",
|
|
4889
|
+
"from",
|
|
4890
|
+
"by",
|
|
4891
|
+
"with",
|
|
1777
4892
|
"about",
|
|
1778
|
-
"
|
|
1779
|
-
"
|
|
1780
|
-
"
|
|
1781
|
-
"
|
|
1782
|
-
"
|
|
1783
|
-
"
|
|
1784
|
-
"
|
|
1785
|
-
"
|
|
1786
|
-
"
|
|
1787
|
-
"
|
|
4893
|
+
"between",
|
|
4894
|
+
"through",
|
|
4895
|
+
"during",
|
|
4896
|
+
"before",
|
|
4897
|
+
"after",
|
|
4898
|
+
"above",
|
|
4899
|
+
"below",
|
|
4900
|
+
"up",
|
|
4901
|
+
"down",
|
|
4902
|
+
"out",
|
|
4903
|
+
"off",
|
|
4904
|
+
"over",
|
|
4905
|
+
"under",
|
|
4906
|
+
"again",
|
|
4907
|
+
"further",
|
|
4908
|
+
"then",
|
|
4909
|
+
"once",
|
|
4910
|
+
"here",
|
|
4911
|
+
"there",
|
|
4912
|
+
"when",
|
|
4913
|
+
"where",
|
|
4914
|
+
"why",
|
|
4915
|
+
"how",
|
|
4916
|
+
"all",
|
|
4917
|
+
"each",
|
|
4918
|
+
"every",
|
|
4919
|
+
"both",
|
|
4920
|
+
"few",
|
|
4921
|
+
"more",
|
|
4922
|
+
"most",
|
|
4923
|
+
"other",
|
|
4924
|
+
"some",
|
|
4925
|
+
"such",
|
|
4926
|
+
"no",
|
|
4927
|
+
"nor",
|
|
4928
|
+
"only",
|
|
4929
|
+
"own",
|
|
4930
|
+
"same",
|
|
4931
|
+
"so",
|
|
4932
|
+
"than",
|
|
4933
|
+
"too",
|
|
4934
|
+
"very",
|
|
4935
|
+
"just",
|
|
4936
|
+
"because",
|
|
4937
|
+
"this",
|
|
4938
|
+
"that",
|
|
4939
|
+
"these",
|
|
4940
|
+
"those",
|
|
4941
|
+
"it",
|
|
4942
|
+
"its",
|
|
4943
|
+
"of",
|
|
4944
|
+
"if"
|
|
1788
4945
|
]);
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
4946
|
+
var ACTION_WORDS = /* @__PURE__ */ new Set([
|
|
4947
|
+
"fix",
|
|
4948
|
+
"add",
|
|
4949
|
+
"create",
|
|
4950
|
+
"build",
|
|
4951
|
+
"implement",
|
|
4952
|
+
"refactor",
|
|
4953
|
+
"trace",
|
|
4954
|
+
"follow",
|
|
4955
|
+
"find",
|
|
4956
|
+
"update",
|
|
4957
|
+
"modify",
|
|
4958
|
+
"change",
|
|
4959
|
+
"remove",
|
|
4960
|
+
"delete",
|
|
4961
|
+
"debug",
|
|
4962
|
+
"test",
|
|
4963
|
+
"check",
|
|
4964
|
+
"verify",
|
|
4965
|
+
"validate",
|
|
4966
|
+
"handle",
|
|
4967
|
+
"process",
|
|
4968
|
+
"resolve",
|
|
4969
|
+
"repair",
|
|
4970
|
+
"optimize",
|
|
4971
|
+
"improve",
|
|
4972
|
+
"speed",
|
|
4973
|
+
"clean",
|
|
4974
|
+
"bug",
|
|
4975
|
+
"error",
|
|
4976
|
+
"issue",
|
|
4977
|
+
"problem",
|
|
4978
|
+
"flow",
|
|
4979
|
+
"path",
|
|
4980
|
+
"chain"
|
|
4981
|
+
]);
|
|
4982
|
+
var OPERATION_WORDS = /* @__PURE__ */ new Set([
|
|
4983
|
+
"create",
|
|
4984
|
+
"read",
|
|
4985
|
+
"update",
|
|
4986
|
+
"delete",
|
|
4987
|
+
"save",
|
|
4988
|
+
"load",
|
|
4989
|
+
"fetch",
|
|
4990
|
+
"retrieve",
|
|
4991
|
+
"store",
|
|
4992
|
+
"persist",
|
|
4993
|
+
"insert",
|
|
4994
|
+
"remove",
|
|
4995
|
+
"invalidate",
|
|
4996
|
+
"validate",
|
|
4997
|
+
"parse",
|
|
4998
|
+
"transform",
|
|
4999
|
+
"convert",
|
|
5000
|
+
"render",
|
|
5001
|
+
"display",
|
|
5002
|
+
"send",
|
|
5003
|
+
"receive",
|
|
5004
|
+
"publish",
|
|
5005
|
+
"subscribe",
|
|
5006
|
+
"emit",
|
|
5007
|
+
"listen",
|
|
5008
|
+
"authenticate",
|
|
5009
|
+
"authorize",
|
|
5010
|
+
"encrypt",
|
|
5011
|
+
"decrypt",
|
|
5012
|
+
"hash",
|
|
5013
|
+
"serialize",
|
|
5014
|
+
"deserialize",
|
|
5015
|
+
"encode",
|
|
5016
|
+
"decode",
|
|
5017
|
+
"compress",
|
|
5018
|
+
"replicate",
|
|
5019
|
+
"sync",
|
|
5020
|
+
"migrate",
|
|
5021
|
+
"export",
|
|
5022
|
+
"import",
|
|
5023
|
+
"upload",
|
|
5024
|
+
"download",
|
|
5025
|
+
"search",
|
|
5026
|
+
"index",
|
|
5027
|
+
"query",
|
|
5028
|
+
"filter",
|
|
5029
|
+
"sort"
|
|
5030
|
+
]);
|
|
5031
|
+
function extractEntities(task) {
|
|
5032
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5033
|
+
const entities = [];
|
|
5034
|
+
for (const word of words) {
|
|
5035
|
+
if (word.length < 3) continue;
|
|
5036
|
+
if (STOP_WORDS2.has(word)) continue;
|
|
5037
|
+
if (ACTION_WORDS.has(word)) continue;
|
|
5038
|
+
if (OPERATION_WORDS.has(word)) continue;
|
|
5039
|
+
const isLayer = LAYER_KEYWORDS.some(([kws]) => kws.includes(word));
|
|
5040
|
+
if (isLayer) continue;
|
|
5041
|
+
entities.push(word);
|
|
1807
5042
|
}
|
|
1808
|
-
|
|
1809
|
-
for (const doc of documents.values()) totalLength += doc.length;
|
|
1810
|
-
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
1811
|
-
return { documents, idf, avgDocLength, totalDocs };
|
|
5043
|
+
return [...new Set(entities)];
|
|
1812
5044
|
}
|
|
1813
|
-
function
|
|
1814
|
-
const
|
|
1815
|
-
|
|
1816
|
-
const
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
}
|
|
1820
|
-
const results = [];
|
|
1821
|
-
const k1 = 1.5;
|
|
1822
|
-
const b = 0.75;
|
|
1823
|
-
for (const [filePath, doc] of index.documents) {
|
|
1824
|
-
let score = 0;
|
|
1825
|
-
const matchedTerms = [];
|
|
1826
|
-
for (const [qTerm, qCount] of querySet) {
|
|
1827
|
-
const tf = doc.terms.get(qTerm) ?? 0;
|
|
1828
|
-
if (tf === 0) continue;
|
|
1829
|
-
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
1830
|
-
if (termIdf <= 0) continue;
|
|
1831
|
-
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
1832
|
-
score += termIdf * tfNorm * qCount;
|
|
1833
|
-
matchedTerms.push(qTerm);
|
|
1834
|
-
}
|
|
1835
|
-
if (score > 0) {
|
|
1836
|
-
results.push({ filePath, score, matchedTerms });
|
|
5045
|
+
function extractOperations(task) {
|
|
5046
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5047
|
+
const operations = [];
|
|
5048
|
+
for (const word of words) {
|
|
5049
|
+
if (OPERATION_WORDS.has(word)) {
|
|
5050
|
+
operations.push(word);
|
|
1837
5051
|
}
|
|
1838
5052
|
}
|
|
1839
|
-
const
|
|
1840
|
-
if (
|
|
1841
|
-
for (const
|
|
5053
|
+
const opPatterns = task.toLowerCase().match(/\b(on|after|before|during)\s+(\w+)/g);
|
|
5054
|
+
if (opPatterns) {
|
|
5055
|
+
for (const pattern of opPatterns) {
|
|
5056
|
+
const parts = pattern.split(/\s+/);
|
|
5057
|
+
if (parts.length >= 2 && OPERATION_WORDS.has(parts[1])) {
|
|
5058
|
+
operations.push(parts[1]);
|
|
5059
|
+
}
|
|
5060
|
+
}
|
|
1842
5061
|
}
|
|
1843
|
-
return
|
|
5062
|
+
return [...new Set(operations)];
|
|
1844
5063
|
}
|
|
1845
|
-
function
|
|
1846
|
-
const
|
|
1847
|
-
const
|
|
1848
|
-
|
|
1849
|
-
const
|
|
1850
|
-
|
|
1851
|
-
if (
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
5064
|
+
function extractQualifiers(task) {
|
|
5065
|
+
const qualifiers = [];
|
|
5066
|
+
const patterns = task.match(/\b(on|for|in|via|from|through)\s+(\w+(?:\s+\w+)?)/gi);
|
|
5067
|
+
if (patterns) {
|
|
5068
|
+
for (const p of patterns) {
|
|
5069
|
+
const parts = p.split(/\s+/);
|
|
5070
|
+
if (parts.length >= 2) {
|
|
5071
|
+
const qualifier = parts.slice(1).join(" ").toLowerCase();
|
|
5072
|
+
if (!STOP_WORDS2.has(qualifier) && qualifier.length >= 2) {
|
|
5073
|
+
qualifiers.push(qualifier);
|
|
5074
|
+
}
|
|
5075
|
+
}
|
|
1856
5076
|
}
|
|
1857
5077
|
}
|
|
1858
|
-
return
|
|
5078
|
+
return [...new Set(qualifiers)];
|
|
1859
5079
|
}
|
|
1860
|
-
function
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
if (w.endsWith("ful") && w.length > 5) return w.slice(0, -3);
|
|
1875
|
-
if (w.endsWith("ess") && w.length > 5) return w.slice(0, -3);
|
|
1876
|
-
if (w.endsWith("ity") && w.length > 5) return w.slice(0, -3);
|
|
1877
|
-
if (w.endsWith("ive") && w.length > 5) return w.slice(0, -3);
|
|
1878
|
-
if (w.endsWith("ed") && w.length > 4) return w.slice(0, -2);
|
|
1879
|
-
if (w.endsWith("er") && w.length > 4) return w.slice(0, -2);
|
|
1880
|
-
if (w.endsWith("ly") && w.length > 4) return w.slice(0, -2);
|
|
1881
|
-
if (w.endsWith("al") && w.length > 4) return w.slice(0, -2);
|
|
1882
|
-
if (w.endsWith("s") && !w.endsWith("ss") && w.length > 3) return w.slice(0, -1);
|
|
1883
|
-
return w;
|
|
5080
|
+
function parseQueryIntent(task) {
|
|
5081
|
+
const action = detectAction(task);
|
|
5082
|
+
const entities = extractEntities(task);
|
|
5083
|
+
const operations = extractOperations(task);
|
|
5084
|
+
const layers = detectLayers(task);
|
|
5085
|
+
const qualifiers = extractQualifiers(task);
|
|
5086
|
+
const signals = [
|
|
5087
|
+
action !== "unknown" ? 1 : 0,
|
|
5088
|
+
entities.length > 0 ? 1 : 0,
|
|
5089
|
+
operations.length > 0 ? 1 : 0,
|
|
5090
|
+
layers.length > 0 ? 1 : 0
|
|
5091
|
+
];
|
|
5092
|
+
const confidence = signals.reduce((a, b) => a + b, 0) / signals.length;
|
|
5093
|
+
return { original: task, action, entities, operations, layers, qualifiers, confidence };
|
|
1884
5094
|
}
|
|
1885
|
-
function
|
|
1886
|
-
const
|
|
1887
|
-
const
|
|
1888
|
-
|
|
1889
|
-
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
5095
|
+
function buildWeightedQuery(intent) {
|
|
5096
|
+
const parts = [];
|
|
5097
|
+
for (const entity of intent.entities) {
|
|
5098
|
+
parts.push(entity, entity, entity);
|
|
1890
5099
|
}
|
|
1891
|
-
for (const
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
5100
|
+
for (const op of intent.operations) {
|
|
5101
|
+
parts.push(op, op);
|
|
5102
|
+
}
|
|
5103
|
+
for (const layer of intent.layers) {
|
|
5104
|
+
parts.push(layer);
|
|
5105
|
+
}
|
|
5106
|
+
for (const q of intent.qualifiers) {
|
|
5107
|
+
parts.push(q);
|
|
5108
|
+
}
|
|
5109
|
+
if (parts.length === 0) return intent.original;
|
|
5110
|
+
return parts.join(" ");
|
|
5111
|
+
}
|
|
5112
|
+
|
|
5113
|
+
// src/engine/embeddings.ts
|
|
5114
|
+
function buildTfIdfEmbeddingIndex(index) {
|
|
5115
|
+
const allTerms = [...index.idf.keys()];
|
|
5116
|
+
const termToIdx = new Map(allTerms.map((t, i) => [t, i]));
|
|
5117
|
+
const dimensions = allTerms.length;
|
|
5118
|
+
const docVectors = /* @__PURE__ */ new Map();
|
|
5119
|
+
const docNorms = /* @__PURE__ */ new Map();
|
|
5120
|
+
for (const [filePath, doc] of index.documents) {
|
|
5121
|
+
const vec = new Float32Array(dimensions);
|
|
5122
|
+
let norm = 0;
|
|
5123
|
+
for (const [term, tf] of doc.terms) {
|
|
5124
|
+
const idx = termToIdx.get(term);
|
|
5125
|
+
if (idx === void 0) continue;
|
|
5126
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5127
|
+
const weight = tf * idf;
|
|
5128
|
+
vec[idx] = weight;
|
|
5129
|
+
norm += weight * weight;
|
|
5130
|
+
}
|
|
5131
|
+
norm = Math.sqrt(norm);
|
|
5132
|
+
if (norm > 0) {
|
|
5133
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5134
|
+
vec[i] /= norm;
|
|
5135
|
+
}
|
|
5136
|
+
}
|
|
5137
|
+
docVectors.set(filePath, vec);
|
|
5138
|
+
docNorms.set(filePath, norm);
|
|
5139
|
+
}
|
|
5140
|
+
function queryFn(text, topK) {
|
|
5141
|
+
const queryTerms = tokenizeForEmbedding(text);
|
|
5142
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
5143
|
+
for (const t of queryTerms) {
|
|
5144
|
+
termCounts.set(t, (termCounts.get(t) ?? 0) + 1);
|
|
5145
|
+
}
|
|
5146
|
+
const queryVec = new Float32Array(dimensions);
|
|
5147
|
+
let queryNorm = 0;
|
|
5148
|
+
for (const [term, count] of termCounts) {
|
|
5149
|
+
const idx = termToIdx.get(term);
|
|
5150
|
+
if (idx === void 0) continue;
|
|
5151
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5152
|
+
const weight = count * idf;
|
|
5153
|
+
queryVec[idx] = weight;
|
|
5154
|
+
queryNorm += weight * weight;
|
|
5155
|
+
}
|
|
5156
|
+
queryNorm = Math.sqrt(queryNorm);
|
|
5157
|
+
if (queryNorm > 0) {
|
|
5158
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5159
|
+
queryVec[i] /= queryNorm;
|
|
5160
|
+
}
|
|
5161
|
+
}
|
|
5162
|
+
const results = [];
|
|
5163
|
+
for (const [filePath, docVec] of docVectors) {
|
|
5164
|
+
let dot = 0;
|
|
5165
|
+
for (const [term] of termCounts) {
|
|
5166
|
+
const idx = termToIdx.get(term);
|
|
5167
|
+
if (idx !== void 0) {
|
|
5168
|
+
dot += queryVec[idx] * docVec[idx];
|
|
1901
5169
|
}
|
|
1902
|
-
}
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
score: Math.min(1, pathBoost),
|
|
1906
|
-
matchedTerms: pathMatches
|
|
1907
|
-
});
|
|
5170
|
+
}
|
|
5171
|
+
if (dot > 0) {
|
|
5172
|
+
results.push({ filePath, score: dot });
|
|
1908
5173
|
}
|
|
1909
5174
|
}
|
|
5175
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
1910
5176
|
}
|
|
1911
|
-
return
|
|
5177
|
+
return {
|
|
5178
|
+
backend: "tfidf-cosine",
|
|
5179
|
+
dimensions,
|
|
5180
|
+
documentCount: docVectors.size,
|
|
5181
|
+
query: queryFn
|
|
5182
|
+
};
|
|
1912
5183
|
}
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
var MIN_OBSERVATIONS = 3;
|
|
1920
|
-
async function loadLearner(projectPath) {
|
|
1921
|
-
const modelPath = join4(projectPath, MODEL_DIR, MODEL_FILE);
|
|
1922
|
-
try {
|
|
1923
|
-
const raw = await readFile5(modelPath, "utf-8");
|
|
1924
|
-
const parsed = JSON.parse(raw);
|
|
1925
|
-
if (parsed.version === 2) return parsed;
|
|
1926
|
-
} catch {
|
|
5184
|
+
function reciprocalRankFusion(bm25Results, embeddingResults, k = 60, bm25Weight = 0.6, embeddingWeight = 0.4) {
|
|
5185
|
+
const scores = /* @__PURE__ */ new Map();
|
|
5186
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
5187
|
+
const rrf = bm25Weight / (k + i + 1);
|
|
5188
|
+
const existing = scores.get(bm25Results[i].filePath) ?? 0;
|
|
5189
|
+
scores.set(bm25Results[i].filePath, existing + rrf);
|
|
1927
5190
|
}
|
|
1928
|
-
|
|
5191
|
+
for (let i = 0; i < embeddingResults.length; i++) {
|
|
5192
|
+
const rrf = embeddingWeight / (k + i + 1);
|
|
5193
|
+
const existing = scores.get(embeddingResults[i].filePath) ?? 0;
|
|
5194
|
+
scores.set(embeddingResults[i].filePath, existing + rrf);
|
|
5195
|
+
}
|
|
5196
|
+
return [...scores.entries()].map(([filePath, score]) => ({ filePath, score })).sort((a, b) => b.score - a.score);
|
|
1929
5197
|
}
|
|
1930
|
-
function
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
5198
|
+
function tokenizeForEmbedding(text) {
|
|
5199
|
+
return text.toLowerCase().replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]/g, " ").split(/\s+/).filter((t) => t.length >= 2);
|
|
5200
|
+
}
|
|
5201
|
+
|
|
5202
|
+
// src/engine/context-pipeline.ts
|
|
5203
|
+
var RANKING_NOISE_PATTERNS = [
|
|
5204
|
+
/^changelog/i,
|
|
5205
|
+
/^license/i,
|
|
5206
|
+
/^contributing/i,
|
|
5207
|
+
/^code_of_conduct/i,
|
|
5208
|
+
/^authors/i,
|
|
5209
|
+
/^codeowners$/i,
|
|
5210
|
+
/^security/i,
|
|
5211
|
+
/\.lock$/,
|
|
5212
|
+
/^package-lock\.json$/,
|
|
5213
|
+
/^yarn\.lock$/,
|
|
5214
|
+
/^pnpm-lock\.yaml$/,
|
|
5215
|
+
/^Gemfile\.lock$/
|
|
5216
|
+
];
|
|
5217
|
+
function isRankingNoise(filePath) {
|
|
5218
|
+
const basename4 = filePath.split("/").pop() ?? filePath;
|
|
5219
|
+
return RANKING_NOISE_PATTERNS.some((re) => re.test(basename4));
|
|
5220
|
+
}
|
|
5221
|
+
function fileTypePenalty(filePath, taskType) {
|
|
5222
|
+
const lower = filePath.toLowerCase();
|
|
5223
|
+
const isTest = /[/\\]test[s]?[/\\]|\.test\.|\.spec\.|_test\./i.test(lower);
|
|
5224
|
+
const isDoc = /\.md$|\.txt$|\.rst$|^docs[/\\]/i.test(lower);
|
|
5225
|
+
const isConfig = /\.xml$|\.yml$|\.yaml$|\.properties$|\.gradle$/i.test(lower);
|
|
5226
|
+
if (taskType === "debug") {
|
|
5227
|
+
if (isTest) return 0.4;
|
|
5228
|
+
if (isDoc) return 0.2;
|
|
5229
|
+
if (isConfig) return 0.6;
|
|
5230
|
+
} else if (taskType === "test") {
|
|
5231
|
+
if (isTest) return 1.2;
|
|
5232
|
+
if (isDoc) return 0.3;
|
|
5233
|
+
} else if (taskType === "docs") {
|
|
5234
|
+
if (isDoc) return 1.2;
|
|
5235
|
+
if (isTest) return 0.3;
|
|
5236
|
+
} else if (taskType === "feature" || taskType === "refactor") {
|
|
5237
|
+
if (isTest) return 0.5;
|
|
5238
|
+
if (isDoc) return 0.4;
|
|
1958
5239
|
}
|
|
1959
|
-
return
|
|
5240
|
+
return 1;
|
|
1960
5241
|
}
|
|
1961
|
-
function
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
5242
|
+
function detectComplexQuery(task) {
|
|
5243
|
+
const lower = task.toLowerCase();
|
|
5244
|
+
const words = lower.split(/\s+/).filter((w) => w.length > 2);
|
|
5245
|
+
const chainIndicators = /\b(when|after|then|through|from .+ to|via|chain|flow|trace|path|propagat|cascade|invalidat\w+ on|calls?|invokes?)\b/;
|
|
5246
|
+
if (chainIndicators.test(lower)) return true;
|
|
5247
|
+
const layers = [
|
|
5248
|
+
"controller",
|
|
5249
|
+
"endpoint",
|
|
5250
|
+
"router",
|
|
5251
|
+
"handler",
|
|
5252
|
+
"service",
|
|
5253
|
+
"usecase",
|
|
5254
|
+
"use case",
|
|
5255
|
+
"repository",
|
|
5256
|
+
"repo",
|
|
5257
|
+
"cache",
|
|
5258
|
+
"database",
|
|
5259
|
+
"queue",
|
|
5260
|
+
"client",
|
|
5261
|
+
"adapter",
|
|
5262
|
+
"gateway",
|
|
5263
|
+
"interceptor",
|
|
5264
|
+
"middleware",
|
|
5265
|
+
"listener",
|
|
5266
|
+
"consumer",
|
|
5267
|
+
"producer",
|
|
5268
|
+
"publisher",
|
|
5269
|
+
"subscriber"
|
|
5270
|
+
];
|
|
5271
|
+
const layerCount = layers.filter((l) => lower.includes(l)).length;
|
|
5272
|
+
if (layerCount >= 2) return true;
|
|
5273
|
+
if (words.length >= 10) return true;
|
|
5274
|
+
const entityConnectors = lower.match(/\b(on|for|in|from|to|with|after|before|during)\b/g);
|
|
5275
|
+
if (entityConnectors && entityConnectors.length >= 3) return true;
|
|
5276
|
+
return false;
|
|
1969
5277
|
}
|
|
1970
|
-
function
|
|
1971
|
-
const
|
|
1972
|
-
const
|
|
1973
|
-
const
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
5278
|
+
async function runContextPipeline(input) {
|
|
5279
|
+
const { projectPath, task, analysis, budget = 5e4 } = input;
|
|
5280
|
+
const taskType = classifyTask(task);
|
|
5281
|
+
const queryIntent = parseQueryIntent(task);
|
|
5282
|
+
const weightedQuery = buildWeightedQuery(queryIntent);
|
|
5283
|
+
const fileContentMap = /* @__PURE__ */ new Map();
|
|
5284
|
+
const fileContents = [];
|
|
5285
|
+
for (const file of analysis.files) {
|
|
5286
|
+
try {
|
|
5287
|
+
const content = readFileSync6(file.path, "utf-8");
|
|
5288
|
+
fileContentMap.set(file.relativePath, content);
|
|
5289
|
+
fileContents.push({ relativePath: file.relativePath, content });
|
|
5290
|
+
} catch {
|
|
5291
|
+
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
1978
5292
|
}
|
|
1979
|
-
return `*${testExt}`;
|
|
1980
5293
|
}
|
|
1981
|
-
|
|
1982
|
-
|
|
5294
|
+
const indexFiles = analysis.files.map((f) => {
|
|
5295
|
+
const raw = fileContentMap.get(f.relativePath);
|
|
5296
|
+
const augmented = raw ? augmentContentWithStructure(raw, f.relativePath) : void 0;
|
|
5297
|
+
return {
|
|
5298
|
+
relativePath: f.relativePath,
|
|
5299
|
+
absolutePath: f.path,
|
|
5300
|
+
content: augmented
|
|
5301
|
+
};
|
|
5302
|
+
});
|
|
5303
|
+
const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
|
|
5304
|
+
const fileCount = analysis.files.length;
|
|
5305
|
+
const adaptiveTopK = Math.min(Math.max(20, Math.round(fileCount * 0.15)), 100);
|
|
5306
|
+
const allFilePaths = analysis.files.map((f) => f.relativePath);
|
|
5307
|
+
const depMap = /* @__PURE__ */ new Map();
|
|
5308
|
+
for (const file of analysis.files) {
|
|
5309
|
+
depMap.set(file.relativePath, file.imports);
|
|
1983
5310
|
}
|
|
1984
|
-
|
|
1985
|
-
|
|
5311
|
+
const callGraph = buildCallGraph(
|
|
5312
|
+
fileContents.filter((f) => f.content.length > 0)
|
|
5313
|
+
);
|
|
5314
|
+
const callEdges = [...analysis.graph.edges.filter((e) => e.type === "call"), ...callGraph.edges];
|
|
5315
|
+
const isComplexQuery = detectComplexQuery(task);
|
|
5316
|
+
const embeddingIndex = buildTfIdfEmbeddingIndex(index);
|
|
5317
|
+
const embeddingResults = embeddingIndex.query(weightedQuery, adaptiveTopK);
|
|
5318
|
+
let bm25Matches;
|
|
5319
|
+
if (isComplexQuery) {
|
|
5320
|
+
const hopResult = multiHopQuery(index, weightedQuery, depMap, callEdges, fileContentMap, {
|
|
5321
|
+
maxHops: 2,
|
|
5322
|
+
topKPerHop: 5,
|
|
5323
|
+
decayFactor: 0.5,
|
|
5324
|
+
minScoreThreshold: 0.15
|
|
5325
|
+
});
|
|
5326
|
+
bm25Matches = hopResult.matches.slice(0, adaptiveTopK);
|
|
5327
|
+
} else {
|
|
5328
|
+
bm25Matches = query(index, weightedQuery, adaptiveTopK);
|
|
1986
5329
|
}
|
|
1987
|
-
const
|
|
1988
|
-
|
|
1989
|
-
|
|
5330
|
+
const fusedResults = reciprocalRankFusion(bm25Matches, embeddingResults, 60, 0.6, 0.4);
|
|
5331
|
+
const rawMatches = fusedResults.slice(0, adaptiveTopK).map((r) => {
|
|
5332
|
+
const bm25Match = bm25Matches.find((m) => m.filePath === r.filePath);
|
|
5333
|
+
return {
|
|
5334
|
+
filePath: r.filePath,
|
|
5335
|
+
score: r.score,
|
|
5336
|
+
matchedTerms: bm25Match?.matchedTerms ?? ["[embedding-only]"]
|
|
5337
|
+
};
|
|
5338
|
+
});
|
|
5339
|
+
const semanticMatches = rawMatches.filter((m) => !isRankingNoise(m.filePath));
|
|
5340
|
+
const pathBoosted = boostByPath(semanticMatches, allFilePaths, task);
|
|
5341
|
+
const layerBoosted = boostByLayer(pathBoosted, allFilePaths, task);
|
|
5342
|
+
const importBoosted = boostByImports(layerBoosted, depMap, 10, 0.4);
|
|
5343
|
+
const callBoosted = boostByCallGraph(importBoosted, callEdges, 10, 0.3);
|
|
5344
|
+
const coChangeMatrix = buildCoChangeMatrix(projectPath, 500, 2);
|
|
5345
|
+
const boostedMatches = boostByGitCoChange(callBoosted, coChangeMatrix, 10, 0.25, 0.15);
|
|
5346
|
+
const rerankResult = rerank({
|
|
5347
|
+
task,
|
|
5348
|
+
candidates: boostedMatches,
|
|
5349
|
+
index,
|
|
5350
|
+
fileContents: fileContentMap,
|
|
5351
|
+
dependencies: depMap,
|
|
5352
|
+
allFilePaths: analysis.files.map((f) => f.relativePath)
|
|
5353
|
+
});
|
|
5354
|
+
const rerankerApproved = new Set(rerankResult.files.map((rf) => rf.filePath));
|
|
5355
|
+
const rerankedMatches = boostedMatches.map((m) => ({
|
|
5356
|
+
filePath: m.filePath,
|
|
5357
|
+
score: rerankerApproved.has(m.filePath) ? m.score * 1.5 : m.score,
|
|
5358
|
+
matchedTerms: [...m.matchedTerms]
|
|
5359
|
+
}));
|
|
5360
|
+
for (const m of rerankedMatches) {
|
|
5361
|
+
m.score *= fileTypePenalty(m.filePath, taskType);
|
|
1990
5362
|
}
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
const
|
|
2007
|
-
|
|
2008
|
-
let
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
if (lower.includes(kw)) score++;
|
|
2013
|
-
}
|
|
2014
|
-
if (score > bestScore) {
|
|
2015
|
-
bestScore = score;
|
|
2016
|
-
bestType = type;
|
|
2017
|
-
}
|
|
5363
|
+
const learner = await loadLearner(projectPath);
|
|
5364
|
+
const learnerBoosts = getLearnerBoosts(
|
|
5365
|
+
learner,
|
|
5366
|
+
taskType,
|
|
5367
|
+
analysis.files.map((f) => f.relativePath)
|
|
5368
|
+
);
|
|
5369
|
+
const semanticScores = rerankedMatches.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
5370
|
+
const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
5371
|
+
const selection = await selectContext({
|
|
5372
|
+
task,
|
|
5373
|
+
analysis,
|
|
5374
|
+
budget,
|
|
5375
|
+
semanticScores,
|
|
5376
|
+
learnerBoosts: learnerBoostInputs
|
|
5377
|
+
});
|
|
5378
|
+
const semanticMap = new Map(rerankedMatches.map((m) => [m.filePath, m]));
|
|
5379
|
+
const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
|
|
5380
|
+
let multiRepo;
|
|
5381
|
+
if (input.siblingRepos && input.siblingRepos.length > 0) {
|
|
5382
|
+
const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
|
|
5383
|
+
multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
|
|
2018
5384
|
}
|
|
2019
|
-
return
|
|
5385
|
+
return { selection, taskType, fileContentMap, semanticMap, learnerMap, queryIntent, multiRepo, indexCacheStats };
|
|
2020
5386
|
}
|
|
2021
5387
|
|
|
2022
5388
|
// src/mcp/index.ts
|
|
2023
5389
|
var server = new McpServer({
|
|
2024
5390
|
name: "cto",
|
|
2025
|
-
version: "6.
|
|
5391
|
+
version: "6.1.0"
|
|
2026
5392
|
});
|
|
2027
5393
|
var lastAnalysis = null;
|
|
2028
5394
|
var lastProjectPath = null;
|
|
2029
5395
|
async function getAnalysis(projectPath) {
|
|
2030
|
-
const absPath =
|
|
5396
|
+
const absPath = resolve6(projectPath);
|
|
2031
5397
|
if (lastAnalysis && lastProjectPath === absPath) return lastAnalysis;
|
|
2032
5398
|
lastAnalysis = await analyzeProject(absPath);
|
|
2033
5399
|
lastProjectPath = absPath;
|
|
@@ -2048,35 +5414,10 @@ server.tool(
|
|
|
2048
5414
|
},
|
|
2049
5415
|
async ({ projectPath, task, budget, includeContents }) => {
|
|
2050
5416
|
try {
|
|
2051
|
-
const absPath =
|
|
5417
|
+
const absPath = resolve6(projectPath);
|
|
2052
5418
|
const analysis = await getAnalysis(absPath);
|
|
2053
5419
|
const tokenBudget = budget ?? 5e4;
|
|
2054
|
-
const taskType =
|
|
2055
|
-
const fileContents = [];
|
|
2056
|
-
for (const file of analysis.files) {
|
|
2057
|
-
try {
|
|
2058
|
-
const content = readFileSync2(file.path, "utf-8");
|
|
2059
|
-
fileContents.push({ relativePath: file.relativePath, content });
|
|
2060
|
-
} catch {
|
|
2061
|
-
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
2062
|
-
}
|
|
2063
|
-
}
|
|
2064
|
-
const index = buildIndex(fileContents);
|
|
2065
|
-
const semanticMatches = query(index, task, 50);
|
|
2066
|
-
const boosted = boostByPath(semanticMatches, analysis.files.map((f) => f.relativePath), task);
|
|
2067
|
-
const semanticMap = new Map(boosted.map((m) => [m.filePath, m]));
|
|
2068
|
-
const learner = await loadLearner(absPath);
|
|
2069
|
-
const learnerBoosts = getLearnerBoosts(learner, taskType, analysis.files.map((f) => f.relativePath));
|
|
2070
|
-
const learnerMap = new Map(learnerBoosts.map((b) => [b.filePath, b]));
|
|
2071
|
-
const semanticScores = boosted.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
2072
|
-
const learnerBoostInputs = learnerBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
2073
|
-
const selection = await selectContext({
|
|
2074
|
-
task,
|
|
2075
|
-
analysis,
|
|
2076
|
-
budget: tokenBudget,
|
|
2077
|
-
semanticScores,
|
|
2078
|
-
learnerBoosts: learnerBoostInputs
|
|
2079
|
-
});
|
|
5420
|
+
const { selection, taskType, fileContentMap, semanticMap, learnerMap } = await runContextPipeline({ projectPath: absPath, task, analysis, budget: tokenBudget });
|
|
2080
5421
|
const files = selection.files.map((f) => {
|
|
2081
5422
|
const sem = semanticMap.get(f.relativePath);
|
|
2082
5423
|
const lb = learnerMap.get(f.relativePath);
|
|
@@ -2090,11 +5431,11 @@ server.tool(
|
|
|
2090
5431
|
learnerBoost: lb?.boost ?? 0
|
|
2091
5432
|
};
|
|
2092
5433
|
if (includeContents) {
|
|
2093
|
-
|
|
2094
|
-
entry.content = sanitizeContent(fc?.content ?? "");
|
|
5434
|
+
entry.content = sanitizeContent(fileContentMap.get(f.relativePath) ?? "");
|
|
2095
5435
|
}
|
|
2096
5436
|
return entry;
|
|
2097
5437
|
});
|
|
5438
|
+
const learner = await loadLearner(absPath);
|
|
2098
5439
|
const result = {
|
|
2099
5440
|
task,
|
|
2100
5441
|
taskType,
|
|
@@ -2121,7 +5462,7 @@ server.tool(
|
|
|
2121
5462
|
},
|
|
2122
5463
|
async ({ projectPath, files }) => {
|
|
2123
5464
|
try {
|
|
2124
|
-
const absPath =
|
|
5465
|
+
const absPath = resolve6(projectPath);
|
|
2125
5466
|
const analysis = await getAnalysis(absPath);
|
|
2126
5467
|
const filePaths = files ?? analysis.files.map((f) => f.path);
|
|
2127
5468
|
const findings = await scanProjectForSecrets(absPath, filePaths);
|
|
@@ -2156,42 +5497,21 @@ server.tool(
|
|
|
2156
5497
|
},
|
|
2157
5498
|
async ({ projectPath, task, filePath }) => {
|
|
2158
5499
|
try {
|
|
2159
|
-
const absPath =
|
|
5500
|
+
const absPath = resolve6(projectPath);
|
|
2160
5501
|
const analysis = await getAnalysis(absPath);
|
|
2161
5502
|
const taskType = classifyTask(task);
|
|
2162
5503
|
const file = analysis.files.find((f) => f.relativePath === filePath);
|
|
2163
5504
|
if (!file) {
|
|
2164
5505
|
return { content: [{ type: "text", text: `File not found in project: ${filePath}` }] };
|
|
2165
5506
|
}
|
|
2166
|
-
const
|
|
2167
|
-
for (const f of analysis.files) {
|
|
2168
|
-
try {
|
|
2169
|
-
fileContents.push({ relativePath: f.relativePath, content: readFileSync2(f.path, "utf-8") });
|
|
2170
|
-
} catch {
|
|
2171
|
-
fileContents.push({ relativePath: f.relativePath, content: "" });
|
|
2172
|
-
}
|
|
2173
|
-
}
|
|
2174
|
-
const idx = buildIndex(fileContents);
|
|
2175
|
-
const semMatches = query(idx, task, 50);
|
|
2176
|
-
const boosted = boostByPath(semMatches, analysis.files.map((f) => f.relativePath), task);
|
|
2177
|
-
const semanticScores = boosted.map((m) => ({ filePath: m.filePath, score: m.score }));
|
|
2178
|
-
const learner = await loadLearner(absPath);
|
|
2179
|
-
const allBoosts = getLearnerBoosts(learner, taskType, analysis.files.map((f) => f.relativePath));
|
|
2180
|
-
const learnerBoostInputs = allBoosts.map((b) => ({ filePath: b.filePath, boost: b.boost }));
|
|
2181
|
-
const selection = await selectContext({
|
|
2182
|
-
task,
|
|
2183
|
-
analysis,
|
|
2184
|
-
budget: 5e4,
|
|
2185
|
-
semanticScores,
|
|
2186
|
-
learnerBoosts: learnerBoostInputs
|
|
2187
|
-
});
|
|
5507
|
+
const { selection, semanticMap, learnerMap } = await runContextPipeline({ projectPath: absPath, task, analysis, budget: 5e4 });
|
|
2188
5508
|
const isSelected = selection.files.some((f) => f.relativePath === filePath);
|
|
2189
5509
|
const selectionEntry = selection.files.find((f) => f.relativePath === filePath);
|
|
2190
|
-
const semEntry =
|
|
5510
|
+
const semEntry = semanticMap.get(filePath);
|
|
2191
5511
|
const semanticScore = semEntry?.score ?? 0;
|
|
2192
5512
|
const semanticTerms = semEntry?.matchedTerms ?? [];
|
|
2193
|
-
const
|
|
2194
|
-
const learnerBoost =
|
|
5513
|
+
const lbEntry = learnerMap.get(filePath);
|
|
5514
|
+
const learnerBoost = lbEntry ?? null;
|
|
2195
5515
|
const importedBy = file.importedBy;
|
|
2196
5516
|
const imports = file.imports;
|
|
2197
5517
|
const explanation = {
|