cto-ai-cli 7.1.0 → 8.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -56
- package/dist/cli/index.js +2018 -34
- package/dist/engine/index.d.ts +826 -3
- package/dist/engine/index.js +3078 -133
- package/dist/mcp/index.js +1978 -34
- package/package.json +1 -1
package/dist/mcp/index.js
CHANGED
|
@@ -9,6 +9,116 @@ var __export = (target, all) => {
|
|
|
9
9
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
10
|
};
|
|
11
11
|
|
|
12
|
+
// src/engine/synonyms.ts
|
|
13
|
+
function buildBidirectionalIndex() {
|
|
14
|
+
for (const [canonical, synonyms] of Object.entries(SYNONYM_MAP)) {
|
|
15
|
+
if (!BIDIRECTIONAL_INDEX.has(canonical)) {
|
|
16
|
+
BIDIRECTIONAL_INDEX.set(canonical, /* @__PURE__ */ new Set());
|
|
17
|
+
}
|
|
18
|
+
const canonicalSet = BIDIRECTIONAL_INDEX.get(canonical);
|
|
19
|
+
for (const syn of synonyms) {
|
|
20
|
+
canonicalSet.add(syn);
|
|
21
|
+
}
|
|
22
|
+
canonicalSet.add(canonical);
|
|
23
|
+
for (const syn of synonyms) {
|
|
24
|
+
if (!BIDIRECTIONAL_INDEX.has(syn)) {
|
|
25
|
+
BIDIRECTIONAL_INDEX.set(syn, /* @__PURE__ */ new Set());
|
|
26
|
+
}
|
|
27
|
+
const synSet = BIDIRECTIONAL_INDEX.get(syn);
|
|
28
|
+
synSet.add(canonical);
|
|
29
|
+
for (const otherSyn of synonyms) {
|
|
30
|
+
if (otherSyn !== syn) synSet.add(otherSyn);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function expandTerm(term) {
|
|
36
|
+
const normalized = term.toLowerCase().trim();
|
|
37
|
+
const related = BIDIRECTIONAL_INDEX.get(normalized);
|
|
38
|
+
if (!related) return [normalized];
|
|
39
|
+
return [normalized, ...Array.from(related)];
|
|
40
|
+
}
|
|
41
|
+
var SYNONYM_MAP, BIDIRECTIONAL_INDEX;
|
|
42
|
+
var init_synonyms = __esm({
|
|
43
|
+
"src/engine/synonyms.ts"() {
|
|
44
|
+
"use strict";
|
|
45
|
+
SYNONYM_MAP = {
|
|
46
|
+
// Authentication & Authorization
|
|
47
|
+
"auth": ["authentication", "authorize", "login", "signin", "session", "jwt", "token", "oauth", "sso", "identity", "credential"],
|
|
48
|
+
"permission": ["authorization", "access", "role", "acl", "rbac", "policy", "grant"],
|
|
49
|
+
// Database & Storage
|
|
50
|
+
"database": ["db", "repository", "store", "storage", "persistence", "orm", "sql", "query", "prisma", "sequelize", "typeorm", "mongo", "postgres", "mysql"],
|
|
51
|
+
"cache": ["redis", "memcached", "ttl", "invalidation", "memoize", "store"],
|
|
52
|
+
"migration": ["schema", "upgrade", "version", "evolution"],
|
|
53
|
+
// API & Networking
|
|
54
|
+
"api": ["endpoint", "route", "handler", "controller", "rest", "graphql", "rpc", "service"],
|
|
55
|
+
"request": ["req", "http", "call", "fetch", "axios"],
|
|
56
|
+
"response": ["res", "reply", "result", "output"],
|
|
57
|
+
"middleware": ["interceptor", "filter", "plugin", "hook"],
|
|
58
|
+
"gateway": ["proxy", "router", "load-balancer", "reverse-proxy"],
|
|
59
|
+
// Frontend & UI
|
|
60
|
+
"component": ["widget", "element", "view", "template"],
|
|
61
|
+
"state": ["store", "redux", "zustand", "context", "model"],
|
|
62
|
+
"render": ["paint", "draw", "display", "show"],
|
|
63
|
+
"style": ["css", "theme", "design", "layout", "tailwind"],
|
|
64
|
+
// Testing & Quality
|
|
65
|
+
"test": ["spec", "suite", "case", "assertion", "mock", "stub", "fixture", "vitest", "jest", "mocha"],
|
|
66
|
+
"validate": ["verify", "check", "assert", "ensure", "sanitize"],
|
|
67
|
+
"error": ["exception", "failure", "bug", "issue", "crash"],
|
|
68
|
+
// Performance & Optimization
|
|
69
|
+
"optimize": ["performance", "speed", "fast", "efficient", "improve", "enhance"],
|
|
70
|
+
"latency": ["delay", "lag", "slowness", "response-time"],
|
|
71
|
+
"throughput": ["capacity", "volume", "rate", "bandwidth"],
|
|
72
|
+
// Data & Collections
|
|
73
|
+
"dataset": ["data", "record", "row", "entry", "item", "collection"],
|
|
74
|
+
"empty": ["null", "blank", "missing", "absent", "none", "zero"],
|
|
75
|
+
// Data Processing
|
|
76
|
+
"parse": ["decode", "deserialize", "extract", "read"],
|
|
77
|
+
"serialize": ["encode", "stringify", "format", "marshal"],
|
|
78
|
+
"transform": ["map", "convert", "translate", "process"],
|
|
79
|
+
"filter": ["select", "where", "match", "find"],
|
|
80
|
+
// Configuration & Setup
|
|
81
|
+
"config": ["configuration", "setting", "option", "preference", "env", "environment"],
|
|
82
|
+
"init": ["initialize", "setup", "bootstrap", "start", "create"],
|
|
83
|
+
"deploy": ["deployment", "release", "publish", "ship", "launch"],
|
|
84
|
+
// Logging & Monitoring
|
|
85
|
+
"log": ["logger", "logging", "trace", "debug", "info", "warn", "error"],
|
|
86
|
+
"metric": ["measurement", "stat", "telemetry", "analytics", "tracking"],
|
|
87
|
+
"monitor": ["observe", "watch", "track", "alert"],
|
|
88
|
+
// Security
|
|
89
|
+
"secret": ["credential", "key", "password", "token", "apikey", "sensitive"],
|
|
90
|
+
"encrypt": ["cipher", "encode", "hash", "crypto"],
|
|
91
|
+
"sanitize": ["escape", "clean", "validate", "filter"],
|
|
92
|
+
// File System & I/O
|
|
93
|
+
"file": ["document", "asset", "resource", "path"],
|
|
94
|
+
"read": ["load", "fetch", "get", "retrieve"],
|
|
95
|
+
"write": ["save", "persist", "store", "put"],
|
|
96
|
+
"delete": ["remove", "unlink", "destroy", "drop"],
|
|
97
|
+
// Async & Concurrency
|
|
98
|
+
"async": ["asynchronous", "promise", "await", "concurrent", "parallel"],
|
|
99
|
+
"queue": ["buffer", "backlog", "pending", "deferred"],
|
|
100
|
+
"lock": ["mutex", "semaphore", "synchronize", "atomic"],
|
|
101
|
+
// Architecture & Patterns
|
|
102
|
+
"service": ["microservice", "api", "backend", "server", "daemon"],
|
|
103
|
+
"client": ["consumer", "frontend", "user", "caller"],
|
|
104
|
+
"event": ["message", "signal", "notification", "trigger"],
|
|
105
|
+
"stream": ["flow", "pipe", "channel", "observable"],
|
|
106
|
+
// Business Logic
|
|
107
|
+
"user": ["account", "profile", "member", "customer"],
|
|
108
|
+
"order": ["purchase", "transaction", "checkout", "cart"],
|
|
109
|
+
"payment": ["billing", "invoice", "charge", "stripe", "paypal"],
|
|
110
|
+
"notification": ["alert", "message", "email", "push", "sms"],
|
|
111
|
+
// DevOps & Infrastructure
|
|
112
|
+
"docker": ["container", "image", "dockerfile", "compose"],
|
|
113
|
+
"kubernetes": ["k8s", "cluster", "pod", "deployment", "helm"],
|
|
114
|
+
"ci": ["continuous-integration", "pipeline", "build", "github-actions", "jenkins"],
|
|
115
|
+
"cd": ["continuous-deployment", "release", "deploy", "rollout"]
|
|
116
|
+
};
|
|
117
|
+
BIDIRECTIONAL_INDEX = /* @__PURE__ */ new Map();
|
|
118
|
+
buildBidirectionalIndex();
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
|
|
12
122
|
// src/engine/tfidf.ts
|
|
13
123
|
function buildIndex(files) {
|
|
14
124
|
const documents = /* @__PURE__ */ new Map();
|
|
@@ -32,15 +142,29 @@ function buildIndex(files) {
|
|
|
32
142
|
let totalLength = 0;
|
|
33
143
|
for (const doc of documents.values()) totalLength += doc.length;
|
|
34
144
|
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
35
|
-
return { documents, idf, avgDocLength, totalDocs };
|
|
145
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
36
146
|
}
|
|
37
|
-
function query(index, taskDescription, maxResults = 50) {
|
|
147
|
+
function query(index, taskDescription, maxResults = 50, expandSynonyms = true) {
|
|
38
148
|
const queryTerms = tokenize(taskDescription);
|
|
39
149
|
if (queryTerms.length === 0) return [];
|
|
40
150
|
const querySet = /* @__PURE__ */ new Map();
|
|
41
151
|
for (const term of queryTerms) {
|
|
42
152
|
querySet.set(term, (querySet.get(term) ?? 0) + 1);
|
|
43
153
|
}
|
|
154
|
+
if (expandSynonyms) {
|
|
155
|
+
const expandedSet = /* @__PURE__ */ new Map();
|
|
156
|
+
for (const [term, count] of querySet) {
|
|
157
|
+
const synonyms = expandTerm(term);
|
|
158
|
+
for (const syn of synonyms) {
|
|
159
|
+
const weight = syn === term ? count : count * 0.7;
|
|
160
|
+
expandedSet.set(syn, (expandedSet.get(syn) ?? 0) + weight);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
querySet.clear();
|
|
164
|
+
for (const [term, weight] of expandedSet) {
|
|
165
|
+
querySet.set(term, weight);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
44
168
|
const results = [];
|
|
45
169
|
const k1 = 1.5;
|
|
46
170
|
const b = 0.75;
|
|
@@ -52,8 +176,11 @@ function query(index, taskDescription, maxResults = 50) {
|
|
|
52
176
|
if (tf === 0) continue;
|
|
53
177
|
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
54
178
|
if (termIdf <= 0) continue;
|
|
179
|
+
const df = index.docFreq.get(qTerm) ?? 0;
|
|
180
|
+
const dfRatio = index.totalDocs > 0 ? df / index.totalDocs : 0;
|
|
181
|
+
const domainDamp = dfRatio > 0.5 ? (1 - dfRatio) * (1 - dfRatio) : 1;
|
|
55
182
|
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
56
|
-
score += termIdf * tfNorm * qCount;
|
|
183
|
+
score += termIdf * tfNorm * qCount * domainDamp;
|
|
57
184
|
matchedTerms.push(qTerm);
|
|
58
185
|
}
|
|
59
186
|
if (score > 0) {
|
|
@@ -116,32 +243,147 @@ function boostByPath(matches, allFiles, taskDescription) {
|
|
|
116
243
|
for (const m of matches) {
|
|
117
244
|
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
118
245
|
}
|
|
246
|
+
const pathTermDocFreq = /* @__PURE__ */ new Map();
|
|
247
|
+
const allPathTokenSets = /* @__PURE__ */ new Map();
|
|
119
248
|
for (const filePath of allFiles) {
|
|
120
|
-
const
|
|
121
|
-
|
|
122
|
-
|
|
249
|
+
const tokens = new Set(tokenize(filePath.replace(/[/\\.]/g, " ")));
|
|
250
|
+
allPathTokenSets.set(filePath, tokens);
|
|
251
|
+
for (const t of tokens) {
|
|
252
|
+
pathTermDocFreq.set(t, (pathTermDocFreq.get(t) ?? 0) + 1);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
const N = allFiles.length;
|
|
256
|
+
function pathIdf(term) {
|
|
257
|
+
const df = pathTermDocFreq.get(term) ?? 0;
|
|
258
|
+
if (df === 0) return 0;
|
|
259
|
+
return Math.log((N + 1) / (df + 1));
|
|
260
|
+
}
|
|
261
|
+
for (const filePath of allFiles) {
|
|
262
|
+
const parts = filePath.replace(/\\/g, "/").split("/");
|
|
263
|
+
const fileName = parts.pop() ?? "";
|
|
264
|
+
const dirSegments = parts;
|
|
265
|
+
const dirTerms = tokenize(dirSegments.join(" ").replace(/[/\\.]/g, " "));
|
|
266
|
+
const fileTerms = tokenize(fileName.replace(/[.\-_]/g, " "));
|
|
267
|
+
const dirMatches = dirTerms.filter((t) => queryTerms.has(t));
|
|
268
|
+
const fileMatches = fileTerms.filter((t) => queryTerms.has(t));
|
|
269
|
+
const allPathMatches = [.../* @__PURE__ */ new Set([...dirMatches, ...fileMatches])];
|
|
270
|
+
if (allPathMatches.length > 0) {
|
|
271
|
+
const uniqueDirMatches = [...new Set(dirMatches)];
|
|
272
|
+
const uniqueFileMatches = [...new Set(fileMatches)].filter((t) => !uniqueDirMatches.includes(t));
|
|
273
|
+
const maxIdf = Math.log(N + 1);
|
|
274
|
+
let pathBoost = 0;
|
|
275
|
+
for (const t of uniqueDirMatches) {
|
|
276
|
+
pathBoost += 0.4 * (pathIdf(t) / maxIdf);
|
|
277
|
+
}
|
|
278
|
+
for (const t of uniqueFileMatches) {
|
|
279
|
+
pathBoost += 0.25 * (pathIdf(t) / maxIdf);
|
|
280
|
+
}
|
|
123
281
|
const existing = boosted.get(filePath);
|
|
124
|
-
const pathBoost = pathMatches.length * 0.3;
|
|
125
282
|
if (existing) {
|
|
126
|
-
existing.score =
|
|
127
|
-
for (const t of
|
|
283
|
+
existing.score = existing.score + pathBoost;
|
|
284
|
+
for (const t of allPathMatches) {
|
|
128
285
|
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
129
286
|
}
|
|
130
287
|
} else {
|
|
131
288
|
boosted.set(filePath, {
|
|
132
289
|
filePath,
|
|
133
|
-
score:
|
|
134
|
-
matchedTerms:
|
|
290
|
+
score: pathBoost,
|
|
291
|
+
matchedTerms: allPathMatches
|
|
135
292
|
});
|
|
136
293
|
}
|
|
137
294
|
}
|
|
138
295
|
}
|
|
139
296
|
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
140
297
|
}
|
|
141
|
-
|
|
298
|
+
function boostByLayer(matches, allFiles, taskDescription) {
|
|
299
|
+
const queryTerms = tokenize(taskDescription);
|
|
300
|
+
const targetDirTerms = /* @__PURE__ */ new Set();
|
|
301
|
+
for (const term of queryTerms) {
|
|
302
|
+
const layers = LAYER_MAP[term];
|
|
303
|
+
if (layers) {
|
|
304
|
+
for (const l of layers) targetDirTerms.add(l);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
if (targetDirTerms.size === 0) return matches;
|
|
308
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
309
|
+
for (const m of matches) {
|
|
310
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
311
|
+
}
|
|
312
|
+
for (const filePath of allFiles) {
|
|
313
|
+
const dirTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
|
|
314
|
+
const layerHits = dirTerms.filter((t) => targetDirTerms.has(t));
|
|
315
|
+
if (layerHits.length > 0) {
|
|
316
|
+
const layerBoost = Math.min(0.5, layerHits.length * 0.2);
|
|
317
|
+
const existing = boosted.get(filePath);
|
|
318
|
+
if (existing) {
|
|
319
|
+
existing.score = existing.score + layerBoost;
|
|
320
|
+
} else {
|
|
321
|
+
boosted.set(filePath, {
|
|
322
|
+
filePath,
|
|
323
|
+
score: layerBoost,
|
|
324
|
+
matchedTerms: [`[layer:${layerHits[0]}]`]
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
330
|
+
}
|
|
331
|
+
function boostByImports(matches, dependencies, topK = 10, boostFactor = 0.4) {
|
|
332
|
+
if (matches.length === 0 || dependencies.size === 0) return matches;
|
|
333
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
334
|
+
for (const m of matches) {
|
|
335
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
336
|
+
}
|
|
337
|
+
const reverseDeps = /* @__PURE__ */ new Map();
|
|
338
|
+
for (const [from, tos] of dependencies) {
|
|
339
|
+
for (const to of tos) {
|
|
340
|
+
const existing = reverseDeps.get(to) ?? [];
|
|
341
|
+
existing.push(from);
|
|
342
|
+
reverseDeps.set(to, existing);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
const topMatches = matches.slice(0, topK);
|
|
346
|
+
const maxDepsPerParent = 5;
|
|
347
|
+
for (const parent of topMatches) {
|
|
348
|
+
const boost = parent.score * boostFactor;
|
|
349
|
+
const imports = dependencies.get(parent.filePath) ?? [];
|
|
350
|
+
const sortedImports = [...imports].sort((a, b) => {
|
|
351
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
352
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
353
|
+
return sb - sa;
|
|
354
|
+
});
|
|
355
|
+
for (const dep of sortedImports.slice(0, maxDepsPerParent)) {
|
|
356
|
+
applyImportBoost(boosted, dep, boost, parent.filePath, "imported-by");
|
|
357
|
+
}
|
|
358
|
+
const importers = reverseDeps.get(parent.filePath) ?? [];
|
|
359
|
+
const sortedImporters = [...importers].sort((a, b) => {
|
|
360
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
361
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
362
|
+
return sb - sa;
|
|
363
|
+
});
|
|
364
|
+
for (const imp of sortedImporters.slice(0, maxDepsPerParent)) {
|
|
365
|
+
applyImportBoost(boosted, imp, boost * 0.7, parent.filePath, "imports");
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
369
|
+
}
|
|
370
|
+
function applyImportBoost(boosted, filePath, boost, parentPath, relation) {
|
|
371
|
+
const existing = boosted.get(filePath);
|
|
372
|
+
if (existing) {
|
|
373
|
+
existing.score = existing.score + boost;
|
|
374
|
+
} else {
|
|
375
|
+
boosted.set(filePath, {
|
|
376
|
+
filePath,
|
|
377
|
+
score: boost,
|
|
378
|
+
matchedTerms: [`[${relation}:${parentPath.split("/").pop()}]`]
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
var STOP_WORDS, TERM_FAMILIES, LAYER_MAP;
|
|
142
383
|
var init_tfidf = __esm({
|
|
143
384
|
"src/engine/tfidf.ts"() {
|
|
144
385
|
"use strict";
|
|
386
|
+
init_synonyms();
|
|
145
387
|
STOP_WORDS = /* @__PURE__ */ new Set([
|
|
146
388
|
// Language keywords
|
|
147
389
|
"import",
|
|
@@ -285,6 +527,29 @@ var init_tfidf = __esm({
|
|
|
285
527
|
["encryp", "encrypt"],
|
|
286
528
|
["decryp", "encrypt"]
|
|
287
529
|
];
|
|
530
|
+
LAYER_MAP = {
|
|
531
|
+
// Query terms → directory segments that should be boosted
|
|
532
|
+
"endpoint": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
533
|
+
"api": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
534
|
+
"controller": ["endpoint", "controller", "handler", "route", "router"],
|
|
535
|
+
"repositori": ["repositori", "dao", "store", "persist"],
|
|
536
|
+
"databas": ["repositori", "dao", "store", "persist", "migrat"],
|
|
537
|
+
"storag": ["repositori", "dao", "store", "persist"],
|
|
538
|
+
"cach": ["cach", "redis", "memcach", "store"],
|
|
539
|
+
"servic": ["servic", "usecas", "core"],
|
|
540
|
+
"usecas": ["usecas", "servic", "core"],
|
|
541
|
+
"config": ["config", "inject", "setup", "bootstrap"],
|
|
542
|
+
"inject": ["config", "inject", "setup"],
|
|
543
|
+
"depend": ["config", "inject", "setup"],
|
|
544
|
+
"event": ["event", "listen", "handler", "subscrib"],
|
|
545
|
+
"error": ["error", "except", "handler", "fault"],
|
|
546
|
+
"except": ["except", "error", "handler", "fault"],
|
|
547
|
+
"model": ["model", "entiti", "dto", "domain", "schema"],
|
|
548
|
+
"entiti": ["entiti", "model", "dto", "domain"],
|
|
549
|
+
"metric": ["metric", "monitor", "observ", "telemetri"],
|
|
550
|
+
"test": ["test", "spec", "mock", "fixtur"],
|
|
551
|
+
"migrat": ["migrat", "schema", "databas"]
|
|
552
|
+
};
|
|
288
553
|
}
|
|
289
554
|
});
|
|
290
555
|
|
|
@@ -536,7 +801,19 @@ var DEFAULT_CONFIG = {
|
|
|
536
801
|
},
|
|
537
802
|
ignore: {
|
|
538
803
|
dirs: ["node_modules", "dist", "build", ".git", "coverage", "__pycache__", ".next", "vendor", ".cto"],
|
|
539
|
-
patterns: [
|
|
804
|
+
patterns: [
|
|
805
|
+
"*.min.js",
|
|
806
|
+
"*.map",
|
|
807
|
+
"*.lock",
|
|
808
|
+
"*.generated.*",
|
|
809
|
+
"CHANGELOG*",
|
|
810
|
+
"LICENSE*",
|
|
811
|
+
"CONTRIBUTING*",
|
|
812
|
+
"CODE_OF_CONDUCT*",
|
|
813
|
+
"AUTHORS*",
|
|
814
|
+
"CODEOWNERS",
|
|
815
|
+
"SECURITY*"
|
|
816
|
+
]
|
|
540
817
|
},
|
|
541
818
|
maxDepth: 20
|
|
542
819
|
},
|
|
@@ -1724,11 +2001,15 @@ function computeTypeProviderUsage(files, graph) {
|
|
|
1724
2001
|
|
|
1725
2002
|
// src/engine/analyzer.ts
|
|
1726
2003
|
function matchesPattern(filename, patterns) {
|
|
2004
|
+
const lower = filename.toLowerCase();
|
|
1727
2005
|
for (const pattern of patterns) {
|
|
1728
2006
|
if (pattern.startsWith("*.")) {
|
|
1729
2007
|
const ext = pattern.slice(1);
|
|
1730
2008
|
if (filename.endsWith(ext)) return true;
|
|
1731
|
-
} else if (
|
|
2009
|
+
} else if (pattern.endsWith("*")) {
|
|
2010
|
+
const prefix = pattern.slice(0, -1).toLowerCase();
|
|
2011
|
+
if (lower.startsWith(prefix)) return true;
|
|
2012
|
+
} else if (lower === pattern.toLowerCase()) {
|
|
1732
2013
|
return true;
|
|
1733
2014
|
}
|
|
1734
2015
|
}
|
|
@@ -2874,17 +3155,19 @@ async function selectContext(input) {
|
|
|
2874
3155
|
const selectedFiles = [];
|
|
2875
3156
|
let usedTokens = 0;
|
|
2876
3157
|
const hasSemanticSignal = semanticMap.size > 0;
|
|
3158
|
+
const maxSemanticScore = hasSemanticSignal ? Math.max(...Array.from(semanticMap.values())) : 0;
|
|
3159
|
+
const semanticFloor = maxSemanticScore * 0.1;
|
|
2877
3160
|
for (const file of candidates) {
|
|
2878
3161
|
const isTarget = targetSet.has(file.relativePath);
|
|
2879
3162
|
const isMustInclude = mustInclude.has(file.relativePath);
|
|
2880
3163
|
if (hasSemanticSignal && !isTarget && !isMustInclude) {
|
|
2881
3164
|
const semScore = semanticMap.get(file.relativePath) ?? 0;
|
|
2882
3165
|
const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
|
|
2883
|
-
if (semScore
|
|
3166
|
+
if (semScore < semanticFloor && lrnBoost <= 0) {
|
|
2884
3167
|
decisions.push({
|
|
2885
3168
|
file: file.relativePath,
|
|
2886
3169
|
action: "exclude",
|
|
2887
|
-
reason:
|
|
3170
|
+
reason: `Skipped: semantic score ${semScore.toFixed(3)} below floor ${semanticFloor.toFixed(3)}`
|
|
2888
3171
|
});
|
|
2889
3172
|
continue;
|
|
2890
3173
|
}
|
|
@@ -3015,6 +3298,212 @@ function buildReason(file, level, isTarget, isMustInclude) {
|
|
|
3015
3298
|
// src/engine/context-pipeline.ts
|
|
3016
3299
|
init_tfidf();
|
|
3017
3300
|
|
|
3301
|
+
// src/engine/ast-tokenizer.ts
|
|
3302
|
+
init_tfidf();
|
|
3303
|
+
var ANNOTATION_LAYER_MAP = {
|
|
3304
|
+
"repository": ["repositori", "dao", "store", "persist", "databas"],
|
|
3305
|
+
"service": ["servic", "usecas", "busi", "logic"],
|
|
3306
|
+
"controller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3307
|
+
"restcontroller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3308
|
+
"component": ["compon", "bean", "inject"],
|
|
3309
|
+
"entity": ["entiti", "model", "domain", "persist"],
|
|
3310
|
+
"configuration": ["config", "setup", "inject", "wire"],
|
|
3311
|
+
"bean": ["config", "inject", "wire", "bean"],
|
|
3312
|
+
"autowired": ["inject", "wire", "depend"],
|
|
3313
|
+
"inject": ["inject", "wire", "depend"],
|
|
3314
|
+
"provides": ["inject", "wire", "depend", "config"],
|
|
3315
|
+
"singleton": ["singleton", "scope", "lifecycl"],
|
|
3316
|
+
"test": ["test", "spec", "assert", "mock"],
|
|
3317
|
+
"override": ["overrid", "inherit", "polymorph"],
|
|
3318
|
+
"transactional": ["transact", "databas", "commit", "rollback"],
|
|
3319
|
+
"cacheable": ["cach", "ttl", "evict", "invalidat"],
|
|
3320
|
+
"async": ["async", "concurr", "thread", "parallel"],
|
|
3321
|
+
"eventlistener": ["event", "listen", "handler", "subscrib"],
|
|
3322
|
+
"scheduled": ["schedul", "cron", "timer", "job"],
|
|
3323
|
+
"slf4j": ["log", "metric", "observ"],
|
|
3324
|
+
"data": ["model", "entiti", "dto", "data"],
|
|
3325
|
+
"getter": ["model", "entiti", "dto", "accessor"],
|
|
3326
|
+
"setter": ["model", "entiti", "dto", "mutator"],
|
|
3327
|
+
"builder": ["build", "pattern", "fluent"],
|
|
3328
|
+
"value": ["model", "entiti", "dto", "immut"]
|
|
3329
|
+
};
|
|
3330
|
+
function extractStructuralTokens(content, filePath) {
|
|
3331
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
3332
|
+
const language = detectLanguage2(ext);
|
|
3333
|
+
switch (language) {
|
|
3334
|
+
case "java":
|
|
3335
|
+
return extractJava(content);
|
|
3336
|
+
case "python":
|
|
3337
|
+
return extractPython(content);
|
|
3338
|
+
case "go":
|
|
3339
|
+
return extractGo(content);
|
|
3340
|
+
case "typescript":
|
|
3341
|
+
return extractTypeScript(content);
|
|
3342
|
+
default:
|
|
3343
|
+
return { classNames: [], methodNames: [], annotations: [], parents: [], packageName: null, language: "unknown" };
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
function detectLanguage2(ext) {
|
|
3347
|
+
switch (ext) {
|
|
3348
|
+
case "java":
|
|
3349
|
+
return "java";
|
|
3350
|
+
case "py":
|
|
3351
|
+
return "python";
|
|
3352
|
+
case "go":
|
|
3353
|
+
return "go";
|
|
3354
|
+
case "ts":
|
|
3355
|
+
case "tsx":
|
|
3356
|
+
case "js":
|
|
3357
|
+
case "jsx":
|
|
3358
|
+
return "typescript";
|
|
3359
|
+
default:
|
|
3360
|
+
return "unknown";
|
|
3361
|
+
}
|
|
3362
|
+
}
|
|
3363
|
+
function extractJava(content) {
|
|
3364
|
+
const classNames = [];
|
|
3365
|
+
const methodNames = [];
|
|
3366
|
+
const annotations = [];
|
|
3367
|
+
const parents = [];
|
|
3368
|
+
let packageName = null;
|
|
3369
|
+
const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
|
|
3370
|
+
if (pkgMatch) packageName = pkgMatch[1];
|
|
3371
|
+
const annRegex = /@(\w+)/g;
|
|
3372
|
+
let annMatch;
|
|
3373
|
+
while ((annMatch = annRegex.exec(content)) !== null) {
|
|
3374
|
+
const ann = annMatch[1].toLowerCase();
|
|
3375
|
+
if (ann !== "override" && ann.length > 2) {
|
|
3376
|
+
annotations.push(ann);
|
|
3377
|
+
}
|
|
3378
|
+
}
|
|
3379
|
+
const classRegex = /(?:public|private|protected|abstract|final|static)?\s*(?:class|interface|enum)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3380
|
+
let classMatch;
|
|
3381
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3382
|
+
classNames.push(classMatch[1]);
|
|
3383
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3384
|
+
if (classMatch[3]) {
|
|
3385
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3386
|
+
const trimmed = impl.trim();
|
|
3387
|
+
if (trimmed) parents.push(trimmed);
|
|
3388
|
+
}
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
const methodRegex = /(?:public|private|protected|static|abstract|final|synchronized|default)\s+(?:<[\w\s,?]+>\s+)?(?:[\w<>\[\]?,\s]+)\s+(\w+)\s*\(/g;
|
|
3392
|
+
let methodMatch;
|
|
3393
|
+
while ((methodMatch = methodRegex.exec(content)) !== null) {
|
|
3394
|
+
const name = methodMatch[1];
|
|
3395
|
+
if (!["equals", "hashCode", "toString", "main", "get", "set"].includes(name)) {
|
|
3396
|
+
methodNames.push(name);
|
|
3397
|
+
}
|
|
3398
|
+
}
|
|
3399
|
+
return { classNames, methodNames, annotations, parents, packageName, language: "java" };
|
|
3400
|
+
}
|
|
3401
|
+
function extractPython(content) {
|
|
3402
|
+
const classNames = [];
|
|
3403
|
+
const methodNames = [];
|
|
3404
|
+
const annotations = [];
|
|
3405
|
+
const parents = [];
|
|
3406
|
+
const classRegex = /^\s*class\s+(\w+)(?:\(([^)]+)\))?/gm;
|
|
3407
|
+
let classMatch;
|
|
3408
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3409
|
+
classNames.push(classMatch[1]);
|
|
3410
|
+
if (classMatch[2]) {
|
|
3411
|
+
for (const parent of classMatch[2].split(",")) {
|
|
3412
|
+
const trimmed = parent.trim().split("[")[0];
|
|
3413
|
+
if (trimmed && trimmed !== "object") parents.push(trimmed);
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
}
|
|
3417
|
+
const decRegex = /^\s*@(\w+)/gm;
|
|
3418
|
+
let decMatch;
|
|
3419
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3420
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3421
|
+
}
|
|
3422
|
+
const funcRegex = /^\s*(?:async\s+)?def\s+(\w+)/gm;
|
|
3423
|
+
let funcMatch;
|
|
3424
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3425
|
+
const name = funcMatch[1];
|
|
3426
|
+
if (!name.startsWith("__") || name === "__init__") {
|
|
3427
|
+
methodNames.push(name.replace(/^_+|_+$/g, ""));
|
|
3428
|
+
}
|
|
3429
|
+
}
|
|
3430
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "python" };
|
|
3431
|
+
}
|
|
3432
|
+
function extractGo(content) {
|
|
3433
|
+
const classNames = [];
|
|
3434
|
+
const methodNames = [];
|
|
3435
|
+
const parents = [];
|
|
3436
|
+
const pkgMatch = content.match(/^package\s+(\w+)/m);
|
|
3437
|
+
const packageName = pkgMatch ? pkgMatch[1] : null;
|
|
3438
|
+
const typeRegex = /type\s+(\w+)\s+(?:struct|interface)/g;
|
|
3439
|
+
let typeMatch;
|
|
3440
|
+
while ((typeMatch = typeRegex.exec(content)) !== null) {
|
|
3441
|
+
classNames.push(typeMatch[1]);
|
|
3442
|
+
}
|
|
3443
|
+
const funcRegex = /func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)\s*\(/g;
|
|
3444
|
+
let funcMatch;
|
|
3445
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3446
|
+
methodNames.push(funcMatch[2]);
|
|
3447
|
+
if (funcMatch[1]) {
|
|
3448
|
+
parents.push(funcMatch[1]);
|
|
3449
|
+
}
|
|
3450
|
+
}
|
|
3451
|
+
return { classNames, methodNames, annotations: [], parents, packageName, language: "go" };
|
|
3452
|
+
}
|
|
3453
|
+
function extractTypeScript(content) {
|
|
3454
|
+
const classNames = [];
|
|
3455
|
+
const methodNames = [];
|
|
3456
|
+
const annotations = [];
|
|
3457
|
+
const parents = [];
|
|
3458
|
+
const classRegex = /(?:export\s+)?(?:abstract\s+)?(?:class|interface)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3459
|
+
let classMatch;
|
|
3460
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3461
|
+
classNames.push(classMatch[1]);
|
|
3462
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3463
|
+
if (classMatch[3]) {
|
|
3464
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3465
|
+
const trimmed = impl.trim();
|
|
3466
|
+
if (trimmed) parents.push(trimmed);
|
|
3467
|
+
}
|
|
3468
|
+
}
|
|
3469
|
+
}
|
|
3470
|
+
const decRegex = /@(\w+)/g;
|
|
3471
|
+
let decMatch;
|
|
3472
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3473
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3474
|
+
}
|
|
3475
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)/g;
|
|
3476
|
+
let funcMatch;
|
|
3477
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3478
|
+
methodNames.push(funcMatch[1]);
|
|
3479
|
+
}
|
|
3480
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "typescript" };
|
|
3481
|
+
}
|
|
3482
|
+
function augmentContentWithStructure(content, filePath) {
|
|
3483
|
+
const struct = extractStructuralTokens(content, filePath);
|
|
3484
|
+
const augmentParts = [];
|
|
3485
|
+
for (const name of struct.classNames) {
|
|
3486
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3487
|
+
augmentParts.push(words, words, words);
|
|
3488
|
+
}
|
|
3489
|
+
for (const name of struct.methodNames) {
|
|
3490
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3491
|
+
augmentParts.push(words, words);
|
|
3492
|
+
}
|
|
3493
|
+
for (const parent of struct.parents) {
|
|
3494
|
+
const words = parent.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3495
|
+
augmentParts.push(words);
|
|
3496
|
+
}
|
|
3497
|
+
for (const ann of struct.annotations) {
|
|
3498
|
+
const layerTerms = ANNOTATION_LAYER_MAP[ann];
|
|
3499
|
+
if (layerTerms) {
|
|
3500
|
+
augmentParts.push(...layerTerms);
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
if (augmentParts.length === 0) return content;
|
|
3504
|
+
return augmentParts.join(" ") + "\n" + content;
|
|
3505
|
+
}
|
|
3506
|
+
|
|
3018
3507
|
// src/engine/index-cache.ts
|
|
3019
3508
|
init_tfidf();
|
|
3020
3509
|
import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, existsSync as existsSync4, mkdirSync as mkdirSync2, statSync } from "fs";
|
|
@@ -3134,7 +3623,7 @@ function rebuildIndex(cachedFiles) {
|
|
|
3134
3623
|
for (const [term, df] of docFreq) {
|
|
3135
3624
|
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
3136
3625
|
}
|
|
3137
|
-
return { documents, idf, avgDocLength, totalDocs };
|
|
3626
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
3138
3627
|
}
|
|
3139
3628
|
|
|
3140
3629
|
// src/engine/reranker.ts
|
|
@@ -3371,10 +3860,1426 @@ function applyQualityGate(scored) {
|
|
|
3371
3860
|
return { passed, filtered, threshold };
|
|
3372
3861
|
}
|
|
3373
3862
|
|
|
3863
|
+
// src/engine/call-graph.ts
|
|
3864
|
+
function getLanguage(filePath) {
|
|
3865
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
3866
|
+
if (ext === "java") return "java";
|
|
3867
|
+
if (["ts", "tsx", "js", "jsx", "mts", "mjs"].includes(ext)) return "ts";
|
|
3868
|
+
if (ext === "py") return "python";
|
|
3869
|
+
if (ext === "go") return "go";
|
|
3870
|
+
return null;
|
|
3871
|
+
}
|
|
3872
|
+
function extractJavaDefinitions(content, filePath) {
|
|
3873
|
+
const defs = [];
|
|
3874
|
+
const classMatch = content.match(/(?:public|abstract)\s+(?:class|interface)\s+(\w+)/);
|
|
3875
|
+
const className = classMatch?.[1];
|
|
3876
|
+
const methodRegex = /(?:public|protected|private|static|\s)+\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
3877
|
+
let match;
|
|
3878
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
3879
|
+
const name = match[1];
|
|
3880
|
+
if (name === className || name === "if" || name === "for" || name === "while" || name === "switch" || name === "catch" || name === "return") continue;
|
|
3881
|
+
const linePrefix = content.substring(Math.max(0, match.index - 200), match.index);
|
|
3882
|
+
const isPublic = /public\s/.test(match[0]);
|
|
3883
|
+
defs.push({
|
|
3884
|
+
name,
|
|
3885
|
+
className,
|
|
3886
|
+
filePath,
|
|
3887
|
+
isExported: isPublic
|
|
3888
|
+
});
|
|
3889
|
+
}
|
|
3890
|
+
return defs;
|
|
3891
|
+
}
|
|
3892
|
+
function extractTsDefinitions(content, filePath) {
|
|
3893
|
+
const defs = [];
|
|
3894
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/gm;
|
|
3895
|
+
let match;
|
|
3896
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3897
|
+
const isExported = match[0].startsWith("export");
|
|
3898
|
+
defs.push({ name: match[1], filePath, isExported });
|
|
3899
|
+
}
|
|
3900
|
+
const classRegex = /(?:export\s+)?class\s+(\w+)/gm;
|
|
3901
|
+
while ((match = classRegex.exec(content)) !== null) {
|
|
3902
|
+
const className = match[1];
|
|
3903
|
+
const isExportedClass = match[0].startsWith("export");
|
|
3904
|
+
const classStart = match.index + match[0].length;
|
|
3905
|
+
const methodInClassRegex = /(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\w[\w<>\[\]|,\s]*\s*)?{/gm;
|
|
3906
|
+
methodInClassRegex.lastIndex = classStart;
|
|
3907
|
+
let methodMatch;
|
|
3908
|
+
while ((methodMatch = methodInClassRegex.exec(content)) !== null) {
|
|
3909
|
+
const name = methodMatch[1];
|
|
3910
|
+
if (name === "constructor" || name === "if" || name === "for" || name === "while" || name === "catch" || name === "return" || name === "function" || name === "class") continue;
|
|
3911
|
+
defs.push({ name, className, filePath, isExported: isExportedClass });
|
|
3912
|
+
if (methodMatch.index - classStart > 1e4) break;
|
|
3913
|
+
}
|
|
3914
|
+
}
|
|
3915
|
+
const arrowRegex = /export\s+const\s+(\w+)\s*=\s*(?:async\s+)?\(/gm;
|
|
3916
|
+
while ((match = arrowRegex.exec(content)) !== null) {
|
|
3917
|
+
defs.push({ name: match[1], filePath, isExported: true });
|
|
3918
|
+
}
|
|
3919
|
+
return defs;
|
|
3920
|
+
}
|
|
3921
|
+
function extractPythonDefinitions(content, filePath) {
|
|
3922
|
+
const defs = [];
|
|
3923
|
+
const classRegex = /^class\s+(\w+)/gm;
|
|
3924
|
+
let currentClass;
|
|
3925
|
+
let match;
|
|
3926
|
+
const funcRegex = /^(\s*)def\s+(\w+)\s*\(/gm;
|
|
3927
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3928
|
+
const indent = match[1];
|
|
3929
|
+
const name = match[2];
|
|
3930
|
+
if (name.startsWith("_") && name !== "__init__") continue;
|
|
3931
|
+
const before = content.substring(0, match.index);
|
|
3932
|
+
const lastClass = before.match(/^class\s+(\w+)/gm);
|
|
3933
|
+
const isMethod = indent.length > 0 && lastClass;
|
|
3934
|
+
const className = isMethod ? lastClass[lastClass.length - 1].replace(/^class\s+/, "") : void 0;
|
|
3935
|
+
defs.push({
|
|
3936
|
+
name: name === "__init__" ? className ?? name : name,
|
|
3937
|
+
className,
|
|
3938
|
+
filePath,
|
|
3939
|
+
isExported: !name.startsWith("_")
|
|
3940
|
+
});
|
|
3941
|
+
}
|
|
3942
|
+
return defs;
|
|
3943
|
+
}
|
|
3944
|
+
function extractGoDefinitions(content, filePath) {
|
|
3945
|
+
const defs = [];
|
|
3946
|
+
const funcRegex = /^func\s+(\w+)\s*\(/gm;
|
|
3947
|
+
let match;
|
|
3948
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
3949
|
+
const name = match[1];
|
|
3950
|
+
defs.push({
|
|
3951
|
+
name,
|
|
3952
|
+
filePath,
|
|
3953
|
+
isExported: name[0] === name[0].toUpperCase()
|
|
3954
|
+
});
|
|
3955
|
+
}
|
|
3956
|
+
const methodRegex = /^func\s+\(\s*\w+\s+\*?(\w+)\s*\)\s+(\w+)\s*\(/gm;
|
|
3957
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
3958
|
+
defs.push({
|
|
3959
|
+
name: match[2],
|
|
3960
|
+
className: match[1],
|
|
3961
|
+
filePath,
|
|
3962
|
+
isExported: match[2][0] === match[2][0].toUpperCase()
|
|
3963
|
+
});
|
|
3964
|
+
}
|
|
3965
|
+
return defs;
|
|
3966
|
+
}
|
|
3967
|
+
function extractJavaCalls(content, filePath) {
|
|
3968
|
+
const calls = [];
|
|
3969
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
3970
|
+
let match;
|
|
3971
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
3972
|
+
const receiver = match[1];
|
|
3973
|
+
const method = match[2];
|
|
3974
|
+
if ([
|
|
3975
|
+
"System",
|
|
3976
|
+
"LOG",
|
|
3977
|
+
"LOGGER",
|
|
3978
|
+
"logger",
|
|
3979
|
+
"log",
|
|
3980
|
+
"this",
|
|
3981
|
+
"super",
|
|
3982
|
+
"String",
|
|
3983
|
+
"Integer",
|
|
3984
|
+
"Long",
|
|
3985
|
+
"Boolean",
|
|
3986
|
+
"Double",
|
|
3987
|
+
"Float",
|
|
3988
|
+
"Math",
|
|
3989
|
+
"Arrays",
|
|
3990
|
+
"Collections",
|
|
3991
|
+
"Objects",
|
|
3992
|
+
"Optional",
|
|
3993
|
+
"List",
|
|
3994
|
+
"Map",
|
|
3995
|
+
"Set",
|
|
3996
|
+
"Stream"
|
|
3997
|
+
].includes(receiver)) continue;
|
|
3998
|
+
if ([
|
|
3999
|
+
"toString",
|
|
4000
|
+
"hashCode",
|
|
4001
|
+
"equals",
|
|
4002
|
+
"getClass",
|
|
4003
|
+
"wait",
|
|
4004
|
+
"notify",
|
|
4005
|
+
"length",
|
|
4006
|
+
"size",
|
|
4007
|
+
"isEmpty",
|
|
4008
|
+
"get",
|
|
4009
|
+
"set",
|
|
4010
|
+
"add",
|
|
4011
|
+
"remove",
|
|
4012
|
+
"contains",
|
|
4013
|
+
"put",
|
|
4014
|
+
"stream",
|
|
4015
|
+
"map",
|
|
4016
|
+
"filter",
|
|
4017
|
+
"collect",
|
|
4018
|
+
"orElse",
|
|
4019
|
+
"orElseGet",
|
|
4020
|
+
"orElseThrow",
|
|
4021
|
+
"isPresent",
|
|
4022
|
+
"ifPresent",
|
|
4023
|
+
"of",
|
|
4024
|
+
"valueOf",
|
|
4025
|
+
"format",
|
|
4026
|
+
"println",
|
|
4027
|
+
"append",
|
|
4028
|
+
"build",
|
|
4029
|
+
"builder",
|
|
4030
|
+
"thenReturn",
|
|
4031
|
+
"when",
|
|
4032
|
+
"verify",
|
|
4033
|
+
"mock",
|
|
4034
|
+
"given"
|
|
4035
|
+
].includes(method)) continue;
|
|
4036
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4037
|
+
}
|
|
4038
|
+
const staticRegex = /(?<!\w)([A-Z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4039
|
+
while ((match = staticRegex.exec(content)) !== null) {
|
|
4040
|
+
const receiver = match[1];
|
|
4041
|
+
const method = match[2];
|
|
4042
|
+
if ([
|
|
4043
|
+
"System",
|
|
4044
|
+
"Math",
|
|
4045
|
+
"Arrays",
|
|
4046
|
+
"Collections",
|
|
4047
|
+
"Objects",
|
|
4048
|
+
"Optional",
|
|
4049
|
+
"String",
|
|
4050
|
+
"Integer",
|
|
4051
|
+
"Long",
|
|
4052
|
+
"Boolean",
|
|
4053
|
+
"Double",
|
|
4054
|
+
"Float",
|
|
4055
|
+
"LoggerFactory",
|
|
4056
|
+
"Logger",
|
|
4057
|
+
"Assert",
|
|
4058
|
+
"Mockito",
|
|
4059
|
+
"Assertions",
|
|
4060
|
+
"ResponseEntity",
|
|
4061
|
+
"HttpStatus"
|
|
4062
|
+
].includes(receiver)) continue;
|
|
4063
|
+
if ([
|
|
4064
|
+
"of",
|
|
4065
|
+
"valueOf",
|
|
4066
|
+
"format",
|
|
4067
|
+
"parse",
|
|
4068
|
+
"toString",
|
|
4069
|
+
"getLogger",
|
|
4070
|
+
"builder",
|
|
4071
|
+
"newBuilder",
|
|
4072
|
+
"create",
|
|
4073
|
+
"getInstance"
|
|
4074
|
+
].includes(method)) continue;
|
|
4075
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4076
|
+
}
|
|
4077
|
+
return calls;
|
|
4078
|
+
}
|
|
4079
|
+
function extractTsCalls(content, filePath) {
|
|
4080
|
+
const calls = [];
|
|
4081
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4082
|
+
let match;
|
|
4083
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4084
|
+
const receiver = match[1];
|
|
4085
|
+
const method = match[2];
|
|
4086
|
+
if ([
|
|
4087
|
+
"console",
|
|
4088
|
+
"process",
|
|
4089
|
+
"Math",
|
|
4090
|
+
"JSON",
|
|
4091
|
+
"Promise",
|
|
4092
|
+
"Object",
|
|
4093
|
+
"Array",
|
|
4094
|
+
"String",
|
|
4095
|
+
"Number",
|
|
4096
|
+
"Date",
|
|
4097
|
+
"Error",
|
|
4098
|
+
"RegExp",
|
|
4099
|
+
"Buffer",
|
|
4100
|
+
"this",
|
|
4101
|
+
"super",
|
|
4102
|
+
"window",
|
|
4103
|
+
"document",
|
|
4104
|
+
"expect",
|
|
4105
|
+
"describe",
|
|
4106
|
+
"it",
|
|
4107
|
+
"test",
|
|
4108
|
+
"vi",
|
|
4109
|
+
"jest"
|
|
4110
|
+
].includes(receiver)) continue;
|
|
4111
|
+
if ([
|
|
4112
|
+
"toString",
|
|
4113
|
+
"valueOf",
|
|
4114
|
+
"hasOwnProperty",
|
|
4115
|
+
"length",
|
|
4116
|
+
"push",
|
|
4117
|
+
"pop",
|
|
4118
|
+
"shift",
|
|
4119
|
+
"unshift",
|
|
4120
|
+
"slice",
|
|
4121
|
+
"splice",
|
|
4122
|
+
"map",
|
|
4123
|
+
"filter",
|
|
4124
|
+
"reduce",
|
|
4125
|
+
"forEach",
|
|
4126
|
+
"find",
|
|
4127
|
+
"findIndex",
|
|
4128
|
+
"some",
|
|
4129
|
+
"every",
|
|
4130
|
+
"includes",
|
|
4131
|
+
"indexOf",
|
|
4132
|
+
"join",
|
|
4133
|
+
"split",
|
|
4134
|
+
"replace",
|
|
4135
|
+
"match",
|
|
4136
|
+
"trim",
|
|
4137
|
+
"toLowerCase",
|
|
4138
|
+
"toUpperCase",
|
|
4139
|
+
"startsWith",
|
|
4140
|
+
"endsWith",
|
|
4141
|
+
"keys",
|
|
4142
|
+
"values",
|
|
4143
|
+
"entries",
|
|
4144
|
+
"has",
|
|
4145
|
+
"get",
|
|
4146
|
+
"set",
|
|
4147
|
+
"delete",
|
|
4148
|
+
"add",
|
|
4149
|
+
"size",
|
|
4150
|
+
"then",
|
|
4151
|
+
"catch",
|
|
4152
|
+
"finally",
|
|
4153
|
+
"resolve",
|
|
4154
|
+
"reject",
|
|
4155
|
+
"stringify",
|
|
4156
|
+
"parse",
|
|
4157
|
+
"log",
|
|
4158
|
+
"warn",
|
|
4159
|
+
"error",
|
|
4160
|
+
"info",
|
|
4161
|
+
"debug"
|
|
4162
|
+
].includes(method)) continue;
|
|
4163
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4164
|
+
}
|
|
4165
|
+
return calls;
|
|
4166
|
+
}
|
|
4167
|
+
function extractPythonCalls(content, filePath) {
|
|
4168
|
+
const calls = [];
|
|
4169
|
+
const callRegex = /(?<!\w)(?:self\.)?([a-z_]\w+)\.([a-z_]\w+)\s*\(/gm;
|
|
4170
|
+
let match;
|
|
4171
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4172
|
+
const receiver = match[1];
|
|
4173
|
+
const method = match[2];
|
|
4174
|
+
if ([
|
|
4175
|
+
"self",
|
|
4176
|
+
"cls",
|
|
4177
|
+
"os",
|
|
4178
|
+
"sys",
|
|
4179
|
+
"json",
|
|
4180
|
+
"logging",
|
|
4181
|
+
"print",
|
|
4182
|
+
"str",
|
|
4183
|
+
"int",
|
|
4184
|
+
"float",
|
|
4185
|
+
"list",
|
|
4186
|
+
"dict",
|
|
4187
|
+
"set",
|
|
4188
|
+
"tuple",
|
|
4189
|
+
"super",
|
|
4190
|
+
"type",
|
|
4191
|
+
"isinstance",
|
|
4192
|
+
"len",
|
|
4193
|
+
"range",
|
|
4194
|
+
"enumerate"
|
|
4195
|
+
].includes(receiver)) continue;
|
|
4196
|
+
if ([
|
|
4197
|
+
"append",
|
|
4198
|
+
"extend",
|
|
4199
|
+
"insert",
|
|
4200
|
+
"remove",
|
|
4201
|
+
"pop",
|
|
4202
|
+
"clear",
|
|
4203
|
+
"get",
|
|
4204
|
+
"keys",
|
|
4205
|
+
"values",
|
|
4206
|
+
"items",
|
|
4207
|
+
"update",
|
|
4208
|
+
"format",
|
|
4209
|
+
"join",
|
|
4210
|
+
"split",
|
|
4211
|
+
"strip",
|
|
4212
|
+
"replace",
|
|
4213
|
+
"lower",
|
|
4214
|
+
"upper",
|
|
4215
|
+
"startswith",
|
|
4216
|
+
"endswith",
|
|
4217
|
+
"encode",
|
|
4218
|
+
"decode"
|
|
4219
|
+
].includes(method)) continue;
|
|
4220
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4221
|
+
}
|
|
4222
|
+
return calls;
|
|
4223
|
+
}
|
|
4224
|
+
function extractGoCalls(content, filePath) {
|
|
4225
|
+
const calls = [];
|
|
4226
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([A-Z]\w+)\s*\(/gm;
|
|
4227
|
+
let match;
|
|
4228
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4229
|
+
const receiver = match[1];
|
|
4230
|
+
const method = match[2];
|
|
4231
|
+
if ([
|
|
4232
|
+
"fmt",
|
|
4233
|
+
"log",
|
|
4234
|
+
"os",
|
|
4235
|
+
"io",
|
|
4236
|
+
"strings",
|
|
4237
|
+
"strconv",
|
|
4238
|
+
"bytes",
|
|
4239
|
+
"context",
|
|
4240
|
+
"errors",
|
|
4241
|
+
"sync",
|
|
4242
|
+
"time",
|
|
4243
|
+
"math",
|
|
4244
|
+
"sort",
|
|
4245
|
+
"http",
|
|
4246
|
+
"json",
|
|
4247
|
+
"testing",
|
|
4248
|
+
"reflect"
|
|
4249
|
+
].includes(receiver)) continue;
|
|
4250
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4251
|
+
}
|
|
4252
|
+
return calls;
|
|
4253
|
+
}
|
|
4254
|
+
function buildJavaImportMap(content, allFiles) {
|
|
4255
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4256
|
+
const importRegex = /^import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm;
|
|
4257
|
+
let match;
|
|
4258
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4259
|
+
const className = match[1];
|
|
4260
|
+
const targetFile = allFiles.find((f) => {
|
|
4261
|
+
const basename4 = f.split("/").pop()?.replace(".java", "") ?? "";
|
|
4262
|
+
return basename4 === className;
|
|
4263
|
+
});
|
|
4264
|
+
if (targetFile) {
|
|
4265
|
+
importMap.set(className, targetFile);
|
|
4266
|
+
const varName = className.charAt(0).toLowerCase() + className.slice(1);
|
|
4267
|
+
importMap.set(varName, targetFile);
|
|
4268
|
+
}
|
|
4269
|
+
}
|
|
4270
|
+
const fieldRegex = /(?:private|protected)\s+(?:final\s+)?(\w+)\s+(\w+)\s*[;=]/gm;
|
|
4271
|
+
while ((match = fieldRegex.exec(content)) !== null) {
|
|
4272
|
+
const typeName = match[1];
|
|
4273
|
+
const fieldName = match[2];
|
|
4274
|
+
const existing = importMap.get(typeName);
|
|
4275
|
+
if (existing) {
|
|
4276
|
+
importMap.set(fieldName, existing);
|
|
4277
|
+
}
|
|
4278
|
+
}
|
|
4279
|
+
return importMap;
|
|
4280
|
+
}
|
|
4281
|
+
function buildTsImportMap(content, allFiles) {
|
|
4282
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4283
|
+
const importRegex = /import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]/gm;
|
|
4284
|
+
let match;
|
|
4285
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4286
|
+
const namedImports = match[1];
|
|
4287
|
+
const defaultImport = match[2];
|
|
4288
|
+
const modulePath = match[3];
|
|
4289
|
+
const targetFile = allFiles.find((f) => {
|
|
4290
|
+
const stripped = f.replace(/\.(ts|tsx|js|jsx|mts|mjs)$/, "");
|
|
4291
|
+
return stripped.endsWith(modulePath.replace(/^\.\//, "").replace(/^\.\.\//, "")) || f.endsWith(modulePath.replace(/^\.\//, "") + "/index.ts");
|
|
4292
|
+
});
|
|
4293
|
+
if (targetFile) {
|
|
4294
|
+
if (namedImports) {
|
|
4295
|
+
for (const name of namedImports.split(",").map((s) => s.trim())) {
|
|
4296
|
+
const cleanName = name.split(" as ").pop()?.trim() ?? name.trim();
|
|
4297
|
+
if (cleanName) importMap.set(cleanName, targetFile);
|
|
4298
|
+
}
|
|
4299
|
+
}
|
|
4300
|
+
if (defaultImport) {
|
|
4301
|
+
importMap.set(defaultImport, targetFile);
|
|
4302
|
+
}
|
|
4303
|
+
}
|
|
4304
|
+
}
|
|
4305
|
+
return importMap;
|
|
4306
|
+
}
|
|
4307
|
+
function buildPythonImportMap(content, allFiles) {
|
|
4308
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4309
|
+
const fromRegex = /^from\s+([\w.]+)\s+import\s+(.+)$/gm;
|
|
4310
|
+
let match;
|
|
4311
|
+
while ((match = fromRegex.exec(content)) !== null) {
|
|
4312
|
+
const modulePath = match[1].replace(/\./g, "/");
|
|
4313
|
+
const names = match[2].split(",").map((s) => s.trim().split(" as ").pop()?.trim() ?? "");
|
|
4314
|
+
const targetFile = allFiles.find((f) => f.includes(modulePath + ".py") || f.includes(modulePath + "/__init__.py"));
|
|
4315
|
+
if (targetFile) {
|
|
4316
|
+
for (const name of names) {
|
|
4317
|
+
if (name) importMap.set(name, targetFile);
|
|
4318
|
+
const snakeName = name.replace(/([A-Z])/g, "_$1").toLowerCase().replace(/^_/, "");
|
|
4319
|
+
if (snakeName !== name) importMap.set(snakeName, targetFile);
|
|
4320
|
+
}
|
|
4321
|
+
}
|
|
4322
|
+
}
|
|
4323
|
+
return importMap;
|
|
4324
|
+
}
|
|
4325
|
+
function buildCallGraph(files) {
|
|
4326
|
+
const allPaths = files.map((f) => f.relativePath);
|
|
4327
|
+
const allDefinitions = [];
|
|
4328
|
+
const allCalls = [];
|
|
4329
|
+
for (const file of files) {
|
|
4330
|
+
const lang = getLanguage(file.relativePath);
|
|
4331
|
+
if (!lang) continue;
|
|
4332
|
+
let defs;
|
|
4333
|
+
let calls;
|
|
4334
|
+
switch (lang) {
|
|
4335
|
+
case "java":
|
|
4336
|
+
defs = extractJavaDefinitions(file.content, file.relativePath);
|
|
4337
|
+
calls = extractJavaCalls(file.content, file.relativePath);
|
|
4338
|
+
break;
|
|
4339
|
+
case "ts":
|
|
4340
|
+
defs = extractTsDefinitions(file.content, file.relativePath);
|
|
4341
|
+
calls = extractTsCalls(file.content, file.relativePath);
|
|
4342
|
+
break;
|
|
4343
|
+
case "python":
|
|
4344
|
+
defs = extractPythonDefinitions(file.content, file.relativePath);
|
|
4345
|
+
calls = extractPythonCalls(file.content, file.relativePath);
|
|
4346
|
+
break;
|
|
4347
|
+
case "go":
|
|
4348
|
+
defs = extractGoDefinitions(file.content, file.relativePath);
|
|
4349
|
+
calls = extractGoCalls(file.content, file.relativePath);
|
|
4350
|
+
break;
|
|
4351
|
+
}
|
|
4352
|
+
allDefinitions.push(...defs);
|
|
4353
|
+
allCalls.push(...calls);
|
|
4354
|
+
}
|
|
4355
|
+
const defByMethod = /* @__PURE__ */ new Map();
|
|
4356
|
+
for (const def of allDefinitions) {
|
|
4357
|
+
const existing = defByMethod.get(def.name) ?? [];
|
|
4358
|
+
existing.push(def);
|
|
4359
|
+
defByMethod.set(def.name, existing);
|
|
4360
|
+
}
|
|
4361
|
+
const defByQualified = /* @__PURE__ */ new Map();
|
|
4362
|
+
for (const def of allDefinitions) {
|
|
4363
|
+
if (def.className) {
|
|
4364
|
+
defByQualified.set(`${def.className}.${def.name}`, def);
|
|
4365
|
+
}
|
|
4366
|
+
}
|
|
4367
|
+
const edges = [];
|
|
4368
|
+
const edgeSet = /* @__PURE__ */ new Set();
|
|
4369
|
+
for (const file of files) {
|
|
4370
|
+
const lang = getLanguage(file.relativePath);
|
|
4371
|
+
if (!lang) continue;
|
|
4372
|
+
let importMap;
|
|
4373
|
+
switch (lang) {
|
|
4374
|
+
case "java":
|
|
4375
|
+
importMap = buildJavaImportMap(file.content, allPaths);
|
|
4376
|
+
break;
|
|
4377
|
+
case "ts":
|
|
4378
|
+
importMap = buildTsImportMap(file.content, allPaths);
|
|
4379
|
+
break;
|
|
4380
|
+
case "python":
|
|
4381
|
+
importMap = buildPythonImportMap(file.content, allPaths);
|
|
4382
|
+
break;
|
|
4383
|
+
default:
|
|
4384
|
+
importMap = /* @__PURE__ */ new Map();
|
|
4385
|
+
}
|
|
4386
|
+
const fileCalls = allCalls.filter((c) => c.callerFile === file.relativePath);
|
|
4387
|
+
for (const call of fileCalls) {
|
|
4388
|
+
let targetFile;
|
|
4389
|
+
targetFile = importMap.get(call.receiverName);
|
|
4390
|
+
if (!targetFile) {
|
|
4391
|
+
const qualDef = defByQualified.get(`${call.receiverName}.${call.methodName}`);
|
|
4392
|
+
if (qualDef) targetFile = qualDef.filePath;
|
|
4393
|
+
}
|
|
4394
|
+
if (!targetFile) {
|
|
4395
|
+
const capitalized = call.receiverName.charAt(0).toUpperCase() + call.receiverName.slice(1);
|
|
4396
|
+
targetFile = importMap.get(capitalized);
|
|
4397
|
+
}
|
|
4398
|
+
if (!targetFile) {
|
|
4399
|
+
const candidates = defByMethod.get(call.methodName);
|
|
4400
|
+
if (candidates && candidates.length === 1 && candidates[0].filePath !== file.relativePath) {
|
|
4401
|
+
targetFile = candidates[0].filePath;
|
|
4402
|
+
}
|
|
4403
|
+
}
|
|
4404
|
+
if (targetFile && targetFile !== file.relativePath) {
|
|
4405
|
+
const key = `${file.relativePath}\u2192${targetFile}`;
|
|
4406
|
+
if (!edgeSet.has(key)) {
|
|
4407
|
+
edgeSet.add(key);
|
|
4408
|
+
edges.push({ from: file.relativePath, to: targetFile, type: "call" });
|
|
4409
|
+
}
|
|
4410
|
+
}
|
|
4411
|
+
}
|
|
4412
|
+
}
|
|
4413
|
+
return { definitions: allDefinitions, calls: allCalls, edges };
|
|
4414
|
+
}
|
|
4415
|
+
function boostByCallGraph(matches, callEdges, topK = 10, boostFactor = 0.3) {
|
|
4416
|
+
if (matches.length === 0 || callEdges.length === 0) return matches;
|
|
4417
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
4418
|
+
for (const m of matches) {
|
|
4419
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
4420
|
+
}
|
|
4421
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
4422
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
4423
|
+
for (const edge of callEdges) {
|
|
4424
|
+
if (edge.type !== "call") continue;
|
|
4425
|
+
const fwd = callsTo.get(edge.from) ?? [];
|
|
4426
|
+
fwd.push(edge.to);
|
|
4427
|
+
callsTo.set(edge.from, fwd);
|
|
4428
|
+
const rev = calledBy.get(edge.to) ?? [];
|
|
4429
|
+
rev.push(edge.from);
|
|
4430
|
+
calledBy.set(edge.to, rev);
|
|
4431
|
+
}
|
|
4432
|
+
const topMatches = matches.slice(0, topK);
|
|
4433
|
+
const maxBoostPerParent = 5;
|
|
4434
|
+
for (const parent of topMatches) {
|
|
4435
|
+
const boost = parent.score * boostFactor;
|
|
4436
|
+
const called = callsTo.get(parent.filePath) ?? [];
|
|
4437
|
+
for (const target of called.slice(0, maxBoostPerParent)) {
|
|
4438
|
+
const existing = boosted.get(target);
|
|
4439
|
+
if (existing) {
|
|
4440
|
+
existing.score += boost;
|
|
4441
|
+
if (!existing.matchedTerms.includes("[call-graph:called-by-match]")) {
|
|
4442
|
+
existing.matchedTerms.push("[call-graph:called-by-match]");
|
|
4443
|
+
}
|
|
4444
|
+
} else {
|
|
4445
|
+
boosted.set(target, {
|
|
4446
|
+
filePath: target,
|
|
4447
|
+
score: boost,
|
|
4448
|
+
matchedTerms: ["[call-graph:called-by-match]"]
|
|
4449
|
+
});
|
|
4450
|
+
}
|
|
4451
|
+
}
|
|
4452
|
+
const callers = calledBy.get(parent.filePath) ?? [];
|
|
4453
|
+
for (const caller of callers.slice(0, maxBoostPerParent)) {
|
|
4454
|
+
const callerBoost = boost * 0.7;
|
|
4455
|
+
const existing = boosted.get(caller);
|
|
4456
|
+
if (existing) {
|
|
4457
|
+
existing.score += callerBoost;
|
|
4458
|
+
if (!existing.matchedTerms.includes("[call-graph:calls-match]")) {
|
|
4459
|
+
existing.matchedTerms.push("[call-graph:calls-match]");
|
|
4460
|
+
}
|
|
4461
|
+
} else {
|
|
4462
|
+
boosted.set(caller, {
|
|
4463
|
+
filePath: caller,
|
|
4464
|
+
score: callerBoost,
|
|
4465
|
+
matchedTerms: ["[call-graph:calls-match]"]
|
|
4466
|
+
});
|
|
4467
|
+
}
|
|
4468
|
+
}
|
|
4469
|
+
}
|
|
4470
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
4471
|
+
}
|
|
4472
|
+
|
|
4473
|
+
// src/engine/git-relevance.ts
|
|
4474
|
+
import { execSync } from "child_process";
|
|
4475
|
+
function buildCoChangeMatrix(projectPath, maxCommits = 500, minCoChanges = 2) {
|
|
4476
|
+
const emptyMatrix = {
|
|
4477
|
+
entries: /* @__PURE__ */ new Map(),
|
|
4478
|
+
fileCommitCounts: /* @__PURE__ */ new Map(),
|
|
4479
|
+
totalCommits: 0
|
|
4480
|
+
};
|
|
4481
|
+
let gitOutput;
|
|
4482
|
+
try {
|
|
4483
|
+
gitOutput = execSync(
|
|
4484
|
+
`git log --no-merges --diff-filter=ACMR --name-only --format="---COMMIT---" -n ${maxCommits}`,
|
|
4485
|
+
{ cwd: projectPath, encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, timeout: 15e3 }
|
|
4486
|
+
);
|
|
4487
|
+
} catch {
|
|
4488
|
+
return emptyMatrix;
|
|
4489
|
+
}
|
|
4490
|
+
const commits = [];
|
|
4491
|
+
let currentFiles = [];
|
|
4492
|
+
for (const line of gitOutput.split("\n")) {
|
|
4493
|
+
const trimmed = line.trim();
|
|
4494
|
+
if (trimmed === "---COMMIT---") {
|
|
4495
|
+
if (currentFiles.length > 0) {
|
|
4496
|
+
commits.push(currentFiles);
|
|
4497
|
+
}
|
|
4498
|
+
currentFiles = [];
|
|
4499
|
+
} else if (trimmed.length > 0) {
|
|
4500
|
+
currentFiles.push(trimmed);
|
|
4501
|
+
}
|
|
4502
|
+
}
|
|
4503
|
+
if (currentFiles.length > 0) {
|
|
4504
|
+
commits.push(currentFiles);
|
|
4505
|
+
}
|
|
4506
|
+
if (commits.length === 0) return emptyMatrix;
|
|
4507
|
+
const fileCommitCounts = /* @__PURE__ */ new Map();
|
|
4508
|
+
const coChangeCounts = /* @__PURE__ */ new Map();
|
|
4509
|
+
for (const files of commits) {
|
|
4510
|
+
const unique = [...new Set(files)];
|
|
4511
|
+
for (const file of unique) {
|
|
4512
|
+
fileCommitCounts.set(file, (fileCommitCounts.get(file) ?? 0) + 1);
|
|
4513
|
+
}
|
|
4514
|
+
const capped = unique.slice(0, 20);
|
|
4515
|
+
for (let i = 0; i < capped.length; i++) {
|
|
4516
|
+
for (let j = i + 1; j < capped.length; j++) {
|
|
4517
|
+
const [a, b] = capped[i] < capped[j] ? [capped[i], capped[j]] : [capped[j], capped[i]];
|
|
4518
|
+
const key = `${a}\0${b}`;
|
|
4519
|
+
coChangeCounts.set(key, (coChangeCounts.get(key) ?? 0) + 1);
|
|
4520
|
+
}
|
|
4521
|
+
}
|
|
4522
|
+
}
|
|
4523
|
+
const entries = /* @__PURE__ */ new Map();
|
|
4524
|
+
for (const [key, coCommits] of coChangeCounts) {
|
|
4525
|
+
if (coCommits < minCoChanges) continue;
|
|
4526
|
+
const [fileA, fileB] = key.split("\0");
|
|
4527
|
+
const commitsA = fileCommitCounts.get(fileA) ?? 0;
|
|
4528
|
+
const commitsB = fileCommitCounts.get(fileB) ?? 0;
|
|
4529
|
+
const union = commitsA + commitsB - coCommits;
|
|
4530
|
+
const similarity = union > 0 ? coCommits / union : 0;
|
|
4531
|
+
const entry = { fileA, fileB, coCommits, similarity };
|
|
4532
|
+
const listA = entries.get(fileA) ?? [];
|
|
4533
|
+
listA.push(entry);
|
|
4534
|
+
entries.set(fileA, listA);
|
|
4535
|
+
const listB = entries.get(fileB) ?? [];
|
|
4536
|
+
listB.push({ ...entry, fileA: fileB, fileB: fileA });
|
|
4537
|
+
entries.set(fileB, listB);
|
|
4538
|
+
}
|
|
4539
|
+
for (const [, list] of entries) {
|
|
4540
|
+
list.sort((a, b) => b.similarity - a.similarity);
|
|
4541
|
+
}
|
|
4542
|
+
return { entries, fileCommitCounts, totalCommits: commits.length };
|
|
4543
|
+
}
|
|
4544
|
+
function boostByGitCoChange(matches, coChangeMatrix, topK = 10, boostFactor = 0.25, minSimilarity = 0.15) {
|
|
4545
|
+
if (matches.length === 0 || coChangeMatrix.entries.size === 0) return matches;
|
|
4546
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
4547
|
+
for (const m of matches) {
|
|
4548
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
4549
|
+
}
|
|
4550
|
+
const topMatches = matches.slice(0, topK);
|
|
4551
|
+
const maxBoostTargets = 5;
|
|
4552
|
+
for (const parent of topMatches) {
|
|
4553
|
+
const partners = coChangeMatrix.entries.get(parent.filePath) ?? [];
|
|
4554
|
+
let boostedCount = 0;
|
|
4555
|
+
for (const partner of partners) {
|
|
4556
|
+
if (boostedCount >= maxBoostTargets) break;
|
|
4557
|
+
if (partner.similarity < minSimilarity) break;
|
|
4558
|
+
const boost = parent.score * boostFactor * partner.similarity;
|
|
4559
|
+
const existing = boosted.get(partner.fileB);
|
|
4560
|
+
if (existing) {
|
|
4561
|
+
existing.score += boost;
|
|
4562
|
+
if (!existing.matchedTerms.includes("[git-cochange]")) {
|
|
4563
|
+
existing.matchedTerms.push("[git-cochange]");
|
|
4564
|
+
}
|
|
4565
|
+
} else {
|
|
4566
|
+
boosted.set(partner.fileB, {
|
|
4567
|
+
filePath: partner.fileB,
|
|
4568
|
+
score: boost,
|
|
4569
|
+
matchedTerms: ["[git-cochange]"]
|
|
4570
|
+
});
|
|
4571
|
+
}
|
|
4572
|
+
boostedCount++;
|
|
4573
|
+
}
|
|
4574
|
+
}
|
|
4575
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
4576
|
+
}
|
|
4577
|
+
|
|
4578
|
+
// src/engine/multi-hop.ts
|
|
4579
|
+
init_tfidf();
|
|
4580
|
+
var DEFAULT_CONFIG2 = {
|
|
4581
|
+
maxHops: 2,
|
|
4582
|
+
topKPerHop: 5,
|
|
4583
|
+
decayFactor: 0.5,
|
|
4584
|
+
minScoreThreshold: 0.2
|
|
4585
|
+
};
|
|
4586
|
+
function multiHopQuery(index, task, deps, callEdges, fileContents, config = {}) {
|
|
4587
|
+
const cfg = { ...DEFAULT_CONFIG2, ...config };
|
|
4588
|
+
const hops = [];
|
|
4589
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
4590
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
4591
|
+
for (const edge of callEdges) {
|
|
4592
|
+
const fwd = callsTo.get(edge.from) ?? /* @__PURE__ */ new Set();
|
|
4593
|
+
fwd.add(edge.to);
|
|
4594
|
+
callsTo.set(edge.from, fwd);
|
|
4595
|
+
const rev = calledBy.get(edge.to) ?? /* @__PURE__ */ new Set();
|
|
4596
|
+
rev.add(edge.from);
|
|
4597
|
+
calledBy.set(edge.to, rev);
|
|
4598
|
+
}
|
|
4599
|
+
const aggregateScores = /* @__PURE__ */ new Map();
|
|
4600
|
+
const aggregateTerms = /* @__PURE__ */ new Map();
|
|
4601
|
+
const explored = /* @__PURE__ */ new Set();
|
|
4602
|
+
const initialResults = query(index, task, 50);
|
|
4603
|
+
for (const m of initialResults) {
|
|
4604
|
+
aggregateScores.set(m.filePath, m.score);
|
|
4605
|
+
aggregateTerms.set(m.filePath, new Set(m.matchedTerms));
|
|
4606
|
+
explored.add(m.filePath);
|
|
4607
|
+
}
|
|
4608
|
+
hops.push({
|
|
4609
|
+
hop: 0,
|
|
4610
|
+
seedFiles: [],
|
|
4611
|
+
newFiles: initialResults.slice(0, cfg.topKPerHop).map((m) => m.filePath),
|
|
4612
|
+
expandedTerms: tokenize(task)
|
|
4613
|
+
});
|
|
4614
|
+
let currentSeeds = initialResults.slice(0, cfg.topKPerHop);
|
|
4615
|
+
for (let hop = 1; hop <= cfg.maxHops; hop++) {
|
|
4616
|
+
if (currentSeeds.length === 0) break;
|
|
4617
|
+
const seedFiles = currentSeeds.map((m) => m.filePath);
|
|
4618
|
+
const newFiles = [];
|
|
4619
|
+
const expandedTerms = [];
|
|
4620
|
+
const connectedFiles = /* @__PURE__ */ new Set();
|
|
4621
|
+
for (const seed of seedFiles) {
|
|
4622
|
+
const importDeps = deps.get(seed) ?? [];
|
|
4623
|
+
for (const dep of importDeps) {
|
|
4624
|
+
if (!explored.has(dep)) connectedFiles.add(dep);
|
|
4625
|
+
}
|
|
4626
|
+
const calls = callsTo.get(seed) ?? /* @__PURE__ */ new Set();
|
|
4627
|
+
for (const called of calls) {
|
|
4628
|
+
if (!explored.has(called)) connectedFiles.add(called);
|
|
4629
|
+
}
|
|
4630
|
+
const callers = calledBy.get(seed) ?? /* @__PURE__ */ new Set();
|
|
4631
|
+
for (const caller of callers) {
|
|
4632
|
+
if (!explored.has(caller)) connectedFiles.add(caller);
|
|
4633
|
+
}
|
|
4634
|
+
}
|
|
4635
|
+
for (const seed of seedFiles) {
|
|
4636
|
+
const content = fileContents.get(seed);
|
|
4637
|
+
if (!content) continue;
|
|
4638
|
+
const identifiers = extractKeyIdentifiers(content, seed);
|
|
4639
|
+
expandedTerms.push(...identifiers);
|
|
4640
|
+
}
|
|
4641
|
+
const decayMultiplier = Math.pow(cfg.decayFactor, hop);
|
|
4642
|
+
const uniqueExpandedTerms = [...new Set(expandedTerms)];
|
|
4643
|
+
const expandedQuery = task + " " + uniqueExpandedTerms.slice(0, 10).join(" ");
|
|
4644
|
+
const expandedResults = query(index, expandedQuery, 30);
|
|
4645
|
+
for (const connected of connectedFiles) {
|
|
4646
|
+
const expandedMatch = expandedResults.find((r) => r.filePath === connected);
|
|
4647
|
+
const graphScore = 0.3;
|
|
4648
|
+
const bm25Score = expandedMatch?.score ?? 0;
|
|
4649
|
+
const hopScore = (graphScore + bm25Score) * decayMultiplier;
|
|
4650
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
4651
|
+
const existing = aggregateScores.get(connected) ?? 0;
|
|
4652
|
+
aggregateScores.set(connected, existing + hopScore);
|
|
4653
|
+
const terms = aggregateTerms.get(connected) ?? /* @__PURE__ */ new Set();
|
|
4654
|
+
terms.add(`[hop-${hop}]`);
|
|
4655
|
+
if (expandedMatch) {
|
|
4656
|
+
for (const t of expandedMatch.matchedTerms) terms.add(t);
|
|
4657
|
+
}
|
|
4658
|
+
aggregateTerms.set(connected, terms);
|
|
4659
|
+
if (!explored.has(connected)) {
|
|
4660
|
+
newFiles.push(connected);
|
|
4661
|
+
explored.add(connected);
|
|
4662
|
+
}
|
|
4663
|
+
}
|
|
4664
|
+
}
|
|
4665
|
+
for (const r of expandedResults) {
|
|
4666
|
+
if (!explored.has(r.filePath)) {
|
|
4667
|
+
const hopScore = r.score * decayMultiplier * 0.5;
|
|
4668
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
4669
|
+
const existing = aggregateScores.get(r.filePath) ?? 0;
|
|
4670
|
+
aggregateScores.set(r.filePath, existing + hopScore);
|
|
4671
|
+
const terms = aggregateTerms.get(r.filePath) ?? /* @__PURE__ */ new Set();
|
|
4672
|
+
terms.add(`[hop-${hop}-bm25]`);
|
|
4673
|
+
for (const t of r.matchedTerms) terms.add(t);
|
|
4674
|
+
aggregateTerms.set(r.filePath, terms);
|
|
4675
|
+
newFiles.push(r.filePath);
|
|
4676
|
+
explored.add(r.filePath);
|
|
4677
|
+
}
|
|
4678
|
+
}
|
|
4679
|
+
}
|
|
4680
|
+
hops.push({ hop, seedFiles, newFiles, expandedTerms: uniqueExpandedTerms.slice(0, 20) });
|
|
4681
|
+
const newScored = newFiles.map((f) => ({ filePath: f, score: aggregateScores.get(f) ?? 0 })).sort((a, b) => b.score - a.score).slice(0, cfg.topKPerHop);
|
|
4682
|
+
currentSeeds = newScored.map((s) => ({
|
|
4683
|
+
filePath: s.filePath,
|
|
4684
|
+
score: s.score,
|
|
4685
|
+
matchedTerms: [...aggregateTerms.get(s.filePath) ?? []]
|
|
4686
|
+
}));
|
|
4687
|
+
}
|
|
4688
|
+
const matches = [];
|
|
4689
|
+
for (const [filePath, score] of aggregateScores) {
|
|
4690
|
+
const terms = aggregateTerms.get(filePath) ?? /* @__PURE__ */ new Set();
|
|
4691
|
+
matches.push({ filePath, score, matchedTerms: [...terms] });
|
|
4692
|
+
}
|
|
4693
|
+
matches.sort((a, b) => b.score - a.score);
|
|
4694
|
+
return {
|
|
4695
|
+
matches,
|
|
4696
|
+
hops,
|
|
4697
|
+
totalFilesExplored: explored.size
|
|
4698
|
+
};
|
|
4699
|
+
}
|
|
4700
|
+
function extractKeyIdentifiers(content, filePath) {
|
|
4701
|
+
const identifiers = [];
|
|
4702
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
4703
|
+
if (ext === "java") {
|
|
4704
|
+
const classMatches = content.match(/(?:class|interface)\s+(\w+)/g) ?? [];
|
|
4705
|
+
for (const m of classMatches) {
|
|
4706
|
+
const name = m.replace(/(?:class|interface)\s+/, "");
|
|
4707
|
+
identifiers.push(...splitCamelCase(name));
|
|
4708
|
+
}
|
|
4709
|
+
const methodRegex = /(?:public|protected)\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
4710
|
+
let match;
|
|
4711
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
4712
|
+
identifiers.push(...splitCamelCase(match[1]));
|
|
4713
|
+
}
|
|
4714
|
+
} else if (["ts", "tsx", "js", "jsx"].includes(ext)) {
|
|
4715
|
+
const exportMatches = content.match(/export\s+(?:class|function|const|interface|type)\s+(\w+)/g) ?? [];
|
|
4716
|
+
for (const m of exportMatches) {
|
|
4717
|
+
const name = m.replace(/export\s+(?:class|function|const|interface|type)\s+/, "");
|
|
4718
|
+
identifiers.push(...splitCamelCase(name));
|
|
4719
|
+
}
|
|
4720
|
+
} else if (ext === "py") {
|
|
4721
|
+
const defMatches = content.match(/^(?:class|def)\s+(\w+)/gm) ?? [];
|
|
4722
|
+
for (const m of defMatches) {
|
|
4723
|
+
const name = m.replace(/^(?:class|def)\s+/, "");
|
|
4724
|
+
identifiers.push(...splitSnakeCase(name));
|
|
4725
|
+
}
|
|
4726
|
+
} else if (ext === "go") {
|
|
4727
|
+
const funcMatches = content.match(/^func\s+(?:\([^)]+\)\s+)?([A-Z]\w+)/gm) ?? [];
|
|
4728
|
+
for (const m of funcMatches) {
|
|
4729
|
+
const name = m.replace(/^func\s+(?:\([^)]+\)\s+)?/, "");
|
|
4730
|
+
identifiers.push(...splitCamelCase(name));
|
|
4731
|
+
}
|
|
4732
|
+
const typeMatches = content.match(/^type\s+([A-Z]\w+)/gm) ?? [];
|
|
4733
|
+
for (const m of typeMatches) {
|
|
4734
|
+
const name = m.replace(/^type\s+/, "");
|
|
4735
|
+
identifiers.push(...splitCamelCase(name));
|
|
4736
|
+
}
|
|
4737
|
+
}
|
|
4738
|
+
return [...new Set(identifiers)].filter((id) => id.length >= 3 && !NOISE_IDENTIFIERS.has(id.toLowerCase())).slice(0, 30);
|
|
4739
|
+
}
|
|
4740
|
+
function splitCamelCase(name) {
|
|
4741
|
+
return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((s) => s.length >= 3);
|
|
4742
|
+
}
|
|
4743
|
+
function splitSnakeCase(name) {
|
|
4744
|
+
return name.split("_").filter((s) => s.length >= 3);
|
|
4745
|
+
}
|
|
4746
|
+
var NOISE_IDENTIFIERS = /* @__PURE__ */ new Set([
|
|
4747
|
+
"get",
|
|
4748
|
+
"set",
|
|
4749
|
+
"has",
|
|
4750
|
+
"add",
|
|
4751
|
+
"put",
|
|
4752
|
+
"new",
|
|
4753
|
+
"run",
|
|
4754
|
+
"map",
|
|
4755
|
+
"for",
|
|
4756
|
+
"the",
|
|
4757
|
+
"and",
|
|
4758
|
+
"not",
|
|
4759
|
+
"with",
|
|
4760
|
+
"from",
|
|
4761
|
+
"this",
|
|
4762
|
+
"that",
|
|
4763
|
+
"test",
|
|
4764
|
+
"spec",
|
|
4765
|
+
"mock",
|
|
4766
|
+
"void",
|
|
4767
|
+
"null",
|
|
4768
|
+
"true",
|
|
4769
|
+
"false",
|
|
4770
|
+
"string",
|
|
4771
|
+
"number",
|
|
4772
|
+
"boolean",
|
|
4773
|
+
"int",
|
|
4774
|
+
"impl",
|
|
4775
|
+
"default",
|
|
4776
|
+
"abstract",
|
|
4777
|
+
"base",
|
|
4778
|
+
"main",
|
|
4779
|
+
"init",
|
|
4780
|
+
"setup",
|
|
4781
|
+
"util",
|
|
4782
|
+
"utils",
|
|
4783
|
+
"helper",
|
|
4784
|
+
"helpers",
|
|
4785
|
+
"common",
|
|
4786
|
+
"config",
|
|
4787
|
+
"model",
|
|
4788
|
+
"entity",
|
|
4789
|
+
"service",
|
|
4790
|
+
"repository",
|
|
4791
|
+
"controller",
|
|
4792
|
+
"handler",
|
|
4793
|
+
"interface",
|
|
4794
|
+
"type",
|
|
4795
|
+
"class",
|
|
4796
|
+
"function",
|
|
4797
|
+
"const",
|
|
4798
|
+
"return",
|
|
4799
|
+
"import",
|
|
4800
|
+
"export",
|
|
4801
|
+
"private",
|
|
4802
|
+
"public",
|
|
4803
|
+
"protected",
|
|
4804
|
+
"static",
|
|
4805
|
+
"final",
|
|
4806
|
+
"override",
|
|
4807
|
+
"async",
|
|
4808
|
+
"await"
|
|
4809
|
+
]);
|
|
4810
|
+
|
|
4811
|
+
// src/engine/query-intent.ts
|
|
4812
|
+
var ACTION_PATTERNS = [
|
|
4813
|
+
[/\b(fix|bug|debug|repair|resolve|broken|crash|error|issue|wrong)\b/i, "fix"],
|
|
4814
|
+
[/\b(add|implement|create|build|new|feature|introduce|wire)\b/i, "add"],
|
|
4815
|
+
[/\b(refactor|restructure|clean|extract|split|move|rename|simplify)\b/i, "refactor"],
|
|
4816
|
+
[/\b(trace|follow|understand|find|where|how|flow|path|chain)\b/i, "trace"],
|
|
4817
|
+
[/\b(test|spec|coverage|assert|mock|verify)\b/i, "test"],
|
|
4818
|
+
[/\b(doc|document|describe|explain|readme|comment)\b/i, "docs"],
|
|
4819
|
+
[/\b(remove|delete|deprecate|drop|kill|eliminate)\b/i, "remove"],
|
|
4820
|
+
[/\b(optimize|performance|speed|fast|slow|latency|efficient)\b/i, "optimize"]
|
|
4821
|
+
];
|
|
4822
|
+
function detectAction(task) {
|
|
4823
|
+
for (const [pattern, action] of ACTION_PATTERNS) {
|
|
4824
|
+
if (pattern.test(task)) return action;
|
|
4825
|
+
}
|
|
4826
|
+
return "unknown";
|
|
4827
|
+
}
|
|
4828
|
+
var LAYER_KEYWORDS = [
|
|
4829
|
+
[["controller", "endpoint", "handler", "router", "route", "api", "rest", "entrypoint"], "endpoint"],
|
|
4830
|
+
[["usecase", "use case", "use-case", "interactor", "application service"], "usecase"],
|
|
4831
|
+
[["service", "domain service", "business logic"], "service"],
|
|
4832
|
+
[["repository", "repo", "dao", "data access", "persistence", "database", "db", "store"], "repository"],
|
|
4833
|
+
[["cache", "redis", "memcached", "caching", "ttl", "invalidat"], "cache"],
|
|
4834
|
+
[["client", "http client", "api client", "rest client", "feign", "retrofit"], "client"],
|
|
4835
|
+
[["model", "entity", "dto", "domain object", "value object", "pojo"], "model"],
|
|
4836
|
+
[["config", "configuration", "injector", "module", "bean", "provider", "dependency injection"], "config"],
|
|
4837
|
+
[["queue", "kafka", "rabbit", "sqs", "event", "listener", "consumer", "producer", "message"], "queue"],
|
|
4838
|
+
[["middleware", "interceptor", "filter", "guard", "pipe"], "middleware"]
|
|
4839
|
+
];
|
|
4840
|
+
function detectLayers(task) {
|
|
4841
|
+
const lower = task.toLowerCase();
|
|
4842
|
+
const layers = [];
|
|
4843
|
+
for (const [keywords, layer] of LAYER_KEYWORDS) {
|
|
4844
|
+
if (keywords.some((kw) => lower.includes(kw))) {
|
|
4845
|
+
layers.push(layer);
|
|
4846
|
+
}
|
|
4847
|
+
}
|
|
4848
|
+
return [...new Set(layers)];
|
|
4849
|
+
}
|
|
4850
|
+
var STOP_WORDS2 = /* @__PURE__ */ new Set([
|
|
4851
|
+
"the",
|
|
4852
|
+
"a",
|
|
4853
|
+
"an",
|
|
4854
|
+
"is",
|
|
4855
|
+
"are",
|
|
4856
|
+
"was",
|
|
4857
|
+
"were",
|
|
4858
|
+
"be",
|
|
4859
|
+
"been",
|
|
4860
|
+
"being",
|
|
4861
|
+
"have",
|
|
4862
|
+
"has",
|
|
4863
|
+
"had",
|
|
4864
|
+
"do",
|
|
4865
|
+
"does",
|
|
4866
|
+
"did",
|
|
4867
|
+
"will",
|
|
4868
|
+
"would",
|
|
4869
|
+
"shall",
|
|
4870
|
+
"should",
|
|
4871
|
+
"may",
|
|
4872
|
+
"might",
|
|
4873
|
+
"must",
|
|
4874
|
+
"can",
|
|
4875
|
+
"could",
|
|
4876
|
+
"need",
|
|
4877
|
+
"not",
|
|
4878
|
+
"and",
|
|
4879
|
+
"but",
|
|
4880
|
+
"or",
|
|
4881
|
+
"nor",
|
|
4882
|
+
"for",
|
|
4883
|
+
"yet",
|
|
4884
|
+
"so",
|
|
4885
|
+
"in",
|
|
4886
|
+
"on",
|
|
4887
|
+
"at",
|
|
4888
|
+
"to",
|
|
4889
|
+
"from",
|
|
4890
|
+
"by",
|
|
4891
|
+
"with",
|
|
4892
|
+
"about",
|
|
4893
|
+
"between",
|
|
4894
|
+
"through",
|
|
4895
|
+
"during",
|
|
4896
|
+
"before",
|
|
4897
|
+
"after",
|
|
4898
|
+
"above",
|
|
4899
|
+
"below",
|
|
4900
|
+
"up",
|
|
4901
|
+
"down",
|
|
4902
|
+
"out",
|
|
4903
|
+
"off",
|
|
4904
|
+
"over",
|
|
4905
|
+
"under",
|
|
4906
|
+
"again",
|
|
4907
|
+
"further",
|
|
4908
|
+
"then",
|
|
4909
|
+
"once",
|
|
4910
|
+
"here",
|
|
4911
|
+
"there",
|
|
4912
|
+
"when",
|
|
4913
|
+
"where",
|
|
4914
|
+
"why",
|
|
4915
|
+
"how",
|
|
4916
|
+
"all",
|
|
4917
|
+
"each",
|
|
4918
|
+
"every",
|
|
4919
|
+
"both",
|
|
4920
|
+
"few",
|
|
4921
|
+
"more",
|
|
4922
|
+
"most",
|
|
4923
|
+
"other",
|
|
4924
|
+
"some",
|
|
4925
|
+
"such",
|
|
4926
|
+
"no",
|
|
4927
|
+
"nor",
|
|
4928
|
+
"only",
|
|
4929
|
+
"own",
|
|
4930
|
+
"same",
|
|
4931
|
+
"so",
|
|
4932
|
+
"than",
|
|
4933
|
+
"too",
|
|
4934
|
+
"very",
|
|
4935
|
+
"just",
|
|
4936
|
+
"because",
|
|
4937
|
+
"this",
|
|
4938
|
+
"that",
|
|
4939
|
+
"these",
|
|
4940
|
+
"those",
|
|
4941
|
+
"it",
|
|
4942
|
+
"its",
|
|
4943
|
+
"of",
|
|
4944
|
+
"if"
|
|
4945
|
+
]);
|
|
4946
|
+
var ACTION_WORDS = /* @__PURE__ */ new Set([
|
|
4947
|
+
"fix",
|
|
4948
|
+
"add",
|
|
4949
|
+
"create",
|
|
4950
|
+
"build",
|
|
4951
|
+
"implement",
|
|
4952
|
+
"refactor",
|
|
4953
|
+
"trace",
|
|
4954
|
+
"follow",
|
|
4955
|
+
"find",
|
|
4956
|
+
"update",
|
|
4957
|
+
"modify",
|
|
4958
|
+
"change",
|
|
4959
|
+
"remove",
|
|
4960
|
+
"delete",
|
|
4961
|
+
"debug",
|
|
4962
|
+
"test",
|
|
4963
|
+
"check",
|
|
4964
|
+
"verify",
|
|
4965
|
+
"validate",
|
|
4966
|
+
"handle",
|
|
4967
|
+
"process",
|
|
4968
|
+
"resolve",
|
|
4969
|
+
"repair",
|
|
4970
|
+
"optimize",
|
|
4971
|
+
"improve",
|
|
4972
|
+
"speed",
|
|
4973
|
+
"clean",
|
|
4974
|
+
"bug",
|
|
4975
|
+
"error",
|
|
4976
|
+
"issue",
|
|
4977
|
+
"problem",
|
|
4978
|
+
"flow",
|
|
4979
|
+
"path",
|
|
4980
|
+
"chain"
|
|
4981
|
+
]);
|
|
4982
|
+
var OPERATION_WORDS = /* @__PURE__ */ new Set([
|
|
4983
|
+
"create",
|
|
4984
|
+
"read",
|
|
4985
|
+
"update",
|
|
4986
|
+
"delete",
|
|
4987
|
+
"save",
|
|
4988
|
+
"load",
|
|
4989
|
+
"fetch",
|
|
4990
|
+
"retrieve",
|
|
4991
|
+
"store",
|
|
4992
|
+
"persist",
|
|
4993
|
+
"insert",
|
|
4994
|
+
"remove",
|
|
4995
|
+
"invalidate",
|
|
4996
|
+
"validate",
|
|
4997
|
+
"parse",
|
|
4998
|
+
"transform",
|
|
4999
|
+
"convert",
|
|
5000
|
+
"render",
|
|
5001
|
+
"display",
|
|
5002
|
+
"send",
|
|
5003
|
+
"receive",
|
|
5004
|
+
"publish",
|
|
5005
|
+
"subscribe",
|
|
5006
|
+
"emit",
|
|
5007
|
+
"listen",
|
|
5008
|
+
"authenticate",
|
|
5009
|
+
"authorize",
|
|
5010
|
+
"encrypt",
|
|
5011
|
+
"decrypt",
|
|
5012
|
+
"hash",
|
|
5013
|
+
"serialize",
|
|
5014
|
+
"deserialize",
|
|
5015
|
+
"encode",
|
|
5016
|
+
"decode",
|
|
5017
|
+
"compress",
|
|
5018
|
+
"replicate",
|
|
5019
|
+
"sync",
|
|
5020
|
+
"migrate",
|
|
5021
|
+
"export",
|
|
5022
|
+
"import",
|
|
5023
|
+
"upload",
|
|
5024
|
+
"download",
|
|
5025
|
+
"search",
|
|
5026
|
+
"index",
|
|
5027
|
+
"query",
|
|
5028
|
+
"filter",
|
|
5029
|
+
"sort"
|
|
5030
|
+
]);
|
|
5031
|
+
function extractEntities(task) {
|
|
5032
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5033
|
+
const entities = [];
|
|
5034
|
+
for (const word of words) {
|
|
5035
|
+
if (word.length < 3) continue;
|
|
5036
|
+
if (STOP_WORDS2.has(word)) continue;
|
|
5037
|
+
if (ACTION_WORDS.has(word)) continue;
|
|
5038
|
+
if (OPERATION_WORDS.has(word)) continue;
|
|
5039
|
+
const isLayer = LAYER_KEYWORDS.some(([kws]) => kws.includes(word));
|
|
5040
|
+
if (isLayer) continue;
|
|
5041
|
+
entities.push(word);
|
|
5042
|
+
}
|
|
5043
|
+
return [...new Set(entities)];
|
|
5044
|
+
}
|
|
5045
|
+
function extractOperations(task) {
|
|
5046
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5047
|
+
const operations = [];
|
|
5048
|
+
for (const word of words) {
|
|
5049
|
+
if (OPERATION_WORDS.has(word)) {
|
|
5050
|
+
operations.push(word);
|
|
5051
|
+
}
|
|
5052
|
+
}
|
|
5053
|
+
const opPatterns = task.toLowerCase().match(/\b(on|after|before|during)\s+(\w+)/g);
|
|
5054
|
+
if (opPatterns) {
|
|
5055
|
+
for (const pattern of opPatterns) {
|
|
5056
|
+
const parts = pattern.split(/\s+/);
|
|
5057
|
+
if (parts.length >= 2 && OPERATION_WORDS.has(parts[1])) {
|
|
5058
|
+
operations.push(parts[1]);
|
|
5059
|
+
}
|
|
5060
|
+
}
|
|
5061
|
+
}
|
|
5062
|
+
return [...new Set(operations)];
|
|
5063
|
+
}
|
|
5064
|
+
function extractQualifiers(task) {
|
|
5065
|
+
const qualifiers = [];
|
|
5066
|
+
const patterns = task.match(/\b(on|for|in|via|from|through)\s+(\w+(?:\s+\w+)?)/gi);
|
|
5067
|
+
if (patterns) {
|
|
5068
|
+
for (const p of patterns) {
|
|
5069
|
+
const parts = p.split(/\s+/);
|
|
5070
|
+
if (parts.length >= 2) {
|
|
5071
|
+
const qualifier = parts.slice(1).join(" ").toLowerCase();
|
|
5072
|
+
if (!STOP_WORDS2.has(qualifier) && qualifier.length >= 2) {
|
|
5073
|
+
qualifiers.push(qualifier);
|
|
5074
|
+
}
|
|
5075
|
+
}
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
return [...new Set(qualifiers)];
|
|
5079
|
+
}
|
|
5080
|
+
function parseQueryIntent(task) {
|
|
5081
|
+
const action = detectAction(task);
|
|
5082
|
+
const entities = extractEntities(task);
|
|
5083
|
+
const operations = extractOperations(task);
|
|
5084
|
+
const layers = detectLayers(task);
|
|
5085
|
+
const qualifiers = extractQualifiers(task);
|
|
5086
|
+
const signals = [
|
|
5087
|
+
action !== "unknown" ? 1 : 0,
|
|
5088
|
+
entities.length > 0 ? 1 : 0,
|
|
5089
|
+
operations.length > 0 ? 1 : 0,
|
|
5090
|
+
layers.length > 0 ? 1 : 0
|
|
5091
|
+
];
|
|
5092
|
+
const confidence = signals.reduce((a, b) => a + b, 0) / signals.length;
|
|
5093
|
+
return { original: task, action, entities, operations, layers, qualifiers, confidence };
|
|
5094
|
+
}
|
|
5095
|
+
function buildWeightedQuery(intent) {
|
|
5096
|
+
const parts = [];
|
|
5097
|
+
for (const entity of intent.entities) {
|
|
5098
|
+
parts.push(entity, entity, entity);
|
|
5099
|
+
}
|
|
5100
|
+
for (const op of intent.operations) {
|
|
5101
|
+
parts.push(op, op);
|
|
5102
|
+
}
|
|
5103
|
+
for (const layer of intent.layers) {
|
|
5104
|
+
parts.push(layer);
|
|
5105
|
+
}
|
|
5106
|
+
for (const q of intent.qualifiers) {
|
|
5107
|
+
parts.push(q);
|
|
5108
|
+
}
|
|
5109
|
+
if (parts.length === 0) return intent.original;
|
|
5110
|
+
return parts.join(" ");
|
|
5111
|
+
}
|
|
5112
|
+
|
|
5113
|
+
// src/engine/embeddings.ts
|
|
5114
|
+
function buildTfIdfEmbeddingIndex(index) {
|
|
5115
|
+
const allTerms = [...index.idf.keys()];
|
|
5116
|
+
const termToIdx = new Map(allTerms.map((t, i) => [t, i]));
|
|
5117
|
+
const dimensions = allTerms.length;
|
|
5118
|
+
const docVectors = /* @__PURE__ */ new Map();
|
|
5119
|
+
const docNorms = /* @__PURE__ */ new Map();
|
|
5120
|
+
for (const [filePath, doc] of index.documents) {
|
|
5121
|
+
const vec = new Float32Array(dimensions);
|
|
5122
|
+
let norm = 0;
|
|
5123
|
+
for (const [term, tf] of doc.terms) {
|
|
5124
|
+
const idx = termToIdx.get(term);
|
|
5125
|
+
if (idx === void 0) continue;
|
|
5126
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5127
|
+
const weight = tf * idf;
|
|
5128
|
+
vec[idx] = weight;
|
|
5129
|
+
norm += weight * weight;
|
|
5130
|
+
}
|
|
5131
|
+
norm = Math.sqrt(norm);
|
|
5132
|
+
if (norm > 0) {
|
|
5133
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5134
|
+
vec[i] /= norm;
|
|
5135
|
+
}
|
|
5136
|
+
}
|
|
5137
|
+
docVectors.set(filePath, vec);
|
|
5138
|
+
docNorms.set(filePath, norm);
|
|
5139
|
+
}
|
|
5140
|
+
function queryFn(text, topK) {
|
|
5141
|
+
const queryTerms = tokenizeForEmbedding(text);
|
|
5142
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
5143
|
+
for (const t of queryTerms) {
|
|
5144
|
+
termCounts.set(t, (termCounts.get(t) ?? 0) + 1);
|
|
5145
|
+
}
|
|
5146
|
+
const queryVec = new Float32Array(dimensions);
|
|
5147
|
+
let queryNorm = 0;
|
|
5148
|
+
for (const [term, count] of termCounts) {
|
|
5149
|
+
const idx = termToIdx.get(term);
|
|
5150
|
+
if (idx === void 0) continue;
|
|
5151
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5152
|
+
const weight = count * idf;
|
|
5153
|
+
queryVec[idx] = weight;
|
|
5154
|
+
queryNorm += weight * weight;
|
|
5155
|
+
}
|
|
5156
|
+
queryNorm = Math.sqrt(queryNorm);
|
|
5157
|
+
if (queryNorm > 0) {
|
|
5158
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5159
|
+
queryVec[i] /= queryNorm;
|
|
5160
|
+
}
|
|
5161
|
+
}
|
|
5162
|
+
const results = [];
|
|
5163
|
+
for (const [filePath, docVec] of docVectors) {
|
|
5164
|
+
let dot = 0;
|
|
5165
|
+
for (const [term] of termCounts) {
|
|
5166
|
+
const idx = termToIdx.get(term);
|
|
5167
|
+
if (idx !== void 0) {
|
|
5168
|
+
dot += queryVec[idx] * docVec[idx];
|
|
5169
|
+
}
|
|
5170
|
+
}
|
|
5171
|
+
if (dot > 0) {
|
|
5172
|
+
results.push({ filePath, score: dot });
|
|
5173
|
+
}
|
|
5174
|
+
}
|
|
5175
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
5176
|
+
}
|
|
5177
|
+
return {
|
|
5178
|
+
backend: "tfidf-cosine",
|
|
5179
|
+
dimensions,
|
|
5180
|
+
documentCount: docVectors.size,
|
|
5181
|
+
query: queryFn
|
|
5182
|
+
};
|
|
5183
|
+
}
|
|
5184
|
+
function reciprocalRankFusion(bm25Results, embeddingResults, k = 60, bm25Weight = 0.6, embeddingWeight = 0.4) {
|
|
5185
|
+
const scores = /* @__PURE__ */ new Map();
|
|
5186
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
5187
|
+
const rrf = bm25Weight / (k + i + 1);
|
|
5188
|
+
const existing = scores.get(bm25Results[i].filePath) ?? 0;
|
|
5189
|
+
scores.set(bm25Results[i].filePath, existing + rrf);
|
|
5190
|
+
}
|
|
5191
|
+
for (let i = 0; i < embeddingResults.length; i++) {
|
|
5192
|
+
const rrf = embeddingWeight / (k + i + 1);
|
|
5193
|
+
const existing = scores.get(embeddingResults[i].filePath) ?? 0;
|
|
5194
|
+
scores.set(embeddingResults[i].filePath, existing + rrf);
|
|
5195
|
+
}
|
|
5196
|
+
return [...scores.entries()].map(([filePath, score]) => ({ filePath, score })).sort((a, b) => b.score - a.score);
|
|
5197
|
+
}
|
|
5198
|
+
function tokenizeForEmbedding(text) {
|
|
5199
|
+
return text.toLowerCase().replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]/g, " ").split(/\s+/).filter((t) => t.length >= 2);
|
|
5200
|
+
}
|
|
5201
|
+
|
|
3374
5202
|
// src/engine/context-pipeline.ts
|
|
5203
|
+
var RANKING_NOISE_PATTERNS = [
|
|
5204
|
+
/^changelog/i,
|
|
5205
|
+
/^license/i,
|
|
5206
|
+
/^contributing/i,
|
|
5207
|
+
/^code_of_conduct/i,
|
|
5208
|
+
/^authors/i,
|
|
5209
|
+
/^codeowners$/i,
|
|
5210
|
+
/^security/i,
|
|
5211
|
+
/\.lock$/,
|
|
5212
|
+
/^package-lock\.json$/,
|
|
5213
|
+
/^yarn\.lock$/,
|
|
5214
|
+
/^pnpm-lock\.yaml$/,
|
|
5215
|
+
/^Gemfile\.lock$/
|
|
5216
|
+
];
|
|
5217
|
+
function isRankingNoise(filePath) {
|
|
5218
|
+
const basename4 = filePath.split("/").pop() ?? filePath;
|
|
5219
|
+
return RANKING_NOISE_PATTERNS.some((re) => re.test(basename4));
|
|
5220
|
+
}
|
|
5221
|
+
function fileTypePenalty(filePath, taskType) {
|
|
5222
|
+
const lower = filePath.toLowerCase();
|
|
5223
|
+
const isTest = /[/\\]test[s]?[/\\]|\.test\.|\.spec\.|_test\./i.test(lower);
|
|
5224
|
+
const isDoc = /\.md$|\.txt$|\.rst$|^docs[/\\]/i.test(lower);
|
|
5225
|
+
const isConfig = /\.xml$|\.yml$|\.yaml$|\.properties$|\.gradle$/i.test(lower);
|
|
5226
|
+
if (taskType === "debug") {
|
|
5227
|
+
if (isTest) return 0.4;
|
|
5228
|
+
if (isDoc) return 0.2;
|
|
5229
|
+
if (isConfig) return 0.6;
|
|
5230
|
+
} else if (taskType === "test") {
|
|
5231
|
+
if (isTest) return 1.2;
|
|
5232
|
+
if (isDoc) return 0.3;
|
|
5233
|
+
} else if (taskType === "docs") {
|
|
5234
|
+
if (isDoc) return 1.2;
|
|
5235
|
+
if (isTest) return 0.3;
|
|
5236
|
+
} else if (taskType === "feature" || taskType === "refactor") {
|
|
5237
|
+
if (isTest) return 0.5;
|
|
5238
|
+
if (isDoc) return 0.4;
|
|
5239
|
+
}
|
|
5240
|
+
return 1;
|
|
5241
|
+
}
|
|
5242
|
+
function detectComplexQuery(task) {
|
|
5243
|
+
const lower = task.toLowerCase();
|
|
5244
|
+
const words = lower.split(/\s+/).filter((w) => w.length > 2);
|
|
5245
|
+
const chainIndicators = /\b(when|after|then|through|from .+ to|via|chain|flow|trace|path|propagat|cascade|invalidat\w+ on|calls?|invokes?)\b/;
|
|
5246
|
+
if (chainIndicators.test(lower)) return true;
|
|
5247
|
+
const layers = [
|
|
5248
|
+
"controller",
|
|
5249
|
+
"endpoint",
|
|
5250
|
+
"router",
|
|
5251
|
+
"handler",
|
|
5252
|
+
"service",
|
|
5253
|
+
"usecase",
|
|
5254
|
+
"use case",
|
|
5255
|
+
"repository",
|
|
5256
|
+
"repo",
|
|
5257
|
+
"cache",
|
|
5258
|
+
"database",
|
|
5259
|
+
"queue",
|
|
5260
|
+
"client",
|
|
5261
|
+
"adapter",
|
|
5262
|
+
"gateway",
|
|
5263
|
+
"interceptor",
|
|
5264
|
+
"middleware",
|
|
5265
|
+
"listener",
|
|
5266
|
+
"consumer",
|
|
5267
|
+
"producer",
|
|
5268
|
+
"publisher",
|
|
5269
|
+
"subscriber"
|
|
5270
|
+
];
|
|
5271
|
+
const layerCount = layers.filter((l) => lower.includes(l)).length;
|
|
5272
|
+
if (layerCount >= 2) return true;
|
|
5273
|
+
if (words.length >= 10) return true;
|
|
5274
|
+
const entityConnectors = lower.match(/\b(on|for|in|from|to|with|after|before|during)\b/g);
|
|
5275
|
+
if (entityConnectors && entityConnectors.length >= 3) return true;
|
|
5276
|
+
return false;
|
|
5277
|
+
}
|
|
3375
5278
|
async function runContextPipeline(input) {
|
|
3376
5279
|
const { projectPath, task, analysis, budget = 5e4 } = input;
|
|
3377
5280
|
const taskType = classifyTask(task);
|
|
5281
|
+
const queryIntent = parseQueryIntent(task);
|
|
5282
|
+
const weightedQuery = buildWeightedQuery(queryIntent);
|
|
3378
5283
|
const fileContentMap = /* @__PURE__ */ new Map();
|
|
3379
5284
|
const fileContents = [];
|
|
3380
5285
|
for (const file of analysis.files) {
|
|
@@ -3386,22 +5291,58 @@ async function runContextPipeline(input) {
|
|
|
3386
5291
|
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
3387
5292
|
}
|
|
3388
5293
|
}
|
|
3389
|
-
const indexFiles = analysis.files.map((f) =>
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
5294
|
+
const indexFiles = analysis.files.map((f) => {
|
|
5295
|
+
const raw = fileContentMap.get(f.relativePath);
|
|
5296
|
+
const augmented = raw ? augmentContentWithStructure(raw, f.relativePath) : void 0;
|
|
5297
|
+
return {
|
|
5298
|
+
relativePath: f.relativePath,
|
|
5299
|
+
absolutePath: f.path,
|
|
5300
|
+
content: augmented
|
|
5301
|
+
};
|
|
5302
|
+
});
|
|
3394
5303
|
const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
|
|
3395
|
-
const
|
|
3396
|
-
const
|
|
3397
|
-
|
|
3398
|
-
analysis.files.map((f) => f.relativePath),
|
|
3399
|
-
task
|
|
3400
|
-
);
|
|
5304
|
+
const fileCount = analysis.files.length;
|
|
5305
|
+
const adaptiveTopK = Math.min(Math.max(20, Math.round(fileCount * 0.15)), 100);
|
|
5306
|
+
const allFilePaths = analysis.files.map((f) => f.relativePath);
|
|
3401
5307
|
const depMap = /* @__PURE__ */ new Map();
|
|
3402
5308
|
for (const file of analysis.files) {
|
|
3403
5309
|
depMap.set(file.relativePath, file.imports);
|
|
3404
5310
|
}
|
|
5311
|
+
const callGraph = buildCallGraph(
|
|
5312
|
+
fileContents.filter((f) => f.content.length > 0)
|
|
5313
|
+
);
|
|
5314
|
+
const callEdges = [...analysis.graph.edges.filter((e) => e.type === "call"), ...callGraph.edges];
|
|
5315
|
+
const isComplexQuery = detectComplexQuery(task);
|
|
5316
|
+
const embeddingIndex = buildTfIdfEmbeddingIndex(index);
|
|
5317
|
+
const embeddingResults = embeddingIndex.query(weightedQuery, adaptiveTopK);
|
|
5318
|
+
let bm25Matches;
|
|
5319
|
+
if (isComplexQuery) {
|
|
5320
|
+
const hopResult = multiHopQuery(index, weightedQuery, depMap, callEdges, fileContentMap, {
|
|
5321
|
+
maxHops: 2,
|
|
5322
|
+
topKPerHop: 5,
|
|
5323
|
+
decayFactor: 0.5,
|
|
5324
|
+
minScoreThreshold: 0.15
|
|
5325
|
+
});
|
|
5326
|
+
bm25Matches = hopResult.matches.slice(0, adaptiveTopK);
|
|
5327
|
+
} else {
|
|
5328
|
+
bm25Matches = query(index, weightedQuery, adaptiveTopK);
|
|
5329
|
+
}
|
|
5330
|
+
const fusedResults = reciprocalRankFusion(bm25Matches, embeddingResults, 60, 0.6, 0.4);
|
|
5331
|
+
const rawMatches = fusedResults.slice(0, adaptiveTopK).map((r) => {
|
|
5332
|
+
const bm25Match = bm25Matches.find((m) => m.filePath === r.filePath);
|
|
5333
|
+
return {
|
|
5334
|
+
filePath: r.filePath,
|
|
5335
|
+
score: r.score,
|
|
5336
|
+
matchedTerms: bm25Match?.matchedTerms ?? ["[embedding-only]"]
|
|
5337
|
+
};
|
|
5338
|
+
});
|
|
5339
|
+
const semanticMatches = rawMatches.filter((m) => !isRankingNoise(m.filePath));
|
|
5340
|
+
const pathBoosted = boostByPath(semanticMatches, allFilePaths, task);
|
|
5341
|
+
const layerBoosted = boostByLayer(pathBoosted, allFilePaths, task);
|
|
5342
|
+
const importBoosted = boostByImports(layerBoosted, depMap, 10, 0.4);
|
|
5343
|
+
const callBoosted = boostByCallGraph(importBoosted, callEdges, 10, 0.3);
|
|
5344
|
+
const coChangeMatrix = buildCoChangeMatrix(projectPath, 500, 2);
|
|
5345
|
+
const boostedMatches = boostByGitCoChange(callBoosted, coChangeMatrix, 10, 0.25, 0.15);
|
|
3405
5346
|
const rerankResult = rerank({
|
|
3406
5347
|
task,
|
|
3407
5348
|
candidates: boostedMatches,
|
|
@@ -3410,12 +5351,15 @@ async function runContextPipeline(input) {
|
|
|
3410
5351
|
dependencies: depMap,
|
|
3411
5352
|
allFilePaths: analysis.files.map((f) => f.relativePath)
|
|
3412
5353
|
});
|
|
3413
|
-
const
|
|
3414
|
-
|
|
3415
|
-
|
|
3416
|
-
|
|
3417
|
-
matchedTerms:
|
|
5354
|
+
const rerankerApproved = new Set(rerankResult.files.map((rf) => rf.filePath));
|
|
5355
|
+
const rerankedMatches = boostedMatches.map((m) => ({
|
|
5356
|
+
filePath: m.filePath,
|
|
5357
|
+
score: rerankerApproved.has(m.filePath) ? m.score * 1.5 : m.score,
|
|
5358
|
+
matchedTerms: [...m.matchedTerms]
|
|
3418
5359
|
}));
|
|
5360
|
+
for (const m of rerankedMatches) {
|
|
5361
|
+
m.score *= fileTypePenalty(m.filePath, taskType);
|
|
5362
|
+
}
|
|
3419
5363
|
const learner = await loadLearner(projectPath);
|
|
3420
5364
|
const learnerBoosts = getLearnerBoosts(
|
|
3421
5365
|
learner,
|
|
@@ -3438,7 +5382,7 @@ async function runContextPipeline(input) {
|
|
|
3438
5382
|
const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
|
|
3439
5383
|
multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
|
|
3440
5384
|
}
|
|
3441
|
-
return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
|
|
5385
|
+
return { selection, taskType, fileContentMap, semanticMap, learnerMap, queryIntent, multiRepo, indexCacheStats };
|
|
3442
5386
|
}
|
|
3443
5387
|
|
|
3444
5388
|
// src/mcp/index.ts
|