cto-ai-cli 7.1.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -56
- package/dist/cli/index.js +2018 -34
- package/dist/engine/index.d.ts +826 -3
- package/dist/engine/index.js +3078 -133
- package/dist/mcp/index.js +1978 -34
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -40,7 +40,19 @@ var init_config = __esm({
|
|
|
40
40
|
},
|
|
41
41
|
ignore: {
|
|
42
42
|
dirs: ["node_modules", "dist", "build", ".git", "coverage", "__pycache__", ".next", "vendor", ".cto"],
|
|
43
|
-
patterns: [
|
|
43
|
+
patterns: [
|
|
44
|
+
"*.min.js",
|
|
45
|
+
"*.map",
|
|
46
|
+
"*.lock",
|
|
47
|
+
"*.generated.*",
|
|
48
|
+
"CHANGELOG*",
|
|
49
|
+
"LICENSE*",
|
|
50
|
+
"CONTRIBUTING*",
|
|
51
|
+
"CODE_OF_CONDUCT*",
|
|
52
|
+
"AUTHORS*",
|
|
53
|
+
"CODEOWNERS",
|
|
54
|
+
"SECURITY*"
|
|
55
|
+
]
|
|
44
56
|
},
|
|
45
57
|
maxDepth: 20
|
|
46
58
|
},
|
|
@@ -1256,11 +1268,15 @@ import { readFile as readFile2, readdir, stat as stat2 } from "fs/promises";
|
|
|
1256
1268
|
import { join as join3, extname, relative as relative3, resolve as resolve3, basename as basename2 } from "path";
|
|
1257
1269
|
import { createHash } from "crypto";
|
|
1258
1270
|
function matchesPattern(filename, patterns) {
|
|
1271
|
+
const lower = filename.toLowerCase();
|
|
1259
1272
|
for (const pattern of patterns) {
|
|
1260
1273
|
if (pattern.startsWith("*.")) {
|
|
1261
1274
|
const ext = pattern.slice(1);
|
|
1262
1275
|
if (filename.endsWith(ext)) return true;
|
|
1263
|
-
} else if (
|
|
1276
|
+
} else if (pattern.endsWith("*")) {
|
|
1277
|
+
const prefix = pattern.slice(0, -1).toLowerCase();
|
|
1278
|
+
if (lower.startsWith(prefix)) return true;
|
|
1279
|
+
} else if (lower === pattern.toLowerCase()) {
|
|
1264
1280
|
return true;
|
|
1265
1281
|
}
|
|
1266
1282
|
}
|
|
@@ -2840,17 +2856,19 @@ async function selectContext(input) {
|
|
|
2840
2856
|
const selectedFiles = [];
|
|
2841
2857
|
let usedTokens = 0;
|
|
2842
2858
|
const hasSemanticSignal = semanticMap.size > 0;
|
|
2859
|
+
const maxSemanticScore = hasSemanticSignal ? Math.max(...Array.from(semanticMap.values())) : 0;
|
|
2860
|
+
const semanticFloor = maxSemanticScore * 0.1;
|
|
2843
2861
|
for (const file of candidates) {
|
|
2844
2862
|
const isTarget = targetSet.has(file.relativePath);
|
|
2845
2863
|
const isMustInclude = mustInclude.has(file.relativePath);
|
|
2846
2864
|
if (hasSemanticSignal && !isTarget && !isMustInclude) {
|
|
2847
2865
|
const semScore = semanticMap.get(file.relativePath) ?? 0;
|
|
2848
2866
|
const lrnBoost = learnerMap.get(file.relativePath) ?? 0;
|
|
2849
|
-
if (semScore
|
|
2867
|
+
if (semScore < semanticFloor && lrnBoost <= 0) {
|
|
2850
2868
|
decisions.push({
|
|
2851
2869
|
file: file.relativePath,
|
|
2852
2870
|
action: "exclude",
|
|
2853
|
-
reason:
|
|
2871
|
+
reason: `Skipped: semantic score ${semScore.toFixed(3)} below floor ${semanticFloor.toFixed(3)}`
|
|
2854
2872
|
});
|
|
2855
2873
|
continue;
|
|
2856
2874
|
}
|
|
@@ -2988,6 +3006,116 @@ var init_selector = __esm({
|
|
|
2988
3006
|
}
|
|
2989
3007
|
});
|
|
2990
3008
|
|
|
3009
|
+
// src/engine/synonyms.ts
|
|
3010
|
+
function buildBidirectionalIndex() {
|
|
3011
|
+
for (const [canonical, synonyms] of Object.entries(SYNONYM_MAP)) {
|
|
3012
|
+
if (!BIDIRECTIONAL_INDEX.has(canonical)) {
|
|
3013
|
+
BIDIRECTIONAL_INDEX.set(canonical, /* @__PURE__ */ new Set());
|
|
3014
|
+
}
|
|
3015
|
+
const canonicalSet = BIDIRECTIONAL_INDEX.get(canonical);
|
|
3016
|
+
for (const syn of synonyms) {
|
|
3017
|
+
canonicalSet.add(syn);
|
|
3018
|
+
}
|
|
3019
|
+
canonicalSet.add(canonical);
|
|
3020
|
+
for (const syn of synonyms) {
|
|
3021
|
+
if (!BIDIRECTIONAL_INDEX.has(syn)) {
|
|
3022
|
+
BIDIRECTIONAL_INDEX.set(syn, /* @__PURE__ */ new Set());
|
|
3023
|
+
}
|
|
3024
|
+
const synSet = BIDIRECTIONAL_INDEX.get(syn);
|
|
3025
|
+
synSet.add(canonical);
|
|
3026
|
+
for (const otherSyn of synonyms) {
|
|
3027
|
+
if (otherSyn !== syn) synSet.add(otherSyn);
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
}
|
|
3032
|
+
function expandTerm(term) {
|
|
3033
|
+
const normalized = term.toLowerCase().trim();
|
|
3034
|
+
const related = BIDIRECTIONAL_INDEX.get(normalized);
|
|
3035
|
+
if (!related) return [normalized];
|
|
3036
|
+
return [normalized, ...Array.from(related)];
|
|
3037
|
+
}
|
|
3038
|
+
var SYNONYM_MAP, BIDIRECTIONAL_INDEX;
|
|
3039
|
+
var init_synonyms = __esm({
|
|
3040
|
+
"src/engine/synonyms.ts"() {
|
|
3041
|
+
"use strict";
|
|
3042
|
+
SYNONYM_MAP = {
|
|
3043
|
+
// Authentication & Authorization
|
|
3044
|
+
"auth": ["authentication", "authorize", "login", "signin", "session", "jwt", "token", "oauth", "sso", "identity", "credential"],
|
|
3045
|
+
"permission": ["authorization", "access", "role", "acl", "rbac", "policy", "grant"],
|
|
3046
|
+
// Database & Storage
|
|
3047
|
+
"database": ["db", "repository", "store", "storage", "persistence", "orm", "sql", "query", "prisma", "sequelize", "typeorm", "mongo", "postgres", "mysql"],
|
|
3048
|
+
"cache": ["redis", "memcached", "ttl", "invalidation", "memoize", "store"],
|
|
3049
|
+
"migration": ["schema", "upgrade", "version", "evolution"],
|
|
3050
|
+
// API & Networking
|
|
3051
|
+
"api": ["endpoint", "route", "handler", "controller", "rest", "graphql", "rpc", "service"],
|
|
3052
|
+
"request": ["req", "http", "call", "fetch", "axios"],
|
|
3053
|
+
"response": ["res", "reply", "result", "output"],
|
|
3054
|
+
"middleware": ["interceptor", "filter", "plugin", "hook"],
|
|
3055
|
+
"gateway": ["proxy", "router", "load-balancer", "reverse-proxy"],
|
|
3056
|
+
// Frontend & UI
|
|
3057
|
+
"component": ["widget", "element", "view", "template"],
|
|
3058
|
+
"state": ["store", "redux", "zustand", "context", "model"],
|
|
3059
|
+
"render": ["paint", "draw", "display", "show"],
|
|
3060
|
+
"style": ["css", "theme", "design", "layout", "tailwind"],
|
|
3061
|
+
// Testing & Quality
|
|
3062
|
+
"test": ["spec", "suite", "case", "assertion", "mock", "stub", "fixture", "vitest", "jest", "mocha"],
|
|
3063
|
+
"validate": ["verify", "check", "assert", "ensure", "sanitize"],
|
|
3064
|
+
"error": ["exception", "failure", "bug", "issue", "crash"],
|
|
3065
|
+
// Performance & Optimization
|
|
3066
|
+
"optimize": ["performance", "speed", "fast", "efficient", "improve", "enhance"],
|
|
3067
|
+
"latency": ["delay", "lag", "slowness", "response-time"],
|
|
3068
|
+
"throughput": ["capacity", "volume", "rate", "bandwidth"],
|
|
3069
|
+
// Data & Collections
|
|
3070
|
+
"dataset": ["data", "record", "row", "entry", "item", "collection"],
|
|
3071
|
+
"empty": ["null", "blank", "missing", "absent", "none", "zero"],
|
|
3072
|
+
// Data Processing
|
|
3073
|
+
"parse": ["decode", "deserialize", "extract", "read"],
|
|
3074
|
+
"serialize": ["encode", "stringify", "format", "marshal"],
|
|
3075
|
+
"transform": ["map", "convert", "translate", "process"],
|
|
3076
|
+
"filter": ["select", "where", "match", "find"],
|
|
3077
|
+
// Configuration & Setup
|
|
3078
|
+
"config": ["configuration", "setting", "option", "preference", "env", "environment"],
|
|
3079
|
+
"init": ["initialize", "setup", "bootstrap", "start", "create"],
|
|
3080
|
+
"deploy": ["deployment", "release", "publish", "ship", "launch"],
|
|
3081
|
+
// Logging & Monitoring
|
|
3082
|
+
"log": ["logger", "logging", "trace", "debug", "info", "warn", "error"],
|
|
3083
|
+
"metric": ["measurement", "stat", "telemetry", "analytics", "tracking"],
|
|
3084
|
+
"monitor": ["observe", "watch", "track", "alert"],
|
|
3085
|
+
// Security
|
|
3086
|
+
"secret": ["credential", "key", "password", "token", "apikey", "sensitive"],
|
|
3087
|
+
"encrypt": ["cipher", "encode", "hash", "crypto"],
|
|
3088
|
+
"sanitize": ["escape", "clean", "validate", "filter"],
|
|
3089
|
+
// File System & I/O
|
|
3090
|
+
"file": ["document", "asset", "resource", "path"],
|
|
3091
|
+
"read": ["load", "fetch", "get", "retrieve"],
|
|
3092
|
+
"write": ["save", "persist", "store", "put"],
|
|
3093
|
+
"delete": ["remove", "unlink", "destroy", "drop"],
|
|
3094
|
+
// Async & Concurrency
|
|
3095
|
+
"async": ["asynchronous", "promise", "await", "concurrent", "parallel"],
|
|
3096
|
+
"queue": ["buffer", "backlog", "pending", "deferred"],
|
|
3097
|
+
"lock": ["mutex", "semaphore", "synchronize", "atomic"],
|
|
3098
|
+
// Architecture & Patterns
|
|
3099
|
+
"service": ["microservice", "api", "backend", "server", "daemon"],
|
|
3100
|
+
"client": ["consumer", "frontend", "user", "caller"],
|
|
3101
|
+
"event": ["message", "signal", "notification", "trigger"],
|
|
3102
|
+
"stream": ["flow", "pipe", "channel", "observable"],
|
|
3103
|
+
// Business Logic
|
|
3104
|
+
"user": ["account", "profile", "member", "customer"],
|
|
3105
|
+
"order": ["purchase", "transaction", "checkout", "cart"],
|
|
3106
|
+
"payment": ["billing", "invoice", "charge", "stripe", "paypal"],
|
|
3107
|
+
"notification": ["alert", "message", "email", "push", "sms"],
|
|
3108
|
+
// DevOps & Infrastructure
|
|
3109
|
+
"docker": ["container", "image", "dockerfile", "compose"],
|
|
3110
|
+
"kubernetes": ["k8s", "cluster", "pod", "deployment", "helm"],
|
|
3111
|
+
"ci": ["continuous-integration", "pipeline", "build", "github-actions", "jenkins"],
|
|
3112
|
+
"cd": ["continuous-deployment", "release", "deploy", "rollout"]
|
|
3113
|
+
};
|
|
3114
|
+
BIDIRECTIONAL_INDEX = /* @__PURE__ */ new Map();
|
|
3115
|
+
buildBidirectionalIndex();
|
|
3116
|
+
}
|
|
3117
|
+
});
|
|
3118
|
+
|
|
2991
3119
|
// src/engine/tfidf.ts
|
|
2992
3120
|
function buildIndex(files) {
|
|
2993
3121
|
const documents = /* @__PURE__ */ new Map();
|
|
@@ -3011,15 +3139,29 @@ function buildIndex(files) {
|
|
|
3011
3139
|
let totalLength = 0;
|
|
3012
3140
|
for (const doc of documents.values()) totalLength += doc.length;
|
|
3013
3141
|
const avgDocLength = totalDocs > 0 ? totalLength / totalDocs : 1;
|
|
3014
|
-
return { documents, idf, avgDocLength, totalDocs };
|
|
3142
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
3015
3143
|
}
|
|
3016
|
-
function query(index, taskDescription, maxResults = 50) {
|
|
3144
|
+
function query(index, taskDescription, maxResults = 50, expandSynonyms = true) {
|
|
3017
3145
|
const queryTerms = tokenize(taskDescription);
|
|
3018
3146
|
if (queryTerms.length === 0) return [];
|
|
3019
3147
|
const querySet = /* @__PURE__ */ new Map();
|
|
3020
3148
|
for (const term of queryTerms) {
|
|
3021
3149
|
querySet.set(term, (querySet.get(term) ?? 0) + 1);
|
|
3022
3150
|
}
|
|
3151
|
+
if (expandSynonyms) {
|
|
3152
|
+
const expandedSet = /* @__PURE__ */ new Map();
|
|
3153
|
+
for (const [term, count] of querySet) {
|
|
3154
|
+
const synonyms = expandTerm(term);
|
|
3155
|
+
for (const syn of synonyms) {
|
|
3156
|
+
const weight = syn === term ? count : count * 0.7;
|
|
3157
|
+
expandedSet.set(syn, (expandedSet.get(syn) ?? 0) + weight);
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
querySet.clear();
|
|
3161
|
+
for (const [term, weight] of expandedSet) {
|
|
3162
|
+
querySet.set(term, weight);
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3023
3165
|
const results = [];
|
|
3024
3166
|
const k1 = 1.5;
|
|
3025
3167
|
const b = 0.75;
|
|
@@ -3031,8 +3173,11 @@ function query(index, taskDescription, maxResults = 50) {
|
|
|
3031
3173
|
if (tf === 0) continue;
|
|
3032
3174
|
const termIdf = index.idf.get(qTerm) ?? 0;
|
|
3033
3175
|
if (termIdf <= 0) continue;
|
|
3176
|
+
const df = index.docFreq.get(qTerm) ?? 0;
|
|
3177
|
+
const dfRatio = index.totalDocs > 0 ? df / index.totalDocs : 0;
|
|
3178
|
+
const domainDamp = dfRatio > 0.5 ? (1 - dfRatio) * (1 - dfRatio) : 1;
|
|
3034
3179
|
const tfNorm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * doc.length / index.avgDocLength));
|
|
3035
|
-
score += termIdf * tfNorm * qCount;
|
|
3180
|
+
score += termIdf * tfNorm * qCount * domainDamp;
|
|
3036
3181
|
matchedTerms.push(qTerm);
|
|
3037
3182
|
}
|
|
3038
3183
|
if (score > 0) {
|
|
@@ -3095,32 +3240,147 @@ function boostByPath(matches, allFiles, taskDescription) {
|
|
|
3095
3240
|
for (const m of matches) {
|
|
3096
3241
|
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
3097
3242
|
}
|
|
3243
|
+
const pathTermDocFreq = /* @__PURE__ */ new Map();
|
|
3244
|
+
const allPathTokenSets = /* @__PURE__ */ new Map();
|
|
3098
3245
|
for (const filePath of allFiles) {
|
|
3099
|
-
const
|
|
3100
|
-
|
|
3101
|
-
|
|
3246
|
+
const tokens = new Set(tokenize(filePath.replace(/[/\\.]/g, " ")));
|
|
3247
|
+
allPathTokenSets.set(filePath, tokens);
|
|
3248
|
+
for (const t of tokens) {
|
|
3249
|
+
pathTermDocFreq.set(t, (pathTermDocFreq.get(t) ?? 0) + 1);
|
|
3250
|
+
}
|
|
3251
|
+
}
|
|
3252
|
+
const N = allFiles.length;
|
|
3253
|
+
function pathIdf(term) {
|
|
3254
|
+
const df = pathTermDocFreq.get(term) ?? 0;
|
|
3255
|
+
if (df === 0) return 0;
|
|
3256
|
+
return Math.log((N + 1) / (df + 1));
|
|
3257
|
+
}
|
|
3258
|
+
for (const filePath of allFiles) {
|
|
3259
|
+
const parts = filePath.replace(/\\/g, "/").split("/");
|
|
3260
|
+
const fileName = parts.pop() ?? "";
|
|
3261
|
+
const dirSegments = parts;
|
|
3262
|
+
const dirTerms = tokenize(dirSegments.join(" ").replace(/[/\\.]/g, " "));
|
|
3263
|
+
const fileTerms = tokenize(fileName.replace(/[.\-_]/g, " "));
|
|
3264
|
+
const dirMatches = dirTerms.filter((t) => queryTerms.has(t));
|
|
3265
|
+
const fileMatches = fileTerms.filter((t) => queryTerms.has(t));
|
|
3266
|
+
const allPathMatches = [.../* @__PURE__ */ new Set([...dirMatches, ...fileMatches])];
|
|
3267
|
+
if (allPathMatches.length > 0) {
|
|
3268
|
+
const uniqueDirMatches = [...new Set(dirMatches)];
|
|
3269
|
+
const uniqueFileMatches = [...new Set(fileMatches)].filter((t) => !uniqueDirMatches.includes(t));
|
|
3270
|
+
const maxIdf = Math.log(N + 1);
|
|
3271
|
+
let pathBoost = 0;
|
|
3272
|
+
for (const t of uniqueDirMatches) {
|
|
3273
|
+
pathBoost += 0.4 * (pathIdf(t) / maxIdf);
|
|
3274
|
+
}
|
|
3275
|
+
for (const t of uniqueFileMatches) {
|
|
3276
|
+
pathBoost += 0.25 * (pathIdf(t) / maxIdf);
|
|
3277
|
+
}
|
|
3102
3278
|
const existing = boosted.get(filePath);
|
|
3103
|
-
const pathBoost = pathMatches.length * 0.3;
|
|
3104
3279
|
if (existing) {
|
|
3105
|
-
existing.score =
|
|
3106
|
-
for (const t of
|
|
3280
|
+
existing.score = existing.score + pathBoost;
|
|
3281
|
+
for (const t of allPathMatches) {
|
|
3107
3282
|
if (!existing.matchedTerms.includes(t)) existing.matchedTerms.push(t);
|
|
3108
3283
|
}
|
|
3109
3284
|
} else {
|
|
3110
3285
|
boosted.set(filePath, {
|
|
3111
3286
|
filePath,
|
|
3112
|
-
score:
|
|
3113
|
-
matchedTerms:
|
|
3287
|
+
score: pathBoost,
|
|
3288
|
+
matchedTerms: allPathMatches
|
|
3289
|
+
});
|
|
3290
|
+
}
|
|
3291
|
+
}
|
|
3292
|
+
}
|
|
3293
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
3294
|
+
}
|
|
3295
|
+
function boostByLayer(matches, allFiles, taskDescription) {
|
|
3296
|
+
const queryTerms = tokenize(taskDescription);
|
|
3297
|
+
const targetDirTerms = /* @__PURE__ */ new Set();
|
|
3298
|
+
for (const term of queryTerms) {
|
|
3299
|
+
const layers = LAYER_MAP[term];
|
|
3300
|
+
if (layers) {
|
|
3301
|
+
for (const l of layers) targetDirTerms.add(l);
|
|
3302
|
+
}
|
|
3303
|
+
}
|
|
3304
|
+
if (targetDirTerms.size === 0) return matches;
|
|
3305
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
3306
|
+
for (const m of matches) {
|
|
3307
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
3308
|
+
}
|
|
3309
|
+
for (const filePath of allFiles) {
|
|
3310
|
+
const dirTerms = tokenize(filePath.replace(/[/\\.]/g, " "));
|
|
3311
|
+
const layerHits = dirTerms.filter((t) => targetDirTerms.has(t));
|
|
3312
|
+
if (layerHits.length > 0) {
|
|
3313
|
+
const layerBoost = Math.min(0.5, layerHits.length * 0.2);
|
|
3314
|
+
const existing = boosted.get(filePath);
|
|
3315
|
+
if (existing) {
|
|
3316
|
+
existing.score = existing.score + layerBoost;
|
|
3317
|
+
} else {
|
|
3318
|
+
boosted.set(filePath, {
|
|
3319
|
+
filePath,
|
|
3320
|
+
score: layerBoost,
|
|
3321
|
+
matchedTerms: [`[layer:${layerHits[0]}]`]
|
|
3114
3322
|
});
|
|
3115
3323
|
}
|
|
3116
3324
|
}
|
|
3117
3325
|
}
|
|
3118
3326
|
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
3119
3327
|
}
|
|
3120
|
-
|
|
3328
|
+
function boostByImports(matches, dependencies, topK = 10, boostFactor = 0.4) {
|
|
3329
|
+
if (matches.length === 0 || dependencies.size === 0) return matches;
|
|
3330
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
3331
|
+
for (const m of matches) {
|
|
3332
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
3333
|
+
}
|
|
3334
|
+
const reverseDeps = /* @__PURE__ */ new Map();
|
|
3335
|
+
for (const [from, tos] of dependencies) {
|
|
3336
|
+
for (const to of tos) {
|
|
3337
|
+
const existing = reverseDeps.get(to) ?? [];
|
|
3338
|
+
existing.push(from);
|
|
3339
|
+
reverseDeps.set(to, existing);
|
|
3340
|
+
}
|
|
3341
|
+
}
|
|
3342
|
+
const topMatches = matches.slice(0, topK);
|
|
3343
|
+
const maxDepsPerParent = 5;
|
|
3344
|
+
for (const parent of topMatches) {
|
|
3345
|
+
const boost = parent.score * boostFactor;
|
|
3346
|
+
const imports = dependencies.get(parent.filePath) ?? [];
|
|
3347
|
+
const sortedImports = [...imports].sort((a, b) => {
|
|
3348
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
3349
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
3350
|
+
return sb - sa;
|
|
3351
|
+
});
|
|
3352
|
+
for (const dep of sortedImports.slice(0, maxDepsPerParent)) {
|
|
3353
|
+
applyImportBoost(boosted, dep, boost, parent.filePath, "imported-by");
|
|
3354
|
+
}
|
|
3355
|
+
const importers = reverseDeps.get(parent.filePath) ?? [];
|
|
3356
|
+
const sortedImporters = [...importers].sort((a, b) => {
|
|
3357
|
+
const sa = boosted.get(a)?.score ?? 0;
|
|
3358
|
+
const sb = boosted.get(b)?.score ?? 0;
|
|
3359
|
+
return sb - sa;
|
|
3360
|
+
});
|
|
3361
|
+
for (const imp of sortedImporters.slice(0, maxDepsPerParent)) {
|
|
3362
|
+
applyImportBoost(boosted, imp, boost * 0.7, parent.filePath, "imports");
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
3366
|
+
}
|
|
3367
|
+
function applyImportBoost(boosted, filePath, boost, parentPath, relation) {
|
|
3368
|
+
const existing = boosted.get(filePath);
|
|
3369
|
+
if (existing) {
|
|
3370
|
+
existing.score = existing.score + boost;
|
|
3371
|
+
} else {
|
|
3372
|
+
boosted.set(filePath, {
|
|
3373
|
+
filePath,
|
|
3374
|
+
score: boost,
|
|
3375
|
+
matchedTerms: [`[${relation}:${parentPath.split("/").pop()}]`]
|
|
3376
|
+
});
|
|
3377
|
+
}
|
|
3378
|
+
}
|
|
3379
|
+
var STOP_WORDS, TERM_FAMILIES, LAYER_MAP;
|
|
3121
3380
|
var init_tfidf = __esm({
|
|
3122
3381
|
"src/engine/tfidf.ts"() {
|
|
3123
3382
|
"use strict";
|
|
3383
|
+
init_synonyms();
|
|
3124
3384
|
STOP_WORDS = /* @__PURE__ */ new Set([
|
|
3125
3385
|
// Language keywords
|
|
3126
3386
|
"import",
|
|
@@ -3264,6 +3524,241 @@ var init_tfidf = __esm({
|
|
|
3264
3524
|
["encryp", "encrypt"],
|
|
3265
3525
|
["decryp", "encrypt"]
|
|
3266
3526
|
];
|
|
3527
|
+
LAYER_MAP = {
|
|
3528
|
+
// Query terms → directory segments that should be boosted
|
|
3529
|
+
"endpoint": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
3530
|
+
"api": ["endpoint", "controller", "handler", "route", "router", "api", "rest"],
|
|
3531
|
+
"controller": ["endpoint", "controller", "handler", "route", "router"],
|
|
3532
|
+
"repositori": ["repositori", "dao", "store", "persist"],
|
|
3533
|
+
"databas": ["repositori", "dao", "store", "persist", "migrat"],
|
|
3534
|
+
"storag": ["repositori", "dao", "store", "persist"],
|
|
3535
|
+
"cach": ["cach", "redis", "memcach", "store"],
|
|
3536
|
+
"servic": ["servic", "usecas", "core"],
|
|
3537
|
+
"usecas": ["usecas", "servic", "core"],
|
|
3538
|
+
"config": ["config", "inject", "setup", "bootstrap"],
|
|
3539
|
+
"inject": ["config", "inject", "setup"],
|
|
3540
|
+
"depend": ["config", "inject", "setup"],
|
|
3541
|
+
"event": ["event", "listen", "handler", "subscrib"],
|
|
3542
|
+
"error": ["error", "except", "handler", "fault"],
|
|
3543
|
+
"except": ["except", "error", "handler", "fault"],
|
|
3544
|
+
"model": ["model", "entiti", "dto", "domain", "schema"],
|
|
3545
|
+
"entiti": ["entiti", "model", "dto", "domain"],
|
|
3546
|
+
"metric": ["metric", "monitor", "observ", "telemetri"],
|
|
3547
|
+
"test": ["test", "spec", "mock", "fixtur"],
|
|
3548
|
+
"migrat": ["migrat", "schema", "databas"]
|
|
3549
|
+
};
|
|
3550
|
+
}
|
|
3551
|
+
});
|
|
3552
|
+
|
|
3553
|
+
// src/engine/ast-tokenizer.ts
|
|
3554
|
+
function extractStructuralTokens(content, filePath) {
|
|
3555
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
3556
|
+
const language = detectLanguage2(ext);
|
|
3557
|
+
switch (language) {
|
|
3558
|
+
case "java":
|
|
3559
|
+
return extractJava(content);
|
|
3560
|
+
case "python":
|
|
3561
|
+
return extractPython(content);
|
|
3562
|
+
case "go":
|
|
3563
|
+
return extractGo(content);
|
|
3564
|
+
case "typescript":
|
|
3565
|
+
return extractTypeScript(content);
|
|
3566
|
+
default:
|
|
3567
|
+
return { classNames: [], methodNames: [], annotations: [], parents: [], packageName: null, language: "unknown" };
|
|
3568
|
+
}
|
|
3569
|
+
}
|
|
3570
|
+
function detectLanguage2(ext) {
|
|
3571
|
+
switch (ext) {
|
|
3572
|
+
case "java":
|
|
3573
|
+
return "java";
|
|
3574
|
+
case "py":
|
|
3575
|
+
return "python";
|
|
3576
|
+
case "go":
|
|
3577
|
+
return "go";
|
|
3578
|
+
case "ts":
|
|
3579
|
+
case "tsx":
|
|
3580
|
+
case "js":
|
|
3581
|
+
case "jsx":
|
|
3582
|
+
return "typescript";
|
|
3583
|
+
default:
|
|
3584
|
+
return "unknown";
|
|
3585
|
+
}
|
|
3586
|
+
}
|
|
3587
|
+
function extractJava(content) {
|
|
3588
|
+
const classNames = [];
|
|
3589
|
+
const methodNames = [];
|
|
3590
|
+
const annotations = [];
|
|
3591
|
+
const parents = [];
|
|
3592
|
+
let packageName = null;
|
|
3593
|
+
const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
|
|
3594
|
+
if (pkgMatch) packageName = pkgMatch[1];
|
|
3595
|
+
const annRegex = /@(\w+)/g;
|
|
3596
|
+
let annMatch;
|
|
3597
|
+
while ((annMatch = annRegex.exec(content)) !== null) {
|
|
3598
|
+
const ann = annMatch[1].toLowerCase();
|
|
3599
|
+
if (ann !== "override" && ann.length > 2) {
|
|
3600
|
+
annotations.push(ann);
|
|
3601
|
+
}
|
|
3602
|
+
}
|
|
3603
|
+
const classRegex = /(?:public|private|protected|abstract|final|static)?\s*(?:class|interface|enum)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3604
|
+
let classMatch;
|
|
3605
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3606
|
+
classNames.push(classMatch[1]);
|
|
3607
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3608
|
+
if (classMatch[3]) {
|
|
3609
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3610
|
+
const trimmed = impl.trim();
|
|
3611
|
+
if (trimmed) parents.push(trimmed);
|
|
3612
|
+
}
|
|
3613
|
+
}
|
|
3614
|
+
}
|
|
3615
|
+
const methodRegex = /(?:public|private|protected|static|abstract|final|synchronized|default)\s+(?:<[\w\s,?]+>\s+)?(?:[\w<>\[\]?,\s]+)\s+(\w+)\s*\(/g;
|
|
3616
|
+
let methodMatch;
|
|
3617
|
+
while ((methodMatch = methodRegex.exec(content)) !== null) {
|
|
3618
|
+
const name = methodMatch[1];
|
|
3619
|
+
if (!["equals", "hashCode", "toString", "main", "get", "set"].includes(name)) {
|
|
3620
|
+
methodNames.push(name);
|
|
3621
|
+
}
|
|
3622
|
+
}
|
|
3623
|
+
return { classNames, methodNames, annotations, parents, packageName, language: "java" };
|
|
3624
|
+
}
|
|
3625
|
+
function extractPython(content) {
|
|
3626
|
+
const classNames = [];
|
|
3627
|
+
const methodNames = [];
|
|
3628
|
+
const annotations = [];
|
|
3629
|
+
const parents = [];
|
|
3630
|
+
const classRegex = /^\s*class\s+(\w+)(?:\(([^)]+)\))?/gm;
|
|
3631
|
+
let classMatch;
|
|
3632
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3633
|
+
classNames.push(classMatch[1]);
|
|
3634
|
+
if (classMatch[2]) {
|
|
3635
|
+
for (const parent of classMatch[2].split(",")) {
|
|
3636
|
+
const trimmed = parent.trim().split("[")[0];
|
|
3637
|
+
if (trimmed && trimmed !== "object") parents.push(trimmed);
|
|
3638
|
+
}
|
|
3639
|
+
}
|
|
3640
|
+
}
|
|
3641
|
+
const decRegex = /^\s*@(\w+)/gm;
|
|
3642
|
+
let decMatch;
|
|
3643
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3644
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3645
|
+
}
|
|
3646
|
+
const funcRegex = /^\s*(?:async\s+)?def\s+(\w+)/gm;
|
|
3647
|
+
let funcMatch;
|
|
3648
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3649
|
+
const name = funcMatch[1];
|
|
3650
|
+
if (!name.startsWith("__") || name === "__init__") {
|
|
3651
|
+
methodNames.push(name.replace(/^_+|_+$/g, ""));
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "python" };
|
|
3655
|
+
}
|
|
3656
|
+
function extractGo(content) {
|
|
3657
|
+
const classNames = [];
|
|
3658
|
+
const methodNames = [];
|
|
3659
|
+
const parents = [];
|
|
3660
|
+
const pkgMatch = content.match(/^package\s+(\w+)/m);
|
|
3661
|
+
const packageName = pkgMatch ? pkgMatch[1] : null;
|
|
3662
|
+
const typeRegex = /type\s+(\w+)\s+(?:struct|interface)/g;
|
|
3663
|
+
let typeMatch;
|
|
3664
|
+
while ((typeMatch = typeRegex.exec(content)) !== null) {
|
|
3665
|
+
classNames.push(typeMatch[1]);
|
|
3666
|
+
}
|
|
3667
|
+
const funcRegex = /func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)\s*\(/g;
|
|
3668
|
+
let funcMatch;
|
|
3669
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3670
|
+
methodNames.push(funcMatch[2]);
|
|
3671
|
+
if (funcMatch[1]) {
|
|
3672
|
+
parents.push(funcMatch[1]);
|
|
3673
|
+
}
|
|
3674
|
+
}
|
|
3675
|
+
return { classNames, methodNames, annotations: [], parents, packageName, language: "go" };
|
|
3676
|
+
}
|
|
3677
|
+
function extractTypeScript(content) {
|
|
3678
|
+
const classNames = [];
|
|
3679
|
+
const methodNames = [];
|
|
3680
|
+
const annotations = [];
|
|
3681
|
+
const parents = [];
|
|
3682
|
+
const classRegex = /(?:export\s+)?(?:abstract\s+)?(?:class|interface)\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w\s,]+))?/g;
|
|
3683
|
+
let classMatch;
|
|
3684
|
+
while ((classMatch = classRegex.exec(content)) !== null) {
|
|
3685
|
+
classNames.push(classMatch[1]);
|
|
3686
|
+
if (classMatch[2]) parents.push(classMatch[2]);
|
|
3687
|
+
if (classMatch[3]) {
|
|
3688
|
+
for (const impl of classMatch[3].split(",")) {
|
|
3689
|
+
const trimmed = impl.trim();
|
|
3690
|
+
if (trimmed) parents.push(trimmed);
|
|
3691
|
+
}
|
|
3692
|
+
}
|
|
3693
|
+
}
|
|
3694
|
+
const decRegex = /@(\w+)/g;
|
|
3695
|
+
let decMatch;
|
|
3696
|
+
while ((decMatch = decRegex.exec(content)) !== null) {
|
|
3697
|
+
annotations.push(decMatch[1].toLowerCase());
|
|
3698
|
+
}
|
|
3699
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)/g;
|
|
3700
|
+
let funcMatch;
|
|
3701
|
+
while ((funcMatch = funcRegex.exec(content)) !== null) {
|
|
3702
|
+
methodNames.push(funcMatch[1]);
|
|
3703
|
+
}
|
|
3704
|
+
return { classNames, methodNames, annotations, parents, packageName: null, language: "typescript" };
|
|
3705
|
+
}
|
|
3706
|
+
function augmentContentWithStructure(content, filePath) {
|
|
3707
|
+
const struct = extractStructuralTokens(content, filePath);
|
|
3708
|
+
const augmentParts = [];
|
|
3709
|
+
for (const name of struct.classNames) {
|
|
3710
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3711
|
+
augmentParts.push(words, words, words);
|
|
3712
|
+
}
|
|
3713
|
+
for (const name of struct.methodNames) {
|
|
3714
|
+
const words = name.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3715
|
+
augmentParts.push(words, words);
|
|
3716
|
+
}
|
|
3717
|
+
for (const parent of struct.parents) {
|
|
3718
|
+
const words = parent.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
|
|
3719
|
+
augmentParts.push(words);
|
|
3720
|
+
}
|
|
3721
|
+
for (const ann of struct.annotations) {
|
|
3722
|
+
const layerTerms = ANNOTATION_LAYER_MAP[ann];
|
|
3723
|
+
if (layerTerms) {
|
|
3724
|
+
augmentParts.push(...layerTerms);
|
|
3725
|
+
}
|
|
3726
|
+
}
|
|
3727
|
+
if (augmentParts.length === 0) return content;
|
|
3728
|
+
return augmentParts.join(" ") + "\n" + content;
|
|
3729
|
+
}
|
|
3730
|
+
var ANNOTATION_LAYER_MAP;
|
|
3731
|
+
var init_ast_tokenizer = __esm({
|
|
3732
|
+
"src/engine/ast-tokenizer.ts"() {
|
|
3733
|
+
"use strict";
|
|
3734
|
+
init_tfidf();
|
|
3735
|
+
ANNOTATION_LAYER_MAP = {
|
|
3736
|
+
"repository": ["repositori", "dao", "store", "persist", "databas"],
|
|
3737
|
+
"service": ["servic", "usecas", "busi", "logic"],
|
|
3738
|
+
"controller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3739
|
+
"restcontroller": ["control", "endpoint", "api", "rest", "handler"],
|
|
3740
|
+
"component": ["compon", "bean", "inject"],
|
|
3741
|
+
"entity": ["entiti", "model", "domain", "persist"],
|
|
3742
|
+
"configuration": ["config", "setup", "inject", "wire"],
|
|
3743
|
+
"bean": ["config", "inject", "wire", "bean"],
|
|
3744
|
+
"autowired": ["inject", "wire", "depend"],
|
|
3745
|
+
"inject": ["inject", "wire", "depend"],
|
|
3746
|
+
"provides": ["inject", "wire", "depend", "config"],
|
|
3747
|
+
"singleton": ["singleton", "scope", "lifecycl"],
|
|
3748
|
+
"test": ["test", "spec", "assert", "mock"],
|
|
3749
|
+
"override": ["overrid", "inherit", "polymorph"],
|
|
3750
|
+
"transactional": ["transact", "databas", "commit", "rollback"],
|
|
3751
|
+
"cacheable": ["cach", "ttl", "evict", "invalidat"],
|
|
3752
|
+
"async": ["async", "concurr", "thread", "parallel"],
|
|
3753
|
+
"eventlistener": ["event", "listen", "handler", "subscrib"],
|
|
3754
|
+
"scheduled": ["schedul", "cron", "timer", "job"],
|
|
3755
|
+
"slf4j": ["log", "metric", "observ"],
|
|
3756
|
+
"data": ["model", "entiti", "dto", "data"],
|
|
3757
|
+
"getter": ["model", "entiti", "dto", "accessor"],
|
|
3758
|
+
"setter": ["model", "entiti", "dto", "mutator"],
|
|
3759
|
+
"builder": ["build", "pattern", "fluent"],
|
|
3760
|
+
"value": ["model", "entiti", "dto", "immut"]
|
|
3761
|
+
};
|
|
3267
3762
|
}
|
|
3268
3763
|
});
|
|
3269
3764
|
|
|
@@ -3382,7 +3877,7 @@ function rebuildIndex(cachedFiles) {
|
|
|
3382
3877
|
for (const [term, df] of docFreq) {
|
|
3383
3878
|
idf.set(term, Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1));
|
|
3384
3879
|
}
|
|
3385
|
-
return { documents, idf, avgDocLength, totalDocs };
|
|
3880
|
+
return { documents, idf, docFreq, avgDocLength, totalDocs };
|
|
3386
3881
|
}
|
|
3387
3882
|
var CACHE_VERSION, CACHE_DIR, CACHE_FILE;
|
|
3388
3883
|
var init_index_cache = __esm({
|
|
@@ -3905,6 +4400,1372 @@ var init_router = __esm({
|
|
|
3905
4400
|
}
|
|
3906
4401
|
});
|
|
3907
4402
|
|
|
4403
|
+
// src/engine/call-graph.ts
|
|
4404
|
+
function getLanguage(filePath) {
|
|
4405
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
4406
|
+
if (ext === "java") return "java";
|
|
4407
|
+
if (["ts", "tsx", "js", "jsx", "mts", "mjs"].includes(ext)) return "ts";
|
|
4408
|
+
if (ext === "py") return "python";
|
|
4409
|
+
if (ext === "go") return "go";
|
|
4410
|
+
return null;
|
|
4411
|
+
}
|
|
4412
|
+
function extractJavaDefinitions(content, filePath) {
|
|
4413
|
+
const defs = [];
|
|
4414
|
+
const classMatch = content.match(/(?:public|abstract)\s+(?:class|interface)\s+(\w+)/);
|
|
4415
|
+
const className = classMatch?.[1];
|
|
4416
|
+
const methodRegex = /(?:public|protected|private|static|\s)+\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
4417
|
+
let match;
|
|
4418
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
4419
|
+
const name = match[1];
|
|
4420
|
+
if (name === className || name === "if" || name === "for" || name === "while" || name === "switch" || name === "catch" || name === "return") continue;
|
|
4421
|
+
const linePrefix = content.substring(Math.max(0, match.index - 200), match.index);
|
|
4422
|
+
const isPublic = /public\s/.test(match[0]);
|
|
4423
|
+
defs.push({
|
|
4424
|
+
name,
|
|
4425
|
+
className,
|
|
4426
|
+
filePath,
|
|
4427
|
+
isExported: isPublic
|
|
4428
|
+
});
|
|
4429
|
+
}
|
|
4430
|
+
return defs;
|
|
4431
|
+
}
|
|
4432
|
+
function extractTsDefinitions(content, filePath) {
|
|
4433
|
+
const defs = [];
|
|
4434
|
+
const funcRegex = /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/gm;
|
|
4435
|
+
let match;
|
|
4436
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
4437
|
+
const isExported = match[0].startsWith("export");
|
|
4438
|
+
defs.push({ name: match[1], filePath, isExported });
|
|
4439
|
+
}
|
|
4440
|
+
const classRegex = /(?:export\s+)?class\s+(\w+)/gm;
|
|
4441
|
+
while ((match = classRegex.exec(content)) !== null) {
|
|
4442
|
+
const className = match[1];
|
|
4443
|
+
const isExportedClass = match[0].startsWith("export");
|
|
4444
|
+
const classStart = match.index + match[0].length;
|
|
4445
|
+
const methodInClassRegex = /(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\w[\w<>\[\]|,\s]*\s*)?{/gm;
|
|
4446
|
+
methodInClassRegex.lastIndex = classStart;
|
|
4447
|
+
let methodMatch;
|
|
4448
|
+
while ((methodMatch = methodInClassRegex.exec(content)) !== null) {
|
|
4449
|
+
const name = methodMatch[1];
|
|
4450
|
+
if (name === "constructor" || name === "if" || name === "for" || name === "while" || name === "catch" || name === "return" || name === "function" || name === "class") continue;
|
|
4451
|
+
defs.push({ name, className, filePath, isExported: isExportedClass });
|
|
4452
|
+
if (methodMatch.index - classStart > 1e4) break;
|
|
4453
|
+
}
|
|
4454
|
+
}
|
|
4455
|
+
const arrowRegex = /export\s+const\s+(\w+)\s*=\s*(?:async\s+)?\(/gm;
|
|
4456
|
+
while ((match = arrowRegex.exec(content)) !== null) {
|
|
4457
|
+
defs.push({ name: match[1], filePath, isExported: true });
|
|
4458
|
+
}
|
|
4459
|
+
return defs;
|
|
4460
|
+
}
|
|
4461
|
+
function extractPythonDefinitions(content, filePath) {
|
|
4462
|
+
const defs = [];
|
|
4463
|
+
const classRegex = /^class\s+(\w+)/gm;
|
|
4464
|
+
let currentClass;
|
|
4465
|
+
let match;
|
|
4466
|
+
const funcRegex = /^(\s*)def\s+(\w+)\s*\(/gm;
|
|
4467
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
4468
|
+
const indent = match[1];
|
|
4469
|
+
const name = match[2];
|
|
4470
|
+
if (name.startsWith("_") && name !== "__init__") continue;
|
|
4471
|
+
const before = content.substring(0, match.index);
|
|
4472
|
+
const lastClass = before.match(/^class\s+(\w+)/gm);
|
|
4473
|
+
const isMethod = indent.length > 0 && lastClass;
|
|
4474
|
+
const className = isMethod ? lastClass[lastClass.length - 1].replace(/^class\s+/, "") : void 0;
|
|
4475
|
+
defs.push({
|
|
4476
|
+
name: name === "__init__" ? className ?? name : name,
|
|
4477
|
+
className,
|
|
4478
|
+
filePath,
|
|
4479
|
+
isExported: !name.startsWith("_")
|
|
4480
|
+
});
|
|
4481
|
+
}
|
|
4482
|
+
return defs;
|
|
4483
|
+
}
|
|
4484
|
+
function extractGoDefinitions(content, filePath) {
|
|
4485
|
+
const defs = [];
|
|
4486
|
+
const funcRegex = /^func\s+(\w+)\s*\(/gm;
|
|
4487
|
+
let match;
|
|
4488
|
+
while ((match = funcRegex.exec(content)) !== null) {
|
|
4489
|
+
const name = match[1];
|
|
4490
|
+
defs.push({
|
|
4491
|
+
name,
|
|
4492
|
+
filePath,
|
|
4493
|
+
isExported: name[0] === name[0].toUpperCase()
|
|
4494
|
+
});
|
|
4495
|
+
}
|
|
4496
|
+
const methodRegex = /^func\s+\(\s*\w+\s+\*?(\w+)\s*\)\s+(\w+)\s*\(/gm;
|
|
4497
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
4498
|
+
defs.push({
|
|
4499
|
+
name: match[2],
|
|
4500
|
+
className: match[1],
|
|
4501
|
+
filePath,
|
|
4502
|
+
isExported: match[2][0] === match[2][0].toUpperCase()
|
|
4503
|
+
});
|
|
4504
|
+
}
|
|
4505
|
+
return defs;
|
|
4506
|
+
}
|
|
4507
|
+
function extractJavaCalls(content, filePath) {
|
|
4508
|
+
const calls = [];
|
|
4509
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4510
|
+
let match;
|
|
4511
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4512
|
+
const receiver = match[1];
|
|
4513
|
+
const method = match[2];
|
|
4514
|
+
if ([
|
|
4515
|
+
"System",
|
|
4516
|
+
"LOG",
|
|
4517
|
+
"LOGGER",
|
|
4518
|
+
"logger",
|
|
4519
|
+
"log",
|
|
4520
|
+
"this",
|
|
4521
|
+
"super",
|
|
4522
|
+
"String",
|
|
4523
|
+
"Integer",
|
|
4524
|
+
"Long",
|
|
4525
|
+
"Boolean",
|
|
4526
|
+
"Double",
|
|
4527
|
+
"Float",
|
|
4528
|
+
"Math",
|
|
4529
|
+
"Arrays",
|
|
4530
|
+
"Collections",
|
|
4531
|
+
"Objects",
|
|
4532
|
+
"Optional",
|
|
4533
|
+
"List",
|
|
4534
|
+
"Map",
|
|
4535
|
+
"Set",
|
|
4536
|
+
"Stream"
|
|
4537
|
+
].includes(receiver)) continue;
|
|
4538
|
+
if ([
|
|
4539
|
+
"toString",
|
|
4540
|
+
"hashCode",
|
|
4541
|
+
"equals",
|
|
4542
|
+
"getClass",
|
|
4543
|
+
"wait",
|
|
4544
|
+
"notify",
|
|
4545
|
+
"length",
|
|
4546
|
+
"size",
|
|
4547
|
+
"isEmpty",
|
|
4548
|
+
"get",
|
|
4549
|
+
"set",
|
|
4550
|
+
"add",
|
|
4551
|
+
"remove",
|
|
4552
|
+
"contains",
|
|
4553
|
+
"put",
|
|
4554
|
+
"stream",
|
|
4555
|
+
"map",
|
|
4556
|
+
"filter",
|
|
4557
|
+
"collect",
|
|
4558
|
+
"orElse",
|
|
4559
|
+
"orElseGet",
|
|
4560
|
+
"orElseThrow",
|
|
4561
|
+
"isPresent",
|
|
4562
|
+
"ifPresent",
|
|
4563
|
+
"of",
|
|
4564
|
+
"valueOf",
|
|
4565
|
+
"format",
|
|
4566
|
+
"println",
|
|
4567
|
+
"append",
|
|
4568
|
+
"build",
|
|
4569
|
+
"builder",
|
|
4570
|
+
"thenReturn",
|
|
4571
|
+
"when",
|
|
4572
|
+
"verify",
|
|
4573
|
+
"mock",
|
|
4574
|
+
"given"
|
|
4575
|
+
].includes(method)) continue;
|
|
4576
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4577
|
+
}
|
|
4578
|
+
const staticRegex = /(?<!\w)([A-Z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4579
|
+
while ((match = staticRegex.exec(content)) !== null) {
|
|
4580
|
+
const receiver = match[1];
|
|
4581
|
+
const method = match[2];
|
|
4582
|
+
if ([
|
|
4583
|
+
"System",
|
|
4584
|
+
"Math",
|
|
4585
|
+
"Arrays",
|
|
4586
|
+
"Collections",
|
|
4587
|
+
"Objects",
|
|
4588
|
+
"Optional",
|
|
4589
|
+
"String",
|
|
4590
|
+
"Integer",
|
|
4591
|
+
"Long",
|
|
4592
|
+
"Boolean",
|
|
4593
|
+
"Double",
|
|
4594
|
+
"Float",
|
|
4595
|
+
"LoggerFactory",
|
|
4596
|
+
"Logger",
|
|
4597
|
+
"Assert",
|
|
4598
|
+
"Mockito",
|
|
4599
|
+
"Assertions",
|
|
4600
|
+
"ResponseEntity",
|
|
4601
|
+
"HttpStatus"
|
|
4602
|
+
].includes(receiver)) continue;
|
|
4603
|
+
if ([
|
|
4604
|
+
"of",
|
|
4605
|
+
"valueOf",
|
|
4606
|
+
"format",
|
|
4607
|
+
"parse",
|
|
4608
|
+
"toString",
|
|
4609
|
+
"getLogger",
|
|
4610
|
+
"builder",
|
|
4611
|
+
"newBuilder",
|
|
4612
|
+
"create",
|
|
4613
|
+
"getInstance"
|
|
4614
|
+
].includes(method)) continue;
|
|
4615
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4616
|
+
}
|
|
4617
|
+
return calls;
|
|
4618
|
+
}
|
|
4619
|
+
function extractTsCalls(content, filePath) {
|
|
4620
|
+
const calls = [];
|
|
4621
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([a-z]\w+)\s*\(/gm;
|
|
4622
|
+
let match;
|
|
4623
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4624
|
+
const receiver = match[1];
|
|
4625
|
+
const method = match[2];
|
|
4626
|
+
if ([
|
|
4627
|
+
"console",
|
|
4628
|
+
"process",
|
|
4629
|
+
"Math",
|
|
4630
|
+
"JSON",
|
|
4631
|
+
"Promise",
|
|
4632
|
+
"Object",
|
|
4633
|
+
"Array",
|
|
4634
|
+
"String",
|
|
4635
|
+
"Number",
|
|
4636
|
+
"Date",
|
|
4637
|
+
"Error",
|
|
4638
|
+
"RegExp",
|
|
4639
|
+
"Buffer",
|
|
4640
|
+
"this",
|
|
4641
|
+
"super",
|
|
4642
|
+
"window",
|
|
4643
|
+
"document",
|
|
4644
|
+
"expect",
|
|
4645
|
+
"describe",
|
|
4646
|
+
"it",
|
|
4647
|
+
"test",
|
|
4648
|
+
"vi",
|
|
4649
|
+
"jest"
|
|
4650
|
+
].includes(receiver)) continue;
|
|
4651
|
+
if ([
|
|
4652
|
+
"toString",
|
|
4653
|
+
"valueOf",
|
|
4654
|
+
"hasOwnProperty",
|
|
4655
|
+
"length",
|
|
4656
|
+
"push",
|
|
4657
|
+
"pop",
|
|
4658
|
+
"shift",
|
|
4659
|
+
"unshift",
|
|
4660
|
+
"slice",
|
|
4661
|
+
"splice",
|
|
4662
|
+
"map",
|
|
4663
|
+
"filter",
|
|
4664
|
+
"reduce",
|
|
4665
|
+
"forEach",
|
|
4666
|
+
"find",
|
|
4667
|
+
"findIndex",
|
|
4668
|
+
"some",
|
|
4669
|
+
"every",
|
|
4670
|
+
"includes",
|
|
4671
|
+
"indexOf",
|
|
4672
|
+
"join",
|
|
4673
|
+
"split",
|
|
4674
|
+
"replace",
|
|
4675
|
+
"match",
|
|
4676
|
+
"trim",
|
|
4677
|
+
"toLowerCase",
|
|
4678
|
+
"toUpperCase",
|
|
4679
|
+
"startsWith",
|
|
4680
|
+
"endsWith",
|
|
4681
|
+
"keys",
|
|
4682
|
+
"values",
|
|
4683
|
+
"entries",
|
|
4684
|
+
"has",
|
|
4685
|
+
"get",
|
|
4686
|
+
"set",
|
|
4687
|
+
"delete",
|
|
4688
|
+
"add",
|
|
4689
|
+
"size",
|
|
4690
|
+
"then",
|
|
4691
|
+
"catch",
|
|
4692
|
+
"finally",
|
|
4693
|
+
"resolve",
|
|
4694
|
+
"reject",
|
|
4695
|
+
"stringify",
|
|
4696
|
+
"parse",
|
|
4697
|
+
"log",
|
|
4698
|
+
"warn",
|
|
4699
|
+
"error",
|
|
4700
|
+
"info",
|
|
4701
|
+
"debug"
|
|
4702
|
+
].includes(method)) continue;
|
|
4703
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4704
|
+
}
|
|
4705
|
+
return calls;
|
|
4706
|
+
}
|
|
4707
|
+
function extractPythonCalls(content, filePath) {
|
|
4708
|
+
const calls = [];
|
|
4709
|
+
const callRegex = /(?<!\w)(?:self\.)?([a-z_]\w+)\.([a-z_]\w+)\s*\(/gm;
|
|
4710
|
+
let match;
|
|
4711
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4712
|
+
const receiver = match[1];
|
|
4713
|
+
const method = match[2];
|
|
4714
|
+
if ([
|
|
4715
|
+
"self",
|
|
4716
|
+
"cls",
|
|
4717
|
+
"os",
|
|
4718
|
+
"sys",
|
|
4719
|
+
"json",
|
|
4720
|
+
"logging",
|
|
4721
|
+
"print",
|
|
4722
|
+
"str",
|
|
4723
|
+
"int",
|
|
4724
|
+
"float",
|
|
4725
|
+
"list",
|
|
4726
|
+
"dict",
|
|
4727
|
+
"set",
|
|
4728
|
+
"tuple",
|
|
4729
|
+
"super",
|
|
4730
|
+
"type",
|
|
4731
|
+
"isinstance",
|
|
4732
|
+
"len",
|
|
4733
|
+
"range",
|
|
4734
|
+
"enumerate"
|
|
4735
|
+
].includes(receiver)) continue;
|
|
4736
|
+
if ([
|
|
4737
|
+
"append",
|
|
4738
|
+
"extend",
|
|
4739
|
+
"insert",
|
|
4740
|
+
"remove",
|
|
4741
|
+
"pop",
|
|
4742
|
+
"clear",
|
|
4743
|
+
"get",
|
|
4744
|
+
"keys",
|
|
4745
|
+
"values",
|
|
4746
|
+
"items",
|
|
4747
|
+
"update",
|
|
4748
|
+
"format",
|
|
4749
|
+
"join",
|
|
4750
|
+
"split",
|
|
4751
|
+
"strip",
|
|
4752
|
+
"replace",
|
|
4753
|
+
"lower",
|
|
4754
|
+
"upper",
|
|
4755
|
+
"startswith",
|
|
4756
|
+
"endswith",
|
|
4757
|
+
"encode",
|
|
4758
|
+
"decode"
|
|
4759
|
+
].includes(method)) continue;
|
|
4760
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4761
|
+
}
|
|
4762
|
+
return calls;
|
|
4763
|
+
}
|
|
4764
|
+
function extractGoCalls(content, filePath) {
|
|
4765
|
+
const calls = [];
|
|
4766
|
+
const callRegex = /(?<!\w)([a-z]\w+)\.([A-Z]\w+)\s*\(/gm;
|
|
4767
|
+
let match;
|
|
4768
|
+
while ((match = callRegex.exec(content)) !== null) {
|
|
4769
|
+
const receiver = match[1];
|
|
4770
|
+
const method = match[2];
|
|
4771
|
+
if ([
|
|
4772
|
+
"fmt",
|
|
4773
|
+
"log",
|
|
4774
|
+
"os",
|
|
4775
|
+
"io",
|
|
4776
|
+
"strings",
|
|
4777
|
+
"strconv",
|
|
4778
|
+
"bytes",
|
|
4779
|
+
"context",
|
|
4780
|
+
"errors",
|
|
4781
|
+
"sync",
|
|
4782
|
+
"time",
|
|
4783
|
+
"math",
|
|
4784
|
+
"sort",
|
|
4785
|
+
"http",
|
|
4786
|
+
"json",
|
|
4787
|
+
"testing",
|
|
4788
|
+
"reflect"
|
|
4789
|
+
].includes(receiver)) continue;
|
|
4790
|
+
calls.push({ callerFile: filePath, receiverName: receiver, methodName: method });
|
|
4791
|
+
}
|
|
4792
|
+
return calls;
|
|
4793
|
+
}
|
|
4794
|
+
function buildJavaImportMap(content, allFiles) {
|
|
4795
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4796
|
+
const importRegex = /^import\s+(?:static\s+)?[\w.]+\.(\w+)\s*;/gm;
|
|
4797
|
+
let match;
|
|
4798
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4799
|
+
const className = match[1];
|
|
4800
|
+
const targetFile = allFiles.find((f) => {
|
|
4801
|
+
const basename4 = f.split("/").pop()?.replace(".java", "") ?? "";
|
|
4802
|
+
return basename4 === className;
|
|
4803
|
+
});
|
|
4804
|
+
if (targetFile) {
|
|
4805
|
+
importMap.set(className, targetFile);
|
|
4806
|
+
const varName = className.charAt(0).toLowerCase() + className.slice(1);
|
|
4807
|
+
importMap.set(varName, targetFile);
|
|
4808
|
+
}
|
|
4809
|
+
}
|
|
4810
|
+
const fieldRegex = /(?:private|protected)\s+(?:final\s+)?(\w+)\s+(\w+)\s*[;=]/gm;
|
|
4811
|
+
while ((match = fieldRegex.exec(content)) !== null) {
|
|
4812
|
+
const typeName = match[1];
|
|
4813
|
+
const fieldName = match[2];
|
|
4814
|
+
const existing = importMap.get(typeName);
|
|
4815
|
+
if (existing) {
|
|
4816
|
+
importMap.set(fieldName, existing);
|
|
4817
|
+
}
|
|
4818
|
+
}
|
|
4819
|
+
return importMap;
|
|
4820
|
+
}
|
|
4821
|
+
function buildTsImportMap(content, allFiles) {
|
|
4822
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4823
|
+
const importRegex = /import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]/gm;
|
|
4824
|
+
let match;
|
|
4825
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
4826
|
+
const namedImports = match[1];
|
|
4827
|
+
const defaultImport = match[2];
|
|
4828
|
+
const modulePath = match[3];
|
|
4829
|
+
const targetFile = allFiles.find((f) => {
|
|
4830
|
+
const stripped = f.replace(/\.(ts|tsx|js|jsx|mts|mjs)$/, "");
|
|
4831
|
+
return stripped.endsWith(modulePath.replace(/^\.\//, "").replace(/^\.\.\//, "")) || f.endsWith(modulePath.replace(/^\.\//, "") + "/index.ts");
|
|
4832
|
+
});
|
|
4833
|
+
if (targetFile) {
|
|
4834
|
+
if (namedImports) {
|
|
4835
|
+
for (const name of namedImports.split(",").map((s) => s.trim())) {
|
|
4836
|
+
const cleanName = name.split(" as ").pop()?.trim() ?? name.trim();
|
|
4837
|
+
if (cleanName) importMap.set(cleanName, targetFile);
|
|
4838
|
+
}
|
|
4839
|
+
}
|
|
4840
|
+
if (defaultImport) {
|
|
4841
|
+
importMap.set(defaultImport, targetFile);
|
|
4842
|
+
}
|
|
4843
|
+
}
|
|
4844
|
+
}
|
|
4845
|
+
return importMap;
|
|
4846
|
+
}
|
|
4847
|
+
function buildPythonImportMap(content, allFiles) {
|
|
4848
|
+
const importMap = /* @__PURE__ */ new Map();
|
|
4849
|
+
const fromRegex = /^from\s+([\w.]+)\s+import\s+(.+)$/gm;
|
|
4850
|
+
let match;
|
|
4851
|
+
while ((match = fromRegex.exec(content)) !== null) {
|
|
4852
|
+
const modulePath = match[1].replace(/\./g, "/");
|
|
4853
|
+
const names = match[2].split(",").map((s) => s.trim().split(" as ").pop()?.trim() ?? "");
|
|
4854
|
+
const targetFile = allFiles.find((f) => f.includes(modulePath + ".py") || f.includes(modulePath + "/__init__.py"));
|
|
4855
|
+
if (targetFile) {
|
|
4856
|
+
for (const name of names) {
|
|
4857
|
+
if (name) importMap.set(name, targetFile);
|
|
4858
|
+
const snakeName = name.replace(/([A-Z])/g, "_$1").toLowerCase().replace(/^_/, "");
|
|
4859
|
+
if (snakeName !== name) importMap.set(snakeName, targetFile);
|
|
4860
|
+
}
|
|
4861
|
+
}
|
|
4862
|
+
}
|
|
4863
|
+
return importMap;
|
|
4864
|
+
}
|
|
4865
|
+
function buildCallGraph(files) {
|
|
4866
|
+
const allPaths = files.map((f) => f.relativePath);
|
|
4867
|
+
const allDefinitions = [];
|
|
4868
|
+
const allCalls = [];
|
|
4869
|
+
for (const file of files) {
|
|
4870
|
+
const lang = getLanguage(file.relativePath);
|
|
4871
|
+
if (!lang) continue;
|
|
4872
|
+
let defs;
|
|
4873
|
+
let calls;
|
|
4874
|
+
switch (lang) {
|
|
4875
|
+
case "java":
|
|
4876
|
+
defs = extractJavaDefinitions(file.content, file.relativePath);
|
|
4877
|
+
calls = extractJavaCalls(file.content, file.relativePath);
|
|
4878
|
+
break;
|
|
4879
|
+
case "ts":
|
|
4880
|
+
defs = extractTsDefinitions(file.content, file.relativePath);
|
|
4881
|
+
calls = extractTsCalls(file.content, file.relativePath);
|
|
4882
|
+
break;
|
|
4883
|
+
case "python":
|
|
4884
|
+
defs = extractPythonDefinitions(file.content, file.relativePath);
|
|
4885
|
+
calls = extractPythonCalls(file.content, file.relativePath);
|
|
4886
|
+
break;
|
|
4887
|
+
case "go":
|
|
4888
|
+
defs = extractGoDefinitions(file.content, file.relativePath);
|
|
4889
|
+
calls = extractGoCalls(file.content, file.relativePath);
|
|
4890
|
+
break;
|
|
4891
|
+
}
|
|
4892
|
+
allDefinitions.push(...defs);
|
|
4893
|
+
allCalls.push(...calls);
|
|
4894
|
+
}
|
|
4895
|
+
const defByMethod = /* @__PURE__ */ new Map();
|
|
4896
|
+
for (const def of allDefinitions) {
|
|
4897
|
+
const existing = defByMethod.get(def.name) ?? [];
|
|
4898
|
+
existing.push(def);
|
|
4899
|
+
defByMethod.set(def.name, existing);
|
|
4900
|
+
}
|
|
4901
|
+
const defByQualified = /* @__PURE__ */ new Map();
|
|
4902
|
+
for (const def of allDefinitions) {
|
|
4903
|
+
if (def.className) {
|
|
4904
|
+
defByQualified.set(`${def.className}.${def.name}`, def);
|
|
4905
|
+
}
|
|
4906
|
+
}
|
|
4907
|
+
const edges = [];
|
|
4908
|
+
const edgeSet = /* @__PURE__ */ new Set();
|
|
4909
|
+
for (const file of files) {
|
|
4910
|
+
const lang = getLanguage(file.relativePath);
|
|
4911
|
+
if (!lang) continue;
|
|
4912
|
+
let importMap;
|
|
4913
|
+
switch (lang) {
|
|
4914
|
+
case "java":
|
|
4915
|
+
importMap = buildJavaImportMap(file.content, allPaths);
|
|
4916
|
+
break;
|
|
4917
|
+
case "ts":
|
|
4918
|
+
importMap = buildTsImportMap(file.content, allPaths);
|
|
4919
|
+
break;
|
|
4920
|
+
case "python":
|
|
4921
|
+
importMap = buildPythonImportMap(file.content, allPaths);
|
|
4922
|
+
break;
|
|
4923
|
+
default:
|
|
4924
|
+
importMap = /* @__PURE__ */ new Map();
|
|
4925
|
+
}
|
|
4926
|
+
const fileCalls = allCalls.filter((c) => c.callerFile === file.relativePath);
|
|
4927
|
+
for (const call of fileCalls) {
|
|
4928
|
+
let targetFile;
|
|
4929
|
+
targetFile = importMap.get(call.receiverName);
|
|
4930
|
+
if (!targetFile) {
|
|
4931
|
+
const qualDef = defByQualified.get(`${call.receiverName}.${call.methodName}`);
|
|
4932
|
+
if (qualDef) targetFile = qualDef.filePath;
|
|
4933
|
+
}
|
|
4934
|
+
if (!targetFile) {
|
|
4935
|
+
const capitalized = call.receiverName.charAt(0).toUpperCase() + call.receiverName.slice(1);
|
|
4936
|
+
targetFile = importMap.get(capitalized);
|
|
4937
|
+
}
|
|
4938
|
+
if (!targetFile) {
|
|
4939
|
+
const candidates = defByMethod.get(call.methodName);
|
|
4940
|
+
if (candidates && candidates.length === 1 && candidates[0].filePath !== file.relativePath) {
|
|
4941
|
+
targetFile = candidates[0].filePath;
|
|
4942
|
+
}
|
|
4943
|
+
}
|
|
4944
|
+
if (targetFile && targetFile !== file.relativePath) {
|
|
4945
|
+
const key = `${file.relativePath}\u2192${targetFile}`;
|
|
4946
|
+
if (!edgeSet.has(key)) {
|
|
4947
|
+
edgeSet.add(key);
|
|
4948
|
+
edges.push({ from: file.relativePath, to: targetFile, type: "call" });
|
|
4949
|
+
}
|
|
4950
|
+
}
|
|
4951
|
+
}
|
|
4952
|
+
}
|
|
4953
|
+
return { definitions: allDefinitions, calls: allCalls, edges };
|
|
4954
|
+
}
|
|
4955
|
+
function boostByCallGraph(matches, callEdges, topK = 10, boostFactor = 0.3) {
|
|
4956
|
+
if (matches.length === 0 || callEdges.length === 0) return matches;
|
|
4957
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
4958
|
+
for (const m of matches) {
|
|
4959
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
4960
|
+
}
|
|
4961
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
4962
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
4963
|
+
for (const edge of callEdges) {
|
|
4964
|
+
if (edge.type !== "call") continue;
|
|
4965
|
+
const fwd = callsTo.get(edge.from) ?? [];
|
|
4966
|
+
fwd.push(edge.to);
|
|
4967
|
+
callsTo.set(edge.from, fwd);
|
|
4968
|
+
const rev = calledBy.get(edge.to) ?? [];
|
|
4969
|
+
rev.push(edge.from);
|
|
4970
|
+
calledBy.set(edge.to, rev);
|
|
4971
|
+
}
|
|
4972
|
+
const topMatches = matches.slice(0, topK);
|
|
4973
|
+
const maxBoostPerParent = 5;
|
|
4974
|
+
for (const parent of topMatches) {
|
|
4975
|
+
const boost = parent.score * boostFactor;
|
|
4976
|
+
const called = callsTo.get(parent.filePath) ?? [];
|
|
4977
|
+
for (const target of called.slice(0, maxBoostPerParent)) {
|
|
4978
|
+
const existing = boosted.get(target);
|
|
4979
|
+
if (existing) {
|
|
4980
|
+
existing.score += boost;
|
|
4981
|
+
if (!existing.matchedTerms.includes("[call-graph:called-by-match]")) {
|
|
4982
|
+
existing.matchedTerms.push("[call-graph:called-by-match]");
|
|
4983
|
+
}
|
|
4984
|
+
} else {
|
|
4985
|
+
boosted.set(target, {
|
|
4986
|
+
filePath: target,
|
|
4987
|
+
score: boost,
|
|
4988
|
+
matchedTerms: ["[call-graph:called-by-match]"]
|
|
4989
|
+
});
|
|
4990
|
+
}
|
|
4991
|
+
}
|
|
4992
|
+
const callers = calledBy.get(parent.filePath) ?? [];
|
|
4993
|
+
for (const caller of callers.slice(0, maxBoostPerParent)) {
|
|
4994
|
+
const callerBoost = boost * 0.7;
|
|
4995
|
+
const existing = boosted.get(caller);
|
|
4996
|
+
if (existing) {
|
|
4997
|
+
existing.score += callerBoost;
|
|
4998
|
+
if (!existing.matchedTerms.includes("[call-graph:calls-match]")) {
|
|
4999
|
+
existing.matchedTerms.push("[call-graph:calls-match]");
|
|
5000
|
+
}
|
|
5001
|
+
} else {
|
|
5002
|
+
boosted.set(caller, {
|
|
5003
|
+
filePath: caller,
|
|
5004
|
+
score: callerBoost,
|
|
5005
|
+
matchedTerms: ["[call-graph:calls-match]"]
|
|
5006
|
+
});
|
|
5007
|
+
}
|
|
5008
|
+
}
|
|
5009
|
+
}
|
|
5010
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
5011
|
+
}
|
|
5012
|
+
var init_call_graph = __esm({
|
|
5013
|
+
"src/engine/call-graph.ts"() {
|
|
5014
|
+
"use strict";
|
|
5015
|
+
}
|
|
5016
|
+
});
|
|
5017
|
+
|
|
5018
|
+
// src/engine/git-relevance.ts
|
|
5019
|
+
import { execSync } from "child_process";
|
|
5020
|
+
function buildCoChangeMatrix(projectPath, maxCommits = 500, minCoChanges = 2) {
|
|
5021
|
+
const emptyMatrix = {
|
|
5022
|
+
entries: /* @__PURE__ */ new Map(),
|
|
5023
|
+
fileCommitCounts: /* @__PURE__ */ new Map(),
|
|
5024
|
+
totalCommits: 0
|
|
5025
|
+
};
|
|
5026
|
+
let gitOutput;
|
|
5027
|
+
try {
|
|
5028
|
+
gitOutput = execSync(
|
|
5029
|
+
`git log --no-merges --diff-filter=ACMR --name-only --format="---COMMIT---" -n ${maxCommits}`,
|
|
5030
|
+
{ cwd: projectPath, encoding: "utf-8", maxBuffer: 10 * 1024 * 1024, timeout: 15e3 }
|
|
5031
|
+
);
|
|
5032
|
+
} catch {
|
|
5033
|
+
return emptyMatrix;
|
|
5034
|
+
}
|
|
5035
|
+
const commits = [];
|
|
5036
|
+
let currentFiles = [];
|
|
5037
|
+
for (const line of gitOutput.split("\n")) {
|
|
5038
|
+
const trimmed = line.trim();
|
|
5039
|
+
if (trimmed === "---COMMIT---") {
|
|
5040
|
+
if (currentFiles.length > 0) {
|
|
5041
|
+
commits.push(currentFiles);
|
|
5042
|
+
}
|
|
5043
|
+
currentFiles = [];
|
|
5044
|
+
} else if (trimmed.length > 0) {
|
|
5045
|
+
currentFiles.push(trimmed);
|
|
5046
|
+
}
|
|
5047
|
+
}
|
|
5048
|
+
if (currentFiles.length > 0) {
|
|
5049
|
+
commits.push(currentFiles);
|
|
5050
|
+
}
|
|
5051
|
+
if (commits.length === 0) return emptyMatrix;
|
|
5052
|
+
const fileCommitCounts = /* @__PURE__ */ new Map();
|
|
5053
|
+
const coChangeCounts = /* @__PURE__ */ new Map();
|
|
5054
|
+
for (const files of commits) {
|
|
5055
|
+
const unique = [...new Set(files)];
|
|
5056
|
+
for (const file of unique) {
|
|
5057
|
+
fileCommitCounts.set(file, (fileCommitCounts.get(file) ?? 0) + 1);
|
|
5058
|
+
}
|
|
5059
|
+
const capped = unique.slice(0, 20);
|
|
5060
|
+
for (let i = 0; i < capped.length; i++) {
|
|
5061
|
+
for (let j = i + 1; j < capped.length; j++) {
|
|
5062
|
+
const [a, b] = capped[i] < capped[j] ? [capped[i], capped[j]] : [capped[j], capped[i]];
|
|
5063
|
+
const key = `${a}\0${b}`;
|
|
5064
|
+
coChangeCounts.set(key, (coChangeCounts.get(key) ?? 0) + 1);
|
|
5065
|
+
}
|
|
5066
|
+
}
|
|
5067
|
+
}
|
|
5068
|
+
const entries = /* @__PURE__ */ new Map();
|
|
5069
|
+
for (const [key, coCommits] of coChangeCounts) {
|
|
5070
|
+
if (coCommits < minCoChanges) continue;
|
|
5071
|
+
const [fileA, fileB] = key.split("\0");
|
|
5072
|
+
const commitsA = fileCommitCounts.get(fileA) ?? 0;
|
|
5073
|
+
const commitsB = fileCommitCounts.get(fileB) ?? 0;
|
|
5074
|
+
const union = commitsA + commitsB - coCommits;
|
|
5075
|
+
const similarity = union > 0 ? coCommits / union : 0;
|
|
5076
|
+
const entry = { fileA, fileB, coCommits, similarity };
|
|
5077
|
+
const listA = entries.get(fileA) ?? [];
|
|
5078
|
+
listA.push(entry);
|
|
5079
|
+
entries.set(fileA, listA);
|
|
5080
|
+
const listB = entries.get(fileB) ?? [];
|
|
5081
|
+
listB.push({ ...entry, fileA: fileB, fileB: fileA });
|
|
5082
|
+
entries.set(fileB, listB);
|
|
5083
|
+
}
|
|
5084
|
+
for (const [, list] of entries) {
|
|
5085
|
+
list.sort((a, b) => b.similarity - a.similarity);
|
|
5086
|
+
}
|
|
5087
|
+
return { entries, fileCommitCounts, totalCommits: commits.length };
|
|
5088
|
+
}
|
|
5089
|
+
function boostByGitCoChange(matches, coChangeMatrix, topK = 10, boostFactor = 0.25, minSimilarity = 0.15) {
|
|
5090
|
+
if (matches.length === 0 || coChangeMatrix.entries.size === 0) return matches;
|
|
5091
|
+
const boosted = /* @__PURE__ */ new Map();
|
|
5092
|
+
for (const m of matches) {
|
|
5093
|
+
boosted.set(m.filePath, { ...m, matchedTerms: [...m.matchedTerms] });
|
|
5094
|
+
}
|
|
5095
|
+
const topMatches = matches.slice(0, topK);
|
|
5096
|
+
const maxBoostTargets = 5;
|
|
5097
|
+
for (const parent of topMatches) {
|
|
5098
|
+
const partners = coChangeMatrix.entries.get(parent.filePath) ?? [];
|
|
5099
|
+
let boostedCount = 0;
|
|
5100
|
+
for (const partner of partners) {
|
|
5101
|
+
if (boostedCount >= maxBoostTargets) break;
|
|
5102
|
+
if (partner.similarity < minSimilarity) break;
|
|
5103
|
+
const boost = parent.score * boostFactor * partner.similarity;
|
|
5104
|
+
const existing = boosted.get(partner.fileB);
|
|
5105
|
+
if (existing) {
|
|
5106
|
+
existing.score += boost;
|
|
5107
|
+
if (!existing.matchedTerms.includes("[git-cochange]")) {
|
|
5108
|
+
existing.matchedTerms.push("[git-cochange]");
|
|
5109
|
+
}
|
|
5110
|
+
} else {
|
|
5111
|
+
boosted.set(partner.fileB, {
|
|
5112
|
+
filePath: partner.fileB,
|
|
5113
|
+
score: boost,
|
|
5114
|
+
matchedTerms: ["[git-cochange]"]
|
|
5115
|
+
});
|
|
5116
|
+
}
|
|
5117
|
+
boostedCount++;
|
|
5118
|
+
}
|
|
5119
|
+
}
|
|
5120
|
+
return [...boosted.values()].sort((a, b) => b.score - a.score);
|
|
5121
|
+
}
|
|
5122
|
+
var init_git_relevance = __esm({
|
|
5123
|
+
"src/engine/git-relevance.ts"() {
|
|
5124
|
+
"use strict";
|
|
5125
|
+
}
|
|
5126
|
+
});
|
|
5127
|
+
|
|
5128
|
+
// src/engine/multi-hop.ts
|
|
5129
|
+
function multiHopQuery(index, task, deps, callEdges, fileContents, config = {}) {
|
|
5130
|
+
const cfg = { ...DEFAULT_CONFIG2, ...config };
|
|
5131
|
+
const hops = [];
|
|
5132
|
+
const callsTo = /* @__PURE__ */ new Map();
|
|
5133
|
+
const calledBy = /* @__PURE__ */ new Map();
|
|
5134
|
+
for (const edge of callEdges) {
|
|
5135
|
+
const fwd = callsTo.get(edge.from) ?? /* @__PURE__ */ new Set();
|
|
5136
|
+
fwd.add(edge.to);
|
|
5137
|
+
callsTo.set(edge.from, fwd);
|
|
5138
|
+
const rev = calledBy.get(edge.to) ?? /* @__PURE__ */ new Set();
|
|
5139
|
+
rev.add(edge.from);
|
|
5140
|
+
calledBy.set(edge.to, rev);
|
|
5141
|
+
}
|
|
5142
|
+
const aggregateScores = /* @__PURE__ */ new Map();
|
|
5143
|
+
const aggregateTerms = /* @__PURE__ */ new Map();
|
|
5144
|
+
const explored = /* @__PURE__ */ new Set();
|
|
5145
|
+
const initialResults = query(index, task, 50);
|
|
5146
|
+
for (const m of initialResults) {
|
|
5147
|
+
aggregateScores.set(m.filePath, m.score);
|
|
5148
|
+
aggregateTerms.set(m.filePath, new Set(m.matchedTerms));
|
|
5149
|
+
explored.add(m.filePath);
|
|
5150
|
+
}
|
|
5151
|
+
hops.push({
|
|
5152
|
+
hop: 0,
|
|
5153
|
+
seedFiles: [],
|
|
5154
|
+
newFiles: initialResults.slice(0, cfg.topKPerHop).map((m) => m.filePath),
|
|
5155
|
+
expandedTerms: tokenize(task)
|
|
5156
|
+
});
|
|
5157
|
+
let currentSeeds = initialResults.slice(0, cfg.topKPerHop);
|
|
5158
|
+
for (let hop = 1; hop <= cfg.maxHops; hop++) {
|
|
5159
|
+
if (currentSeeds.length === 0) break;
|
|
5160
|
+
const seedFiles = currentSeeds.map((m) => m.filePath);
|
|
5161
|
+
const newFiles = [];
|
|
5162
|
+
const expandedTerms = [];
|
|
5163
|
+
const connectedFiles = /* @__PURE__ */ new Set();
|
|
5164
|
+
for (const seed of seedFiles) {
|
|
5165
|
+
const importDeps = deps.get(seed) ?? [];
|
|
5166
|
+
for (const dep of importDeps) {
|
|
5167
|
+
if (!explored.has(dep)) connectedFiles.add(dep);
|
|
5168
|
+
}
|
|
5169
|
+
const calls = callsTo.get(seed) ?? /* @__PURE__ */ new Set();
|
|
5170
|
+
for (const called of calls) {
|
|
5171
|
+
if (!explored.has(called)) connectedFiles.add(called);
|
|
5172
|
+
}
|
|
5173
|
+
const callers = calledBy.get(seed) ?? /* @__PURE__ */ new Set();
|
|
5174
|
+
for (const caller of callers) {
|
|
5175
|
+
if (!explored.has(caller)) connectedFiles.add(caller);
|
|
5176
|
+
}
|
|
5177
|
+
}
|
|
5178
|
+
for (const seed of seedFiles) {
|
|
5179
|
+
const content = fileContents.get(seed);
|
|
5180
|
+
if (!content) continue;
|
|
5181
|
+
const identifiers = extractKeyIdentifiers(content, seed);
|
|
5182
|
+
expandedTerms.push(...identifiers);
|
|
5183
|
+
}
|
|
5184
|
+
const decayMultiplier = Math.pow(cfg.decayFactor, hop);
|
|
5185
|
+
const uniqueExpandedTerms = [...new Set(expandedTerms)];
|
|
5186
|
+
const expandedQuery = task + " " + uniqueExpandedTerms.slice(0, 10).join(" ");
|
|
5187
|
+
const expandedResults = query(index, expandedQuery, 30);
|
|
5188
|
+
for (const connected of connectedFiles) {
|
|
5189
|
+
const expandedMatch = expandedResults.find((r) => r.filePath === connected);
|
|
5190
|
+
const graphScore = 0.3;
|
|
5191
|
+
const bm25Score = expandedMatch?.score ?? 0;
|
|
5192
|
+
const hopScore = (graphScore + bm25Score) * decayMultiplier;
|
|
5193
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
5194
|
+
const existing = aggregateScores.get(connected) ?? 0;
|
|
5195
|
+
aggregateScores.set(connected, existing + hopScore);
|
|
5196
|
+
const terms = aggregateTerms.get(connected) ?? /* @__PURE__ */ new Set();
|
|
5197
|
+
terms.add(`[hop-${hop}]`);
|
|
5198
|
+
if (expandedMatch) {
|
|
5199
|
+
for (const t of expandedMatch.matchedTerms) terms.add(t);
|
|
5200
|
+
}
|
|
5201
|
+
aggregateTerms.set(connected, terms);
|
|
5202
|
+
if (!explored.has(connected)) {
|
|
5203
|
+
newFiles.push(connected);
|
|
5204
|
+
explored.add(connected);
|
|
5205
|
+
}
|
|
5206
|
+
}
|
|
5207
|
+
}
|
|
5208
|
+
for (const r of expandedResults) {
|
|
5209
|
+
if (!explored.has(r.filePath)) {
|
|
5210
|
+
const hopScore = r.score * decayMultiplier * 0.5;
|
|
5211
|
+
if (hopScore >= cfg.minScoreThreshold * decayMultiplier) {
|
|
5212
|
+
const existing = aggregateScores.get(r.filePath) ?? 0;
|
|
5213
|
+
aggregateScores.set(r.filePath, existing + hopScore);
|
|
5214
|
+
const terms = aggregateTerms.get(r.filePath) ?? /* @__PURE__ */ new Set();
|
|
5215
|
+
terms.add(`[hop-${hop}-bm25]`);
|
|
5216
|
+
for (const t of r.matchedTerms) terms.add(t);
|
|
5217
|
+
aggregateTerms.set(r.filePath, terms);
|
|
5218
|
+
newFiles.push(r.filePath);
|
|
5219
|
+
explored.add(r.filePath);
|
|
5220
|
+
}
|
|
5221
|
+
}
|
|
5222
|
+
}
|
|
5223
|
+
hops.push({ hop, seedFiles, newFiles, expandedTerms: uniqueExpandedTerms.slice(0, 20) });
|
|
5224
|
+
const newScored = newFiles.map((f) => ({ filePath: f, score: aggregateScores.get(f) ?? 0 })).sort((a, b) => b.score - a.score).slice(0, cfg.topKPerHop);
|
|
5225
|
+
currentSeeds = newScored.map((s) => ({
|
|
5226
|
+
filePath: s.filePath,
|
|
5227
|
+
score: s.score,
|
|
5228
|
+
matchedTerms: [...aggregateTerms.get(s.filePath) ?? []]
|
|
5229
|
+
}));
|
|
5230
|
+
}
|
|
5231
|
+
const matches = [];
|
|
5232
|
+
for (const [filePath, score] of aggregateScores) {
|
|
5233
|
+
const terms = aggregateTerms.get(filePath) ?? /* @__PURE__ */ new Set();
|
|
5234
|
+
matches.push({ filePath, score, matchedTerms: [...terms] });
|
|
5235
|
+
}
|
|
5236
|
+
matches.sort((a, b) => b.score - a.score);
|
|
5237
|
+
return {
|
|
5238
|
+
matches,
|
|
5239
|
+
hops,
|
|
5240
|
+
totalFilesExplored: explored.size
|
|
5241
|
+
};
|
|
5242
|
+
}
|
|
5243
|
+
function extractKeyIdentifiers(content, filePath) {
|
|
5244
|
+
const identifiers = [];
|
|
5245
|
+
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
|
|
5246
|
+
if (ext === "java") {
|
|
5247
|
+
const classMatches = content.match(/(?:class|interface)\s+(\w+)/g) ?? [];
|
|
5248
|
+
for (const m of classMatches) {
|
|
5249
|
+
const name = m.replace(/(?:class|interface)\s+/, "");
|
|
5250
|
+
identifiers.push(...splitCamelCase(name));
|
|
5251
|
+
}
|
|
5252
|
+
const methodRegex = /(?:public|protected)\s+[\w<>\[\],\s?]+\s+(\w+)\s*\(/gm;
|
|
5253
|
+
let match;
|
|
5254
|
+
while ((match = methodRegex.exec(content)) !== null) {
|
|
5255
|
+
identifiers.push(...splitCamelCase(match[1]));
|
|
5256
|
+
}
|
|
5257
|
+
} else if (["ts", "tsx", "js", "jsx"].includes(ext)) {
|
|
5258
|
+
const exportMatches = content.match(/export\s+(?:class|function|const|interface|type)\s+(\w+)/g) ?? [];
|
|
5259
|
+
for (const m of exportMatches) {
|
|
5260
|
+
const name = m.replace(/export\s+(?:class|function|const|interface|type)\s+/, "");
|
|
5261
|
+
identifiers.push(...splitCamelCase(name));
|
|
5262
|
+
}
|
|
5263
|
+
} else if (ext === "py") {
|
|
5264
|
+
const defMatches = content.match(/^(?:class|def)\s+(\w+)/gm) ?? [];
|
|
5265
|
+
for (const m of defMatches) {
|
|
5266
|
+
const name = m.replace(/^(?:class|def)\s+/, "");
|
|
5267
|
+
identifiers.push(...splitSnakeCase(name));
|
|
5268
|
+
}
|
|
5269
|
+
} else if (ext === "go") {
|
|
5270
|
+
const funcMatches = content.match(/^func\s+(?:\([^)]+\)\s+)?([A-Z]\w+)/gm) ?? [];
|
|
5271
|
+
for (const m of funcMatches) {
|
|
5272
|
+
const name = m.replace(/^func\s+(?:\([^)]+\)\s+)?/, "");
|
|
5273
|
+
identifiers.push(...splitCamelCase(name));
|
|
5274
|
+
}
|
|
5275
|
+
const typeMatches = content.match(/^type\s+([A-Z]\w+)/gm) ?? [];
|
|
5276
|
+
for (const m of typeMatches) {
|
|
5277
|
+
const name = m.replace(/^type\s+/, "");
|
|
5278
|
+
identifiers.push(...splitCamelCase(name));
|
|
5279
|
+
}
|
|
5280
|
+
}
|
|
5281
|
+
return [...new Set(identifiers)].filter((id) => id.length >= 3 && !NOISE_IDENTIFIERS.has(id.toLowerCase())).slice(0, 30);
|
|
5282
|
+
}
|
|
5283
|
+
function splitCamelCase(name) {
|
|
5284
|
+
return name.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((s) => s.length >= 3);
|
|
5285
|
+
}
|
|
5286
|
+
function splitSnakeCase(name) {
|
|
5287
|
+
return name.split("_").filter((s) => s.length >= 3);
|
|
5288
|
+
}
|
|
5289
|
+
var DEFAULT_CONFIG2, NOISE_IDENTIFIERS;
|
|
5290
|
+
var init_multi_hop = __esm({
|
|
5291
|
+
"src/engine/multi-hop.ts"() {
|
|
5292
|
+
"use strict";
|
|
5293
|
+
init_tfidf();
|
|
5294
|
+
DEFAULT_CONFIG2 = {
|
|
5295
|
+
maxHops: 2,
|
|
5296
|
+
topKPerHop: 5,
|
|
5297
|
+
decayFactor: 0.5,
|
|
5298
|
+
minScoreThreshold: 0.2
|
|
5299
|
+
};
|
|
5300
|
+
NOISE_IDENTIFIERS = /* @__PURE__ */ new Set([
|
|
5301
|
+
"get",
|
|
5302
|
+
"set",
|
|
5303
|
+
"has",
|
|
5304
|
+
"add",
|
|
5305
|
+
"put",
|
|
5306
|
+
"new",
|
|
5307
|
+
"run",
|
|
5308
|
+
"map",
|
|
5309
|
+
"for",
|
|
5310
|
+
"the",
|
|
5311
|
+
"and",
|
|
5312
|
+
"not",
|
|
5313
|
+
"with",
|
|
5314
|
+
"from",
|
|
5315
|
+
"this",
|
|
5316
|
+
"that",
|
|
5317
|
+
"test",
|
|
5318
|
+
"spec",
|
|
5319
|
+
"mock",
|
|
5320
|
+
"void",
|
|
5321
|
+
"null",
|
|
5322
|
+
"true",
|
|
5323
|
+
"false",
|
|
5324
|
+
"string",
|
|
5325
|
+
"number",
|
|
5326
|
+
"boolean",
|
|
5327
|
+
"int",
|
|
5328
|
+
"impl",
|
|
5329
|
+
"default",
|
|
5330
|
+
"abstract",
|
|
5331
|
+
"base",
|
|
5332
|
+
"main",
|
|
5333
|
+
"init",
|
|
5334
|
+
"setup",
|
|
5335
|
+
"util",
|
|
5336
|
+
"utils",
|
|
5337
|
+
"helper",
|
|
5338
|
+
"helpers",
|
|
5339
|
+
"common",
|
|
5340
|
+
"config",
|
|
5341
|
+
"model",
|
|
5342
|
+
"entity",
|
|
5343
|
+
"service",
|
|
5344
|
+
"repository",
|
|
5345
|
+
"controller",
|
|
5346
|
+
"handler",
|
|
5347
|
+
"interface",
|
|
5348
|
+
"type",
|
|
5349
|
+
"class",
|
|
5350
|
+
"function",
|
|
5351
|
+
"const",
|
|
5352
|
+
"return",
|
|
5353
|
+
"import",
|
|
5354
|
+
"export",
|
|
5355
|
+
"private",
|
|
5356
|
+
"public",
|
|
5357
|
+
"protected",
|
|
5358
|
+
"static",
|
|
5359
|
+
"final",
|
|
5360
|
+
"override",
|
|
5361
|
+
"async",
|
|
5362
|
+
"await"
|
|
5363
|
+
]);
|
|
5364
|
+
}
|
|
5365
|
+
});
|
|
5366
|
+
|
|
5367
|
+
// src/engine/query-intent.ts
|
|
5368
|
+
function detectAction(task) {
|
|
5369
|
+
for (const [pattern, action] of ACTION_PATTERNS) {
|
|
5370
|
+
if (pattern.test(task)) return action;
|
|
5371
|
+
}
|
|
5372
|
+
return "unknown";
|
|
5373
|
+
}
|
|
5374
|
+
function detectLayers(task) {
|
|
5375
|
+
const lower = task.toLowerCase();
|
|
5376
|
+
const layers = [];
|
|
5377
|
+
for (const [keywords, layer] of LAYER_KEYWORDS) {
|
|
5378
|
+
if (keywords.some((kw) => lower.includes(kw))) {
|
|
5379
|
+
layers.push(layer);
|
|
5380
|
+
}
|
|
5381
|
+
}
|
|
5382
|
+
return [...new Set(layers)];
|
|
5383
|
+
}
|
|
5384
|
+
function extractEntities(task) {
|
|
5385
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5386
|
+
const entities = [];
|
|
5387
|
+
for (const word of words) {
|
|
5388
|
+
if (word.length < 3) continue;
|
|
5389
|
+
if (STOP_WORDS2.has(word)) continue;
|
|
5390
|
+
if (ACTION_WORDS.has(word)) continue;
|
|
5391
|
+
if (OPERATION_WORDS.has(word)) continue;
|
|
5392
|
+
const isLayer = LAYER_KEYWORDS.some(([kws]) => kws.includes(word));
|
|
5393
|
+
if (isLayer) continue;
|
|
5394
|
+
entities.push(word);
|
|
5395
|
+
}
|
|
5396
|
+
return [...new Set(entities)];
|
|
5397
|
+
}
|
|
5398
|
+
function extractOperations(task) {
|
|
5399
|
+
const words = task.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/);
|
|
5400
|
+
const operations = [];
|
|
5401
|
+
for (const word of words) {
|
|
5402
|
+
if (OPERATION_WORDS.has(word)) {
|
|
5403
|
+
operations.push(word);
|
|
5404
|
+
}
|
|
5405
|
+
}
|
|
5406
|
+
const opPatterns = task.toLowerCase().match(/\b(on|after|before|during)\s+(\w+)/g);
|
|
5407
|
+
if (opPatterns) {
|
|
5408
|
+
for (const pattern of opPatterns) {
|
|
5409
|
+
const parts = pattern.split(/\s+/);
|
|
5410
|
+
if (parts.length >= 2 && OPERATION_WORDS.has(parts[1])) {
|
|
5411
|
+
operations.push(parts[1]);
|
|
5412
|
+
}
|
|
5413
|
+
}
|
|
5414
|
+
}
|
|
5415
|
+
return [...new Set(operations)];
|
|
5416
|
+
}
|
|
5417
|
+
function extractQualifiers(task) {
|
|
5418
|
+
const qualifiers = [];
|
|
5419
|
+
const patterns = task.match(/\b(on|for|in|via|from|through)\s+(\w+(?:\s+\w+)?)/gi);
|
|
5420
|
+
if (patterns) {
|
|
5421
|
+
for (const p of patterns) {
|
|
5422
|
+
const parts = p.split(/\s+/);
|
|
5423
|
+
if (parts.length >= 2) {
|
|
5424
|
+
const qualifier = parts.slice(1).join(" ").toLowerCase();
|
|
5425
|
+
if (!STOP_WORDS2.has(qualifier) && qualifier.length >= 2) {
|
|
5426
|
+
qualifiers.push(qualifier);
|
|
5427
|
+
}
|
|
5428
|
+
}
|
|
5429
|
+
}
|
|
5430
|
+
}
|
|
5431
|
+
return [...new Set(qualifiers)];
|
|
5432
|
+
}
|
|
5433
|
+
function parseQueryIntent(task) {
|
|
5434
|
+
const action = detectAction(task);
|
|
5435
|
+
const entities = extractEntities(task);
|
|
5436
|
+
const operations = extractOperations(task);
|
|
5437
|
+
const layers = detectLayers(task);
|
|
5438
|
+
const qualifiers = extractQualifiers(task);
|
|
5439
|
+
const signals = [
|
|
5440
|
+
action !== "unknown" ? 1 : 0,
|
|
5441
|
+
entities.length > 0 ? 1 : 0,
|
|
5442
|
+
operations.length > 0 ? 1 : 0,
|
|
5443
|
+
layers.length > 0 ? 1 : 0
|
|
5444
|
+
];
|
|
5445
|
+
const confidence = signals.reduce((a, b) => a + b, 0) / signals.length;
|
|
5446
|
+
return { original: task, action, entities, operations, layers, qualifiers, confidence };
|
|
5447
|
+
}
|
|
5448
|
+
function buildWeightedQuery(intent) {
|
|
5449
|
+
const parts = [];
|
|
5450
|
+
for (const entity of intent.entities) {
|
|
5451
|
+
parts.push(entity, entity, entity);
|
|
5452
|
+
}
|
|
5453
|
+
for (const op of intent.operations) {
|
|
5454
|
+
parts.push(op, op);
|
|
5455
|
+
}
|
|
5456
|
+
for (const layer of intent.layers) {
|
|
5457
|
+
parts.push(layer);
|
|
5458
|
+
}
|
|
5459
|
+
for (const q of intent.qualifiers) {
|
|
5460
|
+
parts.push(q);
|
|
5461
|
+
}
|
|
5462
|
+
if (parts.length === 0) return intent.original;
|
|
5463
|
+
return parts.join(" ");
|
|
5464
|
+
}
|
|
5465
|
+
var ACTION_PATTERNS, LAYER_KEYWORDS, STOP_WORDS2, ACTION_WORDS, OPERATION_WORDS;
|
|
5466
|
+
var init_query_intent = __esm({
|
|
5467
|
+
"src/engine/query-intent.ts"() {
|
|
5468
|
+
"use strict";
|
|
5469
|
+
ACTION_PATTERNS = [
|
|
5470
|
+
[/\b(fix|bug|debug|repair|resolve|broken|crash|error|issue|wrong)\b/i, "fix"],
|
|
5471
|
+
[/\b(add|implement|create|build|new|feature|introduce|wire)\b/i, "add"],
|
|
5472
|
+
[/\b(refactor|restructure|clean|extract|split|move|rename|simplify)\b/i, "refactor"],
|
|
5473
|
+
[/\b(trace|follow|understand|find|where|how|flow|path|chain)\b/i, "trace"],
|
|
5474
|
+
[/\b(test|spec|coverage|assert|mock|verify)\b/i, "test"],
|
|
5475
|
+
[/\b(doc|document|describe|explain|readme|comment)\b/i, "docs"],
|
|
5476
|
+
[/\b(remove|delete|deprecate|drop|kill|eliminate)\b/i, "remove"],
|
|
5477
|
+
[/\b(optimize|performance|speed|fast|slow|latency|efficient)\b/i, "optimize"]
|
|
5478
|
+
];
|
|
5479
|
+
LAYER_KEYWORDS = [
|
|
5480
|
+
[["controller", "endpoint", "handler", "router", "route", "api", "rest", "entrypoint"], "endpoint"],
|
|
5481
|
+
[["usecase", "use case", "use-case", "interactor", "application service"], "usecase"],
|
|
5482
|
+
[["service", "domain service", "business logic"], "service"],
|
|
5483
|
+
[["repository", "repo", "dao", "data access", "persistence", "database", "db", "store"], "repository"],
|
|
5484
|
+
[["cache", "redis", "memcached", "caching", "ttl", "invalidat"], "cache"],
|
|
5485
|
+
[["client", "http client", "api client", "rest client", "feign", "retrofit"], "client"],
|
|
5486
|
+
[["model", "entity", "dto", "domain object", "value object", "pojo"], "model"],
|
|
5487
|
+
[["config", "configuration", "injector", "module", "bean", "provider", "dependency injection"], "config"],
|
|
5488
|
+
[["queue", "kafka", "rabbit", "sqs", "event", "listener", "consumer", "producer", "message"], "queue"],
|
|
5489
|
+
[["middleware", "interceptor", "filter", "guard", "pipe"], "middleware"]
|
|
5490
|
+
];
|
|
5491
|
+
STOP_WORDS2 = /* @__PURE__ */ new Set([
|
|
5492
|
+
"the",
|
|
5493
|
+
"a",
|
|
5494
|
+
"an",
|
|
5495
|
+
"is",
|
|
5496
|
+
"are",
|
|
5497
|
+
"was",
|
|
5498
|
+
"were",
|
|
5499
|
+
"be",
|
|
5500
|
+
"been",
|
|
5501
|
+
"being",
|
|
5502
|
+
"have",
|
|
5503
|
+
"has",
|
|
5504
|
+
"had",
|
|
5505
|
+
"do",
|
|
5506
|
+
"does",
|
|
5507
|
+
"did",
|
|
5508
|
+
"will",
|
|
5509
|
+
"would",
|
|
5510
|
+
"shall",
|
|
5511
|
+
"should",
|
|
5512
|
+
"may",
|
|
5513
|
+
"might",
|
|
5514
|
+
"must",
|
|
5515
|
+
"can",
|
|
5516
|
+
"could",
|
|
5517
|
+
"need",
|
|
5518
|
+
"not",
|
|
5519
|
+
"and",
|
|
5520
|
+
"but",
|
|
5521
|
+
"or",
|
|
5522
|
+
"nor",
|
|
5523
|
+
"for",
|
|
5524
|
+
"yet",
|
|
5525
|
+
"so",
|
|
5526
|
+
"in",
|
|
5527
|
+
"on",
|
|
5528
|
+
"at",
|
|
5529
|
+
"to",
|
|
5530
|
+
"from",
|
|
5531
|
+
"by",
|
|
5532
|
+
"with",
|
|
5533
|
+
"about",
|
|
5534
|
+
"between",
|
|
5535
|
+
"through",
|
|
5536
|
+
"during",
|
|
5537
|
+
"before",
|
|
5538
|
+
"after",
|
|
5539
|
+
"above",
|
|
5540
|
+
"below",
|
|
5541
|
+
"up",
|
|
5542
|
+
"down",
|
|
5543
|
+
"out",
|
|
5544
|
+
"off",
|
|
5545
|
+
"over",
|
|
5546
|
+
"under",
|
|
5547
|
+
"again",
|
|
5548
|
+
"further",
|
|
5549
|
+
"then",
|
|
5550
|
+
"once",
|
|
5551
|
+
"here",
|
|
5552
|
+
"there",
|
|
5553
|
+
"when",
|
|
5554
|
+
"where",
|
|
5555
|
+
"why",
|
|
5556
|
+
"how",
|
|
5557
|
+
"all",
|
|
5558
|
+
"each",
|
|
5559
|
+
"every",
|
|
5560
|
+
"both",
|
|
5561
|
+
"few",
|
|
5562
|
+
"more",
|
|
5563
|
+
"most",
|
|
5564
|
+
"other",
|
|
5565
|
+
"some",
|
|
5566
|
+
"such",
|
|
5567
|
+
"no",
|
|
5568
|
+
"nor",
|
|
5569
|
+
"only",
|
|
5570
|
+
"own",
|
|
5571
|
+
"same",
|
|
5572
|
+
"so",
|
|
5573
|
+
"than",
|
|
5574
|
+
"too",
|
|
5575
|
+
"very",
|
|
5576
|
+
"just",
|
|
5577
|
+
"because",
|
|
5578
|
+
"this",
|
|
5579
|
+
"that",
|
|
5580
|
+
"these",
|
|
5581
|
+
"those",
|
|
5582
|
+
"it",
|
|
5583
|
+
"its",
|
|
5584
|
+
"of",
|
|
5585
|
+
"if"
|
|
5586
|
+
]);
|
|
5587
|
+
ACTION_WORDS = /* @__PURE__ */ new Set([
|
|
5588
|
+
"fix",
|
|
5589
|
+
"add",
|
|
5590
|
+
"create",
|
|
5591
|
+
"build",
|
|
5592
|
+
"implement",
|
|
5593
|
+
"refactor",
|
|
5594
|
+
"trace",
|
|
5595
|
+
"follow",
|
|
5596
|
+
"find",
|
|
5597
|
+
"update",
|
|
5598
|
+
"modify",
|
|
5599
|
+
"change",
|
|
5600
|
+
"remove",
|
|
5601
|
+
"delete",
|
|
5602
|
+
"debug",
|
|
5603
|
+
"test",
|
|
5604
|
+
"check",
|
|
5605
|
+
"verify",
|
|
5606
|
+
"validate",
|
|
5607
|
+
"handle",
|
|
5608
|
+
"process",
|
|
5609
|
+
"resolve",
|
|
5610
|
+
"repair",
|
|
5611
|
+
"optimize",
|
|
5612
|
+
"improve",
|
|
5613
|
+
"speed",
|
|
5614
|
+
"clean",
|
|
5615
|
+
"bug",
|
|
5616
|
+
"error",
|
|
5617
|
+
"issue",
|
|
5618
|
+
"problem",
|
|
5619
|
+
"flow",
|
|
5620
|
+
"path",
|
|
5621
|
+
"chain"
|
|
5622
|
+
]);
|
|
5623
|
+
OPERATION_WORDS = /* @__PURE__ */ new Set([
|
|
5624
|
+
"create",
|
|
5625
|
+
"read",
|
|
5626
|
+
"update",
|
|
5627
|
+
"delete",
|
|
5628
|
+
"save",
|
|
5629
|
+
"load",
|
|
5630
|
+
"fetch",
|
|
5631
|
+
"retrieve",
|
|
5632
|
+
"store",
|
|
5633
|
+
"persist",
|
|
5634
|
+
"insert",
|
|
5635
|
+
"remove",
|
|
5636
|
+
"invalidate",
|
|
5637
|
+
"validate",
|
|
5638
|
+
"parse",
|
|
5639
|
+
"transform",
|
|
5640
|
+
"convert",
|
|
5641
|
+
"render",
|
|
5642
|
+
"display",
|
|
5643
|
+
"send",
|
|
5644
|
+
"receive",
|
|
5645
|
+
"publish",
|
|
5646
|
+
"subscribe",
|
|
5647
|
+
"emit",
|
|
5648
|
+
"listen",
|
|
5649
|
+
"authenticate",
|
|
5650
|
+
"authorize",
|
|
5651
|
+
"encrypt",
|
|
5652
|
+
"decrypt",
|
|
5653
|
+
"hash",
|
|
5654
|
+
"serialize",
|
|
5655
|
+
"deserialize",
|
|
5656
|
+
"encode",
|
|
5657
|
+
"decode",
|
|
5658
|
+
"compress",
|
|
5659
|
+
"replicate",
|
|
5660
|
+
"sync",
|
|
5661
|
+
"migrate",
|
|
5662
|
+
"export",
|
|
5663
|
+
"import",
|
|
5664
|
+
"upload",
|
|
5665
|
+
"download",
|
|
5666
|
+
"search",
|
|
5667
|
+
"index",
|
|
5668
|
+
"query",
|
|
5669
|
+
"filter",
|
|
5670
|
+
"sort"
|
|
5671
|
+
]);
|
|
5672
|
+
}
|
|
5673
|
+
});
|
|
5674
|
+
|
|
5675
|
+
// src/engine/embeddings.ts
|
|
5676
|
+
function buildTfIdfEmbeddingIndex(index) {
|
|
5677
|
+
const allTerms = [...index.idf.keys()];
|
|
5678
|
+
const termToIdx = new Map(allTerms.map((t, i) => [t, i]));
|
|
5679
|
+
const dimensions = allTerms.length;
|
|
5680
|
+
const docVectors = /* @__PURE__ */ new Map();
|
|
5681
|
+
const docNorms = /* @__PURE__ */ new Map();
|
|
5682
|
+
for (const [filePath, doc] of index.documents) {
|
|
5683
|
+
const vec = new Float32Array(dimensions);
|
|
5684
|
+
let norm = 0;
|
|
5685
|
+
for (const [term, tf] of doc.terms) {
|
|
5686
|
+
const idx = termToIdx.get(term);
|
|
5687
|
+
if (idx === void 0) continue;
|
|
5688
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5689
|
+
const weight = tf * idf;
|
|
5690
|
+
vec[idx] = weight;
|
|
5691
|
+
norm += weight * weight;
|
|
5692
|
+
}
|
|
5693
|
+
norm = Math.sqrt(norm);
|
|
5694
|
+
if (norm > 0) {
|
|
5695
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5696
|
+
vec[i] /= norm;
|
|
5697
|
+
}
|
|
5698
|
+
}
|
|
5699
|
+
docVectors.set(filePath, vec);
|
|
5700
|
+
docNorms.set(filePath, norm);
|
|
5701
|
+
}
|
|
5702
|
+
function queryFn(text, topK) {
|
|
5703
|
+
const queryTerms = tokenizeForEmbedding(text);
|
|
5704
|
+
const termCounts = /* @__PURE__ */ new Map();
|
|
5705
|
+
for (const t of queryTerms) {
|
|
5706
|
+
termCounts.set(t, (termCounts.get(t) ?? 0) + 1);
|
|
5707
|
+
}
|
|
5708
|
+
const queryVec = new Float32Array(dimensions);
|
|
5709
|
+
let queryNorm = 0;
|
|
5710
|
+
for (const [term, count] of termCounts) {
|
|
5711
|
+
const idx = termToIdx.get(term);
|
|
5712
|
+
if (idx === void 0) continue;
|
|
5713
|
+
const idf = index.idf.get(term) ?? 0;
|
|
5714
|
+
const weight = count * idf;
|
|
5715
|
+
queryVec[idx] = weight;
|
|
5716
|
+
queryNorm += weight * weight;
|
|
5717
|
+
}
|
|
5718
|
+
queryNorm = Math.sqrt(queryNorm);
|
|
5719
|
+
if (queryNorm > 0) {
|
|
5720
|
+
for (let i = 0; i < dimensions; i++) {
|
|
5721
|
+
queryVec[i] /= queryNorm;
|
|
5722
|
+
}
|
|
5723
|
+
}
|
|
5724
|
+
const results = [];
|
|
5725
|
+
for (const [filePath, docVec] of docVectors) {
|
|
5726
|
+
let dot = 0;
|
|
5727
|
+
for (const [term] of termCounts) {
|
|
5728
|
+
const idx = termToIdx.get(term);
|
|
5729
|
+
if (idx !== void 0) {
|
|
5730
|
+
dot += queryVec[idx] * docVec[idx];
|
|
5731
|
+
}
|
|
5732
|
+
}
|
|
5733
|
+
if (dot > 0) {
|
|
5734
|
+
results.push({ filePath, score: dot });
|
|
5735
|
+
}
|
|
5736
|
+
}
|
|
5737
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
5738
|
+
}
|
|
5739
|
+
return {
|
|
5740
|
+
backend: "tfidf-cosine",
|
|
5741
|
+
dimensions,
|
|
5742
|
+
documentCount: docVectors.size,
|
|
5743
|
+
query: queryFn
|
|
5744
|
+
};
|
|
5745
|
+
}
|
|
5746
|
+
function reciprocalRankFusion(bm25Results, embeddingResults, k = 60, bm25Weight = 0.6, embeddingWeight = 0.4) {
|
|
5747
|
+
const scores = /* @__PURE__ */ new Map();
|
|
5748
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
5749
|
+
const rrf = bm25Weight / (k + i + 1);
|
|
5750
|
+
const existing = scores.get(bm25Results[i].filePath) ?? 0;
|
|
5751
|
+
scores.set(bm25Results[i].filePath, existing + rrf);
|
|
5752
|
+
}
|
|
5753
|
+
for (let i = 0; i < embeddingResults.length; i++) {
|
|
5754
|
+
const rrf = embeddingWeight / (k + i + 1);
|
|
5755
|
+
const existing = scores.get(embeddingResults[i].filePath) ?? 0;
|
|
5756
|
+
scores.set(embeddingResults[i].filePath, existing + rrf);
|
|
5757
|
+
}
|
|
5758
|
+
return [...scores.entries()].map(([filePath, score]) => ({ filePath, score })).sort((a, b) => b.score - a.score);
|
|
5759
|
+
}
|
|
5760
|
+
function tokenizeForEmbedding(text) {
|
|
5761
|
+
return text.toLowerCase().replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[^a-z0-9]/g, " ").split(/\s+/).filter((t) => t.length >= 2);
|
|
5762
|
+
}
|
|
5763
|
+
var init_embeddings = __esm({
|
|
5764
|
+
"src/engine/embeddings.ts"() {
|
|
5765
|
+
"use strict";
|
|
5766
|
+
}
|
|
5767
|
+
});
|
|
5768
|
+
|
|
3908
5769
|
// src/engine/multi-repo.ts
|
|
3909
5770
|
var multi_repo_exports = {};
|
|
3910
5771
|
__export(multi_repo_exports, {
|
|
@@ -4123,9 +5984,72 @@ var init_multi_repo = __esm({
|
|
|
4123
5984
|
|
|
4124
5985
|
// src/engine/context-pipeline.ts
|
|
4125
5986
|
import { readFileSync as readFileSync6 } from "fs";
|
|
5987
|
+
function isRankingNoise(filePath) {
|
|
5988
|
+
const basename4 = filePath.split("/").pop() ?? filePath;
|
|
5989
|
+
return RANKING_NOISE_PATTERNS.some((re) => re.test(basename4));
|
|
5990
|
+
}
|
|
5991
|
+
function fileTypePenalty(filePath, taskType) {
|
|
5992
|
+
const lower = filePath.toLowerCase();
|
|
5993
|
+
const isTest = /[/\\]test[s]?[/\\]|\.test\.|\.spec\.|_test\./i.test(lower);
|
|
5994
|
+
const isDoc = /\.md$|\.txt$|\.rst$|^docs[/\\]/i.test(lower);
|
|
5995
|
+
const isConfig = /\.xml$|\.yml$|\.yaml$|\.properties$|\.gradle$/i.test(lower);
|
|
5996
|
+
if (taskType === "debug") {
|
|
5997
|
+
if (isTest) return 0.4;
|
|
5998
|
+
if (isDoc) return 0.2;
|
|
5999
|
+
if (isConfig) return 0.6;
|
|
6000
|
+
} else if (taskType === "test") {
|
|
6001
|
+
if (isTest) return 1.2;
|
|
6002
|
+
if (isDoc) return 0.3;
|
|
6003
|
+
} else if (taskType === "docs") {
|
|
6004
|
+
if (isDoc) return 1.2;
|
|
6005
|
+
if (isTest) return 0.3;
|
|
6006
|
+
} else if (taskType === "feature" || taskType === "refactor") {
|
|
6007
|
+
if (isTest) return 0.5;
|
|
6008
|
+
if (isDoc) return 0.4;
|
|
6009
|
+
}
|
|
6010
|
+
return 1;
|
|
6011
|
+
}
|
|
6012
|
+
function detectComplexQuery(task) {
|
|
6013
|
+
const lower = task.toLowerCase();
|
|
6014
|
+
const words = lower.split(/\s+/).filter((w) => w.length > 2);
|
|
6015
|
+
const chainIndicators = /\b(when|after|then|through|from .+ to|via|chain|flow|trace|path|propagat|cascade|invalidat\w+ on|calls?|invokes?)\b/;
|
|
6016
|
+
if (chainIndicators.test(lower)) return true;
|
|
6017
|
+
const layers = [
|
|
6018
|
+
"controller",
|
|
6019
|
+
"endpoint",
|
|
6020
|
+
"router",
|
|
6021
|
+
"handler",
|
|
6022
|
+
"service",
|
|
6023
|
+
"usecase",
|
|
6024
|
+
"use case",
|
|
6025
|
+
"repository",
|
|
6026
|
+
"repo",
|
|
6027
|
+
"cache",
|
|
6028
|
+
"database",
|
|
6029
|
+
"queue",
|
|
6030
|
+
"client",
|
|
6031
|
+
"adapter",
|
|
6032
|
+
"gateway",
|
|
6033
|
+
"interceptor",
|
|
6034
|
+
"middleware",
|
|
6035
|
+
"listener",
|
|
6036
|
+
"consumer",
|
|
6037
|
+
"producer",
|
|
6038
|
+
"publisher",
|
|
6039
|
+
"subscriber"
|
|
6040
|
+
];
|
|
6041
|
+
const layerCount = layers.filter((l) => lower.includes(l)).length;
|
|
6042
|
+
if (layerCount >= 2) return true;
|
|
6043
|
+
if (words.length >= 10) return true;
|
|
6044
|
+
const entityConnectors = lower.match(/\b(on|for|in|from|to|with|after|before|during)\b/g);
|
|
6045
|
+
if (entityConnectors && entityConnectors.length >= 3) return true;
|
|
6046
|
+
return false;
|
|
6047
|
+
}
|
|
4126
6048
|
async function runContextPipeline(input) {
|
|
4127
6049
|
const { projectPath, task, analysis, budget = 5e4 } = input;
|
|
4128
6050
|
const taskType = classifyTask(task);
|
|
6051
|
+
const queryIntent = parseQueryIntent(task);
|
|
6052
|
+
const weightedQuery = buildWeightedQuery(queryIntent);
|
|
4129
6053
|
const fileContentMap = /* @__PURE__ */ new Map();
|
|
4130
6054
|
const fileContents = [];
|
|
4131
6055
|
for (const file of analysis.files) {
|
|
@@ -4137,22 +6061,58 @@ async function runContextPipeline(input) {
|
|
|
4137
6061
|
fileContents.push({ relativePath: file.relativePath, content: "" });
|
|
4138
6062
|
}
|
|
4139
6063
|
}
|
|
4140
|
-
const indexFiles = analysis.files.map((f) =>
|
|
4141
|
-
|
|
4142
|
-
|
|
4143
|
-
|
|
4144
|
-
|
|
6064
|
+
const indexFiles = analysis.files.map((f) => {
|
|
6065
|
+
const raw = fileContentMap.get(f.relativePath);
|
|
6066
|
+
const augmented = raw ? augmentContentWithStructure(raw, f.relativePath) : void 0;
|
|
6067
|
+
return {
|
|
6068
|
+
relativePath: f.relativePath,
|
|
6069
|
+
absolutePath: f.path,
|
|
6070
|
+
content: augmented
|
|
6071
|
+
};
|
|
6072
|
+
});
|
|
4145
6073
|
const { index, stats: indexCacheStats } = buildIndexCached(projectPath, indexFiles);
|
|
4146
|
-
const
|
|
4147
|
-
const
|
|
4148
|
-
|
|
4149
|
-
analysis.files.map((f) => f.relativePath),
|
|
4150
|
-
task
|
|
4151
|
-
);
|
|
6074
|
+
const fileCount = analysis.files.length;
|
|
6075
|
+
const adaptiveTopK = Math.min(Math.max(20, Math.round(fileCount * 0.15)), 100);
|
|
6076
|
+
const allFilePaths = analysis.files.map((f) => f.relativePath);
|
|
4152
6077
|
const depMap = /* @__PURE__ */ new Map();
|
|
4153
6078
|
for (const file of analysis.files) {
|
|
4154
6079
|
depMap.set(file.relativePath, file.imports);
|
|
4155
6080
|
}
|
|
6081
|
+
const callGraph = buildCallGraph(
|
|
6082
|
+
fileContents.filter((f) => f.content.length > 0)
|
|
6083
|
+
);
|
|
6084
|
+
const callEdges = [...analysis.graph.edges.filter((e) => e.type === "call"), ...callGraph.edges];
|
|
6085
|
+
const isComplexQuery = detectComplexQuery(task);
|
|
6086
|
+
const embeddingIndex = buildTfIdfEmbeddingIndex(index);
|
|
6087
|
+
const embeddingResults = embeddingIndex.query(weightedQuery, adaptiveTopK);
|
|
6088
|
+
let bm25Matches;
|
|
6089
|
+
if (isComplexQuery) {
|
|
6090
|
+
const hopResult = multiHopQuery(index, weightedQuery, depMap, callEdges, fileContentMap, {
|
|
6091
|
+
maxHops: 2,
|
|
6092
|
+
topKPerHop: 5,
|
|
6093
|
+
decayFactor: 0.5,
|
|
6094
|
+
minScoreThreshold: 0.15
|
|
6095
|
+
});
|
|
6096
|
+
bm25Matches = hopResult.matches.slice(0, adaptiveTopK);
|
|
6097
|
+
} else {
|
|
6098
|
+
bm25Matches = query(index, weightedQuery, adaptiveTopK);
|
|
6099
|
+
}
|
|
6100
|
+
const fusedResults = reciprocalRankFusion(bm25Matches, embeddingResults, 60, 0.6, 0.4);
|
|
6101
|
+
const rawMatches = fusedResults.slice(0, adaptiveTopK).map((r) => {
|
|
6102
|
+
const bm25Match = bm25Matches.find((m) => m.filePath === r.filePath);
|
|
6103
|
+
return {
|
|
6104
|
+
filePath: r.filePath,
|
|
6105
|
+
score: r.score,
|
|
6106
|
+
matchedTerms: bm25Match?.matchedTerms ?? ["[embedding-only]"]
|
|
6107
|
+
};
|
|
6108
|
+
});
|
|
6109
|
+
const semanticMatches = rawMatches.filter((m) => !isRankingNoise(m.filePath));
|
|
6110
|
+
const pathBoosted = boostByPath(semanticMatches, allFilePaths, task);
|
|
6111
|
+
const layerBoosted = boostByLayer(pathBoosted, allFilePaths, task);
|
|
6112
|
+
const importBoosted = boostByImports(layerBoosted, depMap, 10, 0.4);
|
|
6113
|
+
const callBoosted = boostByCallGraph(importBoosted, callEdges, 10, 0.3);
|
|
6114
|
+
const coChangeMatrix = buildCoChangeMatrix(projectPath, 500, 2);
|
|
6115
|
+
const boostedMatches = boostByGitCoChange(callBoosted, coChangeMatrix, 10, 0.25, 0.15);
|
|
4156
6116
|
const rerankResult = rerank({
|
|
4157
6117
|
task,
|
|
4158
6118
|
candidates: boostedMatches,
|
|
@@ -4161,12 +6121,15 @@ async function runContextPipeline(input) {
|
|
|
4161
6121
|
dependencies: depMap,
|
|
4162
6122
|
allFilePaths: analysis.files.map((f) => f.relativePath)
|
|
4163
6123
|
});
|
|
4164
|
-
const
|
|
4165
|
-
|
|
4166
|
-
|
|
4167
|
-
|
|
4168
|
-
matchedTerms:
|
|
6124
|
+
const rerankerApproved = new Set(rerankResult.files.map((rf) => rf.filePath));
|
|
6125
|
+
const rerankedMatches = boostedMatches.map((m) => ({
|
|
6126
|
+
filePath: m.filePath,
|
|
6127
|
+
score: rerankerApproved.has(m.filePath) ? m.score * 1.5 : m.score,
|
|
6128
|
+
matchedTerms: [...m.matchedTerms]
|
|
4169
6129
|
}));
|
|
6130
|
+
for (const m of rerankedMatches) {
|
|
6131
|
+
m.score *= fileTypePenalty(m.filePath, taskType);
|
|
6132
|
+
}
|
|
4170
6133
|
const learner = await loadLearner(projectPath);
|
|
4171
6134
|
const learnerBoosts = getLearnerBoosts(
|
|
4172
6135
|
learner,
|
|
@@ -4189,17 +6152,38 @@ async function runContextPipeline(input) {
|
|
|
4189
6152
|
const { querySiblingRepos: querySiblingRepos2 } = await Promise.resolve().then(() => (init_multi_repo(), multi_repo_exports));
|
|
4190
6153
|
multiRepo = querySiblingRepos2(input.siblingRepos, task, 5, 0.3);
|
|
4191
6154
|
}
|
|
4192
|
-
return { selection, taskType, fileContentMap, semanticMap, learnerMap, multiRepo, indexCacheStats };
|
|
6155
|
+
return { selection, taskType, fileContentMap, semanticMap, learnerMap, queryIntent, multiRepo, indexCacheStats };
|
|
4193
6156
|
}
|
|
6157
|
+
var RANKING_NOISE_PATTERNS;
|
|
4194
6158
|
var init_context_pipeline = __esm({
|
|
4195
6159
|
"src/engine/context-pipeline.ts"() {
|
|
4196
6160
|
"use strict";
|
|
4197
6161
|
init_selector();
|
|
4198
6162
|
init_tfidf();
|
|
6163
|
+
init_ast_tokenizer();
|
|
4199
6164
|
init_index_cache();
|
|
4200
6165
|
init_reranker();
|
|
4201
6166
|
init_learner();
|
|
4202
6167
|
init_router();
|
|
6168
|
+
init_call_graph();
|
|
6169
|
+
init_git_relevance();
|
|
6170
|
+
init_multi_hop();
|
|
6171
|
+
init_query_intent();
|
|
6172
|
+
init_embeddings();
|
|
6173
|
+
RANKING_NOISE_PATTERNS = [
|
|
6174
|
+
/^changelog/i,
|
|
6175
|
+
/^license/i,
|
|
6176
|
+
/^contributing/i,
|
|
6177
|
+
/^code_of_conduct/i,
|
|
6178
|
+
/^authors/i,
|
|
6179
|
+
/^codeowners$/i,
|
|
6180
|
+
/^security/i,
|
|
6181
|
+
/\.lock$/,
|
|
6182
|
+
/^package-lock\.json$/,
|
|
6183
|
+
/^yarn\.lock$/,
|
|
6184
|
+
/^pnpm-lock\.yaml$/,
|
|
6185
|
+
/^Gemfile\.lock$/
|
|
6186
|
+
];
|
|
4203
6187
|
}
|
|
4204
6188
|
});
|
|
4205
6189
|
|