@colbymchenry/codegraph 0.6.8 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -476
- package/dist/bin/codegraph.d.ts +0 -5
- package/dist/bin/codegraph.d.ts.map +1 -1
- package/dist/bin/codegraph.js +217 -237
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/uninstall.d.ts +0 -1
- package/dist/bin/uninstall.d.ts.map +1 -1
- package/dist/bin/uninstall.js +3 -29
- package/dist/bin/uninstall.js.map +1 -1
- package/dist/context/index.d.ts +3 -5
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +531 -52
- package/dist/context/index.js.map +1 -1
- package/dist/db/migrations.d.ts +1 -1
- package/dist/db/migrations.d.ts.map +1 -1
- package/dist/db/migrations.js +10 -1
- package/dist/db/migrations.js.map +1 -1
- package/dist/db/queries.d.ts +53 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +244 -14
- package/dist/db/queries.js.map +1 -1
- package/dist/db/schema.sql +1 -16
- package/dist/extraction/dfm-extractor.d.ts +31 -0
- package/dist/extraction/dfm-extractor.d.ts.map +1 -0
- package/dist/extraction/dfm-extractor.js +151 -0
- package/dist/extraction/dfm-extractor.js.map +1 -0
- package/dist/extraction/grammars.d.ts +9 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +34 -2
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +7 -1
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +373 -22
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/c-cpp.d.ts +4 -0
- package/dist/extraction/languages/c-cpp.d.ts.map +1 -0
- package/dist/extraction/languages/c-cpp.js +126 -0
- package/dist/extraction/languages/c-cpp.js.map +1 -0
- package/dist/extraction/languages/csharp.d.ts +3 -0
- package/dist/extraction/languages/csharp.d.ts.map +1 -0
- package/dist/extraction/languages/csharp.js +72 -0
- package/dist/extraction/languages/csharp.js.map +1 -0
- package/dist/extraction/languages/dart.d.ts +3 -0
- package/dist/extraction/languages/dart.d.ts.map +1 -0
- package/dist/extraction/languages/dart.js +192 -0
- package/dist/extraction/languages/dart.js.map +1 -0
- package/dist/extraction/languages/go.d.ts +3 -0
- package/dist/extraction/languages/go.d.ts.map +1 -0
- package/dist/extraction/languages/go.js +58 -0
- package/dist/extraction/languages/go.js.map +1 -0
- package/dist/extraction/languages/index.d.ts +10 -0
- package/dist/extraction/languages/index.d.ts.map +1 -0
- package/dist/extraction/languages/index.js +43 -0
- package/dist/extraction/languages/index.js.map +1 -0
- package/dist/extraction/languages/java.d.ts +3 -0
- package/dist/extraction/languages/java.d.ts.map +1 -0
- package/dist/extraction/languages/java.js +64 -0
- package/dist/extraction/languages/java.js.map +1 -0
- package/dist/extraction/languages/javascript.d.ts +3 -0
- package/dist/extraction/languages/javascript.d.ts.map +1 -0
- package/dist/extraction/languages/javascript.js +90 -0
- package/dist/extraction/languages/javascript.js.map +1 -0
- package/dist/extraction/languages/kotlin.d.ts +3 -0
- package/dist/extraction/languages/kotlin.d.ts.map +1 -0
- package/dist/extraction/languages/kotlin.js +253 -0
- package/dist/extraction/languages/kotlin.js.map +1 -0
- package/dist/extraction/languages/pascal.d.ts +3 -0
- package/dist/extraction/languages/pascal.d.ts.map +1 -0
- package/dist/extraction/languages/pascal.js +66 -0
- package/dist/extraction/languages/pascal.js.map +1 -0
- package/dist/extraction/languages/php.d.ts +3 -0
- package/dist/extraction/languages/php.d.ts.map +1 -0
- package/dist/extraction/languages/php.js +107 -0
- package/dist/extraction/languages/php.js.map +1 -0
- package/dist/extraction/languages/python.d.ts +3 -0
- package/dist/extraction/languages/python.d.ts.map +1 -0
- package/dist/extraction/languages/python.js +56 -0
- package/dist/extraction/languages/python.js.map +1 -0
- package/dist/extraction/languages/ruby.d.ts +3 -0
- package/dist/extraction/languages/ruby.d.ts.map +1 -0
- package/dist/extraction/languages/ruby.js +114 -0
- package/dist/extraction/languages/ruby.js.map +1 -0
- package/dist/extraction/languages/rust.d.ts +3 -0
- package/dist/extraction/languages/rust.d.ts.map +1 -0
- package/dist/extraction/languages/rust.js +109 -0
- package/dist/extraction/languages/rust.js.map +1 -0
- package/dist/extraction/languages/swift.d.ts +3 -0
- package/dist/extraction/languages/swift.d.ts.map +1 -0
- package/dist/extraction/languages/swift.js +91 -0
- package/dist/extraction/languages/swift.js.map +1 -0
- package/dist/extraction/languages/typescript.d.ts +3 -0
- package/dist/extraction/languages/typescript.d.ts.map +1 -0
- package/dist/extraction/languages/typescript.js +129 -0
- package/dist/extraction/languages/typescript.js.map +1 -0
- package/dist/extraction/liquid-extractor.d.ts +52 -0
- package/dist/extraction/liquid-extractor.d.ts.map +1 -0
- package/dist/extraction/liquid-extractor.js +313 -0
- package/dist/extraction/liquid-extractor.js.map +1 -0
- package/dist/extraction/parse-worker.d.ts +8 -0
- package/dist/extraction/parse-worker.d.ts.map +1 -0
- package/dist/extraction/parse-worker.js +57 -0
- package/dist/extraction/parse-worker.js.map +1 -0
- package/dist/extraction/svelte-extractor.d.ts +56 -0
- package/dist/extraction/svelte-extractor.d.ts.map +1 -0
- package/dist/extraction/svelte-extractor.js +272 -0
- package/dist/extraction/svelte-extractor.js.map +1 -0
- package/dist/extraction/tree-sitter-helpers.d.ts +28 -0
- package/dist/extraction/tree-sitter-helpers.d.ts.map +1 -0
- package/dist/extraction/tree-sitter-helpers.js +103 -0
- package/dist/extraction/tree-sitter-helpers.js.map +1 -0
- package/dist/extraction/tree-sitter-types.d.ts +179 -0
- package/dist/extraction/tree-sitter-types.d.ts.map +1 -0
- package/dist/extraction/tree-sitter-types.js +10 -0
- package/dist/extraction/tree-sitter-types.js.map +1 -0
- package/dist/extraction/tree-sitter.d.ts +67 -125
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +1052 -1855
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +27 -3
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +29 -53
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +88 -114
- package/dist/index.js.map +1 -1
- package/dist/installer/claude-md-template.d.ts +1 -1
- package/dist/installer/claude-md-template.d.ts.map +1 -1
- package/dist/installer/claude-md-template.js +15 -15
- package/dist/installer/config-writer.d.ts +1 -10
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +0 -79
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +3 -4
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +118 -116
- package/dist/installer/index.js.map +1 -1
- package/dist/mcp/index.d.ts +5 -0
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +25 -1
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/tools.d.ts +33 -0
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +432 -21
- package/dist/mcp/tools.js.map +1 -1
- package/dist/resolution/frameworks/csharp.js +29 -84
- package/dist/resolution/frameworks/csharp.js.map +1 -1
- package/dist/resolution/frameworks/express.js +44 -48
- package/dist/resolution/frameworks/express.js.map +1 -1
- package/dist/resolution/frameworks/go.js +34 -70
- package/dist/resolution/frameworks/go.js.map +1 -1
- package/dist/resolution/frameworks/java.js +29 -87
- package/dist/resolution/frameworks/java.js.map +1 -1
- package/dist/resolution/frameworks/laravel.js +6 -6
- package/dist/resolution/frameworks/laravel.js.map +1 -1
- package/dist/resolution/frameworks/python.js +33 -98
- package/dist/resolution/frameworks/python.js.map +1 -1
- package/dist/resolution/frameworks/react.js +53 -76
- package/dist/resolution/frameworks/react.js.map +1 -1
- package/dist/resolution/frameworks/ruby.js +12 -24
- package/dist/resolution/frameworks/ruby.js.map +1 -1
- package/dist/resolution/frameworks/rust.js +26 -66
- package/dist/resolution/frameworks/rust.js.map +1 -1
- package/dist/resolution/frameworks/svelte.js +11 -31
- package/dist/resolution/frameworks/svelte.js.map +1 -1
- package/dist/resolution/frameworks/swift.js +42 -160
- package/dist/resolution/frameworks/swift.js.map +1 -1
- package/dist/resolution/index.d.ts +19 -6
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +300 -141
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/name-matcher.d.ts +5 -0
- package/dist/resolution/name-matcher.d.ts.map +1 -1
- package/dist/resolution/name-matcher.js +148 -8
- package/dist/resolution/name-matcher.js.map +1 -1
- package/dist/resolution/types.d.ts +1 -1
- package/dist/resolution/types.d.ts.map +1 -1
- package/dist/search/query-utils.d.ts +26 -1
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +209 -9
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/index.d.ts +2 -4
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +4 -3
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watcher.d.ts +81 -0
- package/dist/sync/watcher.d.ts.map +1 -0
- package/dist/sync/watcher.js +184 -0
- package/dist/sync/watcher.js.map +1 -0
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/ui/shimmer-progress.d.ts +11 -0
- package/dist/ui/shimmer-progress.d.ts.map +1 -0
- package/dist/ui/shimmer-progress.js +90 -0
- package/dist/ui/shimmer-progress.js.map +1 -0
- package/dist/ui/shimmer-worker.d.ts +2 -0
- package/dist/ui/shimmer-worker.d.ts.map +1 -0
- package/dist/ui/shimmer-worker.js +112 -0
- package/dist/ui/shimmer-worker.js.map +1 -0
- package/dist/ui/types.d.ts +17 -0
- package/dist/ui/types.d.ts.map +1 -0
- package/dist/ui/types.js +3 -0
- package/dist/ui/types.js.map +1 -0
- package/dist/vectors/embedder.js +1 -1
- package/dist/vectors/embedder.js.map +1 -1
- package/package.json +7 -12
- package/scripts/postinstall.js +0 -68
package/dist/context/index.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Context Builder
|
|
4
4
|
*
|
|
5
|
-
* Builds rich context for tasks by combining
|
|
5
|
+
* Builds rich context for tasks by combining FTS search with graph traversal.
|
|
6
6
|
* Outputs structured context ready to inject into Claude.
|
|
7
7
|
*/
|
|
8
8
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
@@ -42,9 +42,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
42
42
|
exports.formatContextAsJson = exports.formatContextAsMarkdown = exports.ContextBuilder = void 0;
|
|
43
43
|
exports.createContextBuilder = createContextBuilder;
|
|
44
44
|
const fs = __importStar(require("fs"));
|
|
45
|
+
const path = __importStar(require("path"));
|
|
45
46
|
const formatter_1 = require("./formatter");
|
|
46
47
|
const errors_1 = require("../errors");
|
|
47
48
|
const utils_1 = require("../utils");
|
|
49
|
+
const query_utils_1 = require("../search/query-utils");
|
|
48
50
|
/**
|
|
49
51
|
* Extract likely symbol names from a natural language query
|
|
50
52
|
*
|
|
@@ -82,6 +84,13 @@ function extractSymbolsFromQuery(query) {
|
|
|
82
84
|
symbols.add(match[1]);
|
|
83
85
|
}
|
|
84
86
|
}
|
|
87
|
+
// Extract ALL_CAPS acronyms (2+ chars, e.g., REST, HTTP, LRU, API)
|
|
88
|
+
const acronymPattern = /\b([A-Z]{2,})\b/g;
|
|
89
|
+
while ((match = acronymPattern.exec(query)) !== null) {
|
|
90
|
+
if (match[1]) {
|
|
91
|
+
symbols.add(match[1]);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
85
94
|
// Extract dot.notation and split into parts (e.g., "app.isPackaged" -> ["app", "isPackaged"])
|
|
86
95
|
const dotPattern = /\b([a-zA-Z][a-zA-Z0-9]*(?:\.[a-zA-Z][a-zA-Z0-9]*)+)\b/g;
|
|
87
96
|
while ((match = dotPattern.exec(query)) !== null) {
|
|
@@ -96,13 +105,40 @@ function extractSymbolsFromQuery(query) {
|
|
|
96
105
|
}
|
|
97
106
|
}
|
|
98
107
|
}
|
|
99
|
-
//
|
|
108
|
+
// Extract plain lowercase identifiers (3+ chars, not already matched)
|
|
109
|
+
// Catches symbol names like "undo", "redo", "history", "render", "parse"
|
|
110
|
+
const lowercasePattern = /\b([a-z][a-z0-9]{2,})\b/g;
|
|
111
|
+
while ((match = lowercasePattern.exec(query)) !== null) {
|
|
112
|
+
if (match[1]) {
|
|
113
|
+
symbols.add(match[1]);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Filter out common English words that aren't likely symbol names
|
|
100
117
|
const commonWords = new Set([
|
|
101
118
|
'the', 'and', 'for', 'with', 'from', 'this', 'that', 'have', 'been',
|
|
102
119
|
'will', 'would', 'could', 'should', 'does', 'done', 'make', 'made',
|
|
103
120
|
'use', 'used', 'using', 'work', 'works', 'find', 'found', 'show',
|
|
104
121
|
'call', 'called', 'calling', 'get', 'set', 'add', 'all', 'any',
|
|
105
|
-
'how', 'what', 'when', 'where', 'which', 'who', 'why'
|
|
122
|
+
'how', 'what', 'when', 'where', 'which', 'who', 'why',
|
|
123
|
+
'not', 'but', 'are', 'was', 'were', 'has', 'had', 'its',
|
|
124
|
+
'can', 'did', 'may', 'also', 'into', 'than', 'then', 'them',
|
|
125
|
+
'each', 'other', 'some', 'such', 'only', 'same', 'about',
|
|
126
|
+
'after', 'before', 'between', 'through', 'during', 'without',
|
|
127
|
+
'again', 'further', 'once', 'here', 'there', 'both', 'just',
|
|
128
|
+
'more', 'most', 'very', 'being', 'having', 'doing',
|
|
129
|
+
'system', 'need', 'needs', 'want', 'wants', 'like', 'look',
|
|
130
|
+
'change', 'changes', 'changed', 'changing',
|
|
131
|
+
// Common English nouns/verbs that match thousands of unrelated code symbols
|
|
132
|
+
'layer', 'handle', 'handles', 'handling', 'incoming', 'outgoing',
|
|
133
|
+
'data', 'flow', 'flows', 'level', 'levels', 'request', 'requests',
|
|
134
|
+
'response', 'responses', 'implement', 'implements', 'implementation',
|
|
135
|
+
'interface', 'interfaces', 'class', 'classes', 'method', 'methods',
|
|
136
|
+
'trigger', 'triggers', 'affected', 'affect', 'affects',
|
|
137
|
+
'else', 'code', 'failing', 'failed', 'silently', 'decide', 'decides',
|
|
138
|
+
'return', 'returns', 'returned', 'take', 'takes', 'taken',
|
|
139
|
+
'check', 'checks', 'checked', 'create', 'creates', 'created',
|
|
140
|
+
'read', 'reads', 'write', 'writes', 'written',
|
|
141
|
+
'start', 'starts', 'stop', 'stops', 'run', 'runs', 'running',
|
|
106
142
|
]);
|
|
107
143
|
return Array.from(symbols).filter(s => !commonWords.has(s.toLowerCase()));
|
|
108
144
|
}
|
|
@@ -154,12 +190,10 @@ class ContextBuilder {
|
|
|
154
190
|
projectRoot;
|
|
155
191
|
queries;
|
|
156
192
|
traverser;
|
|
157
|
-
|
|
158
|
-
constructor(projectRoot, queries, traverser, vectorManager) {
|
|
193
|
+
constructor(projectRoot, queries, traverser) {
|
|
159
194
|
this.projectRoot = projectRoot;
|
|
160
195
|
this.queries = queries;
|
|
161
196
|
this.traverser = traverser;
|
|
162
|
-
this.vectorManager = vectorManager;
|
|
163
197
|
}
|
|
164
198
|
/**
|
|
165
199
|
* Build context for a task
|
|
@@ -254,74 +288,448 @@ class ContextBuilder {
|
|
|
254
288
|
let exactMatches = [];
|
|
255
289
|
if (symbolsFromQuery.length > 0) {
|
|
256
290
|
try {
|
|
291
|
+
// Get more results so we can apply co-location boosting before trimming
|
|
257
292
|
exactMatches = this.queries.findNodesByExactName(symbolsFromQuery, {
|
|
258
|
-
limit: Math.ceil(opts.searchLimit *
|
|
293
|
+
limit: Math.ceil(opts.searchLimit * 5),
|
|
259
294
|
kinds: opts.nodeKinds && opts.nodeKinds.length > 0 ? opts.nodeKinds : undefined,
|
|
260
295
|
});
|
|
296
|
+
// Co-location boost: when multiple extracted symbols appear in the same file,
|
|
297
|
+
// those results are much more likely to be what the user is looking for.
|
|
298
|
+
// E.g., "scrapeLoop" + "run" both in scrape/scrape.go → boost both.
|
|
299
|
+
if (exactMatches.length > 1) {
|
|
300
|
+
// Build a map of files → how many distinct symbol names matched in that file
|
|
301
|
+
const fileSymbolCounts = new Map();
|
|
302
|
+
for (const r of exactMatches) {
|
|
303
|
+
const names = fileSymbolCounts.get(r.node.filePath) || new Set();
|
|
304
|
+
names.add(r.node.name.toLowerCase());
|
|
305
|
+
fileSymbolCounts.set(r.node.filePath, names);
|
|
306
|
+
}
|
|
307
|
+
// Boost results in files where multiple query symbols co-occur
|
|
308
|
+
exactMatches = exactMatches.map(r => {
|
|
309
|
+
const symbolCount = fileSymbolCounts.get(r.node.filePath)?.size || 1;
|
|
310
|
+
return {
|
|
311
|
+
...r,
|
|
312
|
+
score: symbolCount > 1 ? r.score + (symbolCount - 1) * 20 : r.score,
|
|
313
|
+
};
|
|
314
|
+
});
|
|
315
|
+
exactMatches.sort((a, b) => b.score - a.score);
|
|
316
|
+
}
|
|
317
|
+
// Trim back to reasonable size
|
|
318
|
+
exactMatches = exactMatches.slice(0, Math.ceil(opts.searchLimit * 2));
|
|
261
319
|
(0, errors_1.logDebug)('Exact symbol matches', { count: exactMatches.length });
|
|
262
320
|
}
|
|
263
321
|
catch (error) {
|
|
264
322
|
(0, errors_1.logDebug)('Exact symbol lookup failed', { error: String(error) });
|
|
265
323
|
}
|
|
266
324
|
}
|
|
267
|
-
// Step
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
325
|
+
// Step 2b: Search for extracted symbols as definition (class/interface) prefixes.
|
|
326
|
+
// When the user writes "REST", "bulk", or "allocation", they usually mean classes
|
|
327
|
+
// like RestController, BulkRequest, AllocationService — not nodes named exactly that.
|
|
328
|
+
// Also tries stem variants: "caching" → "cache" finds Cache, CacheBuilder.
|
|
329
|
+
if (symbolsFromQuery.length > 0) {
|
|
330
|
+
const definitionKinds = ['class', 'interface', 'struct', 'trait',
|
|
331
|
+
'protocol', 'enum', 'type_alias'];
|
|
332
|
+
// Expand symbols with stem variants for broader definition matching
|
|
333
|
+
const expandedSymbols = new Set(symbolsFromQuery);
|
|
334
|
+
for (const sym of symbolsFromQuery) {
|
|
335
|
+
for (const variant of (0, query_utils_1.getStemVariants)(sym)) {
|
|
336
|
+
expandedSymbols.add(variant);
|
|
337
|
+
}
|
|
279
338
|
}
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
339
|
+
for (const sym of expandedSymbols) {
|
|
340
|
+
// Title-case the symbol: "REST" → "Rest", "bulk" → "Bulk", "allocation" → "Allocation"
|
|
341
|
+
const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
|
|
342
|
+
if (titleCased === sym)
|
|
343
|
+
continue; // already title-case (e.g., "Engine") — handled by exact match
|
|
344
|
+
// Fetch more results since popular prefixes have many matches
|
|
345
|
+
const prefixResults = this.queries.searchNodes(titleCased, {
|
|
346
|
+
limit: 30,
|
|
347
|
+
kinds: definitionKinds,
|
|
287
348
|
});
|
|
288
|
-
|
|
349
|
+
const matched = [];
|
|
350
|
+
for (const r of prefixResults) {
|
|
351
|
+
if (r.node.name.toLowerCase().startsWith(titleCased.toLowerCase())) {
|
|
352
|
+
// Favor shorter names: "AllocationService" (18 chars) over
|
|
353
|
+
// "AllocationBalancingRoundMetrics" (31 chars). Core classes tend
|
|
354
|
+
// to have concise names; test/helper classes are verbose.
|
|
355
|
+
const brevityBonus = Math.max(0, 10 - (r.node.name.length - titleCased.length) / 3);
|
|
356
|
+
matched.push({ ...r, score: r.score + 15 + brevityBonus });
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
matched.sort((a, b) => b.score - a.score);
|
|
360
|
+
for (const r of matched.slice(0, Math.ceil(opts.searchLimit))) {
|
|
361
|
+
const existing = exactMatches.find(e => e.node.id === r.node.id);
|
|
362
|
+
if (!existing) {
|
|
363
|
+
exactMatches.push(r);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
289
366
|
}
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
367
|
+
exactMatches.sort((a, b) => b.score - a.score);
|
|
368
|
+
exactMatches = exactMatches.slice(0, Math.ceil(opts.searchLimit * 3));
|
|
369
|
+
}
|
|
370
|
+
// Step 3: Run text search for natural language term matching
|
|
371
|
+
// This catches file-name and node-name matches that semantic search may miss,
|
|
372
|
+
// which is critical for template-heavy codebases (e.g., Liquid/Shopify themes)
|
|
373
|
+
// where file names are the primary identifiers.
|
|
374
|
+
let textResults = [];
|
|
375
|
+
try {
|
|
376
|
+
const searchTerms = (0, query_utils_1.extractSearchTerms)(query);
|
|
377
|
+
if (searchTerms.length > 0) {
|
|
378
|
+
// Search each term individually to get broader coverage,
|
|
379
|
+
// then boost results that match multiple terms
|
|
380
|
+
const termResultsMap = new Map();
|
|
381
|
+
// When no explicit kind filter is set, exclude imports — they flood FTS
|
|
382
|
+
// results with qualified name matches (e.g., "REST" matches 445K import paths)
|
|
383
|
+
// but are almost never what exploration queries want.
|
|
384
|
+
const searchKinds = opts.nodeKinds && opts.nodeKinds.length > 0
|
|
385
|
+
? opts.nodeKinds
|
|
386
|
+
: ['file', 'module', 'class', 'struct', 'interface', 'trait', 'protocol',
|
|
387
|
+
'function', 'method', 'property', 'field', 'variable', 'constant',
|
|
388
|
+
'enum', 'enum_member', 'type_alias', 'namespace', 'export',
|
|
389
|
+
'route', 'component'];
|
|
390
|
+
for (const term of searchTerms) {
|
|
391
|
+
const termResults = this.queries.searchNodes(term, {
|
|
392
|
+
limit: opts.searchLimit * 2,
|
|
393
|
+
kinds: searchKinds,
|
|
394
|
+
});
|
|
395
|
+
for (const r of termResults) {
|
|
396
|
+
const existing = termResultsMap.get(r.node.id);
|
|
397
|
+
if (existing) {
|
|
398
|
+
existing.termHits++;
|
|
399
|
+
existing.result.score = Math.max(existing.result.score, r.score);
|
|
400
|
+
}
|
|
401
|
+
else {
|
|
402
|
+
termResultsMap.set(r.node.id, { result: r, termHits: 1 });
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
// Boost results matching multiple terms and sort
|
|
407
|
+
textResults = Array.from(termResultsMap.values())
|
|
408
|
+
.map(({ result, termHits }) => ({
|
|
409
|
+
...result,
|
|
410
|
+
score: result.score + (termHits - 1) * 5,
|
|
411
|
+
}))
|
|
412
|
+
.sort((a, b) => b.score - a.score)
|
|
413
|
+
.slice(0, opts.searchLimit * 2);
|
|
293
414
|
}
|
|
415
|
+
(0, errors_1.logDebug)('Text search results', { count: textResults.length });
|
|
294
416
|
}
|
|
295
|
-
|
|
296
|
-
|
|
417
|
+
catch (error) {
|
|
418
|
+
(0, errors_1.logDebug)('Text search failed', { query, error: String(error) });
|
|
419
|
+
}
|
|
420
|
+
// Step 4: Merge results, taking the max score when duplicates appear
|
|
421
|
+
// across search channels. Exact matches may have lower scores than FTS
|
|
422
|
+
// results for the same node — use the best score from any channel.
|
|
423
|
+
const resultById = new Map();
|
|
297
424
|
let searchResults = [];
|
|
298
|
-
// Add exact matches first
|
|
425
|
+
// Add exact matches first
|
|
299
426
|
for (const result of exactMatches) {
|
|
300
|
-
|
|
301
|
-
|
|
427
|
+
const existing = resultById.get(result.node.id);
|
|
428
|
+
if (existing) {
|
|
429
|
+
existing.score = Math.max(existing.score, result.score);
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
resultById.set(result.node.id, result);
|
|
302
433
|
searchResults.push(result);
|
|
303
434
|
}
|
|
304
435
|
}
|
|
305
|
-
// Add
|
|
306
|
-
for (const result of
|
|
307
|
-
|
|
308
|
-
|
|
436
|
+
// Add text search results, upgrading scores for duplicates
|
|
437
|
+
for (const result of textResults) {
|
|
438
|
+
const existing = resultById.get(result.node.id);
|
|
439
|
+
if (existing) {
|
|
440
|
+
existing.score = Math.max(existing.score, result.score);
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
resultById.set(result.node.id, result);
|
|
309
444
|
searchResults.push(result);
|
|
310
445
|
}
|
|
311
446
|
}
|
|
312
|
-
|
|
313
|
-
|
|
447
|
+
const queryLower = query.toLowerCase();
|
|
448
|
+
const isTestQuery = queryLower.includes('test') || queryLower.includes('spec');
|
|
449
|
+
// Deprioritize test files early so they don't take multi-term boost slots
|
|
450
|
+
if (!isTestQuery) {
|
|
451
|
+
for (const result of searchResults) {
|
|
452
|
+
if ((0, query_utils_1.isTestFile)(result.node.filePath)) {
|
|
453
|
+
result.score *= 0.3;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
// Step 5a: Multi-term co-occurrence re-ranking (applied BEFORE truncation).
|
|
458
|
+
// For multi-word queries like "search execution from request to shard",
|
|
459
|
+
// nodes matching 2+ query terms in their name or path are far more relevant
|
|
460
|
+
// than nodes matching just one generic term. Without this, "ExecutionUtils"
|
|
461
|
+
// (matches only "execution") fills budget slots meant for "ShardSearchRequest"
|
|
462
|
+
// (matches "shard" + "search" + "request").
|
|
463
|
+
const queryTermsForBoost = (0, query_utils_1.extractSearchTerms)(query);
|
|
464
|
+
if (queryTermsForBoost.length >= 2) {
|
|
465
|
+
// Group terms that are substrings of each other (stem variants of the same
|
|
466
|
+
// root word). "indexed", "indexe", "index" should count as ONE concept match,
|
|
467
|
+
// not three. Without this, stem variants inflate matchCount and give false
|
|
468
|
+
// multi-term boosts to symbols matching one root word multiple times.
|
|
469
|
+
const termGroups = [];
|
|
470
|
+
const sorted = [...queryTermsForBoost].sort((a, b) => b.length - a.length);
|
|
471
|
+
const assigned = new Set();
|
|
472
|
+
for (const term of sorted) {
|
|
473
|
+
if (assigned.has(term))
|
|
474
|
+
continue;
|
|
475
|
+
const group = [term];
|
|
476
|
+
assigned.add(term);
|
|
477
|
+
for (const other of sorted) {
|
|
478
|
+
if (assigned.has(other))
|
|
479
|
+
continue;
|
|
480
|
+
if (term.includes(other) || other.includes(term)) {
|
|
481
|
+
group.push(other);
|
|
482
|
+
assigned.add(other);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
termGroups.push(group);
|
|
486
|
+
}
|
|
487
|
+
// Build a set of exact-match node IDs so we can exempt them from dampening.
|
|
488
|
+
// When the query is "LiveEditMode DevServerPreview", these are specific
|
|
489
|
+
// symbols the user asked for — dampening them because they only match 1
|
|
490
|
+
// term group is counter-productive.
|
|
491
|
+
const exactMatchIds = new Set(exactMatches.map(r => r.node.id));
|
|
492
|
+
for (const result of searchResults) {
|
|
493
|
+
// Check term matches in name (substring) and path DIRECTORIES (exact).
|
|
494
|
+
// Directory segments must match exactly — "search" matches directory
|
|
495
|
+
// "search/" but NOT "elasticsearch/". The class name is checked
|
|
496
|
+
// separately via substring match on the node name.
|
|
497
|
+
const nameLower = result.node.name.toLowerCase();
|
|
498
|
+
const dirSegments = path.dirname(result.node.filePath).toLowerCase().split('/');
|
|
499
|
+
let matchCount = 0;
|
|
500
|
+
for (const group of termGroups) {
|
|
501
|
+
const groupMatches = group.some(term => {
|
|
502
|
+
const inName = nameLower.includes(term);
|
|
503
|
+
const inDir = dirSegments.some(seg => seg === term);
|
|
504
|
+
return inName || inDir;
|
|
505
|
+
});
|
|
506
|
+
if (groupMatches)
|
|
507
|
+
matchCount++;
|
|
508
|
+
}
|
|
509
|
+
if (matchCount >= 2) {
|
|
510
|
+
// Multiplicative boost — 2 terms → 2x, 3 terms → 2.5x
|
|
511
|
+
result.score *= 1 + matchCount * 0.5;
|
|
512
|
+
}
|
|
513
|
+
else if (!exactMatchIds.has(result.node.id)) {
|
|
514
|
+
// Mild dampen for single-term matches — they might be generic
|
|
515
|
+
// but could also be the right result (e.g., "Protocol" class for an IPC query).
|
|
516
|
+
// Exempt exact name matches: they are specific symbols the user queried for.
|
|
517
|
+
result.score *= 0.6;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
searchResults.sort((a, b) => b.score - a.score);
|
|
521
|
+
}
|
|
522
|
+
// Step 5b: CamelCase-boundary matching via LIKE query.
|
|
523
|
+
// FTS can't find "Search" inside "TransportSearchAction" (one FTS token).
|
|
524
|
+
// LIKE reliably finds these substring matches. Results are appended with
|
|
525
|
+
// guaranteed slots so they don't compete with higher-scoring prefix matches.
|
|
526
|
+
if (symbolsFromQuery.length > 0) {
|
|
527
|
+
const camelDefinitionKinds = ['class', 'interface', 'struct', 'trait',
|
|
528
|
+
'protocol', 'enum', 'type_alias'];
|
|
529
|
+
const camelSearchedTerms = new Set();
|
|
530
|
+
const searchIdSet = new Set(searchResults.map(r => r.node.id));
|
|
531
|
+
// Track per-node term hits for multi-term boosting
|
|
532
|
+
const camelNodeTerms = new Map();
|
|
533
|
+
const maxCamelPerTerm = Math.ceil(opts.searchLimit / 2);
|
|
534
|
+
for (const sym of symbolsFromQuery) {
|
|
535
|
+
const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
|
|
536
|
+
if (titleCased.length < 3)
|
|
537
|
+
continue;
|
|
538
|
+
const termKey = titleCased.toLowerCase();
|
|
539
|
+
if (camelSearchedTerms.has(termKey))
|
|
540
|
+
continue;
|
|
541
|
+
camelSearchedTerms.add(termKey);
|
|
542
|
+
// Fetch a large batch — popular terms like "Search" in Elasticsearch
|
|
543
|
+
// have hundreds of substring matches. The LIKE scan cost is the same
|
|
544
|
+
// regardless of LIMIT (SQLite scans all matches to sort), so we fetch
|
|
545
|
+
// generously and let path-relevance scoring pick the best ones.
|
|
546
|
+
const likeResults = this.queries.findNodesByNameSubstring(titleCased, {
|
|
547
|
+
limit: 200,
|
|
548
|
+
kinds: camelDefinitionKinds,
|
|
549
|
+
excludePrefix: true,
|
|
550
|
+
});
|
|
551
|
+
// Filter to CamelCase boundaries, score by path relevance, and take top N
|
|
552
|
+
const termCandidates = [];
|
|
553
|
+
for (const r of likeResults) {
|
|
554
|
+
const name = r.node.name;
|
|
555
|
+
const idx = name.indexOf(titleCased);
|
|
556
|
+
if (idx <= 0)
|
|
557
|
+
continue;
|
|
558
|
+
// Accept CamelCase boundary (lowercase before match) OR
|
|
559
|
+
// acronym boundary (uppercase before match, e.g., RPCProtocol)
|
|
560
|
+
if (!/[a-zA-Z]/.test(name.charAt(idx - 1)))
|
|
561
|
+
continue;
|
|
562
|
+
if (searchIdSet.has(r.node.id))
|
|
563
|
+
continue;
|
|
564
|
+
if ((0, query_utils_1.isTestFile)(r.node.filePath) && !isTestQuery)
|
|
565
|
+
continue;
|
|
566
|
+
const pathScore = (0, query_utils_1.scorePathRelevance)(r.node.filePath, query);
|
|
567
|
+
const brevityBonus = Math.max(0, 6 - (name.length - titleCased.length) / 4);
|
|
568
|
+
termCandidates.push({ node: r.node, score: 8 + brevityBonus + pathScore });
|
|
569
|
+
}
|
|
570
|
+
termCandidates.sort((a, b) => b.score - a.score);
|
|
571
|
+
// Widen the per-term pool for accumulation so multi-term co-occurrences
|
|
572
|
+
// can be discovered. A class matching 3 query terms at CamelCase boundaries
|
|
573
|
+
// is far more relevant than one matching just 1, but it needs to survive
|
|
574
|
+
// the per-term cut for EACH term to accumulate its count.
|
|
575
|
+
const accumPerTerm = maxCamelPerTerm * 4;
|
|
576
|
+
for (const r of termCandidates.slice(0, accumPerTerm)) {
|
|
577
|
+
const existing = camelNodeTerms.get(r.node.id);
|
|
578
|
+
if (existing) {
|
|
579
|
+
existing.termCount++;
|
|
580
|
+
}
|
|
581
|
+
else {
|
|
582
|
+
camelNodeTerms.set(r.node.id, {
|
|
583
|
+
result: r,
|
|
584
|
+
termCount: 1,
|
|
585
|
+
});
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
// Append CamelCase matches with multi-term boost.
|
|
590
|
+
// These are structurally important (class names containing query terms at
|
|
591
|
+
// CamelCase boundaries) but score much lower than FTS results. Scale their
|
|
592
|
+
// scores up so multi-term CamelCase matches can compete with FTS results.
|
|
593
|
+
const camelResults = [];
|
|
594
|
+
for (const [, info] of camelNodeTerms) {
|
|
595
|
+
// Multi-term CamelCase matches are extremely relevant — a class matching
|
|
596
|
+
// 3+ query terms in its name (e.g., ExtensionHostProcess) is almost
|
|
597
|
+
// certainly what the user wants. Scale aggressively.
|
|
598
|
+
info.result.score = info.result.score * (1 + info.termCount) + (info.termCount - 1) * 30;
|
|
599
|
+
camelResults.push(info.result);
|
|
600
|
+
}
|
|
601
|
+
camelResults.sort((a, b) => b.score - a.score);
|
|
602
|
+
const maxCamelTotal = opts.searchLimit;
|
|
603
|
+
for (const r of camelResults.slice(0, maxCamelTotal)) {
|
|
604
|
+
searchResults.push(r);
|
|
605
|
+
searchIdSet.add(r.node.id);
|
|
606
|
+
}
|
|
607
|
+
// Step 5c: Compound term matching — find classes whose name contains 2+
|
|
608
|
+
// query terms at ANY position (not just CamelCase boundaries).
|
|
609
|
+
// The CamelCase step above requires idx > 0, which misses classes that
|
|
610
|
+
// START with a query term (e.g., "SearchShardsRequest" starts with "Search").
|
|
611
|
+
// For multi-word queries, a class matching multiple query terms in its name
|
|
612
|
+
// is almost certainly relevant regardless of position.
|
|
613
|
+
if (symbolsFromQuery.length >= 2) {
|
|
614
|
+
// Collect ALL LIKE results per term (reusing findNodesByNameSubstring)
|
|
615
|
+
// but without the CamelCase boundary or prefix exclusion filters.
|
|
616
|
+
const compoundTermMap = new Map();
|
|
617
|
+
for (const sym of symbolsFromQuery) {
|
|
618
|
+
const titleCased = sym.charAt(0).toUpperCase() + sym.slice(1).toLowerCase();
|
|
619
|
+
if (titleCased.length < 3)
|
|
620
|
+
continue;
|
|
621
|
+
const likeResults = this.queries.findNodesByNameSubstring(titleCased, {
|
|
622
|
+
limit: 200,
|
|
623
|
+
kinds: camelDefinitionKinds,
|
|
624
|
+
excludePrefix: false,
|
|
625
|
+
});
|
|
626
|
+
for (const r of likeResults) {
|
|
627
|
+
if (searchIdSet.has(r.node.id))
|
|
628
|
+
continue;
|
|
629
|
+
if ((0, query_utils_1.isTestFile)(r.node.filePath) && !isTestQuery)
|
|
630
|
+
continue;
|
|
631
|
+
const entry = compoundTermMap.get(r.node.id);
|
|
632
|
+
if (entry) {
|
|
633
|
+
entry.terms.add(titleCased);
|
|
634
|
+
}
|
|
635
|
+
else {
|
|
636
|
+
compoundTermMap.set(r.node.id, { node: r.node, terms: new Set([titleCased]) });
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
// Keep only nodes matching 2+ distinct terms
|
|
641
|
+
const compoundResults = [];
|
|
642
|
+
for (const [, entry] of compoundTermMap) {
|
|
643
|
+
if (entry.terms.size >= 2) {
|
|
644
|
+
const pathScore = (0, query_utils_1.scorePathRelevance)(entry.node.filePath, query);
|
|
645
|
+
const brevityBonus = Math.max(0, 6 - entry.node.name.length / 8);
|
|
646
|
+
compoundResults.push({
|
|
647
|
+
node: entry.node,
|
|
648
|
+
score: 10 + (entry.terms.size - 1) * 20 + pathScore + brevityBonus,
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
compoundResults.sort((a, b) => b.score - a.score);
|
|
653
|
+
const maxCompound = Math.ceil(opts.searchLimit / 2);
|
|
654
|
+
for (const r of compoundResults.slice(0, maxCompound)) {
|
|
655
|
+
searchResults.push(r);
|
|
656
|
+
searchIdSet.add(r.node.id);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// Final sort and truncation — all search channels (exact, text, CamelCase,
|
|
661
|
+
// compound) have now contributed. Sort by score so multi-term matches from
|
|
662
|
+
// later steps can outrank dampened single-term matches from earlier steps.
|
|
663
|
+
searchResults.sort((a, b) => b.score - a.score);
|
|
664
|
+
searchResults = searchResults.slice(0, opts.searchLimit * 3);
|
|
314
665
|
// Filter by minimum score
|
|
315
666
|
let filteredResults = searchResults.filter((r) => r.score >= opts.minScore);
|
|
316
667
|
// Resolve imports/exports to their actual definitions
|
|
317
668
|
// If someone searches "terminal" and finds `import { TerminalPanel }`,
|
|
318
669
|
// they want the TerminalPanel class, not the import statement
|
|
319
670
|
filteredResults = this.resolveImportsToDefinitions(filteredResults);
|
|
671
|
+
// Cap entry points so traversal budget isn't spread too thin.
|
|
672
|
+
// With 36 entry points and maxNodes=120, each gets only 3 nodes — useless.
|
|
673
|
+
// Cap to searchLimit so each entry point gets a meaningful traversal budget.
|
|
674
|
+
if (filteredResults.length > opts.searchLimit) {
|
|
675
|
+
filteredResults = filteredResults.slice(0, opts.searchLimit);
|
|
676
|
+
}
|
|
320
677
|
// Add entry points to subgraph
|
|
321
678
|
for (const result of filteredResults) {
|
|
322
679
|
nodes.set(result.node.id, result.node);
|
|
323
680
|
roots.push(result.node.id);
|
|
324
681
|
}
|
|
682
|
+
// Expand type hierarchy for class/interface entry points.
|
|
683
|
+
// BFS often exhausts its per-entry-point budget on contained methods
|
|
684
|
+
// before reaching extends/implements neighbors. This dedicated step
|
|
685
|
+
// ensures subclasses and superclasses always appear in results.
|
|
686
|
+
// Budget: up to maxNodes/4 hierarchy nodes to avoid flooding.
|
|
687
|
+
const typeHierarchyKinds = new Set(['class', 'interface', 'struct', 'trait', 'protocol']);
|
|
688
|
+
const maxHierarchyNodes = Math.ceil(opts.maxNodes / 4);
|
|
689
|
+
let hierarchyNodesAdded = 0;
|
|
690
|
+
for (const result of filteredResults) {
|
|
691
|
+
if (hierarchyNodesAdded >= maxHierarchyNodes)
|
|
692
|
+
break;
|
|
693
|
+
if (typeHierarchyKinds.has(result.node.kind)) {
|
|
694
|
+
const hierarchy = this.traverser.getTypeHierarchy(result.node.id);
|
|
695
|
+
for (const [id, node] of hierarchy.nodes) {
|
|
696
|
+
if (!nodes.has(id)) {
|
|
697
|
+
nodes.set(id, node);
|
|
698
|
+
hierarchyNodesAdded++;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
for (const edge of hierarchy.edges) {
|
|
702
|
+
const exists = edges.some((e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind);
|
|
703
|
+
if (!exists) {
|
|
704
|
+
edges.push(edge);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
// Pass 2: expand hierarchy of newly-discovered parent types to find siblings.
|
|
710
|
+
// E.g., InternalEngine → Engine (parent, from pass 1) → ReadOnlyEngine (sibling).
|
|
711
|
+
if (hierarchyNodesAdded > 0) {
|
|
712
|
+
const pass2Candidates = [...nodes.values()].filter(n => typeHierarchyKinds.has(n.kind) && !roots.includes(n.id));
|
|
713
|
+
for (const candidate of pass2Candidates) {
|
|
714
|
+
if (hierarchyNodesAdded >= maxHierarchyNodes)
|
|
715
|
+
break;
|
|
716
|
+
const siblingHierarchy = this.traverser.getTypeHierarchy(candidate.id);
|
|
717
|
+
for (const [id, node] of siblingHierarchy.nodes) {
|
|
718
|
+
if (!nodes.has(id) && hierarchyNodesAdded < maxHierarchyNodes) {
|
|
719
|
+
nodes.set(id, node);
|
|
720
|
+
hierarchyNodesAdded++;
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
for (const edge of siblingHierarchy.edges) {
|
|
724
|
+
if (nodes.has(edge.source) && nodes.has(edge.target)) {
|
|
725
|
+
const exists = edges.some((e) => e.source === edge.source && e.target === edge.target && e.kind === edge.kind);
|
|
726
|
+
if (!exists) {
|
|
727
|
+
edges.push(edge);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
}
|
|
325
733
|
// Traverse from each entry point
|
|
326
734
|
for (const result of filteredResults) {
|
|
327
735
|
const traversalResult = this.traverser.traverseBFS(result.node.id, {
|
|
@@ -346,6 +754,8 @@ class ContextBuilder {
|
|
|
346
754
|
}
|
|
347
755
|
}
|
|
348
756
|
// Trim to max nodes if needed
|
|
757
|
+
let finalNodes = nodes;
|
|
758
|
+
let finalEdges = edges;
|
|
349
759
|
if (nodes.size > opts.maxNodes) {
|
|
350
760
|
// Prioritize entry points and their direct neighbors
|
|
351
761
|
const priorityIds = new Set(roots);
|
|
@@ -358,26 +768,95 @@ class ContextBuilder {
|
|
|
358
768
|
}
|
|
359
769
|
}
|
|
360
770
|
// Keep priority nodes, then fill remaining slots
|
|
361
|
-
|
|
771
|
+
finalNodes = new Map();
|
|
362
772
|
for (const id of priorityIds) {
|
|
363
773
|
const node = nodes.get(id);
|
|
364
|
-
if (node &&
|
|
365
|
-
|
|
774
|
+
if (node && finalNodes.size < opts.maxNodes) {
|
|
775
|
+
finalNodes.set(id, node);
|
|
366
776
|
}
|
|
367
777
|
}
|
|
368
778
|
// Fill remaining from other nodes
|
|
369
779
|
for (const [id, node] of nodes) {
|
|
370
|
-
if (
|
|
780
|
+
if (finalNodes.size >= opts.maxNodes)
|
|
371
781
|
break;
|
|
372
|
-
if (!
|
|
373
|
-
|
|
782
|
+
if (!finalNodes.has(id)) {
|
|
783
|
+
finalNodes.set(id, node);
|
|
374
784
|
}
|
|
375
785
|
}
|
|
376
786
|
// Filter edges to only include kept nodes
|
|
377
|
-
|
|
378
|
-
|
|
787
|
+
finalEdges = edges.filter((e) => finalNodes.has(e.source) && finalNodes.has(e.target));
|
|
788
|
+
}
|
|
789
|
+
// Per-file diversity cap: prevent any single file from monopolizing the
|
|
790
|
+
// node budget. When BFS traverses from a method, it follows `contains`
|
|
791
|
+
// to the parent class, then back down to all sibling methods. With
|
|
792
|
+
// multiple entry points in the same class, one file can consume 30-40%
|
|
793
|
+
// of maxNodes. Cap each file to ~20% to ensure cross-file diversity.
|
|
794
|
+
const maxPerFile = Math.max(5, Math.ceil(opts.maxNodes * 0.2));
|
|
795
|
+
const fileCounts = new Map();
|
|
796
|
+
for (const [id, node] of finalNodes) {
|
|
797
|
+
const ids = fileCounts.get(node.filePath) || [];
|
|
798
|
+
ids.push(id);
|
|
799
|
+
fileCounts.set(node.filePath, ids);
|
|
800
|
+
}
|
|
801
|
+
const rootSet = new Set(roots);
|
|
802
|
+
for (const [, nodeIds] of fileCounts) {
|
|
803
|
+
if (nodeIds.length <= maxPerFile)
|
|
804
|
+
continue;
|
|
805
|
+
// Sort: entry points first, then classes/interfaces, then others
|
|
806
|
+
const kindPriority = {
|
|
807
|
+
class: 3, interface: 3, struct: 3, trait: 3, protocol: 3, enum: 3,
|
|
808
|
+
method: 1, function: 1, property: 0, field: 0, variable: 0,
|
|
809
|
+
};
|
|
810
|
+
nodeIds.sort((a, b) => {
|
|
811
|
+
const aRoot = rootSet.has(a) ? 10 : 0;
|
|
812
|
+
const bRoot = rootSet.has(b) ? 10 : 0;
|
|
813
|
+
const aKind = kindPriority[finalNodes.get(a).kind] ?? 0;
|
|
814
|
+
const bKind = kindPriority[finalNodes.get(b).kind] ?? 0;
|
|
815
|
+
return (bRoot + bKind) - (aRoot + aKind);
|
|
816
|
+
});
|
|
817
|
+
// Remove excess nodes (keep the highest-priority ones)
|
|
818
|
+
for (const id of nodeIds.slice(maxPerFile)) {
|
|
819
|
+
finalNodes.delete(id);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
// Non-production node cap: limit test/sample/integration/example files to
|
|
823
|
+
// at most 15% of the budget. Many codebases have dozens of near-identical
|
|
824
|
+
// test implementations (e.g., 6 Guard classes in integration tests) that
|
|
825
|
+
// individually survive score dampening but collectively flood the result.
|
|
826
|
+
// Test entry points are NOT exempt — they should be evicted too.
|
|
827
|
+
if (!isTestQuery) {
|
|
828
|
+
const maxNonProd = Math.max(3, Math.ceil(opts.maxNodes * 0.15));
|
|
829
|
+
const nonProdIds = [];
|
|
830
|
+
for (const [id, node] of finalNodes) {
|
|
831
|
+
if ((0, query_utils_1.isTestFile)(node.filePath)) {
|
|
832
|
+
nonProdIds.push(id);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
if (nonProdIds.length > maxNonProd) {
|
|
836
|
+
for (const id of nonProdIds.slice(maxNonProd)) {
|
|
837
|
+
finalNodes.delete(id);
|
|
838
|
+
// Also remove from roots — test file entry points shouldn't anchor results
|
|
839
|
+
const rootIdx = roots.indexOf(id);
|
|
840
|
+
if (rootIdx !== -1)
|
|
841
|
+
roots.splice(rootIdx, 1);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
// Re-filter edges after per-file and non-production caps
|
|
846
|
+
finalEdges = finalEdges.filter((e) => finalNodes.has(e.source) && finalNodes.has(e.target));
|
|
847
|
+
// Edge recovery: BFS with many entry points leaves most nodes disconnected.
|
|
848
|
+
// Discover edges between already-selected nodes to recover connectivity.
|
|
849
|
+
const recoveryKinds = ['calls', 'extends', 'implements', 'references', 'overrides'];
|
|
850
|
+
const recoveredEdges = this.queries.findEdgesBetweenNodes([...finalNodes.keys()], recoveryKinds);
|
|
851
|
+
const existingEdgeKeys = new Set(finalEdges.map((e) => `${e.source}:${e.target}:${e.kind}`));
|
|
852
|
+
for (const edge of recoveredEdges) {
|
|
853
|
+
const key = `${edge.source}:${edge.target}:${edge.kind}`;
|
|
854
|
+
if (!existingEdgeKeys.has(key)) {
|
|
855
|
+
finalEdges.push(edge);
|
|
856
|
+
existingEdgeKeys.add(key);
|
|
857
|
+
}
|
|
379
858
|
}
|
|
380
|
-
return { nodes, edges, roots };
|
|
859
|
+
return { nodes: finalNodes, edges: finalEdges, roots };
|
|
381
860
|
}
|
|
382
861
|
/**
|
|
383
862
|
* Get the source code for a node
|
|
@@ -559,8 +1038,8 @@ exports.ContextBuilder = ContextBuilder;
|
|
|
559
1038
|
/**
|
|
560
1039
|
* Create a context builder
|
|
561
1040
|
*/
|
|
562
|
-
function createContextBuilder(projectRoot, queries, traverser
|
|
563
|
-
return new ContextBuilder(projectRoot, queries, traverser
|
|
1041
|
+
function createContextBuilder(projectRoot, queries, traverser) {
|
|
1042
|
+
return new ContextBuilder(projectRoot, queries, traverser);
|
|
564
1043
|
}
|
|
565
1044
|
// Re-export formatter
|
|
566
1045
|
var formatter_2 = require("./formatter");
|