@colbymchenry/codegraph-darwin-x64 0.9.7 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +19 -56
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/context/index.d.ts +9 -0
- package/lib/dist/context/index.d.ts.map +1 -1
- package/lib/dist/context/index.js +95 -6
- package/lib/dist/context/index.js.map +1 -1
- package/lib/dist/context/markers.d.ts +19 -0
- package/lib/dist/context/markers.d.ts.map +1 -0
- package/lib/dist/context/markers.js +22 -0
- package/lib/dist/context/markers.js.map +1 -0
- package/lib/dist/extraction/grammars.d.ts +10 -0
- package/lib/dist/extraction/grammars.d.ts.map +1 -1
- package/lib/dist/extraction/grammars.js +13 -0
- package/lib/dist/extraction/grammars.js.map +1 -1
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +17 -2
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/tree-sitter.d.ts +26 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +140 -20
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/index.d.ts +9 -2
- package/lib/dist/index.d.ts.map +1 -1
- package/lib/dist/index.js +17 -2
- package/lib/dist/index.js.map +1 -1
- package/lib/dist/installer/targets/shared.d.ts.map +1 -1
- package/lib/dist/installer/targets/shared.js +3 -2
- package/lib/dist/installer/targets/shared.js.map +1 -1
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +12 -38
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/index.d.ts +7 -4
- package/lib/dist/mcp/index.d.ts.map +1 -1
- package/lib/dist/mcp/index.js +46 -39
- package/lib/dist/mcp/index.js.map +1 -1
- package/lib/dist/mcp/proxy.d.ts +35 -0
- package/lib/dist/mcp/proxy.d.ts.map +1 -1
- package/lib/dist/mcp/proxy.js +223 -0
- package/lib/dist/mcp/proxy.js.map +1 -1
- package/lib/dist/mcp/server-instructions.d.ts +1 -1
- package/lib/dist/mcp/server-instructions.d.ts.map +1 -1
- package/lib/dist/mcp/server-instructions.js +18 -19
- package/lib/dist/mcp/server-instructions.js.map +1 -1
- package/lib/dist/mcp/session.d.ts +10 -0
- package/lib/dist/mcp/session.d.ts.map +1 -1
- package/lib/dist/mcp/session.js +7 -5
- package/lib/dist/mcp/session.js.map +1 -1
- package/lib/dist/mcp/tools.d.ts +49 -52
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +922 -908
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/resolution/callback-synthesizer.d.ts +2 -2
- package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/callback-synthesizer.js +239 -2
- package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
- package/lib/dist/search/query-utils.d.ts +18 -0
- package/lib/dist/search/query-utils.d.ts.map +1 -1
- package/lib/dist/search/query-utils.js +30 -0
- package/lib/dist/search/query-utils.js.map +1 -1
- package/lib/dist/types.d.ts +8 -0
- package/lib/dist/types.d.ts.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +1 -1
- package/package.json +1 -1
package/lib/dist/mcp/tools.js
CHANGED
|
@@ -4,53 +4,25 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Defines the tools exposed by the CodeGraph MCP server.
|
|
6
6
|
*/
|
|
7
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
-
if (k2 === undefined) k2 = k;
|
|
9
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
-
}
|
|
13
|
-
Object.defineProperty(o, k2, desc);
|
|
14
|
-
}) : (function(o, m, k, k2) {
|
|
15
|
-
if (k2 === undefined) k2 = k;
|
|
16
|
-
o[k2] = m[k];
|
|
17
|
-
}));
|
|
18
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
-
}) : function(o, v) {
|
|
21
|
-
o["default"] = v;
|
|
22
|
-
});
|
|
23
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
-
var ownKeys = function(o) {
|
|
25
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
-
var ar = [];
|
|
27
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
-
return ar;
|
|
29
|
-
};
|
|
30
|
-
return ownKeys(o);
|
|
31
|
-
};
|
|
32
|
-
return function (mod) {
|
|
33
|
-
if (mod && mod.__esModule) return mod;
|
|
34
|
-
var result = {};
|
|
35
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
-
__setModuleDefault(result, mod);
|
|
37
|
-
return result;
|
|
38
|
-
};
|
|
39
|
-
})();
|
|
40
7
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
8
|
exports.ToolHandler = exports.tools = void 0;
|
|
42
9
|
exports.getExploreBudget = getExploreBudget;
|
|
43
10
|
exports.getExploreOutputBudget = getExploreOutputBudget;
|
|
44
11
|
exports.formatStaleBanner = formatStaleBanner;
|
|
45
12
|
exports.formatStaleFooter = formatStaleFooter;
|
|
46
|
-
|
|
13
|
+
exports.getStaticTools = getStaticTools;
|
|
14
|
+
const directory_1 = require("../directory");
|
|
15
|
+
// Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
|
|
16
|
+
// helper in engine.ts. ToolHandler must load to answer tools/list (static
|
|
17
|
+
// schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
|
|
18
|
+
// CodeGraph is pulled in only when a tool actually opens a project. require() is
|
|
19
|
+
// sync + cached (CommonJS build).
|
|
20
|
+
const loadCodeGraph = () => require('../index').default;
|
|
47
21
|
const worktree_1 = require("../sync/worktree");
|
|
48
|
-
const
|
|
22
|
+
const query_utils_1 = require("../search/query-utils");
|
|
49
23
|
const fs_1 = require("fs");
|
|
50
24
|
const utils_1 = require("../utils");
|
|
51
25
|
const generated_detection_1 = require("../extraction/generated-detection");
|
|
52
|
-
const os_1 = require("os");
|
|
53
|
-
const pathModule = __importStar(require("path"));
|
|
54
26
|
const path_1 = require("path");
|
|
55
27
|
/** Maximum output length to prevent context bloat (characters) */
|
|
56
28
|
const MAX_OUTPUT_LENGTH = 15000;
|
|
@@ -107,13 +79,24 @@ function getExploreBudget(fileCount) {
|
|
|
107
79
|
return 5;
|
|
108
80
|
}
|
|
109
81
|
function getExploreOutputBudget(fileCount) {
|
|
82
|
+
// Tiered budget, scaled to project size. The budget is a CEILING (relevance
|
|
83
|
+
// still gates WHAT is included), and it MUST stay under the agent's INLINE
|
|
84
|
+
// tool-result cap (~25K chars). Above that, the host externalizes the result
|
|
85
|
+
// to a file the agent then Reads back — re-introducing a read AND the
|
|
86
|
+
// cache-write cost — which is exactly what a 35K vscode explore did in the
|
|
87
|
+
// n=4 README A/B. So even large repos cap at ~24K: the answer is the handful
|
|
88
|
+
// of ~100-line flow windows the agent would have grep-located and read (it
|
|
89
|
+
// natively reads ~6–9 files, median 100-line ranges), NOT a sprawl of 12
|
|
90
|
+
// files. Concentration onto the flow emerges from this cap + the named-file-
|
|
91
|
+
// first sort dropping peripheral files. Invariant: a larger tier must never
|
|
92
|
+
// get a smaller `maxCharsPerFile` than a smaller tier.
|
|
110
93
|
if (fileCount < 150) {
|
|
111
94
|
return {
|
|
112
95
|
// ITER3: revert iter2's aggressive body shrink (forced Read fallback —
|
|
113
96
|
// the per-file 2.5K cap pushed the agent to Read instead of node).
|
|
114
97
|
// Back to the iter1 shape (13K/4/3.8K) but keep the test-file
|
|
115
|
-
// hard-exclude. The cost lever for this tier lives in
|
|
116
|
-
//
|
|
98
|
+
// hard-exclude. The cost lever for this tier lives in steering the
|
|
99
|
+
// agent to stop after 1-2 calls, not in this budget.
|
|
117
100
|
maxOutputChars: 13000,
|
|
118
101
|
defaultMaxFiles: 4,
|
|
119
102
|
maxCharsPerFile: 3800,
|
|
@@ -145,13 +128,11 @@ function getExploreOutputBudget(fileCount) {
|
|
|
145
128
|
}
|
|
146
129
|
if (fileCount < 5000) {
|
|
147
130
|
return {
|
|
148
|
-
//
|
|
149
|
-
//
|
|
150
|
-
//
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
maxOutputChars: 28000,
|
|
154
|
-
defaultMaxFiles: 10,
|
|
131
|
+
// ~150-line per-file window (the native read unit) × ~6 files, capped at
|
|
132
|
+
// the ~24K inline ceiling so the response is never externalized. Per-file
|
|
133
|
+
// stays ≥ the <500 tier (3800) — monotonic.
|
|
134
|
+
maxOutputChars: 24000,
|
|
135
|
+
defaultMaxFiles: 8,
|
|
155
136
|
maxCharsPerFile: 6500,
|
|
156
137
|
gapThreshold: 12,
|
|
157
138
|
maxSymbolsInFileHeader: 10,
|
|
@@ -163,10 +144,14 @@ function getExploreOutputBudget(fileCount) {
|
|
|
163
144
|
excludeLowValueFiles: false,
|
|
164
145
|
};
|
|
165
146
|
}
|
|
147
|
+
// Large + very-large repos: SAME ~24K inline ceiling (a bigger response just
|
|
148
|
+
// externalizes — see vscode). More files indexed → more CALLS via
|
|
149
|
+
// getExploreBudget, not a bigger single response. Per-file 7000 (≥ smaller
|
|
150
|
+
// tiers) gives the central file a ~180-line orientation window.
|
|
166
151
|
if (fileCount < 15000) {
|
|
167
152
|
return {
|
|
168
|
-
maxOutputChars:
|
|
169
|
-
defaultMaxFiles:
|
|
153
|
+
maxOutputChars: 24000,
|
|
154
|
+
defaultMaxFiles: 8,
|
|
170
155
|
maxCharsPerFile: 7000,
|
|
171
156
|
gapThreshold: 15,
|
|
172
157
|
maxSymbolsInFileHeader: 15,
|
|
@@ -179,8 +164,8 @@ function getExploreOutputBudget(fileCount) {
|
|
|
179
164
|
};
|
|
180
165
|
}
|
|
181
166
|
return {
|
|
182
|
-
maxOutputChars:
|
|
183
|
-
defaultMaxFiles:
|
|
167
|
+
maxOutputChars: 24000,
|
|
168
|
+
defaultMaxFiles: 8,
|
|
184
169
|
maxCharsPerFile: 7000,
|
|
185
170
|
gapThreshold: 15,
|
|
186
171
|
maxSymbolsInFileHeader: 15,
|
|
@@ -206,6 +191,21 @@ function getExploreOutputBudget(fileCount) {
|
|
|
206
191
|
function exploreLineNumbersEnabled() {
|
|
207
192
|
return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
|
|
208
193
|
}
|
|
194
|
+
/**
|
|
195
|
+
* Adaptive explore sizing (default ON). `codegraph_explore` skeletonizes OFF-SPINE
|
|
196
|
+
* polymorphic-sibling files — a file whose class is one of ≥3 interchangeable
|
|
197
|
+
* implementations of a shared interface (e.g. OkHttp's `: Interceptor` classes) —
|
|
198
|
+
* to class + member signatures (bodies elided), keeping the on-spine exemplar full.
|
|
199
|
+
* This sizes the response to the answer instead of the budget cap on sibling-heavy
|
|
200
|
+
* flows (OkHttp interceptor-chain explore 28.5k→16.6k, ~28% cheaper than native
|
|
201
|
+
* search, reads flat). It is PROVABLY INERT elsewhere: distinct pipeline steps (no
|
|
202
|
+
* ≥3-implementer supertype, e.g. Excalidraw's `renderStaticScene`) and on-spine
|
|
203
|
+
* files keep full source — output is byte-identical to shipped on excalidraw /
|
|
204
|
+
* tokio / django / vscode / gin. Set `CODEGRAPH_ADAPTIVE_EXPLORE=0` to disable.
|
|
205
|
+
*/
|
|
206
|
+
function adaptiveExploreEnabled() {
|
|
207
|
+
return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
|
|
208
|
+
}
|
|
209
209
|
/**
|
|
210
210
|
* Prefix each line of a source slice with its 1-based line number, matching
|
|
211
211
|
* the Read tool's `cat -n` convention (number + tab) so the agent treats it
|
|
@@ -222,55 +222,6 @@ function numberSourceLines(slice, firstLineNumber) {
|
|
|
222
222
|
}
|
|
223
223
|
return out.join('\n');
|
|
224
224
|
}
|
|
225
|
-
/**
|
|
226
|
-
* Mark a Claude session as having consulted MCP tools.
|
|
227
|
-
* This enables Grep/Glob/Bash commands that would otherwise be blocked.
|
|
228
|
-
*
|
|
229
|
-
* Why the explicit openSync + O_NOFOLLOW dance instead of plain writeFileSync:
|
|
230
|
-
* tmpdir() is world-writable on Linux (mode 1777), so on a shared multi-user
|
|
231
|
-
* machine any other local user can pre-create `codegraph-consulted-<hash>` as
|
|
232
|
-
* a symlink pointing at a file the victim owns. The old `writeFileSync` would
|
|
233
|
-
* happily follow that link and overwrite the target's contents with the ISO
|
|
234
|
-
* timestamp string (CWE-59). The session-id hash provides the predictability
|
|
235
|
-
* gate, but it's defense-in-depth: if a session id ever surfaces in logs,
|
|
236
|
-
* argv, or telemetry the attack becomes trivial, and the right fix is to not
|
|
237
|
-
* follow links from /tmp paths in the first place.
|
|
238
|
-
*/
|
|
239
|
-
function markSessionConsulted(sessionId) {
|
|
240
|
-
try {
|
|
241
|
-
const hash = (0, crypto_1.createHash)('md5').update(sessionId).digest('hex').slice(0, 16);
|
|
242
|
-
const markerPath = (0, path_1.join)((0, os_1.tmpdir)(), `codegraph-consulted-${hash}`);
|
|
243
|
-
// Refuse to follow a pre-planted symlink at the marker path (CWE-59).
|
|
244
|
-
// O_NOFOLLOW (below) is the atomic, TOCTOU-free guard on POSIX, but it is
|
|
245
|
-
// `undefined` on Windows (libuv ignores it), so the bitwise-OR silently
|
|
246
|
-
// drops it and openSync would follow the link. This lstat check closes that
|
|
247
|
-
// gap cross-platform; ENOENT (path is free) falls through to create it.
|
|
248
|
-
try {
|
|
249
|
-
if ((0, fs_1.lstatSync)(markerPath).isSymbolicLink())
|
|
250
|
-
return;
|
|
251
|
-
}
|
|
252
|
-
catch {
|
|
253
|
-
// No existing entry (or stat failed) — nothing to refuse; proceed.
|
|
254
|
-
}
|
|
255
|
-
// O_NOFOLLOW makes openSync throw ELOOP if markerPath is already a symlink.
|
|
256
|
-
// O_CREAT + O_TRUNC keep the original "create-or-overwrite" semantics, and
|
|
257
|
-
// mode 0o600 prevents readback by other local users (the marker payload is
|
|
258
|
-
// benign, but narrowing the exposure costs nothing).
|
|
259
|
-
const flags = fs_1.constants.O_WRONLY | fs_1.constants.O_CREAT | fs_1.constants.O_TRUNC | fs_1.constants.O_NOFOLLOW;
|
|
260
|
-
const fd = (0, fs_1.openSync)(markerPath, flags, 0o600);
|
|
261
|
-
try {
|
|
262
|
-
(0, fs_1.writeSync)(fd, new Date().toISOString());
|
|
263
|
-
}
|
|
264
|
-
finally {
|
|
265
|
-
(0, fs_1.closeSync)(fd);
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
catch {
|
|
269
|
-
// Silently fail - don't break MCP on marker write failure. ELOOP from a
|
|
270
|
-
// planted symlink lands here too, which is the intended behavior: refuse
|
|
271
|
-
// to write rather than overwrite an attacker-chosen target.
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
225
|
/**
|
|
275
226
|
* Per-file staleness banner emitted at the top of a tool response when the
|
|
276
227
|
* file watcher has pending events for files referenced by the response.
|
|
@@ -317,15 +268,16 @@ const projectPathProperty = {
|
|
|
317
268
|
/**
|
|
318
269
|
* All CodeGraph MCP tools
|
|
319
270
|
*
|
|
320
|
-
* Designed for minimal context usage - use
|
|
321
|
-
* and only use other tools for
|
|
271
|
+
* Designed for minimal context usage - use codegraph_explore as the primary tool
|
|
272
|
+
* (one call usually answers the whole question), and only use other tools for
|
|
273
|
+
* targeted follow-up queries.
|
|
322
274
|
*
|
|
323
275
|
* All tools support cross-project queries via the optional `projectPath` parameter.
|
|
324
276
|
*/
|
|
325
277
|
exports.tools = [
|
|
326
278
|
{
|
|
327
279
|
name: 'codegraph_search',
|
|
328
|
-
description: 'Quick symbol search by name. Returns locations only (no code). Use
|
|
280
|
+
description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_explore instead to get the actual source / understand an area in one call.',
|
|
329
281
|
inputSchema: {
|
|
330
282
|
type: 'object',
|
|
331
283
|
properties: {
|
|
@@ -348,34 +300,9 @@ exports.tools = [
|
|
|
348
300
|
required: ['query'],
|
|
349
301
|
},
|
|
350
302
|
},
|
|
351
|
-
{
|
|
352
|
-
name: 'codegraph_context',
|
|
353
|
-
description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.',
|
|
354
|
-
inputSchema: {
|
|
355
|
-
type: 'object',
|
|
356
|
-
properties: {
|
|
357
|
-
task: {
|
|
358
|
-
type: 'string',
|
|
359
|
-
description: 'Description of the task, bug, or feature to build context for',
|
|
360
|
-
},
|
|
361
|
-
maxNodes: {
|
|
362
|
-
type: 'number',
|
|
363
|
-
description: 'Maximum symbols to include (default: 20)',
|
|
364
|
-
default: 20,
|
|
365
|
-
},
|
|
366
|
-
includeCode: {
|
|
367
|
-
type: 'boolean',
|
|
368
|
-
description: 'Include code snippets for key symbols (default: true)',
|
|
369
|
-
default: true,
|
|
370
|
-
},
|
|
371
|
-
projectPath: projectPathProperty,
|
|
372
|
-
},
|
|
373
|
-
required: ['task'],
|
|
374
|
-
},
|
|
375
|
-
},
|
|
376
303
|
{
|
|
377
304
|
name: 'codegraph_callers',
|
|
378
|
-
description: 'List functions that call <symbol>. For
|
|
305
|
+
description: 'List functions that call <symbol>. For the full flow, use codegraph_explore.',
|
|
379
306
|
inputSchema: {
|
|
380
307
|
type: 'object',
|
|
381
308
|
properties: {
|
|
@@ -395,7 +322,7 @@ exports.tools = [
|
|
|
395
322
|
},
|
|
396
323
|
{
|
|
397
324
|
name: 'codegraph_callees',
|
|
398
|
-
description: 'List functions that <symbol> calls. For
|
|
325
|
+
description: 'List functions that <symbol> calls. For the full flow, use codegraph_explore.',
|
|
399
326
|
inputSchema: {
|
|
400
327
|
type: 'object',
|
|
401
328
|
properties: {
|
|
@@ -435,7 +362,7 @@ exports.tools = [
|
|
|
435
362
|
},
|
|
436
363
|
{
|
|
437
364
|
name: 'codegraph_node',
|
|
438
|
-
description: '
|
|
365
|
+
description: 'SECONDARY (after codegraph_explore): get ONE symbol in full — its location, signature, callers/callees trail, and verbatim body (includeCode=true). When the name is AMBIGUOUS (an overloaded method, or the same method name on different types), it returns EVERY matching definition\'s full body in a single call — so you never need to Read a file to find the specific overload you want. For a heavily-overloaded name, pass `file` (and/or `line`) to pin the exact definition — e.g. the `file:line` a trail or another tool already showed you. Reach for this when explore trimmed a body you need. Use codegraph_explore for several related symbols or the full flow.',
|
|
439
366
|
inputSchema: {
|
|
440
367
|
type: 'object',
|
|
441
368
|
properties: {
|
|
@@ -448,6 +375,14 @@ exports.tools = [
|
|
|
448
375
|
description: 'Include full source code (default: false to minimize context)',
|
|
449
376
|
default: false,
|
|
450
377
|
},
|
|
378
|
+
file: {
|
|
379
|
+
type: 'string',
|
|
380
|
+
description: 'Optional: disambiguate an overloaded name to the definition in this file (path or basename, e.g. "harness.rs").',
|
|
381
|
+
},
|
|
382
|
+
line: {
|
|
383
|
+
type: 'number',
|
|
384
|
+
description: 'Optional: disambiguate to the definition at/around this line (use with the file:line a trail showed you).',
|
|
385
|
+
},
|
|
451
386
|
projectPath: projectPathProperty,
|
|
452
387
|
},
|
|
453
388
|
required: ['symbol'],
|
|
@@ -455,7 +390,7 @@ exports.tools = [
|
|
|
455
390
|
},
|
|
456
391
|
{
|
|
457
392
|
name: 'codegraph_explore',
|
|
458
|
-
description: '
|
|
393
|
+
description: 'PRIMARY TOOL — call FIRST for almost any question: how does X work, architecture, a bug, where/what is X, or surveying an area. Returns the verbatim source of the relevant symbols grouped by file in ONE capped call (Read-equivalent — do NOT re-open shown files). Query can be a natural-language question OR a bag of symbol/file names. Usually the ONLY call you need — answers without further search/node/Read/Grep.',
|
|
459
394
|
inputSchema: {
|
|
460
395
|
type: 'object',
|
|
461
396
|
properties: {
|
|
@@ -516,26 +451,20 @@ exports.tools = [
|
|
|
516
451
|
},
|
|
517
452
|
},
|
|
518
453
|
},
|
|
519
|
-
{
|
|
520
|
-
name: 'codegraph_trace',
|
|
521
|
-
description: 'Call path between two symbols — "how does <from> reach <to>?" Returns the chain with each hop\'s body inlined plus the destination\'s callees, in ONE call. Ideal for flow questions (update→render, request→handler, QuerySet→SQL). If no static path exists the chain broke at dynamic dispatch — the failure response inlines both endpoints + their TO-file siblings.',
|
|
522
|
-
inputSchema: {
|
|
523
|
-
type: 'object',
|
|
524
|
-
properties: {
|
|
525
|
-
from: {
|
|
526
|
-
type: 'string',
|
|
527
|
-
description: 'Symbol the flow starts at (e.g., "QuerySet", "handleRequest", "mutateElement")',
|
|
528
|
-
},
|
|
529
|
-
to: {
|
|
530
|
-
type: 'string',
|
|
531
|
-
description: 'Symbol the flow should reach (e.g., "execute_sql", "render", "setState")',
|
|
532
|
-
},
|
|
533
|
-
projectPath: projectPathProperty,
|
|
534
|
-
},
|
|
535
|
-
required: ['from', 'to'],
|
|
536
|
-
},
|
|
537
|
-
},
|
|
538
454
|
];
|
|
455
|
+
/**
|
|
456
|
+
* Allowlist-filtered tool definitions WITHOUT an engine — the static surface the
|
|
457
|
+
* proxy answers `tools/list` with before any project is open. Mirrors
|
|
458
|
+
* `ToolHandler.getTools()` in the no-CodeGraph case (the dynamic per-repo budget
|
|
459
|
+
* note in a description only adds once `cg` is loaded; the schemas are static).
|
|
460
|
+
*/
|
|
461
|
+
function getStaticTools() {
|
|
462
|
+
const raw = process.env.CODEGRAPH_MCP_TOOLS;
|
|
463
|
+
if (!raw || !raw.trim())
|
|
464
|
+
return exports.tools;
|
|
465
|
+
const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
|
|
466
|
+
return allow.size ? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : exports.tools;
|
|
467
|
+
}
|
|
539
468
|
/**
|
|
540
469
|
* Tool handler that executes tools against a CodeGraph instance
|
|
541
470
|
*
|
|
@@ -601,7 +530,7 @@ class ToolHandler {
|
|
|
601
530
|
* Unset/empty → every tool is exposed. Lets an operator (or an A/B harness)
|
|
602
531
|
* trim the tool surface without rebuilding the client config; the ablated
|
|
603
532
|
* tool is then truly absent from ListTools rather than merely denied on call.
|
|
604
|
-
* Matching is on the short form, so "
|
|
533
|
+
* Matching is on the short form, so "node" and "codegraph_node" both work.
|
|
605
534
|
*/
|
|
606
535
|
toolAllowlist() {
|
|
607
536
|
const raw = process.env.CODEGRAPH_MCP_TOOLS;
|
|
@@ -656,11 +585,9 @@ class ToolHandler {
|
|
|
656
585
|
// so it deserves the same gating.
|
|
657
586
|
const TINY_REPO_FILE_THRESHOLD = 500;
|
|
658
587
|
const TINY_REPO_CORE_TOOLS = new Set([
|
|
588
|
+
'codegraph_explore',
|
|
659
589
|
'codegraph_search',
|
|
660
|
-
'codegraph_context',
|
|
661
590
|
'codegraph_node',
|
|
662
|
-
'codegraph_explore',
|
|
663
|
-
'codegraph_trace',
|
|
664
591
|
]);
|
|
665
592
|
if (stats.fileCount < TINY_REPO_FILE_THRESHOLD) {
|
|
666
593
|
visible = visible.filter(t => TINY_REPO_CORE_TOOLS.has(t.name));
|
|
@@ -718,7 +645,7 @@ class ToolHandler {
|
|
|
718
645
|
}
|
|
719
646
|
}
|
|
720
647
|
// Walk up parent directories to find nearest .codegraph/
|
|
721
|
-
const resolvedRoot = (0,
|
|
648
|
+
const resolvedRoot = (0, directory_1.findNearestCodeGraphRoot)(projectPath);
|
|
722
649
|
if (!resolvedRoot) {
|
|
723
650
|
throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
|
|
724
651
|
}
|
|
@@ -740,7 +667,7 @@ class ToolHandler {
|
|
|
740
667
|
return cg;
|
|
741
668
|
}
|
|
742
669
|
// Open and cache under both paths
|
|
743
|
-
const cg =
|
|
670
|
+
const cg = loadCodeGraph().openSync(resolvedRoot);
|
|
744
671
|
this.projectCache.set(resolvedRoot, cg);
|
|
745
672
|
if (projectPath !== resolvedRoot) {
|
|
746
673
|
this.projectCache.set(projectPath, cg);
|
|
@@ -969,9 +896,6 @@ class ToolHandler {
|
|
|
969
896
|
case 'codegraph_search':
|
|
970
897
|
result = await this.handleSearch(args);
|
|
971
898
|
break;
|
|
972
|
-
case 'codegraph_context':
|
|
973
|
-
result = await this.handleContext(args);
|
|
974
|
-
break;
|
|
975
899
|
case 'codegraph_callers':
|
|
976
900
|
result = await this.handleCallers(args);
|
|
977
901
|
break;
|
|
@@ -995,9 +919,6 @@ class ToolHandler {
|
|
|
995
919
|
case 'codegraph_files':
|
|
996
920
|
result = await this.handleFiles(args);
|
|
997
921
|
break;
|
|
998
|
-
case 'codegraph_trace':
|
|
999
|
-
result = await this.handleTrace(args);
|
|
1000
|
-
break;
|
|
1001
922
|
default:
|
|
1002
923
|
return this.errorResult(`Unknown tool: ${toolName}`);
|
|
1003
924
|
}
|
|
@@ -1037,261 +958,6 @@ class ToolHandler {
|
|
|
1037
958
|
const formatted = this.formatSearchResults(ranked);
|
|
1038
959
|
return this.textResult(this.truncateOutput(formatted));
|
|
1039
960
|
}
|
|
1040
|
-
/**
|
|
1041
|
-
* Handle codegraph_context
|
|
1042
|
-
*/
|
|
1043
|
-
async handleContext(args) {
|
|
1044
|
-
const task = this.validateString(args.task, 'task');
|
|
1045
|
-
if (typeof task !== 'string')
|
|
1046
|
-
return task;
|
|
1047
|
-
// Mark session as consulted (enables Grep/Glob/Bash)
|
|
1048
|
-
const sessionId = process.env.CLAUDE_SESSION_ID;
|
|
1049
|
-
if (sessionId) {
|
|
1050
|
-
markSessionConsulted(sessionId);
|
|
1051
|
-
}
|
|
1052
|
-
const cg = this.getCodeGraph(args.projectPath);
|
|
1053
|
-
// On tiny repos (<150 files), trim maxNodes hard — the entire repo
|
|
1054
|
-
// is grep-able in a turn so a 20-node context is wasted budget.
|
|
1055
|
-
// 8 covers the typical 1-3 entry-point + their immediate neighbors
|
|
1056
|
-
// without dragging in the rest of the small codebase.
|
|
1057
|
-
let defaultMaxNodes = 20;
|
|
1058
|
-
let isTinyRepo = false;
|
|
1059
|
-
let isSmallRepo = false;
|
|
1060
|
-
try {
|
|
1061
|
-
const stats = cg.getStats();
|
|
1062
|
-
if (stats.fileCount < 150) {
|
|
1063
|
-
defaultMaxNodes = 8;
|
|
1064
|
-
isTinyRepo = true;
|
|
1065
|
-
}
|
|
1066
|
-
else if (stats.fileCount < 500) {
|
|
1067
|
-
isSmallRepo = true;
|
|
1068
|
-
}
|
|
1069
|
-
}
|
|
1070
|
-
catch {
|
|
1071
|
-
// stats failure — fall back to the standard default
|
|
1072
|
-
}
|
|
1073
|
-
const maxNodes = args.maxNodes || defaultMaxNodes;
|
|
1074
|
-
const includeCode = args.includeCode !== false;
|
|
1075
|
-
const context = await cg.buildContext(task, {
|
|
1076
|
-
maxNodes,
|
|
1077
|
-
includeCode,
|
|
1078
|
-
format: 'markdown',
|
|
1079
|
-
});
|
|
1080
|
-
// Detect if this looks like a feature request (vs bug fix or exploration)
|
|
1081
|
-
const isFeatureQuery = this.looksLikeFeatureRequest(task);
|
|
1082
|
-
const reminder = isFeatureQuery
|
|
1083
|
-
? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria'
|
|
1084
|
-
: '';
|
|
1085
|
-
// Auto-trace for flow queries: when the task is asking "how does X
|
|
1086
|
-
// reach/flow/propagate from A to B", run the trace internally and
|
|
1087
|
-
// append its body to the context response. Saves the agent the
|
|
1088
|
-
// follow-up codegraph_trace call that was the #2 cost driver on
|
|
1089
|
-
// multi-module flow questions (Q3 / etcd Q2 in the audit).
|
|
1090
|
-
const flowTrace = await this.maybeInlineFlowTrace(task, cg);
|
|
1091
|
-
// Iter3 — sufficiency steering on small repos.
|
|
1092
|
-
//
|
|
1093
|
-
// Measured economics on tiny (<150) and small (<500) projects: every
|
|
1094
|
-
// additional MCP tool call costs ~$0.02-0.05 in cache-write tokens
|
|
1095
|
-
// (5K-15K per response at $3.75/1M). The agent reflexively follows
|
|
1096
|
-
// codegraph_context with explore/node even when the context response
|
|
1097
|
-
// is already sufficient — that pattern drove the cost gap that
|
|
1098
|
-
// smaller bodies (iter2) failed to close (smaller bodies just shifted
|
|
1099
|
-
// the agent to Read instead). Direct directive on small-repo
|
|
1100
|
-
// responses: tell the agent the context call IS the comprehensive
|
|
1101
|
-
// pass for a project of this size and that follow-ups should be
|
|
1102
|
-
// narrow (trace from→to, node single-symbol) — not another broad
|
|
1103
|
-
// explore that re-bundles the same content.
|
|
1104
|
-
// ITER4: unified strong directive for both tiny (<150) and small
|
|
1105
|
-
// (<500) tiers — measured iter3 result was that the soft <500
|
|
1106
|
-
// wording was IGNORED on sinatra (5 tool calls, +92% loss) while
|
|
1107
|
-
// the strong <150 wording was followed on cobra/slim (3 calls,
|
|
1108
|
-
// -21%/-22% wins). The single-file-framework problem (sinatra)
|
|
1109
|
-
// is structurally the same as cobra's; both deserve the same
|
|
1110
|
-
// sufficiency steering.
|
|
1111
|
-
let smallRepoTail = '';
|
|
1112
|
-
let smallRepoRouteInline = '';
|
|
1113
|
-
if (isTinyRepo || isSmallRepo) {
|
|
1114
|
-
// Iter12: backend-computed routing manifest for routing queries.
|
|
1115
|
-
// Builds a URL → handler map directly from the graph (each route
|
|
1116
|
-
// node has a `references` edge to its handler), then inlines the
|
|
1117
|
-
// top handler file's source. The agent gets the canonical
|
|
1118
|
-
// routing answer in one MCP call — no need to parse framework
|
|
1119
|
-
// DSL or grep for handlers.
|
|
1120
|
-
//
|
|
1121
|
-
// Replaces iter10's raw route-file inline. The manifest is more
|
|
1122
|
-
// information-dense (parsed URL→handler map vs raw config DSL)
|
|
1123
|
-
// and we still inline the top handler file's source so the agent
|
|
1124
|
-
// has the implementation bodies inline too.
|
|
1125
|
-
const isRouteQuery = /\b(route|routes|routing|request|handler|endpoint|api|controller|middleware|dispatch|invok)/i.test(task);
|
|
1126
|
-
if (isRouteQuery) {
|
|
1127
|
-
try {
|
|
1128
|
-
const manifest = cg.getRoutingManifest(40);
|
|
1129
|
-
if (manifest) {
|
|
1130
|
-
// 1) Compact URL→handler list (~30-60 lines, ~1-2KB).
|
|
1131
|
-
const lines = [
|
|
1132
|
-
`\n\n## Routing manifest (${manifest.totalRoutes} routes, top handler file holds ${manifest.topHandlerFileCount})`,
|
|
1133
|
-
'',
|
|
1134
|
-
'| URL | Handler | Location |',
|
|
1135
|
-
'|---|---|---|',
|
|
1136
|
-
];
|
|
1137
|
-
for (const e of manifest.entries) {
|
|
1138
|
-
lines.push(`| \`${e.url}\` | \`${e.handler}\` | ${e.handlerFile}:${e.handlerLine} |`);
|
|
1139
|
-
}
|
|
1140
|
-
// 2) Inline the top handler file's source.
|
|
1141
|
-
if (manifest.topHandlerFile && manifest.topHandlerFileCount >= 2) {
|
|
1142
|
-
try {
|
|
1143
|
-
const fullPath = pathModule.join(cg.getProjectRoot(), manifest.topHandlerFile);
|
|
1144
|
-
const stat = (0, fs_1.statSync)(fullPath);
|
|
1145
|
-
if (stat.size > 0 && stat.size <= 16000) {
|
|
1146
|
-
const source = (0, fs_1.readFileSync)(fullPath, 'utf-8');
|
|
1147
|
-
const capped = source.length > 7000 ? source.slice(0, 7000) + '\n... (truncated)' : source;
|
|
1148
|
-
const ext = (manifest.topHandlerFile.match(/\.([a-z]+)$/i)?.[1] || '').toLowerCase();
|
|
1149
|
-
const lang = ext === 'rb' ? 'ruby' : ext === 'py' ? 'python' :
|
|
1150
|
-
ext === 'go' ? 'go' : ext === 'rs' ? 'rust' :
|
|
1151
|
-
ext === 'js' || ext === 'jsx' ? 'javascript' :
|
|
1152
|
-
ext === 'ts' || ext === 'tsx' ? 'typescript' :
|
|
1153
|
-
ext === 'java' ? 'java' : ext === 'kt' ? 'kotlin' :
|
|
1154
|
-
ext === 'cs' ? 'csharp' : ext === 'php' ? 'php' :
|
|
1155
|
-
ext === 'swift' ? 'swift' : ext === 'yml' || ext === 'yaml' ? 'yaml' : '';
|
|
1156
|
-
lines.push('');
|
|
1157
|
-
lines.push(`### Top handler file (\`${manifest.topHandlerFile}\` — ${manifest.topHandlerFileCount}/${manifest.totalRoutes} routes, full source inlined — do NOT Read)`);
|
|
1158
|
-
lines.push('');
|
|
1159
|
-
lines.push('```' + lang);
|
|
1160
|
-
lines.push(capped);
|
|
1161
|
-
lines.push('```');
|
|
1162
|
-
}
|
|
1163
|
-
}
|
|
1164
|
-
catch { /* file read failed, skip the source inline */ }
|
|
1165
|
-
}
|
|
1166
|
-
smallRepoRouteInline = lines.join('\n');
|
|
1167
|
-
}
|
|
1168
|
-
}
|
|
1169
|
-
catch {
|
|
1170
|
-
// Manifest build failed — drop silently
|
|
1171
|
-
}
|
|
1172
|
-
}
|
|
1173
|
-
const sizeQualifier = isTinyRepo ? 'under 150' : 'under 500';
|
|
1174
|
-
const routingClause = smallRepoRouteInline
|
|
1175
|
-
? ' The URL→handler manifest and top handler file are also inlined above — answer routing questions from them.'
|
|
1176
|
-
: '';
|
|
1177
|
-
smallRepoTail = `\n\n---\n> **This project is small** (${sizeQualifier} indexed files). The entry points and code above cover the relevant surface — **do NOT call codegraph_explore as a follow-up; its content will largely duplicate this response**. If you need a specific flow, call \`codegraph_trace from→to\`. If you need one specific symbol's body, call \`codegraph_node <name>\`.${routingClause} Otherwise, answer from what is above.`;
|
|
1178
|
-
}
|
|
1179
|
-
// buildContext returns string when format is 'markdown'
|
|
1180
|
-
if (typeof context === 'string') {
|
|
1181
|
-
return this.textResult(this.truncateOutput(context + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
|
|
1182
|
-
}
|
|
1183
|
-
// If it returns TaskContext, format it
|
|
1184
|
-
return this.textResult(this.truncateOutput(this.formatTaskContext(context) + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
|
|
1185
|
-
}
|
|
1186
|
-
/**
|
|
1187
|
-
* Detect a flow-style task ("how does X reach Y", "trace the path from A to B")
|
|
1188
|
-
* and pre-run trace between the most likely endpoints, returning the trace
|
|
1189
|
-
* body to splice into the context response. Returns '' for non-flow queries
|
|
1190
|
-
* or when no plausible endpoint pair can be extracted.
|
|
1191
|
-
*
|
|
1192
|
-
* Conservative by design: only fires when the task has both a clear flow
|
|
1193
|
-
* keyword AND at least two distinct PascalCase / camelCase identifiers.
|
|
1194
|
-
* False positives waste a graph query; false negatives just fall back to
|
|
1195
|
-
* the agent calling trace itself (existing path-proximity wiring handles
|
|
1196
|
-
* disambiguation either way).
|
|
1197
|
-
*/
|
|
1198
|
-
async maybeInlineFlowTrace(task, cg) {
|
|
1199
|
-
const lower = task.toLowerCase();
|
|
1200
|
-
const FLOW_KEYWORDS = [
|
|
1201
|
-
'trace ',
|
|
1202
|
-
'from ',
|
|
1203
|
-
'reach ',
|
|
1204
|
-
'flow ',
|
|
1205
|
-
'propagat',
|
|
1206
|
-
'how does ',
|
|
1207
|
-
'how do ',
|
|
1208
|
-
];
|
|
1209
|
-
if (!FLOW_KEYWORDS.some((k) => lower.includes(k)))
|
|
1210
|
-
return '';
|
|
1211
|
-
// Extract candidate symbols — PascalCase or camelCase identifiers ≥3 chars.
|
|
1212
|
-
// Filter out common non-symbol words and the flow keywords themselves.
|
|
1213
|
-
const STOP_WORDS = new Set([
|
|
1214
|
-
'how', 'does', 'the', 'and', 'from', 'through', 'reach', 'reaches',
|
|
1215
|
-
'flow', 'path', 'trace', 'cross', 'module', 'modules', 'where',
|
|
1216
|
-
'update', 'updates', 'updated', 'when', 'what', 'this', 'that',
|
|
1217
|
-
]);
|
|
1218
|
-
const ids = [];
|
|
1219
|
-
const seen = new Set();
|
|
1220
|
-
const re = /\b([A-Z][a-z]+(?:[A-Z][a-z]*)+|[a-z]+[A-Z][a-z]*(?:[A-Z][a-z]*)*)\b/g;
|
|
1221
|
-
let m;
|
|
1222
|
-
while ((m = re.exec(task)) !== null) {
|
|
1223
|
-
const sym = m[1];
|
|
1224
|
-
if (sym.length < 3)
|
|
1225
|
-
continue;
|
|
1226
|
-
const key = sym.toLowerCase();
|
|
1227
|
-
if (STOP_WORDS.has(key) || seen.has(key))
|
|
1228
|
-
continue;
|
|
1229
|
-
seen.add(key);
|
|
1230
|
-
ids.push(sym);
|
|
1231
|
-
}
|
|
1232
|
-
if (ids.length < 2)
|
|
1233
|
-
return '';
|
|
1234
|
-
// The first two distinct symbols, in order of appearance, are the most
|
|
1235
|
-
// likely from/to endpoints — "from X ... through to Y" naturally places
|
|
1236
|
-
// them in that order in the prose. If the trace fails to connect, it
|
|
1237
|
-
// still returns the inlined endpoint bodies (the trace-failure rewrite).
|
|
1238
|
-
const fromSym = ids[0];
|
|
1239
|
-
const toSym = ids[1];
|
|
1240
|
-
let traceResult;
|
|
1241
|
-
try {
|
|
1242
|
-
traceResult = await this.handleTrace({
|
|
1243
|
-
from: fromSym,
|
|
1244
|
-
to: toSym,
|
|
1245
|
-
projectPath: cg.getProjectRoot(),
|
|
1246
|
-
});
|
|
1247
|
-
}
|
|
1248
|
-
catch {
|
|
1249
|
-
return '';
|
|
1250
|
-
}
|
|
1251
|
-
// Extract the textual body. Defensive: handleTrace's contract is the
|
|
1252
|
-
// standard tool-result shape used elsewhere in this file.
|
|
1253
|
-
const body = traceResult.content
|
|
1254
|
-
?.map((c) => (c.type === 'text' ? c.text : ''))
|
|
1255
|
-
.filter(Boolean)
|
|
1256
|
-
.join('\n')
|
|
1257
|
-
.trim();
|
|
1258
|
-
if (!body)
|
|
1259
|
-
return '';
|
|
1260
|
-
return [
|
|
1261
|
-
'',
|
|
1262
|
-
'## Inline flow trace',
|
|
1263
|
-
'',
|
|
1264
|
-
`Auto-traced \`${fromSym}\` → \`${toSym}\` because the query looks like a flow question. No follow-up codegraph_trace is needed for this pair.`,
|
|
1265
|
-
'',
|
|
1266
|
-
body,
|
|
1267
|
-
].join('\n');
|
|
1268
|
-
}
|
|
1269
|
-
/**
|
|
1270
|
-
* Heuristic to detect if a query looks like a feature request
|
|
1271
|
-
*/
|
|
1272
|
-
looksLikeFeatureRequest(task) {
|
|
1273
|
-
const featureKeywords = [
|
|
1274
|
-
'add', 'create', 'implement', 'build', 'enable', 'allow',
|
|
1275
|
-
'new feature', 'support for', 'ability to', 'want to',
|
|
1276
|
-
'should be able', 'need to add', 'swap', 'edit', 'modify'
|
|
1277
|
-
];
|
|
1278
|
-
const bugKeywords = [
|
|
1279
|
-
'fix', 'bug', 'error', 'broken', 'crash', 'issue', 'problem',
|
|
1280
|
-
'not working', 'fails', 'undefined', 'null'
|
|
1281
|
-
];
|
|
1282
|
-
const explorationKeywords = [
|
|
1283
|
-
'how does', 'where is', 'what is', 'find', 'show me',
|
|
1284
|
-
'explain', 'understand', 'explore'
|
|
1285
|
-
];
|
|
1286
|
-
const lowerTask = task.toLowerCase();
|
|
1287
|
-
// If it's clearly a bug or exploration, not a feature
|
|
1288
|
-
if (bugKeywords.some(k => lowerTask.includes(k)))
|
|
1289
|
-
return false;
|
|
1290
|
-
if (explorationKeywords.some(k => lowerTask.includes(k)))
|
|
1291
|
-
return false;
|
|
1292
|
-
// If it matches feature keywords, it's likely a feature request
|
|
1293
|
-
return featureKeywords.some(k => lowerTask.includes(k));
|
|
1294
|
-
}
|
|
1295
961
|
/**
|
|
1296
962
|
* Handle codegraph_callers
|
|
1297
963
|
*/
|
|
@@ -1390,275 +1056,6 @@ class ToolHandler {
|
|
|
1390
1056
|
const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note;
|
|
1391
1057
|
return this.textResult(this.truncateOutput(formatted));
|
|
1392
1058
|
}
|
|
1393
|
-
/**
|
|
1394
|
-
* Handle codegraph_trace — shortest CALL PATH between two symbols.
|
|
1395
|
-
*
|
|
1396
|
-
* Exposes GraphTraverser.findPath: the chain of functions from `from` to `to`,
|
|
1397
|
-
* each hop annotated with file:line and the call-site line. This is the
|
|
1398
|
-
* capability grep/Read structurally cannot provide. When no static path
|
|
1399
|
-
* exists, the chain has almost certainly broken at dynamic dispatch
|
|
1400
|
-
* (callbacks, descriptors, metaclasses) — we say so and surface the start
|
|
1401
|
-
* symbol's outgoing calls so the agent bridges the one missing hop with
|
|
1402
|
-
* codegraph_node rather than blindly reading.
|
|
1403
|
-
*/
|
|
1404
|
-
async handleTrace(args) {
|
|
1405
|
-
const from = this.validateString(args.from, 'from');
|
|
1406
|
-
if (typeof from !== 'string')
|
|
1407
|
-
return from;
|
|
1408
|
-
const to = this.validateString(args.to, 'to');
|
|
1409
|
-
if (typeof to !== 'string')
|
|
1410
|
-
return to;
|
|
1411
|
-
const cg = this.getCodeGraph(args.projectPath);
|
|
1412
|
-
const fromMatches = this.findAllSymbols(cg, from);
|
|
1413
|
-
if (fromMatches.nodes.length === 0)
|
|
1414
|
-
return this.textResult(`Symbol "${from}" not found in the codebase`);
|
|
1415
|
-
const toMatches = this.findAllSymbols(cg, to);
|
|
1416
|
-
if (toMatches.nodes.length === 0)
|
|
1417
|
-
return this.textResult(`Symbol "${to}" not found in the codebase`);
|
|
1418
|
-
// Trace along call edges only — a true call path. Names can map to several
|
|
1419
|
-
// nodes, so try a few from×to candidate pairs until a usable path turns up.
|
|
1420
|
-
//
|
|
1421
|
-
// MAX_HOPS guard: a BFS shortest path longer than this on a dense call graph
|
|
1422
|
-
// is almost always a spurious wander through unrelated code (django's
|
|
1423
|
-
// `_fetch_all → … → execute_sql` BFS detours through prefetch/filter), not
|
|
1424
|
-
// the real execution flow — and a confident-but-wrong 15-hop trace is worse
|
|
1425
|
-
// than none. Over-cap paths are rejected and reported as "no direct path"
|
|
1426
|
-
// (which, on real code, means the flow breaks at dynamic dispatch).
|
|
1427
|
-
const edgeKinds = ['calls'];
|
|
1428
|
-
const MAX_HOPS = 7;
|
|
1429
|
-
// Path-proximity pairing: in a multi-module repo a symbol name like
|
|
1430
|
-
// `EndBlocker` exists in 20+ modules. FTS picks one almost arbitrarily;
|
|
1431
|
-
// the WRONG pair (e.g. simapp's wrapper EndBlocker paired with gov's Tally)
|
|
1432
|
-
// has no static path, falls through to the dynamic-dispatch failure branch,
|
|
1433
|
-
// and surfaces unrelated bodies — exactly the cosmos-Q3 trace failure mode.
|
|
1434
|
-
// Score every from×to combo by shared file-path prefix length; try the
|
|
1435
|
-
// most-co-located pair first (e.g. `x/gov/abci.go::EndBlocker` ×
|
|
1436
|
-
// `x/gov/keeper/tally.go::Tally` share `x/gov/`).
|
|
1437
|
-
//
|
|
1438
|
-
// Consider the FULL candidate set, not just the FTS top-5: the right
|
|
1439
|
-
// EndBlocker for a gov-module flow may rank 8th in FTS but share the
|
|
1440
|
-
// entire `x/gov/` prefix with the destination. Path-proximity supersedes
|
|
1441
|
-
// FTS for this disambiguation. Findpath trials are still capped by
|
|
1442
|
-
// FINDPATH_PAIR_BUDGET below to bound graph traversal cost.
|
|
1443
|
-
const sharedDirPrefixLen = (a, b) => {
|
|
1444
|
-
const aDir = a.replace(/[^/]+$/, '');
|
|
1445
|
-
const bDir = b.replace(/[^/]+$/, '');
|
|
1446
|
-
let i = 0;
|
|
1447
|
-
while (i < aDir.length && i < bDir.length && aDir[i] === bDir[i])
|
|
1448
|
-
i++;
|
|
1449
|
-
return i;
|
|
1450
|
-
};
|
|
1451
|
-
// Cosmos-Q3 surfaced a second-order failure: `enterprise/group/x/group/`
|
|
1452
|
-
// SHARES MORE of its path with `enterprise/group/x/group/keeper/tally.go`
|
|
1453
|
-
// (24 chars) than `x/gov/abci.go` shares with `x/gov/keeper/tally.go`
|
|
1454
|
-
// (6 chars), so pure shared-prefix prefers the side-experiment module
|
|
1455
|
-
// over the canonical one — even though the user's question is clearly
|
|
1456
|
-
// about the main gov module. Penalize candidates living under prefixes
|
|
1457
|
-
// that conventionally hold extensions / experiments / vendored code, so
|
|
1458
|
-
// the canonical-path pair wins even when its shared prefix is short.
|
|
1459
|
-
const isLessCanonicalPath = (p) => /^(enterprise|contrib|examples?|sample|playground|vendor|third[_-]?party|deprecated|legacy)\//i.test(p);
|
|
1460
|
-
const LESS_CANONICAL_PENALTY = 100; // any canonical candidate beats any less-canonical one
|
|
1461
|
-
const scorePair = (a, b) => sharedDirPrefixLen(a, b)
|
|
1462
|
-
- (isLessCanonicalPath(a) ? LESS_CANONICAL_PENALTY : 0)
|
|
1463
|
-
- (isLessCanonicalPath(b) ? LESS_CANONICAL_PENALTY : 0);
|
|
1464
|
-
const fromCands = fromMatches.nodes;
|
|
1465
|
-
const toCands = toMatches.nodes;
|
|
1466
|
-
const pairs = [];
|
|
1467
|
-
for (const f of fromCands) {
|
|
1468
|
-
for (const t of toCands) {
|
|
1469
|
-
pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) });
|
|
1470
|
-
}
|
|
1471
|
-
}
|
|
1472
|
-
// Sort by shared prefix desc, then by FTS order (already encoded in the
|
|
1473
|
-
// pairs' insertion order — both for f and t). The tiebreaker preserves
|
|
1474
|
-
// findAllSymbols' generated-file-last ranking.
|
|
1475
|
-
pairs.sort((a, b) => b.score - a.score);
|
|
1476
|
-
// Cap how many graph-path probes we attempt so a 50×50 cross-product
|
|
1477
|
-
// doesn't blow up on a god-named symbol like `Get` (well-named flows have
|
|
1478
|
-
// their good pair near the top of the sort anyway).
|
|
1479
|
-
const FINDPATH_PAIR_BUDGET = 20;
|
|
1480
|
-
const fromTry = fromCands;
|
|
1481
|
-
const toTry = toCands;
|
|
1482
|
-
let path = null;
|
|
1483
|
-
let overCap = null;
|
|
1484
|
-
let bestPair = null;
|
|
1485
|
-
let triedPairs = 0;
|
|
1486
|
-
for (const { f, t } of pairs) {
|
|
1487
|
-
if (path)
|
|
1488
|
-
break;
|
|
1489
|
-
if (triedPairs >= FINDPATH_PAIR_BUDGET)
|
|
1490
|
-
break;
|
|
1491
|
-
triedPairs++;
|
|
1492
|
-
const p = cg.findPath(f.id, t.id, edgeKinds);
|
|
1493
|
-
if (p && p.length > 1) {
|
|
1494
|
-
if (p.length <= MAX_HOPS) {
|
|
1495
|
-
path = p;
|
|
1496
|
-
bestPair = { f, t };
|
|
1497
|
-
break;
|
|
1498
|
-
}
|
|
1499
|
-
if (!overCap || p.length < overCap.length) {
|
|
1500
|
-
overCap = p;
|
|
1501
|
-
bestPair = { f, t };
|
|
1502
|
-
}
|
|
1503
|
-
}
|
|
1504
|
-
else if (!bestPair) {
|
|
1505
|
-
// No path yet — remember the top-scored pair so the failure branch
|
|
1506
|
-
// surfaces the most-co-located candidates' bodies, not whatever FTS
|
|
1507
|
-
// happened to put first.
|
|
1508
|
-
bestPair = { f, t };
|
|
1509
|
-
}
|
|
1510
|
-
}
|
|
1511
|
-
if (!path) {
|
|
1512
|
-
// No static path — almost always a dynamic-dispatch break. INSTEAD of
|
|
1513
|
-
// telling the agent to chase the gap with codegraph_node/callers/callees
|
|
1514
|
-
// (which fans out into 3-4 follow-up tool calls + a Read), inline the
|
|
1515
|
-
// material those would have returned right here. Measured on cosmos-Q3:
|
|
1516
|
-
// the failed-trace + subsequent fan-out used to cost ~2× a single
|
|
1517
|
-
// sufficient trace call; this branch closes that gap.
|
|
1518
|
-
// Prefer the path-proximity-best pair we identified above (e.g. gov's
|
|
1519
|
-
// EndBlocker × gov's Tally) over the FTS top-pick (simapp's wrapper).
|
|
1520
|
-
const start = bestPair?.f ?? fromTry[0];
|
|
1521
|
-
const end = bestPair?.t ?? toTry[0];
|
|
1522
|
-
const fileCache = new Map();
|
|
1523
|
-
const lines = [
|
|
1524
|
-
`No direct static call path from "${from}" to "${to}" — the chain almost certainly breaks at dynamic dispatch (a callback / interface dispatch / framework hook / metaclass). Both endpoint bodies + their immediate neighbors are inlined below; answer from them — a follow-up codegraph_node/callers/callees on these would just return what is already here.`,
|
|
1525
|
-
'',
|
|
1526
|
-
];
|
|
1527
|
-
if (overCap) {
|
|
1528
|
-
lines.push(`> Indirect chain of ${overCap.length} hops exists but is over the ${MAX_HOPS}-hop cap (usually a BFS wander through unrelated code, not the real execution flow).`, '');
|
|
1529
|
-
}
|
|
1530
|
-
// Track which node IDs we've already inlined a body for so we don't
|
|
1531
|
-
// double-emit when a callee of FROM is also surfaced separately.
|
|
1532
|
-
const inlinedBodies = new Set();
|
|
1533
|
-
const inlineBody = (n, lineCap, charCap) => {
|
|
1534
|
-
if (inlinedBodies.has(n.id))
|
|
1535
|
-
return false;
|
|
1536
|
-
inlinedBodies.add(n.id);
|
|
1537
|
-
const body = this.sourceRangeAt(cg, n.filePath, n.startLine, n.endLine, fileCache, lineCap, charCap);
|
|
1538
|
-
if (body) {
|
|
1539
|
-
lines.push(body);
|
|
1540
|
-
return true;
|
|
1541
|
-
}
|
|
1542
|
-
return false;
|
|
1543
|
-
};
|
|
1544
|
-
const inlineEndpoint = (label, node) => {
|
|
1545
|
-
lines.push(`### ${label}: \`${node.name}\` (${node.filePath}:${node.startLine}-${node.endLine})`);
|
|
1546
|
-
inlineBody(node, 120, 3600);
|
|
1547
|
-
const callers = cg.getCallers(node.id).slice(0, 6);
|
|
1548
|
-
if (callers.length > 0) {
|
|
1549
|
-
lines.push(`**Callers of \`${node.name}\`:** ` +
|
|
1550
|
-
callers.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', '));
|
|
1551
|
-
}
|
|
1552
|
-
const callees = cg.getCallees(node.id).slice(0, 8);
|
|
1553
|
-
if (callees.length > 0) {
|
|
1554
|
-
lines.push(`**\`${node.name}\` calls:** ` +
|
|
1555
|
-
callees.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', '));
|
|
1556
|
-
}
|
|
1557
|
-
lines.push('');
|
|
1558
|
-
};
|
|
1559
|
-
inlineEndpoint('FROM', start);
|
|
1560
|
-
if (end.id !== start.id)
|
|
1561
|
-
inlineEndpoint('TO', end);
|
|
1562
|
-
// Inline the OTHER top-level functions/methods in TO's file — that's
|
|
1563
|
-
// where the missing dynamic-dispatch flow usually lives. Concrete
|
|
1564
|
-
// measurement from cosmos-Q1: `msgServer.Send` statically calls only
|
|
1565
|
-
// utility functions (`StringToBytes`, `Wrapf`); its real next-hop
|
|
1566
|
-
// `SendCoins` is invoked via an embedded-interface call (`k.Keeper.SendCoins`)
|
|
1567
|
-
// that static parsing CAN'T see. The flow IS in the same file as the
|
|
1568
|
-
// destination (`x/bank/keeper/send.go`: SendCoins → subUnlockedCoins →
|
|
1569
|
-
// addCoins → setBalance). Pre-inlining those file-mates is what
|
|
1570
|
-
// replaces the agent's "trace fail → search SendCoins → node SendCoins
|
|
1571
|
-
// → trace again" fan-out.
|
|
1572
|
-
const NEIGHBOR_LINES = 40;
|
|
1573
|
-
const NEIGHBOR_CHARS = 1200;
|
|
1574
|
-
const NEIGHBOR_K = 5;
|
|
1575
|
-
const fileSiblings = (anchor) => {
|
|
1576
|
-
// Functions and methods in the same file as the anchor, excluding
|
|
1577
|
-
// the anchor itself and anything we've already inlined. Sort by
|
|
1578
|
-
// distance from the anchor's startLine so the closest symbols come
|
|
1579
|
-
// first (the flow is usually adjacent in the file).
|
|
1580
|
-
const sameFile = cg
|
|
1581
|
-
.getNodesByKind('function')
|
|
1582
|
-
.filter((n) => n.filePath === anchor.filePath)
|
|
1583
|
-
.concat(cg.getNodesByKind('method').filter((n) => n.filePath === anchor.filePath));
|
|
1584
|
-
return sameFile
|
|
1585
|
-
.filter((n) => n.id !== anchor.id && !inlinedBodies.has(n.id))
|
|
1586
|
-
.sort((a, b) => Math.abs(a.startLine - anchor.startLine) - Math.abs(b.startLine - anchor.startLine))
|
|
1587
|
-
.slice(0, NEIGHBOR_K);
|
|
1588
|
-
};
|
|
1589
|
-
const renderSiblings = (label, siblings) => {
|
|
1590
|
-
if (siblings.length === 0)
|
|
1591
|
-
return;
|
|
1592
|
-
lines.push(`### ${label}`);
|
|
1593
|
-
for (const sib of siblings) {
|
|
1594
|
-
lines.push('');
|
|
1595
|
-
lines.push(`- \`${sib.name}\` (${sib.filePath}:${sib.startLine}-${sib.endLine})`);
|
|
1596
|
-
inlineBody(sib, NEIGHBOR_LINES, NEIGHBOR_CHARS);
|
|
1597
|
-
}
|
|
1598
|
-
lines.push('');
|
|
1599
|
-
};
|
|
1600
|
-
renderSiblings(`Other functions in \`${end.filePath}\` (the flow that the dynamic-dispatch hop reaches — bodies inlined)`, fileSiblings(end));
|
|
1601
|
-
lines.push('> Endpoint bodies + the other functions in the destination\'s file are inlined above. Together they typically cover the missing dynamic-dispatch boundary (interface-method calls like `k.Keeper.SendCoins` that static parsing can\'t follow). **No further codegraph_node / codegraph_callers / codegraph_callees / Read / Grep is needed for any symbol already shown here** — call them again only if you need to walk DEEPER than what is inlined.');
|
|
1602
|
-
return this.textResult(this.truncateOutput(lines.join('\n') + fromMatches.note + toMatches.note));
|
|
1603
|
-
}
|
|
1604
|
-
const lines = [
|
|
1605
|
-
`## Trace: ${from} → ${to}`,
|
|
1606
|
-
'',
|
|
1607
|
-
`Full execution path below — ${path.length} hops, each with its body, plus what the destination calls. This is the complete flow; answer from it.`,
|
|
1608
|
-
'',
|
|
1609
|
-
`${path.length} hops:`,
|
|
1610
|
-
'',
|
|
1611
|
-
];
|
|
1612
|
-
// Inline what each hop needs so the agent doesn't Read/Grep to get it: the
|
|
1613
|
-
// call-site source line, the registration site for dynamic-dispatch hops, AND
|
|
1614
|
-
// the hop's own body (capped per hop so the trace stays path-scoped). Earlier
|
|
1615
|
-
// versions inlined only the call-site line, which left agents calling explore
|
|
1616
|
-
// or Read for the bodies — the exact follow-up the ablation experiment measured.
|
|
1617
|
-
const fileCache = new Map();
|
|
1618
|
-
for (let i = 0; i < path.length; i++) {
|
|
1619
|
-
const step = path[i];
|
|
1620
|
-
if (step.edge) {
|
|
1621
|
-
const synth = this.synthEdgeNote(step.edge);
|
|
1622
|
-
if (synth) {
|
|
1623
|
-
lines.push(` ↓ ${synth.label}`);
|
|
1624
|
-
if (synth.registeredAt) {
|
|
1625
|
-
const regSrc = this.sourceLineAt(cg, synth.registeredAt, fileCache);
|
|
1626
|
-
lines.push(` ↳ registered at ${synth.registeredAt}${regSrc ? ` ${regSrc}` : ''}`);
|
|
1627
|
-
}
|
|
1628
|
-
}
|
|
1629
|
-
else {
|
|
1630
|
-
// The call happens in the PREVIOUS hop's file at edge.line.
|
|
1631
|
-
const prev = path[i - 1];
|
|
1632
|
-
const ref = prev && step.edge.line ? `${prev.node.filePath}:${step.edge.line}` : undefined;
|
|
1633
|
-
const callSrc = this.sourceLineAt(cg, ref, fileCache);
|
|
1634
|
-
lines.push(` ↓ ${step.edge.kind}${step.edge.line ? `@${step.edge.line}` : ''}${callSrc ? ` ${callSrc}` : ''}`);
|
|
1635
|
-
}
|
|
1636
|
-
}
|
|
1637
|
-
lines.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine}-${step.node.endLine})`);
|
|
1638
|
-
const body = this.sourceRangeAt(cg, step.node.filePath, step.node.startLine, step.node.endLine, fileCache, 60, 1800);
|
|
1639
|
-
if (body)
|
|
1640
|
-
lines.push(body);
|
|
1641
|
-
}
|
|
1642
|
-
// The "last mile": what the destination does next. Agents otherwise explore/Read
|
|
1643
|
-
// for exactly this (e.g. renderStaticScene → _renderStaticScene → the canvas draw),
|
|
1644
|
-
// so inlining the destination's callees is what actually stops the investigation —
|
|
1645
|
-
// sufficiency, not a "don't explore" instruction.
|
|
1646
|
-
const dest = path[path.length - 1].node;
|
|
1647
|
-
const destCallees = cg.getCallees(dest.id)
|
|
1648
|
-
.filter(c => !path.some(p => p.node.id === c.node.id))
|
|
1649
|
-
.slice(0, 6);
|
|
1650
|
-
if (destCallees.length > 0) {
|
|
1651
|
-
lines.push('', `### \`${dest.name}\` then calls (the destination's immediate work):`);
|
|
1652
|
-
for (const c of destCallees) {
|
|
1653
|
-
lines.push('', `- ${c.node.name} (${c.node.filePath}:${c.node.startLine}-${c.node.endLine})`);
|
|
1654
|
-
const body = this.sourceRangeAt(cg, c.node.filePath, c.node.startLine, c.node.endLine, fileCache, 16, 600);
|
|
1655
|
-
if (body)
|
|
1656
|
-
lines.push(body);
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1659
|
-
lines.push('', '> Full path + every hop body + the destination\'s calls are inlined above — the complete flow. Answer from it; a Read is only needed to chase a specific local variable\'s data-flow.');
|
|
1660
|
-
return this.textResult(this.truncateOutput(lines.join('\n')));
|
|
1661
|
-
}
|
|
1662
1059
|
/**
|
|
1663
1060
|
* Describe a synthesized (dynamic-dispatch) edge for human output: how the
|
|
1664
1061
|
* callback was wired up — the bridge static parsing can't see. Returns null
|
|
@@ -1718,83 +1115,15 @@ class ToolHandler {
|
|
|
1718
1115
|
registeredAt,
|
|
1719
1116
|
};
|
|
1720
1117
|
}
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
sourceLineAt(cg, ref, cache) {
|
|
1729
|
-
if (!ref)
|
|
1730
|
-
return null;
|
|
1731
|
-
const i = ref.lastIndexOf(':');
|
|
1732
|
-
if (i < 0)
|
|
1733
|
-
return null;
|
|
1734
|
-
const filePath = ref.slice(0, i);
|
|
1735
|
-
const line = parseInt(ref.slice(i + 1), 10);
|
|
1736
|
-
if (!Number.isFinite(line) || line < 1)
|
|
1737
|
-
return null;
|
|
1738
|
-
let fileLines = cache.get(filePath);
|
|
1739
|
-
if (!fileLines) {
|
|
1740
|
-
const abs = (0, utils_1.validatePathWithinRoot)(cg.getProjectRoot(), filePath);
|
|
1741
|
-
if (!abs || !(0, fs_1.existsSync)(abs))
|
|
1742
|
-
return null;
|
|
1743
|
-
try {
|
|
1744
|
-
fileLines = (0, fs_1.readFileSync)(abs, 'utf-8').split('\n');
|
|
1745
|
-
}
|
|
1746
|
-
catch {
|
|
1747
|
-
return null;
|
|
1748
|
-
}
|
|
1749
|
-
cache.set(filePath, fileLines);
|
|
1750
|
-
}
|
|
1751
|
-
const raw = fileLines[line - 1];
|
|
1752
|
-
if (raw == null)
|
|
1753
|
-
return null;
|
|
1754
|
-
const t = raw.trim();
|
|
1755
|
-
return t.length > 160 ? t.slice(0, 157) + '…' : t;
|
|
1756
|
-
}
|
|
1757
|
-
/**
|
|
1758
|
-
* Read a hop's body — filePath lines [startLine..endLine] — for inlining into
|
|
1759
|
-
* a trace, capped (lines + chars) so the whole path stays path-scoped even on
|
|
1760
|
-
* a 7-hop chain. Dedents to the body's own indentation and marks truncation.
|
|
1761
|
-
* Shares `cache` with sourceLineAt so each file is read at most once per trace.
|
|
1762
|
-
*/
|
|
1763
|
-
sourceRangeAt(cg, filePath, startLine, endLine, cache, maxLines = 28, maxChars = 1200) {
|
|
1764
|
-
if (!Number.isFinite(startLine) || startLine < 1)
|
|
1765
|
-
return null;
|
|
1766
|
-
let fileLines = cache.get(filePath);
|
|
1767
|
-
if (!fileLines) {
|
|
1768
|
-
const abs = (0, utils_1.validatePathWithinRoot)(cg.getProjectRoot(), filePath);
|
|
1769
|
-
if (!abs || !(0, fs_1.existsSync)(abs))
|
|
1770
|
-
return null;
|
|
1771
|
-
try {
|
|
1772
|
-
fileLines = (0, fs_1.readFileSync)(abs, 'utf-8').split('\n');
|
|
1773
|
-
}
|
|
1774
|
-
catch {
|
|
1775
|
-
return null;
|
|
1776
|
-
}
|
|
1777
|
-
cache.set(filePath, fileLines);
|
|
1118
|
+
if (m?.synthesizedBy === 'closure-collection') {
|
|
1119
|
+
const field = m.field ? `\`${String(m.field)}\`` : 'a collection';
|
|
1120
|
+
return {
|
|
1121
|
+
label: `closure collection — runs handlers appended to ${field} (dynamic dispatch)`,
|
|
1122
|
+
compact: `dynamic: runs ${field} handlers${at}`,
|
|
1123
|
+
registeredAt,
|
|
1124
|
+
};
|
|
1778
1125
|
}
|
|
1779
|
-
|
|
1780
|
-
let slice = fileLines.slice(startLine - 1, end);
|
|
1781
|
-
if (slice.length === 0)
|
|
1782
|
-
return null;
|
|
1783
|
-
let omitted = 0;
|
|
1784
|
-
if (slice.length > maxLines) {
|
|
1785
|
-
omitted = slice.length - maxLines;
|
|
1786
|
-
slice = slice.slice(0, maxLines);
|
|
1787
|
-
}
|
|
1788
|
-
const nonBlank = slice.filter(l => l.trim().length > 0);
|
|
1789
|
-
const dedent = nonBlank.length ? Math.min(...nonBlank.map(l => l.length - l.trimStart().length)) : 0;
|
|
1790
|
-
let text = slice.map((l, i) => ` ${startLine + i}\t${l.slice(dedent)}`).join('\n');
|
|
1791
|
-
if (text.length > maxChars) {
|
|
1792
|
-
text = text.slice(0, maxChars).replace(/\n[^\n]*$/, '');
|
|
1793
|
-
omitted = Math.max(omitted, 1);
|
|
1794
|
-
}
|
|
1795
|
-
if (omitted > 0)
|
|
1796
|
-
text += `\n … (+${omitted} more line${omitted === 1 ? '' : 's'})`;
|
|
1797
|
-
return text;
|
|
1126
|
+
return null;
|
|
1798
1127
|
}
|
|
1799
1128
|
/**
|
|
1800
1129
|
* Flow-from-named-symbols: an agent's codegraph_explore query is a bag of
|
|
@@ -1811,6 +1140,7 @@ class ToolHandler {
|
|
|
1811
1140
|
* dropping unrelated `OmsOrderService::list`.
|
|
1812
1141
|
*/
|
|
1813
1142
|
buildFlowFromNamedSymbols(cg, query) {
|
|
1143
|
+
const EMPTY = { text: '', pathNodeIds: new Set(), namedNodeIds: new Set(), uniqueNamedNodeIds: new Set() };
|
|
1814
1144
|
try {
|
|
1815
1145
|
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
1816
1146
|
// Strip only a REAL file extension (Create.cs → Create); KEEP qualified
|
|
@@ -1822,7 +1152,7 @@ class ToolHandler {
|
|
|
1822
1152
|
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
1823
1153
|
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
1824
1154
|
if (tokens.length < 2)
|
|
1825
|
-
return
|
|
1155
|
+
return EMPTY;
|
|
1826
1156
|
// Pool of name SEGMENTS (Class + method from every token) used to
|
|
1827
1157
|
// disambiguate an ambiguous SIMPLE name: keep a candidate only if its
|
|
1828
1158
|
// CONTAINER class is itself named in the query.
|
|
@@ -1832,24 +1162,34 @@ class ToolHandler {
|
|
|
1832
1162
|
if (s)
|
|
1833
1163
|
segPool.add(s);
|
|
1834
1164
|
const named = new Map();
|
|
1165
|
+
// Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in
|
|
1166
|
+
// the whole graph). These are safe to SPARE a file on: the agent named THIS
|
|
1167
|
+
// method (`getResponseWithInterceptorChain`, 1 def). A hyper-polymorphic name
|
|
1168
|
+
// (`as_sql`, 110 defs across every Expression/Compiler subclass) is NOT here,
|
|
1169
|
+
// so naming it doesn't keep every backend variant full and flood the budget.
|
|
1170
|
+
const uniqueNamedNodeIds = new Set();
|
|
1835
1171
|
for (const t of tokens) {
|
|
1836
1172
|
const cands = this.findAllSymbols(cg, t).nodes.filter((n) => CALLABLE.has(n.kind));
|
|
1837
1173
|
// A qualified or otherwise-specific name (<=3 hits) keeps all; an
|
|
1838
1174
|
// ambiguous simple name keeps only candidates whose container is named.
|
|
1839
|
-
const
|
|
1175
|
+
const specific = cands.length <= 3;
|
|
1176
|
+
const pick = specific
|
|
1840
1177
|
? cands
|
|
1841
1178
|
: cands.filter((n) => {
|
|
1842
1179
|
const segs = (n.qualifiedName || '').toLowerCase().split(/::|\./).filter(Boolean);
|
|
1843
1180
|
const container = segs.length >= 2 ? segs[segs.length - 2] : '';
|
|
1844
1181
|
return !!container && segPool.has(container);
|
|
1845
1182
|
});
|
|
1846
|
-
for (const n of pick.slice(0, 6))
|
|
1183
|
+
for (const n of pick.slice(0, 6)) {
|
|
1847
1184
|
named.set(n.id, n);
|
|
1185
|
+
if (specific)
|
|
1186
|
+
uniqueNamedNodeIds.add(n.id);
|
|
1187
|
+
}
|
|
1848
1188
|
if (named.size > 40)
|
|
1849
1189
|
break;
|
|
1850
1190
|
}
|
|
1851
1191
|
if (named.size < 2)
|
|
1852
|
-
return
|
|
1192
|
+
return EMPTY;
|
|
1853
1193
|
const MAX_HOPS = 7;
|
|
1854
1194
|
let best = null;
|
|
1855
1195
|
// BFS the full call graph (incl. synth edges) from each named seed, but
|
|
@@ -1895,23 +1235,208 @@ class ToolHandler {
|
|
|
1895
1235
|
if (!best || chain.length > best.length)
|
|
1896
1236
|
best = chain;
|
|
1897
1237
|
}
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1238
|
+
const hasMain = !!best && best.length >= 3;
|
|
1239
|
+
const pathIds = new Set((best ?? []).map((s) => s.node.id));
|
|
1240
|
+
// Supplementary: dynamic-dispatch (synthesized) edges incident to a NAMED
|
|
1241
|
+
// symbol — the indirect hops an agent would otherwise grep/Read to
|
|
1242
|
+
// reconstruct ("where do the appended `validators` actually run?"). The
|
|
1243
|
+
// synth edge IS that answer, so surface it even when the OTHER end wasn't
|
|
1244
|
+
// named (e.g. the agent names `validate` but not the `didCompleteTask`
|
|
1245
|
+
// that drains the collection). On-topic by construction: only heuristic
|
|
1246
|
+
// edges touching a symbol the agent named; skipped when the hop already
|
|
1247
|
+
// shows in the main chain.
|
|
1248
|
+
const synthLines = [];
|
|
1249
|
+
const synthSeen = new Set();
|
|
1250
|
+
for (const n of named.values()) {
|
|
1251
|
+
if (synthLines.length >= 6)
|
|
1252
|
+
break;
|
|
1253
|
+
for (const { node: other, edge } of [...cg.getCallers(n.id), ...cg.getCallees(n.id)]) {
|
|
1254
|
+
if (synthLines.length >= 6)
|
|
1255
|
+
break;
|
|
1256
|
+
if (edge.provenance !== 'heuristic' || other.id === n.id)
|
|
1257
|
+
continue;
|
|
1258
|
+
if (pathIds.has(edge.source) && pathIds.has(edge.target))
|
|
1259
|
+
continue; // already in the main chain
|
|
1260
|
+
const src = edge.source === n.id ? n : other;
|
|
1261
|
+
const tgt = edge.source === n.id ? other : n;
|
|
1262
|
+
const key = `${src.name}>${tgt.name}`;
|
|
1263
|
+
if (synthSeen.has(key))
|
|
1264
|
+
continue;
|
|
1265
|
+
synthSeen.add(key);
|
|
1266
|
+
const note = this.synthEdgeNote(edge);
|
|
1267
|
+
synthLines.push(`- ${src.name} → ${tgt.name} [${note ? note.compact : edge.kind}]`);
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
if (!hasMain && synthLines.length === 0)
|
|
1271
|
+
return EMPTY;
|
|
1272
|
+
const out = [];
|
|
1273
|
+
if (hasMain) {
|
|
1274
|
+
out.push('## Flow (call path among the symbols you queried)', '');
|
|
1275
|
+
for (let i = 0; i < best.length; i++) {
|
|
1276
|
+
const step = best[i];
|
|
1277
|
+
if (step.edge) {
|
|
1278
|
+
const sy = this.synthEdgeNote(step.edge);
|
|
1279
|
+
out.push(` ↓ ${sy ? sy.compact : step.edge.kind}`);
|
|
1280
|
+
}
|
|
1281
|
+
out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
|
|
1906
1282
|
}
|
|
1907
|
-
out.push(
|
|
1283
|
+
out.push('');
|
|
1908
1284
|
}
|
|
1909
|
-
|
|
1910
|
-
|
|
1285
|
+
if (synthLines.length) {
|
|
1286
|
+
out.push('## Dynamic-dispatch links among your symbols', '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)', '', ...synthLines, '');
|
|
1287
|
+
}
|
|
1288
|
+
out.push('> Full source for these symbols is below — the call flow among them, followed by their bodies.', '');
|
|
1289
|
+
// namedNodeIds = every callable the agent explicitly named (a superset of
|
|
1290
|
+
// the spine). A file holding one is something the agent asked to SEE, so it
|
|
1291
|
+
// must keep full source even if it's an off-spine polymorphic sibling — the
|
|
1292
|
+
// agent named `getResponseWithInterceptorChain` / `SQLCompiler.execute_sql`
|
|
1293
|
+
// as the mechanism, not as an interchangeable leaf. See the skeleton gate.
|
|
1294
|
+
return { text: out.join('\n'), pathNodeIds: pathIds, namedNodeIds: new Set(named.keys()), uniqueNamedNodeIds };
|
|
1911
1295
|
}
|
|
1912
1296
|
catch {
|
|
1297
|
+
return EMPTY;
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
/**
|
|
1301
|
+
* Compact "blast radius" for the entry symbols of an explore result: who
|
|
1302
|
+
* depends on each (callers) and which test files cover it — LOCATIONS ONLY,
|
|
1303
|
+
* no source, so the agent knows what to update / re-verify before editing
|
|
1304
|
+
* without reaching for a separate impact call. Always-on, but skips symbols
|
|
1305
|
+
* that have no dependents (nothing to warn about), and returns '' when none
|
|
1306
|
+
* qualify so a leaf-only exploration stays clean.
|
|
1307
|
+
*/
|
|
1308
|
+
buildBlastRadiusSection(cg, subgraph) {
|
|
1309
|
+
const ROOT_CAP = 5; // only the symbols the query actually targeted
|
|
1310
|
+
const FILE_CAP = 4; // caller files listed per symbol before "+N more"
|
|
1311
|
+
const MEANINGFUL = new Set([
|
|
1312
|
+
'function', 'method', 'class', 'interface', 'struct', 'trait', 'protocol',
|
|
1313
|
+
'enum', 'type_alias', 'component', 'constant', 'variable', 'property', 'field',
|
|
1314
|
+
]);
|
|
1315
|
+
const rel = (p) => p.replace(/\\/g, '/');
|
|
1316
|
+
const roots = subgraph.roots
|
|
1317
|
+
.map((id) => subgraph.nodes.get(id))
|
|
1318
|
+
.filter((n) => !!n && MEANINGFUL.has(n.kind))
|
|
1319
|
+
.slice(0, ROOT_CAP);
|
|
1320
|
+
if (roots.length === 0)
|
|
1913
1321
|
return '';
|
|
1322
|
+
const entries = [];
|
|
1323
|
+
for (const root of roots) {
|
|
1324
|
+
let callers = [];
|
|
1325
|
+
try {
|
|
1326
|
+
callers = cg.getCallers(root.id);
|
|
1327
|
+
}
|
|
1328
|
+
catch { /* skip this root */ }
|
|
1329
|
+
const seen = new Set();
|
|
1330
|
+
const uniq = [];
|
|
1331
|
+
for (const c of callers) {
|
|
1332
|
+
if (c?.node && !seen.has(c.node.id)) {
|
|
1333
|
+
seen.add(c.node.id);
|
|
1334
|
+
uniq.push(c.node);
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
if (uniq.length === 0)
|
|
1338
|
+
continue; // no blast radius → nothing to flag
|
|
1339
|
+
const callerFiles = [...new Set(uniq.map((n) => rel(n.filePath)))];
|
|
1340
|
+
const testFiles = callerFiles.filter((f) => (0, query_utils_1.isTestFile)(f));
|
|
1341
|
+
const nonTest = callerFiles.filter((f) => !(0, query_utils_1.isTestFile)(f));
|
|
1342
|
+
const shown = nonTest.slice(0, FILE_CAP).map((f) => `\`${f}\``).join(', ');
|
|
1343
|
+
const more = nonTest.length > FILE_CAP ? ` +${nonTest.length - FILE_CAP} more` : '';
|
|
1344
|
+
const where = nonTest.length > 0 ? ` in ${shown}${more}` : '';
|
|
1345
|
+
const tests = testFiles.length > 0
|
|
1346
|
+
? `; tests: ${testFiles.slice(0, FILE_CAP).map((f) => `\`${f}\``).join(', ')}${testFiles.length > FILE_CAP ? ` +${testFiles.length - FILE_CAP}` : ''}`
|
|
1347
|
+
: '; ⚠️ no covering tests found';
|
|
1348
|
+
entries.push(`- \`${root.name}\` (${rel(root.filePath)}:${root.startLine}) — ${uniq.length} caller${uniq.length === 1 ? '' : 's'}${where}${tests}`);
|
|
1349
|
+
}
|
|
1350
|
+
if (entries.length === 0)
|
|
1351
|
+
return '';
|
|
1352
|
+
return [
|
|
1353
|
+
'### Blast radius — what depends on these (update/verify before editing)',
|
|
1354
|
+
'',
|
|
1355
|
+
...entries,
|
|
1356
|
+
'',
|
|
1357
|
+
].join('\n');
|
|
1358
|
+
}
|
|
1359
|
+
/**
|
|
1360
|
+
* Graph-connectivity relevance via Random-Walk-with-Restart (personalized
|
|
1361
|
+
* PageRank) from the query's matched SEED nodes over the call/reference graph.
|
|
1362
|
+
*
|
|
1363
|
+
* This is the ranking signal text search (FTS/bm25) CANNOT provide, and it's
|
|
1364
|
+
* codegraph's home turf: relevance by STRUCTURE, not words. A file whose
|
|
1365
|
+
* symbols are call-connected to the matched cluster accrues walk mass and
|
|
1366
|
+
* ranks high; a lone TEXT match — e.g. `LensSwitcher.swift` matched the word
|
|
1367
|
+
* "switch" from `switchOrganization`, but calls none of `setUser`/`fetchUser`
|
|
1368
|
+
* — gets only its own restart probability and ranks ~0. Immune to the
|
|
1369
|
+
* tokenization trap that fools term matching, deterministic, no embeddings.
|
|
1370
|
+
*
|
|
1371
|
+
* Undirected adjacency (reachability both ways), restart α=0.25 to the seeds,
|
|
1372
|
+
* power iteration to convergence. Bounded to the already-relevant subgraph, so
|
|
1373
|
+
* it's a few hundred nodes × ~25 iterations — negligible cost.
|
|
1374
|
+
*/
|
|
1375
|
+
computeGraphRelevance(nodeIds, edges, seedIds) {
|
|
1376
|
+
const out = new Map();
|
|
1377
|
+
const n = nodeIds.length;
|
|
1378
|
+
if (n === 0)
|
|
1379
|
+
return out;
|
|
1380
|
+
const idx = new Map();
|
|
1381
|
+
for (let i = 0; i < n; i++)
|
|
1382
|
+
idx.set(nodeIds[i], i);
|
|
1383
|
+
const RANK_EDGES = new Set([
|
|
1384
|
+
'calls', 'references', 'extends', 'implements', 'overrides',
|
|
1385
|
+
'instantiates', 'returns', 'type_of', 'imports',
|
|
1386
|
+
]);
|
|
1387
|
+
const adj = Array.from({ length: n }, () => []);
|
|
1388
|
+
for (const e of edges) {
|
|
1389
|
+
if (!RANK_EDGES.has(e.kind))
|
|
1390
|
+
continue;
|
|
1391
|
+
const i = idx.get(e.source);
|
|
1392
|
+
const j = idx.get(e.target);
|
|
1393
|
+
if (i === undefined || j === undefined || i === j)
|
|
1394
|
+
continue;
|
|
1395
|
+
adj[i].push(j);
|
|
1396
|
+
adj[j].push(i); // undirected — reachable either direction
|
|
1397
|
+
}
|
|
1398
|
+
// Restart vector: uniform over seeds present in the candidate set. (Falls
|
|
1399
|
+
// back to uniform-over-all if no seed landed in the set, so we never return
|
|
1400
|
+
// all-zero.)
|
|
1401
|
+
const r = new Array(n).fill(0);
|
|
1402
|
+
let rsum = 0;
|
|
1403
|
+
for (const id of seedIds) {
|
|
1404
|
+
const i = idx.get(id);
|
|
1405
|
+
if (i !== undefined) {
|
|
1406
|
+
r[i] = 1;
|
|
1407
|
+
rsum += 1;
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
if (rsum === 0) {
|
|
1411
|
+
for (let i = 0; i < n; i++)
|
|
1412
|
+
r[i] = 1;
|
|
1413
|
+
rsum = n;
|
|
1414
|
+
}
|
|
1415
|
+
for (let i = 0; i < n; i++)
|
|
1416
|
+
r[i] /= rsum;
|
|
1417
|
+
const alpha = 0.25;
|
|
1418
|
+
let s = r.slice();
|
|
1419
|
+
for (let iter = 0; iter < 25; iter++) {
|
|
1420
|
+
const next = new Array(n).fill(0);
|
|
1421
|
+
for (let i = 0; i < n; i++) {
|
|
1422
|
+
const si = s[i];
|
|
1423
|
+
if (si === 0)
|
|
1424
|
+
continue;
|
|
1425
|
+
const d = adj[i].length;
|
|
1426
|
+
if (d === 0) {
|
|
1427
|
+
next[i] += si;
|
|
1428
|
+
continue;
|
|
1429
|
+
} // dangling: keep its mass
|
|
1430
|
+
const share = si / d;
|
|
1431
|
+
for (const j of adj[i])
|
|
1432
|
+
next[j] += share;
|
|
1433
|
+
}
|
|
1434
|
+
for (let i = 0; i < n; i++)
|
|
1435
|
+
s[i] = (1 - alpha) * next[i] + alpha * r[i];
|
|
1914
1436
|
}
|
|
1437
|
+
for (let i = 0; i < n; i++)
|
|
1438
|
+
out.set(nodeIds[i], s[i]);
|
|
1439
|
+
return out;
|
|
1915
1440
|
}
|
|
1916
1441
|
/**
|
|
1917
1442
|
* Handle codegraph_explore — deep exploration in a single call
|
|
@@ -1991,9 +1516,74 @@ class ToolHandler {
|
|
|
1991
1516
|
glueNodeIds.add(nb.id);
|
|
1992
1517
|
}
|
|
1993
1518
|
}
|
|
1519
|
+
// Named-symbol seeding: findRelevantContext is an FTS/text rank, so a query
|
|
1520
|
+
// that's a BAG of symbol names skewed toward one phase (Alamofire: 5 build
|
|
1521
|
+
// terms, each a high-frequency name, vs 3 validate terms) lets the
|
|
1522
|
+
// lower-frequency names fall below the search cut — their definitions, and
|
|
1523
|
+
// whole files (Validation.swift), never get gathered, so they can never
|
|
1524
|
+
// render and the agent Reads them. Resolve EACH named token to its
|
|
1525
|
+
// substantive definition (skip empty stubs + test files, same relevance the
|
|
1526
|
+
// trace endpoint picker uses) and inject it as an entry, so every symbol the
|
|
1527
|
+
// agent explicitly named is in the subgraph and its file is scored.
|
|
1528
|
+
const namedSeedIds = new Set();
|
|
1529
|
+
{
|
|
1530
|
+
const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
|
|
1531
|
+
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
1532
|
+
const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
|
|
1533
|
+
const bodyLines = (n) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
|
|
1534
|
+
const tokens = [...new Set(query.split(/[\s,()[\]]+/)
|
|
1535
|
+
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
1536
|
+
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
1537
|
+
// PascalCase tokens in the query are type/file disambiguators — when the
|
|
1538
|
+
// agent writes "DataRequest task validate", the `task`/`validate` it wants
|
|
1539
|
+
// are DataRequest's, NOT the same-named overloads in Validation.swift /
|
|
1540
|
+
// Concurrency.swift / the abstract base. Used below to bias overloaded
|
|
1541
|
+
// names toward the file/class the query also names.
|
|
1542
|
+
const typeTokens = tokens.filter((o) => /^[A-Z][A-Za-z0-9]{3,}/.test(o));
|
|
1543
|
+
const inNamedContext = (n) => typeTokens.some((ct) => {
|
|
1544
|
+
const lc = ct.toLowerCase();
|
|
1545
|
+
return n.filePath.toLowerCase().includes(lc) || n.qualifiedName.toLowerCase().includes(lc);
|
|
1546
|
+
});
|
|
1547
|
+
for (const t of tokens) {
|
|
1548
|
+
// Enumerate ALL defs of a bare token via the direct index, not FTS — a
|
|
1549
|
+
// 50+-overload name (tokio `poll`) ranks the wanted def (`Harness::poll`)
|
|
1550
|
+
// below the FTS cut, so findAllSymbols would never see it and the
|
|
1551
|
+
// type-token bias below couldn't pick the harness.rs one. (Same fix as
|
|
1552
|
+
// codegraph_node's findSymbolMatches.) Qualified tokens keep findAllSymbols.
|
|
1553
|
+
const isQual = /[.\/]|::/.test(t);
|
|
1554
|
+
const raw = isQual ? this.findAllSymbols(cg, t).nodes : cg.getNodesByName(t);
|
|
1555
|
+
const cands = raw
|
|
1556
|
+
.filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath))
|
|
1557
|
+
.sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a));
|
|
1558
|
+
// A specific name (<=3 defs) injects all its defs. An overloaded name
|
|
1559
|
+
// (`validate` = 10, `request` = 44) would flood the subgraph, so inject
|
|
1560
|
+
// only: the overloads whose file/class the query ALSO names (the agent
|
|
1561
|
+
// told us which one it wants — DataRequest's, not Validation.swift's),
|
|
1562
|
+
// capped; else fall back to the single most-substantive def. This is the
|
|
1563
|
+
// explore-side mirror of codegraph_node's overload disambiguation.
|
|
1564
|
+
let picks;
|
|
1565
|
+
if (cands.length <= 3) {
|
|
1566
|
+
picks = cands;
|
|
1567
|
+
}
|
|
1568
|
+
else {
|
|
1569
|
+
const ctx = cands.filter(inNamedContext);
|
|
1570
|
+
picks = ctx.length > 0 ? ctx.slice(0, 4) : cands.slice(0, 1);
|
|
1571
|
+
}
|
|
1572
|
+
for (const n of picks) {
|
|
1573
|
+
if (!subgraph.nodes.has(n.id))
|
|
1574
|
+
subgraph.nodes.set(n.id, n);
|
|
1575
|
+
// Mark as a named seed EVEN IF the FTS gather already had it — being
|
|
1576
|
+
// "named by the agent" is independent of whether search happened to
|
|
1577
|
+
// surface it, and it drives the +50 score, the gate, and the
|
|
1578
|
+
// named-file sort below. (Previously only NEW injections were marked,
|
|
1579
|
+
// so a named symbol FTS already gathered never sorted to the top.)
|
|
1580
|
+
namedSeedIds.add(n.id);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1994
1584
|
// Step 2: Group nodes by file, score by relevance
|
|
1995
1585
|
const fileGroups = new Map();
|
|
1996
|
-
const entryNodeIds = new Set(subgraph.roots);
|
|
1586
|
+
const entryNodeIds = new Set([...subgraph.roots, ...namedSeedIds]);
|
|
1997
1587
|
// Build a set of nodes directly connected to entry points (depth 1)
|
|
1998
1588
|
const connectedToEntry = new Set();
|
|
1999
1589
|
for (const edge of subgraph.edges) {
|
|
@@ -2008,8 +1598,16 @@ class ToolHandler {
|
|
|
2008
1598
|
continue;
|
|
2009
1599
|
const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
|
|
2010
1600
|
group.nodes.push(node);
|
|
2011
|
-
// Score:
|
|
2012
|
-
|
|
1601
|
+
// Score: a NAMED-SEED node (a symbol the agent named that FTS missed, now
|
|
1602
|
+
// injected) is worth far more than a mere reference — its file is where the
|
|
1603
|
+
// answer lives. Without this, an incidental file that name-drops the flow
|
|
1604
|
+
// (Combine.swift references request/task → score 23 from connected nodes)
|
|
1605
|
+
// outranks the file that DEFINES a named symbol (Validation.swift's
|
|
1606
|
+
// `validate` → 10) and steals its render slot. Definition ≫ reference.
|
|
1607
|
+
if (namedSeedIds.has(node.id)) {
|
|
1608
|
+
group.score += 50;
|
|
1609
|
+
}
|
|
1610
|
+
else if (entryNodeIds.has(node.id)) {
|
|
2013
1611
|
group.score += 10;
|
|
2014
1612
|
}
|
|
2015
1613
|
else if (connectedToEntry.has(node.id)) {
|
|
@@ -2042,40 +1640,124 @@ class ToolHandler {
|
|
|
2042
1640
|
/\bicons?\b/.test(lp) ||
|
|
2043
1641
|
/\bi18n\b/.test(lp));
|
|
2044
1642
|
};
|
|
2045
|
-
//
|
|
2046
|
-
//
|
|
2047
|
-
//
|
|
2048
|
-
//
|
|
2049
|
-
//
|
|
2050
|
-
// the query itself is about tests —
|
|
2051
|
-
// tests" case
|
|
2052
|
-
|
|
1643
|
+
// Hard-exclude test/spec files (ALL tiers, not just tiny). One slipped test
|
|
1644
|
+
// file dominates the per-file budget on small repos (cobra's `command_test.go`
|
|
1645
|
+
// displaced `args.go`) AND wastes budget on large ones (Django's
|
|
1646
|
+
// `custom_lookups/tests.py` ate ~2.3 KB of the 28 KB cap, crowding out the
|
|
1647
|
+
// SQLCompiler mechanism the agent then Read). A test file almost never answers
|
|
1648
|
+
// an architecture question. Skip when the query itself is about tests — the
|
|
1649
|
+
// legitimate "explore the tests" case — and only cut if ≥2 non-test candidates
|
|
1650
|
+
// remain (else tests are the only signal for this area).
|
|
1651
|
+
{
|
|
2053
1652
|
const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query);
|
|
2054
1653
|
if (!queryMentionsTests) {
|
|
2055
1654
|
const nonLow = relevantFiles.filter(([p]) => !isLowValue(p));
|
|
2056
|
-
// Only apply the hard-filter if we still have at least 2 non-test
|
|
2057
|
-
// candidates after the cut — otherwise the agent is asking about an
|
|
2058
|
-
// area where tests are the only signal, and we should not strip them.
|
|
2059
1655
|
if (nonLow.length >= 2) {
|
|
2060
1656
|
relevantFiles = nonLow;
|
|
2061
1657
|
}
|
|
2062
1658
|
}
|
|
2063
1659
|
}
|
|
2064
|
-
//
|
|
1660
|
+
// Secondary signal: how many DISTINCT query terms each file matches (path +
|
|
1661
|
+
// symbol names). Kept only as a tiebreak — the PRIMARY relevance is graph
|
|
1662
|
+
// connectivity below. (Term counting alone tied the real central file with
|
|
1663
|
+
// incidental same-word matches; it's a weak text signal, not the ranker.)
|
|
1664
|
+
const uniqueQueryTerms = [...new Set(queryTerms)].filter(t => t.length >= 3);
|
|
1665
|
+
const fileTermHits = new Map();
|
|
1666
|
+
for (const [fp, group] of relevantFiles) {
|
|
1667
|
+
const hay = fp.toLowerCase() + ' ' + group.nodes.map(n => n.name.toLowerCase()).join(' ');
|
|
1668
|
+
let hits = 0;
|
|
1669
|
+
for (const t of uniqueQueryTerms)
|
|
1670
|
+
if (hay.includes(t))
|
|
1671
|
+
hits++;
|
|
1672
|
+
fileTermHits.set(fp, hits);
|
|
1673
|
+
}
|
|
1674
|
+
// PRIMARY relevance: graph connectivity (Random-Walk-with-Restart from the
|
|
1675
|
+
// matched seeds — see computeGraphRelevance). Aggregate each file's nodes'
|
|
1676
|
+
// walk mass. This is the signal text search lacks: the real cluster
|
|
1677
|
+
// (org-user.storage.ts, call-connected to the matches) accrues mass; a lone
|
|
1678
|
+
// text match (LensSwitcher.swift, matched "switch" but calls nothing in the
|
|
1679
|
+
// flow) gets only its restart probability → ~0, and is dropped by the gate.
|
|
1680
|
+
const nodeRwr = this.computeGraphRelevance([...subgraph.nodes.keys()], subgraph.edges, entryNodeIds);
|
|
1681
|
+
const fileGraphScore = new Map();
|
|
1682
|
+
for (const node of subgraph.nodes.values()) {
|
|
1683
|
+
fileGraphScore.set(node.filePath, (fileGraphScore.get(node.filePath) ?? 0) + (nodeRwr.get(node.id) ?? 0));
|
|
1684
|
+
}
|
|
1685
|
+
const maxGraph = Math.max(0, ...fileGraphScore.values());
|
|
1686
|
+
// Central file(s): the 1-2 most graph-central files that also match the
|
|
1687
|
+
// query textually (so a connected hub-utility with no term match isn't
|
|
1688
|
+
// mistaken for the subject). The heart of the answer — they earn the larger
|
|
1689
|
+
// WHOLE-FILE ceiling below (a god-file central file still exceeds it and
|
|
1690
|
+
// falls to generous full-method sectioning — never a whole dump).
|
|
1691
|
+
const centralFiles = new Set([...fileGraphScore.entries()]
|
|
1692
|
+
.filter(([fp, g]) => g > 0 && (fileTermHits.get(fp) ?? 0) >= 1)
|
|
1693
|
+
.sort((a, b) => b[1] - a[1] || (fileTermHits.get(b[0]) ?? 0) - (fileTermHits.get(a[0]) ?? 0))
|
|
1694
|
+
.slice(0, 2)
|
|
1695
|
+
.map(([f]) => f));
|
|
1696
|
+
// Files that DEFINE a symbol the agent named (or a subgraph root). These are
|
|
1697
|
+
// the highest-relevance files there are — the agent asked for them by name —
|
|
1698
|
+
// so the connectivity gate below must never drop them, even when their RWR
|
|
1699
|
+
// mass is low (a leaf family file like codec.ts is call-connected to little
|
|
1700
|
+
// but is exactly what the agent queried). Without this protection the gate
|
|
1701
|
+
// prunes a named file and the agent Reads it back.
|
|
1702
|
+
const entryFiles = new Set();
|
|
1703
|
+
for (const id of entryNodeIds) {
|
|
1704
|
+
const n = subgraph.nodes.get(id);
|
|
1705
|
+
if (n)
|
|
1706
|
+
entryFiles.add(n.filePath);
|
|
1707
|
+
}
|
|
1708
|
+
// Relevance gate (so the generous budget is a CEILING, not a target): keep a
|
|
1709
|
+
// file only if it is STRUCTURALLY relevant by ANY of:
|
|
1710
|
+
// - graph score within a fraction of the top (it's on/near the flow), OR
|
|
1711
|
+
// - central (a query entry-point lives here), OR
|
|
1712
|
+
// - it DEFINES a symbol the agent named (entryFiles), OR
|
|
1713
|
+
// - it matches >= 2 DISTINCT named query terms — a strong text signal that
|
|
1714
|
+
// the agent is asking about this file even when nothing calls it (codec.ts:
|
|
1715
|
+
// the agent named `encode`/`Codec`/`JsonCodec`, all leaf classes with zero
|
|
1716
|
+
// RWR mass — graph alone wrongly drops it).
|
|
1717
|
+
// A lone text match on one shared word (LensSwitcher: term=1, g~0) is still
|
|
1718
|
+
// dropped, so the budget never fills with incidental files. Guarded so it
|
|
1719
|
+
// never prunes below 2.
|
|
1720
|
+
if (maxGraph > 0) {
|
|
1721
|
+
const gated = relevantFiles.filter(([fp]) => (fileGraphScore.get(fp) ?? 0) >= maxGraph * 0.06
|
|
1722
|
+
|| centralFiles.has(fp)
|
|
1723
|
+
|| entryFiles.has(fp)
|
|
1724
|
+
|| (fileTermHits.get(fp) ?? 0) >= 2);
|
|
1725
|
+
if (gated.length >= 2)
|
|
1726
|
+
relevantFiles = gated;
|
|
1727
|
+
}
|
|
1728
|
+
// Sort files: graph-central first, then distinct-term match, then the
|
|
1729
|
+
// existing low-value/generated/score tiebreaks.
|
|
1730
|
+
// Files that DEFINE a symbol the agent NAMED. These sort first — ahead of
|
|
1731
|
+
// graph connectivity — because the agent asked for them by name. Without
|
|
1732
|
+
// this, a named leaf override reached only by dynamic dispatch (Alamofire's
|
|
1733
|
+
// `DataRequest.task`/`validate`, low RWR mass) sorts below the high-
|
|
1734
|
+
// connectivity abstract base (`Request.swift`) and the same-named overloads
|
|
1735
|
+
// in other files (`Validation.swift`), falls outside the budget, and the
|
|
1736
|
+
// agent Reads it. The named file is the answer — rank it at the top.
|
|
1737
|
+
const namedSeedFiles = new Set();
|
|
1738
|
+
for (const id of namedSeedIds) {
|
|
1739
|
+
const n = subgraph.nodes.get(id);
|
|
1740
|
+
if (n)
|
|
1741
|
+
namedSeedFiles.add(n.filePath);
|
|
1742
|
+
}
|
|
2065
1743
|
const sortedFiles = relevantFiles.sort((a, b) => {
|
|
2066
1744
|
const aPath = a[0].toLowerCase();
|
|
2067
1745
|
const bPath = b[0].toLowerCase();
|
|
2068
|
-
//
|
|
2069
|
-
const
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
const
|
|
2076
|
-
const
|
|
2077
|
-
if (
|
|
2078
|
-
return
|
|
1746
|
+
// Agent-named files first (it asked for a symbol defined here by name).
|
|
1747
|
+
const aNamed = namedSeedFiles.has(a[0]) ? 1 : 0;
|
|
1748
|
+
const bNamed = namedSeedFiles.has(b[0]) ? 1 : 0;
|
|
1749
|
+
if (aNamed !== bNamed)
|
|
1750
|
+
return bNamed - aNamed;
|
|
1751
|
+
// Graph connectivity is the next key (small epsilon so near-ties fall
|
|
1752
|
+
// through to the text signal rather than coin-flipping on float noise).
|
|
1753
|
+
const aG = fileGraphScore.get(a[0]) ?? 0;
|
|
1754
|
+
const bG = fileGraphScore.get(b[0]) ?? 0;
|
|
1755
|
+
if (Math.abs(aG - bG) > maxGraph * 0.01)
|
|
1756
|
+
return bG - aG;
|
|
1757
|
+
const aHits = fileTermHits.get(a[0]) ?? 0;
|
|
1758
|
+
const bHits = fileTermHits.get(b[0]) ?? 0;
|
|
1759
|
+
if (aHits !== bHits)
|
|
1760
|
+
return bHits - aHits;
|
|
2079
1761
|
const aLow = isLowValue(aPath);
|
|
2080
1762
|
const bLow = isLowValue(bPath);
|
|
2081
1763
|
if (aLow !== bLow)
|
|
@@ -2101,6 +1783,12 @@ class ToolHandler {
|
|
|
2101
1783
|
`Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
|
|
2102
1784
|
'',
|
|
2103
1785
|
];
|
|
1786
|
+
// Blast radius (always-on, compact): for the entry symbols, who depends on
|
|
1787
|
+
// them + which tests cover them — locations only, no source — so the agent
|
|
1788
|
+
// knows what to update/verify before editing without a separate call.
|
|
1789
|
+
const blastRadius = this.buildBlastRadiusSection(cg, subgraph);
|
|
1790
|
+
if (blastRadius)
|
|
1791
|
+
lines.push(blastRadius);
|
|
2104
1792
|
// Relationship map — show how symbols connect
|
|
2105
1793
|
const significantEdges = subgraph.edges.filter(e => e.kind !== 'contains' // skip contains — it's implied by file grouping
|
|
2106
1794
|
);
|
|
@@ -2132,6 +1820,64 @@ class ToolHandler {
|
|
|
2132
1820
|
}
|
|
2133
1821
|
}
|
|
2134
1822
|
// Step 4: Read contiguous file sections
|
|
1823
|
+
// Compute the flow spine once — used both to prepend the Flow section (below)
|
|
1824
|
+
// and to gate adaptive source sizing: files on the spine get full source,
|
|
1825
|
+
// off-spine peers skeletonize.
|
|
1826
|
+
const flow = this.buildFlowFromNamedSymbols(cg, query);
|
|
1827
|
+
// Polymorphic-sibling detector for adaptive sizing. A class that implements/
|
|
1828
|
+
// extends a supertype shared by >= MIN_SIBLINGS classes is one of many
|
|
1829
|
+
// INTERCHANGEABLE implementations (OkHttp's 14 `: Interceptor` classes —
|
|
1830
|
+
// showing one + the rest as signatures is enough), as opposed to a DISTINCT
|
|
1831
|
+
// pipeline step (Excalidraw's `renderStaticScene`, which shares no supertype and
|
|
1832
|
+
// must stay full or the agent loses real content). Only off-spine sibling files
|
|
1833
|
+
// skeletonize; distinct steps and on-spine files keep full source. Cache
|
|
1834
|
+
// supertype→(has ≥N implementers) so this stays a handful of edge queries.
|
|
1835
|
+
const MIN_SIBLINGS = 3;
|
|
1836
|
+
const siblingSuper = new Map();
|
|
1837
|
+
const isPolymorphicSibling = (nodes) => {
|
|
1838
|
+
for (const n of nodes) {
|
|
1839
|
+
for (const e of cg.getOutgoingEdges(n.id)) {
|
|
1840
|
+
if (e.kind !== 'implements' && e.kind !== 'extends')
|
|
1841
|
+
continue;
|
|
1842
|
+
let many = siblingSuper.get(e.target);
|
|
1843
|
+
if (many === undefined) {
|
|
1844
|
+
many = cg.getIncomingEdges(e.target)
|
|
1845
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
1846
|
+
siblingSuper.set(e.target, many);
|
|
1847
|
+
}
|
|
1848
|
+
if (many)
|
|
1849
|
+
return true;
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
return false;
|
|
1853
|
+
};
|
|
1854
|
+
// A file that DEFINES a polymorphic supertype (a class/interface with ≥
|
|
1855
|
+
// MIN_SIBLINGS implementers) AND co-locates its subclasses is a redundant
|
|
1856
|
+
// "family" file — Django's compiler.py holds `SQLCompiler` + its 4 subclasses
|
|
1857
|
+
// (SQLInsert/Update/Delete/AggregateCompiler) in 2,266 lines. Such files are
|
|
1858
|
+
// huge and read-anyway, so they should STILL skeletonize even when the agent
|
|
1859
|
+
// named a method in them: a full one eats ~6.5K of the explore budget (Django
|
|
1860
|
+
// is pinned at the 28K cap, truncating), starving the sibling files the agent
|
|
1861
|
+
// then Reads. This flag OVERRIDES the named-callable spare below — it does NOT
|
|
1862
|
+
// by itself spare a file. (OkHttp's RealCall implements the `Lockable` mixin
|
|
1863
|
+
// but defines no ≥3-impl supertype, so the named spare keeps it full.)
|
|
1864
|
+
const superMany = new Map();
|
|
1865
|
+
const definesPolymorphicSupertype = (nodes) => {
|
|
1866
|
+
for (const n of nodes) {
|
|
1867
|
+
if (n.kind !== 'class' && n.kind !== 'interface' && n.kind !== 'struct'
|
|
1868
|
+
&& n.kind !== 'trait' && n.kind !== 'protocol' && n.kind !== 'type_alias')
|
|
1869
|
+
continue;
|
|
1870
|
+
let many = superMany.get(n.id);
|
|
1871
|
+
if (many === undefined) {
|
|
1872
|
+
many = cg.getIncomingEdges(n.id)
|
|
1873
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
1874
|
+
superMany.set(n.id, many);
|
|
1875
|
+
}
|
|
1876
|
+
if (many)
|
|
1877
|
+
return true;
|
|
1878
|
+
}
|
|
1879
|
+
return false;
|
|
1880
|
+
};
|
|
2135
1881
|
lines.push('### Source Code');
|
|
2136
1882
|
lines.push('');
|
|
2137
1883
|
lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
|
|
@@ -2142,8 +1888,15 @@ class ToolHandler {
|
|
|
2142
1888
|
for (const [filePath, group] of sortedFiles) {
|
|
2143
1889
|
if (filesIncluded >= maxFiles)
|
|
2144
1890
|
break;
|
|
2145
|
-
|
|
2146
|
-
|
|
1891
|
+
// A file DEFINES a named/spine symbol (the answer) vs merely references the
|
|
1892
|
+
// flow. Past 90% budget, stop pulling INCIDENTAL files — but keep scanning
|
|
1893
|
+
// for necessary ones, which render even past the cap (bounded by maxFiles).
|
|
1894
|
+
// Without this `continue` (was an unconditional `break`), the loop stopped
|
|
1895
|
+
// after the build + validators-exec files and never reached the ranked-in
|
|
1896
|
+
// validate-logic file (Alamofire's Validation.swift).
|
|
1897
|
+
const fileNecessary = group.nodes.some(n => entryNodeIds.has(n.id) || flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id));
|
|
1898
|
+
if (!fileNecessary && totalChars > budget.maxOutputChars * 0.9)
|
|
1899
|
+
continue;
|
|
2147
1900
|
const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
|
|
2148
1901
|
if (!absPath || !(0, fs_1.existsSync)(absPath))
|
|
2149
1902
|
continue;
|
|
@@ -2156,14 +1909,160 @@ class ToolHandler {
|
|
|
2156
1909
|
}
|
|
2157
1910
|
const fileLines = fileContent.split('\n');
|
|
2158
1911
|
const lang = group.nodes[0]?.language || '';
|
|
2159
|
-
//
|
|
2160
|
-
//
|
|
2161
|
-
//
|
|
2162
|
-
//
|
|
2163
|
-
//
|
|
1912
|
+
// Adaptive sizing (CODEGRAPH_ADAPTIVE_EXPLORE, default on): collapse a file
|
|
1913
|
+
// to a per-symbol view when it's a redundant member of a polymorphic family.
|
|
1914
|
+
// Engages iff ALL hold:
|
|
1915
|
+
// 1. a flow spine exists,
|
|
1916
|
+
// 2. no symbol in the file is on that spine (it's not the mechanism path),
|
|
1917
|
+
// 3. it IS a polymorphic sibling (≥ MIN_SIBLINGS impls of a shared supertype),
|
|
1918
|
+
// 4. it is NOT SPARED, where a file is spared iff the agent named a
|
|
1919
|
+
// (near-)UNIQUE callable in it (`getResponseWithInterceptorChain`, 1 def →
|
|
1920
|
+
// keep RealCall.kt full) UNLESS the file DEFINES the family supertype (a
|
|
1921
|
+
// base+subclasses "family" file like Django's compiler.py — collapse it).
|
|
1922
|
+
// Uniqueness matters: `as_sql` has 110 defs across every Compiler/Expression
|
|
1923
|
+
// subclass; naming it must NOT keep every backend variant + test file full
|
|
1924
|
+
// and flood the budget. That's why the spare reads uniqueNamedNodeIds.
|
|
1925
|
+
// Within a collapsed file the render is PER-SYMBOL (condition B): a method the
|
|
1926
|
+
// agent NAMED or that's on the spine is shown with its FULL body (so the agent
|
|
1927
|
+
// doesn't Read the file back for it — Django's SQLCompiler.execute_sql/as_sql);
|
|
1928
|
+
// every other symbol is just its signature. So the base mechanism survives while
|
|
1929
|
+
// the file's other ~80 symbols + the redundant subclasses collapse to one line each.
|
|
1930
|
+
const spareNamed = group.nodes.some(n => flow.uniqueNamedNodeIds.has(n.id));
|
|
1931
|
+
const fileDefinesSuper = definesPolymorphicSupertype(group.nodes);
|
|
1932
|
+
const spared = spareNamed && !fileDefinesSuper;
|
|
1933
|
+
const CALLABLE_BODY = new Set(['method', 'function', 'constructor', 'component']);
|
|
1934
|
+
const hasSpineNode = group.nodes.some(n => flow.pathNodeIds.has(n.id));
|
|
1935
|
+
// On-spine god-file: the flow path runs THROUGH this file, but it also holds
|
|
1936
|
+
// many OTHER named methods, and rendering all of them in full blows the
|
|
1937
|
+
// per-file budget and starves the other flow files (Alamofire: the agent
|
|
1938
|
+
// names ~7 Session.swift methods — the build spine PLUS off-path
|
|
1939
|
+
// task/didCompleteTask — far past the whole response budget). Engage the
|
|
1940
|
+
// per-symbol view to keep the SPINE full and collapse the off-path named
|
|
1941
|
+
// methods to signatures. Only when there IS off-path content to shed —
|
|
1942
|
+
// otherwise the spine is irreducible (a sequential flow has no redundancy),
|
|
1943
|
+
// so leave it to the normal full render.
|
|
1944
|
+
const namedBodyChars = group.nodes
|
|
1945
|
+
.filter(n => CALLABLE_BODY.has(n.kind) && (flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id)))
|
|
1946
|
+
.reduce((s, n) => s + fileLines.slice(n.startLine - 1, n.endLine).join('\n').length, 0);
|
|
1947
|
+
const onSpineGodFile = hasSpineNode
|
|
1948
|
+
&& namedBodyChars > budget.maxCharsPerFile
|
|
1949
|
+
&& group.nodes.some(n => CALLABLE_BODY.has(n.kind) && flow.uniqueNamedNodeIds.has(n.id) && !flow.pathNodeIds.has(n.id));
|
|
1950
|
+
if (adaptiveExploreEnabled() && flow.pathNodeIds.size > 0
|
|
1951
|
+
&& (onSpineGodFile || (!hasSpineNode && isPolymorphicSibling(group.nodes) && !spared))) {
|
|
1952
|
+
const syms = group.nodes
|
|
1953
|
+
.filter(n => n.kind !== 'import' && n.kind !== 'export' && n.startLine > 0)
|
|
1954
|
+
.sort((a, b) => a.startLine - b.startLine);
|
|
1955
|
+
// Pass 1: choose which symbols get a FULL body, by priority, greedily within
|
|
1956
|
+
// a per-file body cap — so one huge family file can't body every named method
|
|
1957
|
+
// and crowd out the other flow files (Django's query.py). A symbol earns a
|
|
1958
|
+
// body if it's on-spine, or UNIQUELY named (`SQLCompiler.execute_sql`), or a
|
|
1959
|
+
// co-named method WHEN this file DEFINES the family supertype (so the base
|
|
1960
|
+
// `SQLCompiler.as_sql` body shows, but the 110 leaf `as_sql` overrides — and
|
|
1961
|
+
// OkHttp's 5 `intercept`s if the agent names `intercept` — stay signatures).
|
|
1962
|
+
const prio = (n) => !CALLABLE_BODY.has(n.kind) ? 99
|
|
1963
|
+
: flow.pathNodeIds.has(n.id) ? 0
|
|
1964
|
+
: flow.uniqueNamedNodeIds.has(n.id) ? 1
|
|
1965
|
+
: (fileDefinesSuper && flow.namedNodeIds.has(n.id)) ? 2 : 99;
|
|
1966
|
+
// One ~250-line WINDOW per file. syms are taken by priority (spine first,
|
|
1967
|
+
// then uniquely-named, then family-base), and the cap applies to ALL of
|
|
1968
|
+
// them — including the spine — so a big-spine god-file (tokio's worker.rs:
|
|
1969
|
+
// run→run_task→next_task→steal_work) can't eat the whole response and
|
|
1970
|
+
// starve the co-flow file (harness.rs's poll). The native agent windows
|
|
1971
|
+
// such a file too (~190 lines at a time), so this mimics, not truncates.
|
|
1972
|
+
// Always emit ≥1 (never an empty section).
|
|
1973
|
+
const bodyCap = budget.maxCharsPerFile * 1.5;
|
|
1974
|
+
const bodyIds = new Set();
|
|
1975
|
+
let bodyChars = 0;
|
|
1976
|
+
for (const n of syms.filter(n => prio(n) < 99 && n.endLine >= n.startLine).sort((a, b) => prio(a) - prio(b))) {
|
|
1977
|
+
const sz = fileLines.slice(n.startLine - 1, n.endLine).join('\n').length;
|
|
1978
|
+
if (bodyChars + sz > bodyCap && bodyIds.size > 0)
|
|
1979
|
+
continue;
|
|
1980
|
+
bodyIds.add(n.id);
|
|
1981
|
+
bodyChars += sz;
|
|
1982
|
+
}
|
|
1983
|
+
// Pass 2: render in line order — full body for chosen symbols, else the
|
|
1984
|
+
// signature line (capped, with a "+N more" tail so the structure map of a
|
|
1985
|
+
// god-file doesn't itself bloat the budget).
|
|
1986
|
+
const skel = [];
|
|
1987
|
+
let coveredUntil = 0; // skip symbols already inside an emitted body
|
|
1988
|
+
let sigCount = 0, sigDropped = 0;
|
|
1989
|
+
const SIG_MAX = Math.max(12, budget.maxSymbolsInFileHeader * 2);
|
|
1990
|
+
for (const n of syms) {
|
|
1991
|
+
if (n.startLine <= coveredUntil)
|
|
1992
|
+
continue;
|
|
1993
|
+
if (bodyIds.has(n.id)) {
|
|
1994
|
+
const end = n.endLine;
|
|
1995
|
+
const body = fileLines.slice(n.startLine - 1, end).join('\n');
|
|
1996
|
+
skel.push(exploreLineNumbersEnabled() ? numberSourceLines(body, n.startLine) : body);
|
|
1997
|
+
coveredUntil = end;
|
|
1998
|
+
}
|
|
1999
|
+
else {
|
|
2000
|
+
// Elide the body, emit the signature. node.startLine can point at a
|
|
2001
|
+
// decorator/annotation, so scan forward for the line that names the symbol.
|
|
2002
|
+
let lineNo = n.startLine;
|
|
2003
|
+
for (let k = 0; k < 4; k++) {
|
|
2004
|
+
if ((fileLines[n.startLine - 1 + k] || '').includes(n.name)) {
|
|
2005
|
+
lineNo = n.startLine + k;
|
|
2006
|
+
break;
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
if (lineNo <= coveredUntil)
|
|
2010
|
+
continue;
|
|
2011
|
+
if (sigCount >= SIG_MAX) {
|
|
2012
|
+
sigDropped++;
|
|
2013
|
+
continue;
|
|
2014
|
+
}
|
|
2015
|
+
const sig = (fileLines[lineNo - 1] || '').trim();
|
|
2016
|
+
if (sig) {
|
|
2017
|
+
skel.push(exploreLineNumbersEnabled() ? `${lineNo}\t${sig}` : sig);
|
|
2018
|
+
sigCount++;
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
if (sigDropped > 0)
|
|
2023
|
+
skel.push(`… +${sigDropped} more (signatures elided)`);
|
|
2024
|
+
if (skel.length > 0) {
|
|
2025
|
+
const names = [...new Set(group.nodes.filter(n => n.kind !== 'import' && n.kind !== 'export').map(n => n.name))]
|
|
2026
|
+
.slice(0, budget.maxSymbolsInFileHeader).join(', ');
|
|
2027
|
+
// Steer the agent to codegraph_explore for an elided body — NEVER to
|
|
2028
|
+
// Read. The old "Read for more" / "Read for a full body" tags invited
|
|
2029
|
+
// a Read of the very file just skeletonized; on a central, wanted file
|
|
2030
|
+
// (Session.swift, DataRequest.swift) that fired an over-investigation
|
|
2031
|
+
// spiral (the agent Read the skeletonized file, then kept digging).
|
|
2032
|
+
// CLAUDE.md: explore output must never tell the agent to Read.
|
|
2033
|
+
const tag = bodyIds.size > 0
|
|
2034
|
+
? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
|
|
2035
|
+
: 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
|
|
2036
|
+
lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
|
|
2037
|
+
totalChars += skel.join('\n').length + 120;
|
|
2038
|
+
filesIncluded++;
|
|
2039
|
+
continue;
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
// Whole-file rule: if a relevant file is small enough to afford, return it
|
|
2043
|
+
// ENTIRELY instead of clustering. Clustering exists to tame god-files
|
|
2044
|
+
// (App.tsx ~13k lines); on a ~134-line component a cluster is a lossy
|
|
2045
|
+
// subset of a file the agent will just Read in full anyway — costing a
|
|
2046
|
+
// round-trip and a re-read every later turn. Reserve clustering for files
|
|
2164
2047
|
// too big to ship whole. Still bounded by the total maxOutputChars check.
|
|
2165
|
-
|
|
2166
|
-
|
|
2048
|
+
//
|
|
2049
|
+
// CENTRAL files (where the query's entry points live) get a larger — but
|
|
2050
|
+
// bounded — ceiling: they're the heart of the answer, the file(s) the agent
|
|
2051
|
+
// would Read whole, so a genuinely small one comes back whole rather than as
|
|
2052
|
+
// thin clusters. A LARGE central file (the 791-line org-user store) exceeds
|
|
2053
|
+
// the ceiling and falls through to sectioning/clustering below — full method
|
|
2054
|
+
// bodies + signatures — so we never dump (or overflow on) a whole god-file.
|
|
2055
|
+
const isCentralFile = centralFiles.has(filePath);
|
|
2056
|
+
// Central files get a slightly larger whole-file window than peripheral ones,
|
|
2057
|
+
// but a TIGHT one (~1.5× the per-file cap): the native read of a central file
|
|
2058
|
+
// is a ~150–250 line orientation window, NOT the whole file. A flat "whole
|
|
2059
|
+
// central file" both overflowed the inline cap AND starved the co-flow files
|
|
2060
|
+
// (worker.rs ate the budget, dropping harness.rs's poll). A larger central
|
|
2061
|
+
// file falls through to per-method windowing/clustering below.
|
|
2062
|
+
const WHOLE_FILE_MAX_LINES = isCentralFile ? 280 : 220;
|
|
2063
|
+
const WHOLE_FILE_MAX_CHARS = isCentralFile
|
|
2064
|
+
? Math.min(Math.max(0, budget.maxOutputChars - totalChars - 200), Math.round(budget.maxCharsPerFile * 1.5))
|
|
2065
|
+
: budget.maxCharsPerFile * 3;
|
|
2167
2066
|
if (fileLines.length <= WHOLE_FILE_MAX_LINES && fileContent.length <= WHOLE_FILE_MAX_CHARS) {
|
|
2168
2067
|
const body = fileContent.replace(/\n+$/, '');
|
|
2169
2068
|
let wholeSection = exploreLineNumbersEnabled() ? numberSourceLines(body, 1) : body;
|
|
@@ -2173,12 +2072,12 @@ class ToolHandler {
|
|
|
2173
2072
|
const headerNames = uniqSymbols.slice(0, budget.maxSymbolsInFileHeader);
|
|
2174
2073
|
const omitted = uniqSymbols.length - headerNames.length;
|
|
2175
2074
|
const wholeHeader = `#### ${filePath} — ${omitted > 0 ? `${headerNames.join(', ')}, +${omitted} more` : headerNames.join(', ')}`;
|
|
2176
|
-
if (totalChars + wholeSection.length + 200 > budget.maxOutputChars) {
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
wholeSection = wholeSection.slice(0, remaining) + '\n... (trimmed) ...';
|
|
2075
|
+
if (!fileNecessary && totalChars + wholeSection.length + 200 > budget.maxOutputChars) {
|
|
2076
|
+
// Don't slice a whole file mid-method: an incidental file that doesn't
|
|
2077
|
+
// fit is skipped; a necessary one (below) renders in full. Half a file
|
|
2078
|
+
// forces the Read this is meant to prevent.
|
|
2181
2079
|
anyFileTrimmed = true;
|
|
2080
|
+
continue;
|
|
2182
2081
|
}
|
|
2183
2082
|
lines.push(wholeHeader, '', '```' + lang, wholeSection, '```', '');
|
|
2184
2083
|
totalChars += wholeSection.length + 200;
|
|
@@ -2205,14 +2104,33 @@ class ToolHandler {
|
|
|
2205
2104
|
// Alamofire is the canonical case: the `Session` class spans ~1,400
|
|
2206
2105
|
// lines). We want the granular symbols inside, not the envelope.
|
|
2207
2106
|
const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
|
|
2208
|
-
|
|
2209
|
-
|
|
2107
|
+
// Cluster from this file's gathered nodes PLUS any callable the agent NAMED that
|
|
2108
|
+
// lives here. Explore's relevance gather can miss a named method def in a huge
|
|
2109
|
+
// non-sibling file — Django's query.py is 3,040 lines and `_fetch_all` (L2237)
|
|
2110
|
+
// was gathered only as call-reference edges, never as a def, so it formed no
|
|
2111
|
+
// cluster and the agent Read it back. Inject named defs directly and rank them
|
|
2112
|
+
// ABOVE connected/glue nodes (importance 9) so their cluster wins the per-file
|
|
2113
|
+
// budget — the agent explicitly asked for these symbols.
|
|
2114
|
+
const rangeNodes = new Map();
|
|
2115
|
+
for (const n of group.nodes)
|
|
2116
|
+
if (n.startLine > 0 && n.endLine > 0)
|
|
2117
|
+
rangeNodes.set(n.id, n);
|
|
2118
|
+
for (const id of flow.namedNodeIds) {
|
|
2119
|
+
if (rangeNodes.has(id))
|
|
2120
|
+
continue;
|
|
2121
|
+
const n = cg.getNode(id);
|
|
2122
|
+
if (n && n.filePath === filePath && n.startLine > 0 && n.endLine > 0)
|
|
2123
|
+
rangeNodes.set(id, n);
|
|
2124
|
+
}
|
|
2125
|
+
const ranges = [...rangeNodes.values()]
|
|
2210
2126
|
// Drop whole-file envelope nodes (containers covering >50% of the file).
|
|
2211
2127
|
.filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
|
|
2212
2128
|
.map(n => {
|
|
2213
2129
|
let importance = 1;
|
|
2214
2130
|
if (entryNodeIds.has(n.id))
|
|
2215
2131
|
importance = 10;
|
|
2132
|
+
else if (flow.namedNodeIds.has(n.id))
|
|
2133
|
+
importance = 9; // agent named it → keep its cluster
|
|
2216
2134
|
else if (glueNodeIds.has(n.id))
|
|
2217
2135
|
importance = 6; // bridging caller/callee of an entry
|
|
2218
2136
|
else if (connectedToEntry.has(n.id))
|
|
@@ -2313,6 +2231,13 @@ class ToolHandler {
|
|
|
2313
2231
|
return b.c.score - a.c.score;
|
|
2314
2232
|
return a.span - b.span;
|
|
2315
2233
|
});
|
|
2234
|
+
// Per-file budget is the SMALLER of the per-file cap and what's left of the
|
|
2235
|
+
// total output cap — so selection (which ranks by importance) keeps the
|
|
2236
|
+
// high-importance clusters and drops peripheral ones, instead of the
|
|
2237
|
+
// downstream source-order trim slicing off whatever comes last in the file.
|
|
2238
|
+
// That source-order slice is what cut Django's `_fetch_all` (L2237, importance
|
|
2239
|
+
// 9 — agent-named) when query.py was the last of four big files to be emitted.
|
|
2240
|
+
const fileBudget = Math.min(budget.maxCharsPerFile, Math.max(0, budget.maxOutputChars - totalChars - 200));
|
|
2316
2241
|
const chosenIndices = new Set();
|
|
2317
2242
|
let projectedChars = 0;
|
|
2318
2243
|
for (const rc of rankedClusters) {
|
|
@@ -2325,7 +2250,7 @@ class ToolHandler {
|
|
|
2325
2250
|
projectedChars += sectionLen;
|
|
2326
2251
|
continue;
|
|
2327
2252
|
}
|
|
2328
|
-
if (projectedChars + sectionLen >
|
|
2253
|
+
if (projectedChars + sectionLen > fileBudget)
|
|
2329
2254
|
continue;
|
|
2330
2255
|
chosenIndices.add(rc.idx);
|
|
2331
2256
|
projectedChars += sectionLen;
|
|
@@ -2333,7 +2258,6 @@ class ToolHandler {
|
|
|
2333
2258
|
// Emit chosen clusters in source order so the file reads top-to-bottom.
|
|
2334
2259
|
let fileSection = '';
|
|
2335
2260
|
const allSymbols = [];
|
|
2336
|
-
let fileTrimmed = false;
|
|
2337
2261
|
for (let i = 0; i < clusters.length; i++) {
|
|
2338
2262
|
if (!chosenIndices.has(i))
|
|
2339
2263
|
continue;
|
|
@@ -2344,13 +2268,12 @@ class ToolHandler {
|
|
|
2344
2268
|
fileSection += section;
|
|
2345
2269
|
allSymbols.push(...cluster.symbols);
|
|
2346
2270
|
}
|
|
2347
|
-
//
|
|
2348
|
-
//
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
if (chosenIndices.size < clusters.length || fileTrimmed) {
|
|
2271
|
+
// A chosen cluster is a COMPLETE method-range — we never cut through a body.
|
|
2272
|
+
// An oversize single cluster (a long monolithic function) renders in FULL:
|
|
2273
|
+
// half a method is useless (the agent just Reads the rest for the other half),
|
|
2274
|
+
// which is the very fallback explore exists to prevent. A pathological file is
|
|
2275
|
+
// bounded by the per-file cluster SELECTION above + the total hard ceiling.
|
|
2276
|
+
if (chosenIndices.size < clusters.length) {
|
|
2354
2277
|
anyFileTrimmed = true;
|
|
2355
2278
|
}
|
|
2356
2279
|
// Dedupe + cap the symbols list shown in the per-file header. Some
|
|
@@ -2371,22 +2294,22 @@ class ToolHandler {
|
|
|
2371
2294
|
? `${headerSymbols.join(', ')}, +${omittedCount} more`
|
|
2372
2295
|
: headerSymbols.join(', ');
|
|
2373
2296
|
const fileHeader = `#### ${filePath} — ${headerSuffix}`;
|
|
2374
|
-
//
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2297
|
+
// The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
|
|
2298
|
+
// the agent named (or that's on the flow spine) renders even when the
|
|
2299
|
+
// nominal total is used up — it's the answer, and the set is bounded by
|
|
2300
|
+
// maxFiles AND by true-spine/named-seeding having already trimmed each file
|
|
2301
|
+
// to its necessary content. A file that merely REFERENCES the flow
|
|
2302
|
+
// (Combine.swift name-drops request/task) is incidental → still capped, so
|
|
2303
|
+
// freed budget never leaks into noise. This is the last god-file layer:
|
|
2304
|
+
// build (Session, true-spined) + validators-exec (Request) + validate
|
|
2305
|
+
// (DataRequest/Validation) all render, instead of the cap dropping whichever
|
|
2306
|
+
// phase the file order happened to put last.
|
|
2307
|
+
if (!fileNecessary && totalChars + fileSection.length + 200 > budget.maxOutputChars) {
|
|
2308
|
+
// Incidental file that doesn't fit: SKIP it whole — never slice mid-method.
|
|
2309
|
+
// Keep scanning for necessary files (which bypass this cap and render in
|
|
2310
|
+
// full, bounded by the hard ceiling).
|
|
2388
2311
|
anyFileTrimmed = true;
|
|
2389
|
-
|
|
2312
|
+
continue;
|
|
2390
2313
|
}
|
|
2391
2314
|
lines.push(fileHeader);
|
|
2392
2315
|
lines.push('');
|
|
@@ -2443,17 +2366,26 @@ class ToolHandler {
|
|
|
2443
2366
|
// Stats unavailable — skip budget note
|
|
2444
2367
|
}
|
|
2445
2368
|
}
|
|
2446
|
-
//
|
|
2447
|
-
//
|
|
2448
|
-
//
|
|
2449
|
-
//
|
|
2450
|
-
//
|
|
2451
|
-
//
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2369
|
+
// Final ceiling — an ABSOLUTE inline cap, not a multiple of the budget. The
|
|
2370
|
+
// render loop renders necessary (named/spine) files even a bit past
|
|
2371
|
+
// maxOutputChars and caps only incidental ones, so this is the last safety.
|
|
2372
|
+
// It MUST stay under the host's inline tool-result limit (~25K chars): above
|
|
2373
|
+
// that the result is externalized to a file the agent Reads back (a 35K
|
|
2374
|
+
// vscode explore did exactly this in the n=4 A/B). So allow a little
|
|
2375
|
+
// necessary overflow above the 24K budget, but hard-stop at 25K — never into
|
|
2376
|
+
// externalize territory.
|
|
2377
|
+
const output = flow.text + lines.join('\n');
|
|
2378
|
+
const hardCeiling = Math.min(Math.round(budget.maxOutputChars * 1.5), 25000);
|
|
2379
|
+
if (output.length > hardCeiling) {
|
|
2380
|
+
// Cut at a FILE-SECTION boundary (the last `#### ` header before the
|
|
2381
|
+
// ceiling) so we drop whole trailing file-sections rather than slicing
|
|
2382
|
+
// through a method body — a half-rendered method just forces the Read this
|
|
2383
|
+
// tool exists to prevent. Fall back to a line boundary only if no section
|
|
2384
|
+
// header sits in the back half (degenerate single-giant-section case).
|
|
2385
|
+
const cut = output.slice(0, hardCeiling);
|
|
2386
|
+
const lastSection = cut.lastIndexOf('\n#### ');
|
|
2387
|
+
const boundary = lastSection > hardCeiling * 0.5 ? lastSection : cut.lastIndexOf('\n');
|
|
2388
|
+
const safe = boundary > 0 ? cut.slice(0, boundary) : cut;
|
|
2457
2389
|
return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
|
|
2458
2390
|
}
|
|
2459
2391
|
return this.textResult(output);
|
|
@@ -2468,29 +2400,110 @@ class ToolHandler {
|
|
|
2468
2400
|
const cg = this.getCodeGraph(args.projectPath);
|
|
2469
2401
|
// Default to false to minimize context usage
|
|
2470
2402
|
const includeCode = args.includeCode === true;
|
|
2471
|
-
const
|
|
2472
|
-
|
|
2403
|
+
const fileHint = typeof args.file === 'string' && args.file.trim() ? args.file.trim() : undefined;
|
|
2404
|
+
const lineHint = typeof args.line === 'number' && args.line > 0 ? args.line : undefined;
|
|
2405
|
+
let matches = this.findSymbolMatches(cg, symbol);
|
|
2406
|
+
if (matches.length === 0) {
|
|
2473
2407
|
return this.textResult(`Symbol "${symbol}" not found in the codebase`);
|
|
2474
2408
|
}
|
|
2409
|
+
// Disambiguate a heavily-overloaded name to a specific definition the caller
|
|
2410
|
+
// pinned by file/line (the `file:line` a trail or another tool showed it) —
|
|
2411
|
+
// so it can fetch e.g. `Harness::poll` at harness.rs:153 out of 50+ `poll`s
|
|
2412
|
+
// instead of Reading. file matches by path suffix/substring; line prefers the
|
|
2413
|
+
// def whose body contains it, else the nearest start. Only narrows (never
|
|
2414
|
+
// empties — if a hint matches nothing it's ignored).
|
|
2415
|
+
if (matches.length > 1 && (fileHint || lineHint !== undefined)) {
|
|
2416
|
+
const norm = (p) => p.replace(/\\/g, '/').toLowerCase();
|
|
2417
|
+
let narrowed = matches;
|
|
2418
|
+
if (fileHint) {
|
|
2419
|
+
const fh = norm(fileHint);
|
|
2420
|
+
const byFile = narrowed.filter((n) => norm(n.filePath).endsWith(fh) || norm(n.filePath).includes(fh));
|
|
2421
|
+
if (byFile.length > 0)
|
|
2422
|
+
narrowed = byFile;
|
|
2423
|
+
}
|
|
2424
|
+
if (lineHint !== undefined && narrowed.length > 1) {
|
|
2425
|
+
const containing = narrowed.filter((n) => n.startLine <= lineHint && (n.endLine ?? n.startLine) >= lineHint);
|
|
2426
|
+
narrowed = containing.length > 0
|
|
2427
|
+
? containing
|
|
2428
|
+
: [...narrowed].sort((a, b) => Math.abs(a.startLine - lineHint) - Math.abs(b.startLine - lineHint)).slice(0, 1);
|
|
2429
|
+
}
|
|
2430
|
+
if (narrowed.length > 0)
|
|
2431
|
+
matches = narrowed;
|
|
2432
|
+
}
|
|
2433
|
+
// Single definition — the common case.
|
|
2434
|
+
if (matches.length === 1) {
|
|
2435
|
+
return this.textResult(this.truncateOutput(await this.renderNodeSection(cg, matches[0], includeCode)));
|
|
2436
|
+
}
|
|
2437
|
+
// Multiple definitions share this name — overloads, or same-named methods on
|
|
2438
|
+
// different types (Alamofire `didCompleteTask`/`task`/`validate`, gin
|
|
2439
|
+
// `reset`). Returning ONE forces the agent to guess, and when it guesses
|
|
2440
|
+
// wrong it READS the file to find the right overload — the dominant
|
|
2441
|
+
// codegraph_node read cause on Swift/Go. So return them ALL: pack as many
|
|
2442
|
+
// FULL bodies as fit a char budget (the agent gets the one it needs in this
|
|
2443
|
+
// one call, no follow-up parameter to learn), and list any remainder by
|
|
2444
|
+
// file:line so a large overload set can't overflow the per-tool cap.
|
|
2445
|
+
const header = `**${matches.length} definitions named "${symbol}"**`;
|
|
2446
|
+
if (!includeCode) {
|
|
2447
|
+
const list = matches.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`);
|
|
2448
|
+
return this.textResult(this.truncateOutput([header, '', 'Re-query with `includeCode: true` to get every body in one call — no need to pick one first.', '', ...list].join('\n')));
|
|
2449
|
+
}
|
|
2450
|
+
const BODY_BUDGET = 12000; // leaves room under MAX_OUTPUT_LENGTH for the header + list
|
|
2451
|
+
// The CHAR budget is the real limiter — keep the count cap high so a set of
|
|
2452
|
+
// SHORT overloads (Alamofire's 10 `validate` variants, each a few lines) all
|
|
2453
|
+
// render in full rather than relegating the one the agent wanted to a
|
|
2454
|
+
// bodiless list. Only a set of many LARGE bodies hits the char budget first.
|
|
2455
|
+
const HARD_CAP = 16;
|
|
2456
|
+
const rendered = [];
|
|
2457
|
+
const listed = [];
|
|
2458
|
+
let used = 0;
|
|
2459
|
+
for (const n of matches) {
|
|
2460
|
+
if (rendered.length >= HARD_CAP) {
|
|
2461
|
+
listed.push(n);
|
|
2462
|
+
continue;
|
|
2463
|
+
}
|
|
2464
|
+
const section = await this.renderNodeSection(cg, n, true);
|
|
2465
|
+
// Always emit the first; emit the rest only while within the char budget.
|
|
2466
|
+
if (rendered.length === 0 || used + section.length <= BODY_BUDGET) {
|
|
2467
|
+
rendered.push(section);
|
|
2468
|
+
used += section.length;
|
|
2469
|
+
}
|
|
2470
|
+
else {
|
|
2471
|
+
listed.push(n);
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
const out = [
|
|
2475
|
+
header,
|
|
2476
|
+
`Returning ${rendered.length} in full${listed.length ? `; ${listed.length} more listed below` : ''} — pick the one you need (no Read required).`,
|
|
2477
|
+
'',
|
|
2478
|
+
rendered.join('\n\n---\n\n'),
|
|
2479
|
+
];
|
|
2480
|
+
if (listed.length) {
|
|
2481
|
+
const LIST_CAP = 20;
|
|
2482
|
+
const shownList = listed.slice(0, LIST_CAP);
|
|
2483
|
+
out.push('', '### Other definitions', ...shownList.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`));
|
|
2484
|
+
if (listed.length > LIST_CAP)
|
|
2485
|
+
out.push(`- … +${listed.length - LIST_CAP} more`);
|
|
2486
|
+
out.push('', `> Need one of these in full? Call codegraph_node again with \`file\` (e.g. \`"${listed[0].filePath.split('/').pop()}"\`) or \`line\` — do NOT Read it.`);
|
|
2487
|
+
}
|
|
2488
|
+
return this.textResult(this.truncateOutput(out.join('\n')));
|
|
2489
|
+
}
|
|
2490
|
+
/** Render one symbol: details + (optional) body/outline + its caller/callee trail. */
|
|
2491
|
+
async renderNodeSection(cg, node, includeCode) {
|
|
2475
2492
|
let code = null;
|
|
2476
2493
|
let outline = null;
|
|
2477
2494
|
if (includeCode) {
|
|
2478
2495
|
// For container symbols (class/interface/struct/…), the full body is the
|
|
2479
|
-
// sum of every method body — a wall of source
|
|
2480
|
-
//
|
|
2481
|
-
//
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
if (CONTAINER_NODE_KINDS.has(match.node.kind)) {
|
|
2485
|
-
outline = this.buildContainerOutline(cg, match.node);
|
|
2496
|
+
// sum of every method body — a wall of source. Return a structural outline
|
|
2497
|
+
// (members + signatures + line numbers) instead; leaf symbols return their
|
|
2498
|
+
// full body.
|
|
2499
|
+
if (CONTAINER_NODE_KINDS.has(node.kind)) {
|
|
2500
|
+
outline = this.buildContainerOutline(cg, node);
|
|
2486
2501
|
}
|
|
2487
2502
|
if (!outline) {
|
|
2488
|
-
code = await cg.getCode(
|
|
2503
|
+
code = await cg.getCode(node.id);
|
|
2489
2504
|
}
|
|
2490
2505
|
}
|
|
2491
|
-
|
|
2492
|
-
const formatted = this.formatNodeDetails(match.node, code, outline) + trail + match.note;
|
|
2493
|
-
return this.textResult(this.truncateOutput(formatted));
|
|
2506
|
+
return this.formatNodeDetails(node, code, outline) + this.formatTrail(cg, node);
|
|
2494
2507
|
}
|
|
2495
2508
|
/**
|
|
2496
2509
|
* Build the "trail" for a symbol: its direct callees (what it calls) and
|
|
@@ -2830,51 +2843,55 @@ class ToolHandler {
|
|
|
2830
2843
|
const segments = node.filePath.split('/').filter((s) => s.length > 0);
|
|
2831
2844
|
return containerHints.every((hint) => segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint));
|
|
2832
2845
|
}
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2846
|
+
/**
|
|
2847
|
+
* Find ALL definitions matching a name, ranked, so codegraph_node can return
|
|
2848
|
+
* every overload instead of guessing one (the wrong guess → a Read). Keepers
|
|
2849
|
+
* rank before generated stubs (.pb.go etc.); stable within a group preserves
|
|
2850
|
+
* FTS order. Returns [] when nothing matches; a qualified lookup that finds no
|
|
2851
|
+
* exact match returns [] rather than a misleading fuzzy file hit (#173); a
|
|
2852
|
+
* bare name with no exact match falls back to the single top fuzzy result.
|
|
2853
|
+
*/
|
|
2854
|
+
findSymbolMatches(cg, symbol) {
|
|
2837
2855
|
const isQualified = /[.\/]|::/.test(symbol);
|
|
2838
|
-
|
|
2856
|
+
// For a bare name, enumerate EVERY exact-name definition via the direct index
|
|
2857
|
+
// (not FTS, which caps + ranks): tokio's `poll` has 50+ defs and the one the
|
|
2858
|
+
// caller wants (`Harness::poll` at harness.rs:153) ranks below any search cut,
|
|
2859
|
+
// so it could be neither rendered nor pinned by the file/line disambiguator —
|
|
2860
|
+
// and the agent Read it. With the full set, the multi-overload render + the
|
|
2861
|
+
// file/line filter can both reach it.
|
|
2862
|
+
if (!isQualified) {
|
|
2863
|
+
const exact = cg.getNodesByName(symbol);
|
|
2864
|
+
if (exact.length > 0) {
|
|
2865
|
+
return [...exact].sort((a, b) => ((0, generated_detection_1.isGeneratedFile)(a.filePath) ? 1 : 0) - ((0, generated_detection_1.isGeneratedFile)(b.filePath) ? 1 : 0));
|
|
2866
|
+
}
|
|
2867
|
+
// No exact match — use the single top fuzzy result (e.g. a file basename).
|
|
2868
|
+
const fuzzy = cg.searchNodes(symbol, { limit: 10 });
|
|
2869
|
+
return fuzzy[0] ? [fuzzy[0].node] : [];
|
|
2870
|
+
}
|
|
2871
|
+
// Qualified lookup (`Session.request`, `stage_apply::run`): FTS + matchesSymbol.
|
|
2872
|
+
const limit = 50;
|
|
2839
2873
|
let results = cg.searchNodes(symbol, { limit });
|
|
2840
|
-
// FTS strips colons
|
|
2841
|
-
//
|
|
2842
|
-
//
|
|
2874
|
+
// FTS strips colons, so `stage_apply::run` searches the literal
|
|
2875
|
+
// `stage_applyrun` and finds nothing. Re-search by the bare last part and
|
|
2876
|
+
// let `matchesSymbol` filter by qualifier.
|
|
2843
2877
|
if (isQualified && results.length === 0) {
|
|
2844
2878
|
const tail = lastQualifierPart(symbol);
|
|
2845
2879
|
if (tail && tail !== symbol)
|
|
2846
2880
|
results = cg.searchNodes(tail, { limit });
|
|
2847
2881
|
}
|
|
2848
|
-
if (results.length === 0
|
|
2849
|
-
return
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
2862
|
-
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
2863
|
-
return aGen - bGen;
|
|
2864
|
-
});
|
|
2865
|
-
// Multiple exact matches - pick first, note the others
|
|
2866
|
-
const picked = ranked[0].node;
|
|
2867
|
-
const others = ranked.slice(1).map(r => `${r.node.name} (${r.node.kind}) at ${r.node.filePath}:${r.node.startLine}`);
|
|
2868
|
-
const note = `\n\n> **Note:** ${ranked.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`;
|
|
2869
|
-
return { node: picked, note };
|
|
2870
|
-
}
|
|
2871
|
-
// No exact match. For qualified lookups, don't silently fall back
|
|
2872
|
-
// to a fuzzy result — the user typed a specific qualifier, and
|
|
2873
|
-
// resolving `stage_apply::nonexistent_fn` to the unrelated
|
|
2874
|
-
// `stage_apply.rs` file would be actively misleading (#173).
|
|
2875
|
-
if (isQualified)
|
|
2876
|
-
return null;
|
|
2877
|
-
return { node: results[0].node, note: '' };
|
|
2882
|
+
if (results.length === 0)
|
|
2883
|
+
return [];
|
|
2884
|
+
const exactMatches = results.filter((r) => this.matchesSymbol(r.node, symbol));
|
|
2885
|
+
if (exactMatches.length === 0) {
|
|
2886
|
+
// No exact match — a qualified lookup must not fall back to a fuzzy file
|
|
2887
|
+
// hit (#173); a bare name may use the single top fuzzy result.
|
|
2888
|
+
return isQualified ? [] : results[0] ? [results[0].node] : [];
|
|
2889
|
+
}
|
|
2890
|
+
// Down-rank generated files (.pb.go, .pulsar.go, _grpc.pb.go, …) so a flow
|
|
2891
|
+
// query prefers the keeper implementation over the protobuf-generated stub.
|
|
2892
|
+
return [...exactMatches]
|
|
2893
|
+
.sort((a, b) => ((0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0) - ((0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0))
|
|
2894
|
+
.map((r) => r.node);
|
|
2878
2895
|
}
|
|
2879
2896
|
/**
|
|
2880
2897
|
* Find ALL symbols matching a name. Used by callers/callees/impact to aggregate
|
|
@@ -3016,9 +3033,6 @@ class ToolHandler {
|
|
|
3016
3033
|
}
|
|
3017
3034
|
return lines.join('\n');
|
|
3018
3035
|
}
|
|
3019
|
-
formatTaskContext(context) {
|
|
3020
|
-
return context.summary || 'No context found';
|
|
3021
|
-
}
|
|
3022
3036
|
textResult(text) {
|
|
3023
3037
|
return {
|
|
3024
3038
|
content: [{ type: 'text', text }],
|