@colbymchenry/codegraph-darwin-x64 0.9.6 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/dist/bin/codegraph.js +28 -45
- package/lib/dist/bin/codegraph.js.map +1 -1
- package/lib/dist/context/formatter.d.ts.map +1 -1
- package/lib/dist/context/formatter.js +25 -6
- package/lib/dist/context/formatter.js.map +1 -1
- package/lib/dist/context/index.d.ts.map +1 -1
- package/lib/dist/context/index.js +31 -0
- package/lib/dist/context/index.js.map +1 -1
- package/lib/dist/db/queries.d.ts +74 -0
- package/lib/dist/db/queries.d.ts.map +1 -1
- package/lib/dist/db/queries.js +182 -0
- package/lib/dist/db/queries.js.map +1 -1
- package/lib/dist/extraction/generated-detection.d.ts +30 -0
- package/lib/dist/extraction/generated-detection.d.ts.map +1 -0
- package/lib/dist/extraction/generated-detection.js +80 -0
- package/lib/dist/extraction/generated-detection.js.map +1 -0
- package/lib/dist/extraction/grammars.d.ts +10 -0
- package/lib/dist/extraction/grammars.d.ts.map +1 -1
- package/lib/dist/extraction/grammars.js +13 -0
- package/lib/dist/extraction/grammars.js.map +1 -1
- package/lib/dist/extraction/index.d.ts.map +1 -1
- package/lib/dist/extraction/index.js +21 -6
- package/lib/dist/extraction/index.js.map +1 -1
- package/lib/dist/extraction/languages/java.d.ts.map +1 -1
- package/lib/dist/extraction/languages/java.js +6 -0
- package/lib/dist/extraction/languages/java.js.map +1 -1
- package/lib/dist/extraction/languages/kotlin.d.ts.map +1 -1
- package/lib/dist/extraction/languages/kotlin.js +6 -0
- package/lib/dist/extraction/languages/kotlin.js.map +1 -1
- package/lib/dist/extraction/tree-sitter-types.d.ts +10 -0
- package/lib/dist/extraction/tree-sitter-types.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.d.ts +25 -0
- package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/lib/dist/extraction/tree-sitter.js +125 -1
- package/lib/dist/extraction/tree-sitter.js.map +1 -1
- package/lib/dist/extraction/wasm-runtime-flags.d.ts.map +1 -1
- package/lib/dist/extraction/wasm-runtime-flags.js +1 -0
- package/lib/dist/extraction/wasm-runtime-flags.js.map +1 -1
- package/lib/dist/index.d.ts +33 -1
- package/lib/dist/index.d.ts.map +1 -1
- package/lib/dist/index.js +37 -1
- package/lib/dist/index.js.map +1 -1
- package/lib/dist/installer/config-writer.d.ts +7 -8
- package/lib/dist/installer/config-writer.d.ts.map +1 -1
- package/lib/dist/installer/config-writer.js +7 -27
- package/lib/dist/installer/config-writer.js.map +1 -1
- package/lib/dist/installer/index.d.ts +2 -19
- package/lib/dist/installer/index.d.ts.map +1 -1
- package/lib/dist/installer/index.js +5 -36
- package/lib/dist/installer/index.js.map +1 -1
- package/lib/dist/installer/instructions-template.d.ts +11 -21
- package/lib/dist/installer/instructions-template.d.ts.map +1 -1
- package/lib/dist/installer/instructions-template.js +12 -56
- package/lib/dist/installer/instructions-template.js.map +1 -1
- package/lib/dist/installer/targets/antigravity.d.ts.map +1 -1
- package/lib/dist/installer/targets/antigravity.js +1 -0
- package/lib/dist/installer/targets/antigravity.js.map +1 -1
- package/lib/dist/installer/targets/claude.d.ts +10 -1
- package/lib/dist/installer/targets/claude.d.ts.map +1 -1
- package/lib/dist/installer/targets/claude.js +25 -40
- package/lib/dist/installer/targets/claude.js.map +1 -1
- package/lib/dist/installer/targets/codex.d.ts.map +1 -1
- package/lib/dist/installer/targets/codex.js +15 -13
- package/lib/dist/installer/targets/codex.js.map +1 -1
- package/lib/dist/installer/targets/cursor.d.ts.map +1 -1
- package/lib/dist/installer/targets/cursor.js +9 -38
- package/lib/dist/installer/targets/cursor.js.map +1 -1
- package/lib/dist/installer/targets/gemini.d.ts.map +1 -1
- package/lib/dist/installer/targets/gemini.js +15 -13
- package/lib/dist/installer/targets/gemini.js.map +1 -1
- package/lib/dist/installer/targets/kiro.d.ts.map +1 -1
- package/lib/dist/installer/targets/kiro.js +9 -27
- package/lib/dist/installer/targets/kiro.js.map +1 -1
- package/lib/dist/installer/targets/opencode.d.ts.map +1 -1
- package/lib/dist/installer/targets/opencode.js +15 -13
- package/lib/dist/installer/targets/opencode.js.map +1 -1
- package/lib/dist/installer/targets/types.d.ts +0 -15
- package/lib/dist/installer/targets/types.d.ts.map +1 -1
- package/lib/dist/mcp/engine.d.ts +6 -1
- package/lib/dist/mcp/engine.d.ts.map +1 -1
- package/lib/dist/mcp/engine.js +21 -42
- package/lib/dist/mcp/engine.js.map +1 -1
- package/lib/dist/mcp/index.d.ts +7 -4
- package/lib/dist/mcp/index.d.ts.map +1 -1
- package/lib/dist/mcp/index.js +46 -39
- package/lib/dist/mcp/index.js.map +1 -1
- package/lib/dist/mcp/proxy.d.ts +35 -0
- package/lib/dist/mcp/proxy.d.ts.map +1 -1
- package/lib/dist/mcp/proxy.js +223 -0
- package/lib/dist/mcp/proxy.js.map +1 -1
- package/lib/dist/mcp/server-instructions.d.ts +1 -1
- package/lib/dist/mcp/server-instructions.d.ts.map +1 -1
- package/lib/dist/mcp/server-instructions.js +2 -0
- package/lib/dist/mcp/server-instructions.js.map +1 -1
- package/lib/dist/mcp/session.d.ts +10 -0
- package/lib/dist/mcp/session.d.ts.map +1 -1
- package/lib/dist/mcp/session.js +7 -5
- package/lib/dist/mcp/session.js.map +1 -1
- package/lib/dist/mcp/tools.d.ts +39 -1
- package/lib/dist/mcp/tools.d.ts.map +1 -1
- package/lib/dist/mcp/tools.js +968 -96
- package/lib/dist/mcp/tools.js.map +1 -1
- package/lib/dist/resolution/callback-synthesizer.d.ts +2 -2
- package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
- package/lib/dist/resolution/callback-synthesizer.js +395 -29
- package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
- package/lib/dist/resolution/import-resolver.d.ts +10 -0
- package/lib/dist/resolution/import-resolver.d.ts.map +1 -1
- package/lib/dist/resolution/import-resolver.js +34 -0
- package/lib/dist/resolution/import-resolver.js.map +1 -1
- package/lib/dist/resolution/index.d.ts.map +1 -1
- package/lib/dist/resolution/index.js +15 -0
- package/lib/dist/resolution/index.js.map +1 -1
- package/lib/dist/sync/git-hooks.d.ts.map +1 -1
- package/lib/dist/sync/git-hooks.js +2 -0
- package/lib/dist/sync/git-hooks.js.map +1 -1
- package/lib/dist/sync/worktree.d.ts.map +1 -1
- package/lib/dist/sync/worktree.js +1 -0
- package/lib/dist/sync/worktree.js.map +1 -1
- package/lib/node_modules/.package-lock.json +1 -1
- package/lib/package.json +1 -1
- package/package.json +1 -1
- package/lib/dist/installer/claude-md-template.d.ts +0 -14
- package/lib/dist/installer/claude-md-template.d.ts.map +0 -1
- package/lib/dist/installer/claude-md-template.js +0 -21
- package/lib/dist/installer/claude-md-template.js.map +0 -1
package/lib/dist/mcp/tools.js
CHANGED
|
@@ -43,12 +43,21 @@ exports.getExploreBudget = getExploreBudget;
|
|
|
43
43
|
exports.getExploreOutputBudget = getExploreOutputBudget;
|
|
44
44
|
exports.formatStaleBanner = formatStaleBanner;
|
|
45
45
|
exports.formatStaleFooter = formatStaleFooter;
|
|
46
|
-
|
|
46
|
+
exports.getStaticTools = getStaticTools;
|
|
47
|
+
const directory_1 = require("../directory");
|
|
48
|
+
// Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
|
|
49
|
+
// helper in engine.ts. ToolHandler must load to answer tools/list (static
|
|
50
|
+
// schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
|
|
51
|
+
// CodeGraph is pulled in only when a tool actually opens a project. require() is
|
|
52
|
+
// sync + cached (CommonJS build).
|
|
53
|
+
const loadCodeGraph = () => require('../index').default;
|
|
47
54
|
const worktree_1 = require("../sync/worktree");
|
|
48
55
|
const crypto_1 = require("crypto");
|
|
49
56
|
const fs_1 = require("fs");
|
|
50
57
|
const utils_1 = require("../utils");
|
|
58
|
+
const generated_detection_1 = require("../extraction/generated-detection");
|
|
51
59
|
const os_1 = require("os");
|
|
60
|
+
const pathModule = __importStar(require("path"));
|
|
52
61
|
const path_1 = require("path");
|
|
53
62
|
/** Maximum output length to prevent context bloat (characters) */
|
|
54
63
|
const MAX_OUTPUT_LENGTH = 15000;
|
|
@@ -105,18 +114,40 @@ function getExploreBudget(fileCount) {
|
|
|
105
114
|
return 5;
|
|
106
115
|
}
|
|
107
116
|
function getExploreOutputBudget(fileCount) {
|
|
117
|
+
if (fileCount < 150) {
|
|
118
|
+
return {
|
|
119
|
+
// ITER3: revert iter2's aggressive body shrink (forced Read fallback —
|
|
120
|
+
// the per-file 2.5K cap pushed the agent to Read instead of node).
|
|
121
|
+
// Back to the iter1 shape (13K/4/3.8K) but keep the test-file
|
|
122
|
+
// hard-exclude. The cost lever for this tier lives in handleContext
|
|
123
|
+
// (steering the agent to stop after 1-2 calls), not in this budget.
|
|
124
|
+
maxOutputChars: 13000,
|
|
125
|
+
defaultMaxFiles: 4,
|
|
126
|
+
maxCharsPerFile: 3800,
|
|
127
|
+
gapThreshold: 7,
|
|
128
|
+
maxSymbolsInFileHeader: 5,
|
|
129
|
+
maxEdgesPerRelationshipKind: 4,
|
|
130
|
+
includeRelationships: false,
|
|
131
|
+
includeAdditionalFiles: false,
|
|
132
|
+
includeCompletenessSignal: false,
|
|
133
|
+
includeBudgetNote: false,
|
|
134
|
+
excludeLowValueFiles: true,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
108
137
|
if (fileCount < 500) {
|
|
109
138
|
return {
|
|
139
|
+
// ITER3: same revert/keep-filter pattern as <150.
|
|
110
140
|
maxOutputChars: 18000,
|
|
111
141
|
defaultMaxFiles: 5,
|
|
112
142
|
maxCharsPerFile: 3800,
|
|
113
143
|
gapThreshold: 8,
|
|
114
144
|
maxSymbolsInFileHeader: 6,
|
|
115
145
|
maxEdgesPerRelationshipKind: 6,
|
|
116
|
-
includeRelationships:
|
|
146
|
+
includeRelationships: false,
|
|
117
147
|
includeAdditionalFiles: false,
|
|
118
148
|
includeCompletenessSignal: false,
|
|
119
149
|
includeBudgetNote: false,
|
|
150
|
+
excludeLowValueFiles: true,
|
|
120
151
|
};
|
|
121
152
|
}
|
|
122
153
|
if (fileCount < 5000) {
|
|
@@ -136,6 +167,7 @@ function getExploreOutputBudget(fileCount) {
|
|
|
136
167
|
includeAdditionalFiles: true,
|
|
137
168
|
includeCompletenessSignal: true,
|
|
138
169
|
includeBudgetNote: true,
|
|
170
|
+
excludeLowValueFiles: false,
|
|
139
171
|
};
|
|
140
172
|
}
|
|
141
173
|
if (fileCount < 15000) {
|
|
@@ -150,6 +182,7 @@ function getExploreOutputBudget(fileCount) {
|
|
|
150
182
|
includeAdditionalFiles: true,
|
|
151
183
|
includeCompletenessSignal: true,
|
|
152
184
|
includeBudgetNote: true,
|
|
185
|
+
excludeLowValueFiles: false,
|
|
153
186
|
};
|
|
154
187
|
}
|
|
155
188
|
return {
|
|
@@ -163,6 +196,7 @@ function getExploreOutputBudget(fileCount) {
|
|
|
163
196
|
includeAdditionalFiles: true,
|
|
164
197
|
includeCompletenessSignal: true,
|
|
165
198
|
includeBudgetNote: true,
|
|
199
|
+
excludeLowValueFiles: false,
|
|
166
200
|
};
|
|
167
201
|
}
|
|
168
202
|
/**
|
|
@@ -179,6 +213,21 @@ function getExploreOutputBudget(fileCount) {
|
|
|
179
213
|
function exploreLineNumbersEnabled() {
|
|
180
214
|
return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
|
|
181
215
|
}
|
|
216
|
+
/**
|
|
217
|
+
* Adaptive explore sizing (default ON). `codegraph_explore` skeletonizes OFF-SPINE
|
|
218
|
+
* polymorphic-sibling files — a file whose class is one of ≥3 interchangeable
|
|
219
|
+
* implementations of a shared interface (e.g. OkHttp's `: Interceptor` classes) —
|
|
220
|
+
* to class + member signatures (bodies elided), keeping the on-spine exemplar full.
|
|
221
|
+
* This sizes the response to the answer instead of the budget cap on sibling-heavy
|
|
222
|
+
* flows (OkHttp interceptor-chain explore 28.5k→16.6k, ~28% cheaper than native
|
|
223
|
+
* search, reads flat). It is PROVABLY INERT elsewhere: distinct pipeline steps (no
|
|
224
|
+
* ≥3-implementer supertype, e.g. Excalidraw's `renderStaticScene`) and on-spine
|
|
225
|
+
* files keep full source — output is byte-identical to shipped on excalidraw /
|
|
226
|
+
* tokio / django / vscode / gin. Set `CODEGRAPH_ADAPTIVE_EXPLORE=0` to disable.
|
|
227
|
+
*/
|
|
228
|
+
function adaptiveExploreEnabled() {
|
|
229
|
+
return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
|
|
230
|
+
}
|
|
182
231
|
/**
|
|
183
232
|
* Prefix each line of a source slice with its 1-based line number, matching
|
|
184
233
|
* the Read tool's `cat -n` convention (number + tab) so the agent treats it
|
|
@@ -323,7 +372,7 @@ exports.tools = [
|
|
|
323
372
|
},
|
|
324
373
|
{
|
|
325
374
|
name: 'codegraph_context',
|
|
326
|
-
description: 'PRIMARY TOOL — call
|
|
375
|
+
description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.',
|
|
327
376
|
inputSchema: {
|
|
328
377
|
type: 'object',
|
|
329
378
|
properties: {
|
|
@@ -348,7 +397,7 @@ exports.tools = [
|
|
|
348
397
|
},
|
|
349
398
|
{
|
|
350
399
|
name: 'codegraph_callers',
|
|
351
|
-
description: '
|
|
400
|
+
description: 'List functions that call <symbol>. For deep flow use codegraph_trace.',
|
|
352
401
|
inputSchema: {
|
|
353
402
|
type: 'object',
|
|
354
403
|
properties: {
|
|
@@ -368,7 +417,7 @@ exports.tools = [
|
|
|
368
417
|
},
|
|
369
418
|
{
|
|
370
419
|
name: 'codegraph_callees',
|
|
371
|
-
description: '
|
|
420
|
+
description: 'List functions that <symbol> calls. For deep flow use codegraph_trace.',
|
|
372
421
|
inputSchema: {
|
|
373
422
|
type: 'object',
|
|
374
423
|
properties: {
|
|
@@ -388,7 +437,7 @@ exports.tools = [
|
|
|
388
437
|
},
|
|
389
438
|
{
|
|
390
439
|
name: 'codegraph_impact',
|
|
391
|
-
description: '
|
|
440
|
+
description: 'List symbols affected by changing <symbol>. Use before a refactor.',
|
|
392
441
|
inputSchema: {
|
|
393
442
|
type: 'object',
|
|
394
443
|
properties: {
|
|
@@ -408,7 +457,7 @@ exports.tools = [
|
|
|
408
457
|
},
|
|
409
458
|
{
|
|
410
459
|
name: 'codegraph_node',
|
|
411
|
-
description: '
|
|
460
|
+
description: 'One symbol\'s location, signature, callers/callees trail. includeCode=true returns the verbatim body. Use codegraph_trace for full paths instead of chaining nodes.',
|
|
412
461
|
inputSchema: {
|
|
413
462
|
type: 'object',
|
|
414
463
|
properties: {
|
|
@@ -428,7 +477,7 @@ exports.tools = [
|
|
|
428
477
|
},
|
|
429
478
|
{
|
|
430
479
|
name: 'codegraph_explore',
|
|
431
|
-
description: '
|
|
480
|
+
description: 'Source of SEVERAL related symbols grouped by file, in one capped call. Query is a bag of symbol/file names (not a question). Returned source is verbatim Read-equivalent — do not re-open shown files. Prefer over chained codegraph_node.',
|
|
432
481
|
inputSchema: {
|
|
433
482
|
type: 'object',
|
|
434
483
|
properties: {
|
|
@@ -448,7 +497,7 @@ exports.tools = [
|
|
|
448
497
|
},
|
|
449
498
|
{
|
|
450
499
|
name: 'codegraph_status',
|
|
451
|
-
description: '
|
|
500
|
+
description: 'Index health check (files / nodes / edges). Skip unless debugging.',
|
|
452
501
|
inputSchema: {
|
|
453
502
|
type: 'object',
|
|
454
503
|
properties: {
|
|
@@ -458,7 +507,7 @@ exports.tools = [
|
|
|
458
507
|
},
|
|
459
508
|
{
|
|
460
509
|
name: 'codegraph_files',
|
|
461
|
-
description: '
|
|
510
|
+
description: 'Indexed file tree with language + symbol counts. Faster than Glob for project layout.',
|
|
462
511
|
inputSchema: {
|
|
463
512
|
type: 'object',
|
|
464
513
|
properties: {
|
|
@@ -491,7 +540,7 @@ exports.tools = [
|
|
|
491
540
|
},
|
|
492
541
|
{
|
|
493
542
|
name: 'codegraph_trace',
|
|
494
|
-
description: '
|
|
543
|
+
description: 'Call path between two symbols — "how does <from> reach <to>?" Returns the chain with each hop\'s body inlined plus the destination\'s callees, in ONE call. Ideal for flow questions (update→render, request→handler, QuerySet→SQL). If no static path exists the chain broke at dynamic dispatch — the failure response inlines both endpoints + their TO-file siblings.',
|
|
495
544
|
inputSchema: {
|
|
496
545
|
type: 'object',
|
|
497
546
|
properties: {
|
|
@@ -509,6 +558,19 @@ exports.tools = [
|
|
|
509
558
|
},
|
|
510
559
|
},
|
|
511
560
|
];
|
|
561
|
+
/**
|
|
562
|
+
* Allowlist-filtered tool definitions WITHOUT an engine — the static surface the
|
|
563
|
+
* proxy answers `tools/list` with before any project is open. Mirrors
|
|
564
|
+
* `ToolHandler.getTools()` in the no-CodeGraph case (the dynamic per-repo budget
|
|
565
|
+
* note in a description only adds once `cg` is loaded; the schemas are static).
|
|
566
|
+
*/
|
|
567
|
+
function getStaticTools() {
|
|
568
|
+
const raw = process.env.CODEGRAPH_MCP_TOOLS;
|
|
569
|
+
if (!raw || !raw.trim())
|
|
570
|
+
return exports.tools;
|
|
571
|
+
const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
|
|
572
|
+
return allow.size ? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : exports.tools;
|
|
573
|
+
}
|
|
512
574
|
/**
|
|
513
575
|
* Tool handler that executes tools against a CodeGraph instance
|
|
514
576
|
*
|
|
@@ -528,6 +590,14 @@ class ToolHandler {
|
|
|
528
590
|
// once and every later tool call reuses the result — never shelling out to
|
|
529
591
|
// git on the hot path. `undefined` = not computed yet; `null` = no mismatch.
|
|
530
592
|
worktreeMismatchCache = new Map();
|
|
593
|
+
// Gate that the MCP engine pokes after `cg.open()` so the first tool call
|
|
594
|
+
// blocks on the post-open filesystem reconcile (catch-up sync). Without
|
|
595
|
+
// this, a tool call that races past `catchUpSync()` serves rows for files
|
|
596
|
+
// that were deleted (or edited) while no MCP server was running — and the
|
|
597
|
+
// per-file staleness banner can't help, because `getPendingFiles()` is
|
|
598
|
+
// populated by the watcher, not by catch-up. Cleared on first await so
|
|
599
|
+
// subsequent calls don't pay any cost.
|
|
600
|
+
catchUpGate = null;
|
|
531
601
|
constructor(cg) {
|
|
532
602
|
this.cg = cg;
|
|
533
603
|
}
|
|
@@ -537,6 +607,16 @@ class ToolHandler {
|
|
|
537
607
|
setDefaultCodeGraph(cg) {
|
|
538
608
|
this.cg = cg;
|
|
539
609
|
}
|
|
610
|
+
/**
|
|
611
|
+
* Engine-only: register the catch-up sync promise so the next `execute()`
|
|
612
|
+
* call awaits it before serving. The handler swallows rejections (the
|
|
613
|
+
* engine logs them) so a sync failure never propagates as a tool error;
|
|
614
|
+
* we still want to serve a best-effort result over the same potentially-
|
|
615
|
+
* stale data, which is what would have happened without the gate.
|
|
616
|
+
*/
|
|
617
|
+
setCatchUpGate(p) {
|
|
618
|
+
this.catchUpGate = p;
|
|
619
|
+
}
|
|
540
620
|
/**
|
|
541
621
|
* Record the directory the server tried to resolve the default project from.
|
|
542
622
|
* Used only to make the "no default project" error actionable.
|
|
@@ -579,7 +659,7 @@ class ToolHandler {
|
|
|
579
659
|
*/
|
|
580
660
|
getTools() {
|
|
581
661
|
const allow = this.toolAllowlist();
|
|
582
|
-
|
|
662
|
+
let visible = allow
|
|
583
663
|
? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, '')))
|
|
584
664
|
: exports.tools;
|
|
585
665
|
if (!this.cg)
|
|
@@ -587,6 +667,39 @@ class ToolHandler {
|
|
|
587
667
|
try {
|
|
588
668
|
const stats = this.cg.getStats();
|
|
589
669
|
const budget = getExploreBudget(stats.fileCount);
|
|
670
|
+
// Tiny-repo tool gating: on projects under TINY_REPO_FILE_THRESHOLD
|
|
671
|
+
// files, only expose the 5 core tools (search, context, node,
|
|
672
|
+
// explore, trace). The 5 omitted tools (callers, callees, impact,
|
|
673
|
+
// status, files) reduce to one grep at this scale.
|
|
674
|
+
//
|
|
675
|
+
// n=2 audits ruled out cutting below 5 tools:
|
|
676
|
+
// - 3-tool gate (search + context + trace): cost regressed on
|
|
677
|
+
// cobra/ky/sinatra. The agent fell back to raw Reads to cover
|
|
678
|
+
// what codegraph_node + codegraph_explore would have answered.
|
|
679
|
+
// - 1-tool gate (search only): catastrophic regression — express
|
|
680
|
+
// went from -43% WIN to +107% LOSS. With only search, the agent
|
|
681
|
+
// can't navigate the call graph structurally and reads everything.
|
|
682
|
+
//
|
|
683
|
+
// 5 is the empirical lower bound. Tools beyond search/context/
|
|
684
|
+
// node/explore/trace pay overhead that the agent doesn't recoup
|
|
685
|
+
// on tiny-repo flow questions.
|
|
686
|
+
// ITER4: raise threshold 150 → 500 so single-file frameworks
|
|
687
|
+
// (sinatra at 159, slim_framework around 200) also get the
|
|
688
|
+
// 5-tool surface. The empirical 5-tool floor was set on <150
|
|
689
|
+
// probes; iter3 measurement showed sinatra is structurally the
|
|
690
|
+
// SAME problem as cobra (single-file WITHOUT-arm Read wins),
|
|
691
|
+
// so it deserves the same gating.
|
|
692
|
+
const TINY_REPO_FILE_THRESHOLD = 500;
|
|
693
|
+
const TINY_REPO_CORE_TOOLS = new Set([
|
|
694
|
+
'codegraph_search',
|
|
695
|
+
'codegraph_context',
|
|
696
|
+
'codegraph_node',
|
|
697
|
+
'codegraph_explore',
|
|
698
|
+
'codegraph_trace',
|
|
699
|
+
]);
|
|
700
|
+
if (stats.fileCount < TINY_REPO_FILE_THRESHOLD) {
|
|
701
|
+
visible = visible.filter(t => TINY_REPO_CORE_TOOLS.has(t.name));
|
|
702
|
+
}
|
|
590
703
|
return visible.map(tool => {
|
|
591
704
|
if (tool.name === 'codegraph_explore') {
|
|
592
705
|
return {
|
|
@@ -640,7 +753,7 @@ class ToolHandler {
|
|
|
640
753
|
}
|
|
641
754
|
}
|
|
642
755
|
// Walk up parent directories to find nearest .codegraph/
|
|
643
|
-
const resolvedRoot = (0,
|
|
756
|
+
const resolvedRoot = (0, directory_1.findNearestCodeGraphRoot)(projectPath);
|
|
644
757
|
if (!resolvedRoot) {
|
|
645
758
|
throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
|
|
646
759
|
}
|
|
@@ -662,7 +775,7 @@ class ToolHandler {
|
|
|
662
775
|
return cg;
|
|
663
776
|
}
|
|
664
777
|
// Open and cache under both paths
|
|
665
|
-
const cg =
|
|
778
|
+
const cg = loadCodeGraph().openSync(resolvedRoot);
|
|
666
779
|
this.projectCache.set(resolvedRoot, cg);
|
|
667
780
|
if (projectPath !== resolvedRoot) {
|
|
668
781
|
this.projectCache.set(projectPath, cg);
|
|
@@ -842,6 +955,19 @@ class ToolHandler {
|
|
|
842
955
|
*/
|
|
843
956
|
async execute(toolName, args) {
|
|
844
957
|
try {
|
|
958
|
+
// Block the first tool call on the engine's post-open reconcile so we
|
|
959
|
+
// never serve rows for files deleted/edited while no MCP server was
|
|
960
|
+
// running. The gate is cleared after first await — subsequent calls
|
|
961
|
+
// pay nothing. Catch-up failures are logged by the engine; we
|
|
962
|
+
// proceed regardless so a transient sync error never breaks tools.
|
|
963
|
+
if (this.catchUpGate) {
|
|
964
|
+
const gate = this.catchUpGate;
|
|
965
|
+
this.catchUpGate = null;
|
|
966
|
+
try {
|
|
967
|
+
await gate;
|
|
968
|
+
}
|
|
969
|
+
catch { /* engine already logged */ }
|
|
970
|
+
}
|
|
845
971
|
// Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed
|
|
846
972
|
// surface rejects ablated tools defensively even if a client cached them.
|
|
847
973
|
if (!this.isToolAllowed(toolName)) {
|
|
@@ -935,7 +1061,15 @@ class ToolHandler {
|
|
|
935
1061
|
if (results.length === 0) {
|
|
936
1062
|
return this.textResult(`No results found for "${query}"`);
|
|
937
1063
|
}
|
|
938
|
-
|
|
1064
|
+
// Down-rank generated files within the FTS-returned set so a search
|
|
1065
|
+
// for "Send" surfaces the hand-written keeper before .pb.go stubs
|
|
1066
|
+
// that share the name. Stable: only reorders generated vs. not.
|
|
1067
|
+
const ranked = [...results].sort((a, b) => {
|
|
1068
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
1069
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
1070
|
+
return aGen - bGen;
|
|
1071
|
+
});
|
|
1072
|
+
const formatted = this.formatSearchResults(ranked);
|
|
939
1073
|
return this.textResult(this.truncateOutput(formatted));
|
|
940
1074
|
}
|
|
941
1075
|
/**
|
|
@@ -951,7 +1085,27 @@ class ToolHandler {
|
|
|
951
1085
|
markSessionConsulted(sessionId);
|
|
952
1086
|
}
|
|
953
1087
|
const cg = this.getCodeGraph(args.projectPath);
|
|
954
|
-
|
|
1088
|
+
// On tiny repos (<150 files), trim maxNodes hard — the entire repo
|
|
1089
|
+
// is grep-able in a turn so a 20-node context is wasted budget.
|
|
1090
|
+
// 8 covers the typical 1-3 entry-point + their immediate neighbors
|
|
1091
|
+
// without dragging in the rest of the small codebase.
|
|
1092
|
+
let defaultMaxNodes = 20;
|
|
1093
|
+
let isTinyRepo = false;
|
|
1094
|
+
let isSmallRepo = false;
|
|
1095
|
+
try {
|
|
1096
|
+
const stats = cg.getStats();
|
|
1097
|
+
if (stats.fileCount < 150) {
|
|
1098
|
+
defaultMaxNodes = 8;
|
|
1099
|
+
isTinyRepo = true;
|
|
1100
|
+
}
|
|
1101
|
+
else if (stats.fileCount < 500) {
|
|
1102
|
+
isSmallRepo = true;
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
catch {
|
|
1106
|
+
// stats failure — fall back to the standard default
|
|
1107
|
+
}
|
|
1108
|
+
const maxNodes = args.maxNodes || defaultMaxNodes;
|
|
955
1109
|
const includeCode = args.includeCode !== false;
|
|
956
1110
|
const context = await cg.buildContext(task, {
|
|
957
1111
|
maxNodes,
|
|
@@ -963,12 +1117,189 @@ class ToolHandler {
|
|
|
963
1117
|
const reminder = isFeatureQuery
|
|
964
1118
|
? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria'
|
|
965
1119
|
: '';
|
|
1120
|
+
// Auto-trace for flow queries: when the task is asking "how does X
|
|
1121
|
+
// reach/flow/propagate from A to B", run the trace internally and
|
|
1122
|
+
// append its body to the context response. Saves the agent the
|
|
1123
|
+
// follow-up codegraph_trace call that was the #2 cost driver on
|
|
1124
|
+
// multi-module flow questions (Q3 / etcd Q2 in the audit).
|
|
1125
|
+
const flowTrace = await this.maybeInlineFlowTrace(task, cg);
|
|
1126
|
+
// Iter3 — sufficiency steering on small repos.
|
|
1127
|
+
//
|
|
1128
|
+
// Measured economics on tiny (<150) and small (<500) projects: every
|
|
1129
|
+
// additional MCP tool call costs ~$0.02-0.05 in cache-write tokens
|
|
1130
|
+
// (5K-15K per response at $3.75/1M). The agent reflexively follows
|
|
1131
|
+
// codegraph_context with explore/node even when the context response
|
|
1132
|
+
// is already sufficient — that pattern drove the cost gap that
|
|
1133
|
+
// smaller bodies (iter2) failed to close (smaller bodies just shifted
|
|
1134
|
+
// the agent to Read instead). Direct directive on small-repo
|
|
1135
|
+
// responses: tell the agent the context call IS the comprehensive
|
|
1136
|
+
// pass for a project of this size and that follow-ups should be
|
|
1137
|
+
// narrow (trace from→to, node single-symbol) — not another broad
|
|
1138
|
+
// explore that re-bundles the same content.
|
|
1139
|
+
// ITER4: unified strong directive for both tiny (<150) and small
|
|
1140
|
+
// (<500) tiers — measured iter3 result was that the soft <500
|
|
1141
|
+
// wording was IGNORED on sinatra (5 tool calls, +92% loss) while
|
|
1142
|
+
// the strong <150 wording was followed on cobra/slim (3 calls,
|
|
1143
|
+
// -21%/-22% wins). The single-file-framework problem (sinatra)
|
|
1144
|
+
// is structurally the same as cobra's; both deserve the same
|
|
1145
|
+
// sufficiency steering.
|
|
1146
|
+
let smallRepoTail = '';
|
|
1147
|
+
let smallRepoRouteInline = '';
|
|
1148
|
+
if (isTinyRepo || isSmallRepo) {
|
|
1149
|
+
// Iter12: backend-computed routing manifest for routing queries.
|
|
1150
|
+
// Builds a URL → handler map directly from the graph (each route
|
|
1151
|
+
// node has a `references` edge to its handler), then inlines the
|
|
1152
|
+
// top handler file's source. The agent gets the canonical
|
|
1153
|
+
// routing answer in one MCP call — no need to parse framework
|
|
1154
|
+
// DSL or grep for handlers.
|
|
1155
|
+
//
|
|
1156
|
+
// Replaces iter10's raw route-file inline. The manifest is more
|
|
1157
|
+
// information-dense (parsed URL→handler map vs raw config DSL)
|
|
1158
|
+
// and we still inline the top handler file's source so the agent
|
|
1159
|
+
// has the implementation bodies inline too.
|
|
1160
|
+
const isRouteQuery = /\b(route|routes|routing|request|handler|endpoint|api|controller|middleware|dispatch|invok)/i.test(task);
|
|
1161
|
+
if (isRouteQuery) {
|
|
1162
|
+
try {
|
|
1163
|
+
const manifest = cg.getRoutingManifest(40);
|
|
1164
|
+
if (manifest) {
|
|
1165
|
+
// 1) Compact URL→handler list (~30-60 lines, ~1-2KB).
|
|
1166
|
+
const lines = [
|
|
1167
|
+
`\n\n## Routing manifest (${manifest.totalRoutes} routes, top handler file holds ${manifest.topHandlerFileCount})`,
|
|
1168
|
+
'',
|
|
1169
|
+
'| URL | Handler | Location |',
|
|
1170
|
+
'|---|---|---|',
|
|
1171
|
+
];
|
|
1172
|
+
for (const e of manifest.entries) {
|
|
1173
|
+
lines.push(`| \`${e.url}\` | \`${e.handler}\` | ${e.handlerFile}:${e.handlerLine} |`);
|
|
1174
|
+
}
|
|
1175
|
+
// 2) Inline the top handler file's source.
|
|
1176
|
+
if (manifest.topHandlerFile && manifest.topHandlerFileCount >= 2) {
|
|
1177
|
+
try {
|
|
1178
|
+
const fullPath = pathModule.join(cg.getProjectRoot(), manifest.topHandlerFile);
|
|
1179
|
+
const stat = (0, fs_1.statSync)(fullPath);
|
|
1180
|
+
if (stat.size > 0 && stat.size <= 16000) {
|
|
1181
|
+
const source = (0, fs_1.readFileSync)(fullPath, 'utf-8');
|
|
1182
|
+
const capped = source.length > 7000 ? source.slice(0, 7000) + '\n... (truncated)' : source;
|
|
1183
|
+
const ext = (manifest.topHandlerFile.match(/\.([a-z]+)$/i)?.[1] || '').toLowerCase();
|
|
1184
|
+
const lang = ext === 'rb' ? 'ruby' : ext === 'py' ? 'python' :
|
|
1185
|
+
ext === 'go' ? 'go' : ext === 'rs' ? 'rust' :
|
|
1186
|
+
ext === 'js' || ext === 'jsx' ? 'javascript' :
|
|
1187
|
+
ext === 'ts' || ext === 'tsx' ? 'typescript' :
|
|
1188
|
+
ext === 'java' ? 'java' : ext === 'kt' ? 'kotlin' :
|
|
1189
|
+
ext === 'cs' ? 'csharp' : ext === 'php' ? 'php' :
|
|
1190
|
+
ext === 'swift' ? 'swift' : ext === 'yml' || ext === 'yaml' ? 'yaml' : '';
|
|
1191
|
+
lines.push('');
|
|
1192
|
+
lines.push(`### Top handler file (\`${manifest.topHandlerFile}\` — ${manifest.topHandlerFileCount}/${manifest.totalRoutes} routes, full source inlined — do NOT Read)`);
|
|
1193
|
+
lines.push('');
|
|
1194
|
+
lines.push('```' + lang);
|
|
1195
|
+
lines.push(capped);
|
|
1196
|
+
lines.push('```');
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
catch { /* file read failed, skip the source inline */ }
|
|
1200
|
+
}
|
|
1201
|
+
smallRepoRouteInline = lines.join('\n');
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
catch {
|
|
1205
|
+
// Manifest build failed — drop silently
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
const sizeQualifier = isTinyRepo ? 'under 150' : 'under 500';
|
|
1209
|
+
const routingClause = smallRepoRouteInline
|
|
1210
|
+
? ' The URL→handler manifest and top handler file are also inlined above — answer routing questions from them.'
|
|
1211
|
+
: '';
|
|
1212
|
+
smallRepoTail = `\n\n---\n> **This project is small** (${sizeQualifier} indexed files). The entry points and code above cover the relevant surface — **do NOT call codegraph_explore as a follow-up; its content will largely duplicate this response**. If you need a specific flow, call \`codegraph_trace from→to\`. If you need one specific symbol's body, call \`codegraph_node <name>\`.${routingClause} Otherwise, answer from what is above.`;
|
|
1213
|
+
}
|
|
966
1214
|
// buildContext returns string when format is 'markdown'
|
|
967
1215
|
if (typeof context === 'string') {
|
|
968
|
-
return this.textResult(this.truncateOutput(context + reminder));
|
|
1216
|
+
return this.textResult(this.truncateOutput(context + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
|
|
969
1217
|
}
|
|
970
1218
|
// If it returns TaskContext, format it
|
|
971
|
-
return this.textResult(this.truncateOutput(this.formatTaskContext(context) + reminder));
|
|
1219
|
+
return this.textResult(this.truncateOutput(this.formatTaskContext(context) + flowTrace + reminder + smallRepoRouteInline + smallRepoTail));
|
|
1220
|
+
}
|
|
1221
|
+
/**
|
|
1222
|
+
* Detect a flow-style task ("how does X reach Y", "trace the path from A to B")
|
|
1223
|
+
* and pre-run trace between the most likely endpoints, returning the trace
|
|
1224
|
+
* body to splice into the context response. Returns '' for non-flow queries
|
|
1225
|
+
* or when no plausible endpoint pair can be extracted.
|
|
1226
|
+
*
|
|
1227
|
+
* Conservative by design: only fires when the task has both a clear flow
|
|
1228
|
+
* keyword AND at least two distinct PascalCase / camelCase identifiers.
|
|
1229
|
+
* False positives waste a graph query; false negatives just fall back to
|
|
1230
|
+
* the agent calling trace itself (existing path-proximity wiring handles
|
|
1231
|
+
* disambiguation either way).
|
|
1232
|
+
*/
|
|
1233
|
+
async maybeInlineFlowTrace(task, cg) {
|
|
1234
|
+
const lower = task.toLowerCase();
|
|
1235
|
+
const FLOW_KEYWORDS = [
|
|
1236
|
+
'trace ',
|
|
1237
|
+
'from ',
|
|
1238
|
+
'reach ',
|
|
1239
|
+
'flow ',
|
|
1240
|
+
'propagat',
|
|
1241
|
+
'how does ',
|
|
1242
|
+
'how do ',
|
|
1243
|
+
];
|
|
1244
|
+
if (!FLOW_KEYWORDS.some((k) => lower.includes(k)))
|
|
1245
|
+
return '';
|
|
1246
|
+
// Extract candidate symbols — PascalCase or camelCase identifiers ≥3 chars.
|
|
1247
|
+
// Filter out common non-symbol words and the flow keywords themselves.
|
|
1248
|
+
const STOP_WORDS = new Set([
|
|
1249
|
+
'how', 'does', 'the', 'and', 'from', 'through', 'reach', 'reaches',
|
|
1250
|
+
'flow', 'path', 'trace', 'cross', 'module', 'modules', 'where',
|
|
1251
|
+
'update', 'updates', 'updated', 'when', 'what', 'this', 'that',
|
|
1252
|
+
]);
|
|
1253
|
+
const ids = [];
|
|
1254
|
+
const seen = new Set();
|
|
1255
|
+
const re = /\b([A-Z][a-z]+(?:[A-Z][a-z]*)+|[a-z]+[A-Z][a-z]*(?:[A-Z][a-z]*)*)\b/g;
|
|
1256
|
+
let m;
|
|
1257
|
+
while ((m = re.exec(task)) !== null) {
|
|
1258
|
+
const sym = m[1];
|
|
1259
|
+
if (sym.length < 3)
|
|
1260
|
+
continue;
|
|
1261
|
+
const key = sym.toLowerCase();
|
|
1262
|
+
if (STOP_WORDS.has(key) || seen.has(key))
|
|
1263
|
+
continue;
|
|
1264
|
+
seen.add(key);
|
|
1265
|
+
ids.push(sym);
|
|
1266
|
+
}
|
|
1267
|
+
if (ids.length < 2)
|
|
1268
|
+
return '';
|
|
1269
|
+
// The first two distinct symbols, in order of appearance, are the most
|
|
1270
|
+
// likely from/to endpoints — "from X ... through to Y" naturally places
|
|
1271
|
+
// them in that order in the prose. If the trace fails to connect, it
|
|
1272
|
+
// still returns the inlined endpoint bodies (the trace-failure rewrite).
|
|
1273
|
+
const fromSym = ids[0];
|
|
1274
|
+
const toSym = ids[1];
|
|
1275
|
+
let traceResult;
|
|
1276
|
+
try {
|
|
1277
|
+
traceResult = await this.handleTrace({
|
|
1278
|
+
from: fromSym,
|
|
1279
|
+
to: toSym,
|
|
1280
|
+
projectPath: cg.getProjectRoot(),
|
|
1281
|
+
});
|
|
1282
|
+
}
|
|
1283
|
+
catch {
|
|
1284
|
+
return '';
|
|
1285
|
+
}
|
|
1286
|
+
// Extract the textual body. Defensive: handleTrace's contract is the
|
|
1287
|
+
// standard tool-result shape used elsewhere in this file.
|
|
1288
|
+
const body = traceResult.content
|
|
1289
|
+
?.map((c) => (c.type === 'text' ? c.text : ''))
|
|
1290
|
+
.filter(Boolean)
|
|
1291
|
+
.join('\n')
|
|
1292
|
+
.trim();
|
|
1293
|
+
if (!body)
|
|
1294
|
+
return '';
|
|
1295
|
+
return [
|
|
1296
|
+
'',
|
|
1297
|
+
'## Inline flow trace',
|
|
1298
|
+
'',
|
|
1299
|
+
`Auto-traced \`${fromSym}\` → \`${toSym}\` because the query looks like a flow question. No follow-up codegraph_trace is needed for this pair.`,
|
|
1300
|
+
'',
|
|
1301
|
+
body,
|
|
1302
|
+
].join('\n');
|
|
972
1303
|
}
|
|
973
1304
|
/**
|
|
974
1305
|
* Heuristic to detect if a query looks like a feature request
|
|
@@ -1130,46 +1461,200 @@ class ToolHandler {
|
|
|
1130
1461
|
// (which, on real code, means the flow breaks at dynamic dispatch).
|
|
1131
1462
|
const edgeKinds = ['calls'];
|
|
1132
1463
|
const MAX_HOPS = 7;
|
|
1133
|
-
|
|
1134
|
-
|
|
1464
|
+
// Path-proximity pairing: in a multi-module repo a symbol name like
|
|
1465
|
+
// `EndBlocker` exists in 20+ modules. FTS picks one almost arbitrarily;
|
|
1466
|
+
// the WRONG pair (e.g. simapp's wrapper EndBlocker paired with gov's Tally)
|
|
1467
|
+
// has no static path, falls through to the dynamic-dispatch failure branch,
|
|
1468
|
+
// and surfaces unrelated bodies — exactly the cosmos-Q3 trace failure mode.
|
|
1469
|
+
// Score every from×to combo by shared file-path prefix length; try the
|
|
1470
|
+
// most-co-located pair first (e.g. `x/gov/abci.go::EndBlocker` ×
|
|
1471
|
+
// `x/gov/keeper/tally.go::Tally` share `x/gov/`).
|
|
1472
|
+
//
|
|
1473
|
+
// Consider the FULL candidate set, not just the FTS top-5: the right
|
|
1474
|
+
// EndBlocker for a gov-module flow may rank 8th in FTS but share the
|
|
1475
|
+
// entire `x/gov/` prefix with the destination. Path-proximity supersedes
|
|
1476
|
+
// FTS for this disambiguation. Findpath trials are still capped by
|
|
1477
|
+
// FINDPATH_PAIR_BUDGET below to bound graph traversal cost.
|
|
1478
|
+
const sharedDirPrefixLen = (a, b) => {
|
|
1479
|
+
const aDir = a.replace(/[^/]+$/, '');
|
|
1480
|
+
const bDir = b.replace(/[^/]+$/, '');
|
|
1481
|
+
let i = 0;
|
|
1482
|
+
while (i < aDir.length && i < bDir.length && aDir[i] === bDir[i])
|
|
1483
|
+
i++;
|
|
1484
|
+
return i;
|
|
1485
|
+
};
|
|
1486
|
+
// Cosmos-Q3 surfaced a second-order failure: `enterprise/group/x/group/`
|
|
1487
|
+
// SHARES MORE of its path with `enterprise/group/x/group/keeper/tally.go`
|
|
1488
|
+
// (24 chars) than `x/gov/abci.go` shares with `x/gov/keeper/tally.go`
|
|
1489
|
+
// (6 chars), so pure shared-prefix prefers the side-experiment module
|
|
1490
|
+
// over the canonical one — even though the user's question is clearly
|
|
1491
|
+
// about the main gov module. Penalize candidates living under prefixes
|
|
1492
|
+
// that conventionally hold extensions / experiments / vendored code, so
|
|
1493
|
+
// the canonical-path pair wins even when its shared prefix is short.
|
|
1494
|
+
const isLessCanonicalPath = (p) => /^(enterprise|contrib|examples?|sample|playground|vendor|third[_-]?party|deprecated|legacy)\//i.test(p);
|
|
1495
|
+
const LESS_CANONICAL_PENALTY = 100; // any canonical candidate beats any less-canonical one
|
|
1496
|
+
const scorePair = (a, b) => sharedDirPrefixLen(a, b)
|
|
1497
|
+
- (isLessCanonicalPath(a) ? LESS_CANONICAL_PENALTY : 0)
|
|
1498
|
+
- (isLessCanonicalPath(b) ? LESS_CANONICAL_PENALTY : 0);
|
|
1499
|
+
const fromCands = fromMatches.nodes;
|
|
1500
|
+
const toCands = toMatches.nodes;
|
|
1501
|
+
// Candidate relevance: an overloaded name (Alamofire has 44 `request`s, most
|
|
1502
|
+
// of them EMPTY EventMonitor protocol-conformance stubs `func request(…){}`)
|
|
1503
|
+
// floods the pool with no-op decls. Shared-dir-prefix alone then MISLEADS —
|
|
1504
|
+
// two unrelated `Source/Features/` delegate stubs outscore the real
|
|
1505
|
+
// `Source/Core/Session.request` × `Source/Core/…task` pair the agent meant,
|
|
1506
|
+
// so trace resolves to stubs, finds no path, and the agent reads by line.
|
|
1507
|
+
// Penalize empty stubs and test-file symbols so a substantive entry point
|
|
1508
|
+
// wins; among real methods this is ~flat, so path-proximity still decides
|
|
1509
|
+
// (cosmos EndBlocker disambiguation is unaffected — none of its candidates
|
|
1510
|
+
// are stubs/tests).
|
|
1511
|
+
const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
|
|
1512
|
+
const nodeRelevance = (n) => {
|
|
1513
|
+
const bodyLines = Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
|
|
1514
|
+
let s = Math.min(bodyLines, 20); // a substantive body is more likely the meant symbol
|
|
1515
|
+
if (bodyLines <= 1)
|
|
1516
|
+
s -= 40; // empty/one-line stub (protocol no-op, decl-only) — almost never the trace endpoint
|
|
1517
|
+
if (isTestPath(n.filePath))
|
|
1518
|
+
s -= 150; // a Source/ symbol is meant over a Tests/ same-named one
|
|
1519
|
+
return s;
|
|
1520
|
+
};
|
|
1521
|
+
const pairs = [];
|
|
1522
|
+
for (const f of fromCands) {
|
|
1523
|
+
for (const t of toCands) {
|
|
1524
|
+
pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) + nodeRelevance(f) + nodeRelevance(t) });
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
1527
|
+
// Sort by shared prefix desc, then by FTS order (already encoded in the
|
|
1528
|
+
// pairs' insertion order — both for f and t). The tiebreaker preserves
|
|
1529
|
+
// findAllSymbols' generated-file-last ranking.
|
|
1530
|
+
pairs.sort((a, b) => b.score - a.score);
|
|
1531
|
+
// Cap how many graph-path probes we attempt so a 50×50 cross-product
|
|
1532
|
+
// doesn't blow up on a god-named symbol like `Get` (well-named flows have
|
|
1533
|
+
// their good pair near the top of the sort anyway).
|
|
1534
|
+
const FINDPATH_PAIR_BUDGET = 20;
|
|
1535
|
+
const fromTry = fromCands;
|
|
1536
|
+
const toTry = toCands;
|
|
1135
1537
|
let path = null;
|
|
1136
1538
|
let overCap = null;
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1539
|
+
let bestPair = null;
|
|
1540
|
+
let triedPairs = 0;
|
|
1541
|
+
for (const { f, t } of pairs) {
|
|
1542
|
+
if (path)
|
|
1543
|
+
break;
|
|
1544
|
+
if (triedPairs >= FINDPATH_PAIR_BUDGET)
|
|
1545
|
+
break;
|
|
1546
|
+
triedPairs++;
|
|
1547
|
+
const p = cg.findPath(f.id, t.id, edgeKinds);
|
|
1548
|
+
if (p && p.length > 1) {
|
|
1142
1549
|
if (p.length <= MAX_HOPS) {
|
|
1143
1550
|
path = p;
|
|
1551
|
+
bestPair = { f, t };
|
|
1144
1552
|
break;
|
|
1145
1553
|
}
|
|
1146
|
-
if (!overCap || p.length < overCap.length)
|
|
1554
|
+
if (!overCap || p.length < overCap.length) {
|
|
1147
1555
|
overCap = p;
|
|
1556
|
+
bestPair = { f, t };
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
else if (!bestPair) {
|
|
1560
|
+
// No path yet — remember the top-scored pair so the failure branch
|
|
1561
|
+
// surfaces the most-co-located candidates' bodies, not whatever FTS
|
|
1562
|
+
// happened to put first.
|
|
1563
|
+
bestPair = { f, t };
|
|
1148
1564
|
}
|
|
1149
|
-
if (path)
|
|
1150
|
-
break;
|
|
1151
1565
|
}
|
|
1152
1566
|
if (!path) {
|
|
1153
|
-
// No static path — almost always a dynamic-dispatch break.
|
|
1154
|
-
//
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1567
|
+
// No static path — almost always a dynamic-dispatch break. INSTEAD of
|
|
1568
|
+
// telling the agent to chase the gap with codegraph_node/callers/callees
|
|
1569
|
+
// (which fans out into 3-4 follow-up tool calls + a Read), inline the
|
|
1570
|
+
// material those would have returned right here. Measured on cosmos-Q3:
|
|
1571
|
+
// the failed-trace + subsequent fan-out used to cost ~2× a single
|
|
1572
|
+
// sufficient trace call; this branch closes that gap.
|
|
1573
|
+
// Prefer the path-proximity-best pair we identified above (e.g. gov's
|
|
1574
|
+
// EndBlocker × gov's Tally) over the FTS top-pick (simapp's wrapper).
|
|
1575
|
+
const start = bestPair?.f ?? fromTry[0];
|
|
1576
|
+
const end = bestPair?.t ?? toTry[0];
|
|
1577
|
+
const fileCache = new Map();
|
|
1158
1578
|
const lines = [
|
|
1159
|
-
`No direct call path from "${from}" to "${to}".`,
|
|
1579
|
+
`No direct static call path from "${from}" to "${to}" — the chain almost certainly breaks at dynamic dispatch (a callback / interface dispatch / framework hook / metaclass). Both endpoint bodies + their immediate neighbors are inlined below; answer from them — a follow-up codegraph_node/callers/callees on these would just return what is already here.`,
|
|
1160
1580
|
'',
|
|
1161
|
-
(overCap
|
|
1162
|
-
? `(Only a ${overCap.length}-hop indirect chain connects them — almost certainly a BFS wander through unrelated code, not the real flow.) `
|
|
1163
|
-
: '') +
|
|
1164
|
-
'The direct chain most likely breaks at **dynamic dispatch** (a callback, descriptor, ' +
|
|
1165
|
-
'metaclass, or attribute-as-callable) that static parsing cannot resolve into an edge. ' +
|
|
1166
|
-
`Inspect \`${start.name}\` (${start.filePath}:${start.startLine}) with codegraph_node ` +
|
|
1167
|
-
'(includeCode=true) — its body usually shows the dynamic call to follow next.',
|
|
1168
1581
|
];
|
|
1169
|
-
if (
|
|
1170
|
-
lines.push(
|
|
1582
|
+
if (overCap) {
|
|
1583
|
+
lines.push(`> Indirect chain of ${overCap.length} hops exists but is over the ${MAX_HOPS}-hop cap (usually a BFS wander through unrelated code, not the real execution flow).`, '');
|
|
1171
1584
|
}
|
|
1172
|
-
|
|
1585
|
+
// Track which node IDs we've already inlined a body for so we don't
|
|
1586
|
+
// double-emit when a callee of FROM is also surfaced separately.
|
|
1587
|
+
const inlinedBodies = new Set();
|
|
1588
|
+
const inlineBody = (n, lineCap, charCap) => {
|
|
1589
|
+
if (inlinedBodies.has(n.id))
|
|
1590
|
+
return false;
|
|
1591
|
+
inlinedBodies.add(n.id);
|
|
1592
|
+
const body = this.sourceRangeAt(cg, n.filePath, n.startLine, n.endLine, fileCache, lineCap, charCap);
|
|
1593
|
+
if (body) {
|
|
1594
|
+
lines.push(body);
|
|
1595
|
+
return true;
|
|
1596
|
+
}
|
|
1597
|
+
return false;
|
|
1598
|
+
};
|
|
1599
|
+
const inlineEndpoint = (label, node) => {
|
|
1600
|
+
lines.push(`### ${label}: \`${node.name}\` (${node.filePath}:${node.startLine}-${node.endLine})`);
|
|
1601
|
+
inlineBody(node, 120, 3600);
|
|
1602
|
+
const callers = cg.getCallers(node.id).slice(0, 6);
|
|
1603
|
+
if (callers.length > 0) {
|
|
1604
|
+
lines.push(`**Callers of \`${node.name}\`:** ` +
|
|
1605
|
+
callers.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', '));
|
|
1606
|
+
}
|
|
1607
|
+
const callees = cg.getCallees(node.id).slice(0, 8);
|
|
1608
|
+
if (callees.length > 0) {
|
|
1609
|
+
lines.push(`**\`${node.name}\` calls:** ` +
|
|
1610
|
+
callees.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', '));
|
|
1611
|
+
}
|
|
1612
|
+
lines.push('');
|
|
1613
|
+
};
|
|
1614
|
+
inlineEndpoint('FROM', start);
|
|
1615
|
+
if (end.id !== start.id)
|
|
1616
|
+
inlineEndpoint('TO', end);
|
|
1617
|
+
// Inline the OTHER top-level functions/methods in TO's file — that's
|
|
1618
|
+
// where the missing dynamic-dispatch flow usually lives. Concrete
|
|
1619
|
+
// measurement from cosmos-Q1: `msgServer.Send` statically calls only
|
|
1620
|
+
// utility functions (`StringToBytes`, `Wrapf`); its real next-hop
|
|
1621
|
+
// `SendCoins` is invoked via an embedded-interface call (`k.Keeper.SendCoins`)
|
|
1622
|
+
// that static parsing CAN'T see. The flow IS in the same file as the
|
|
1623
|
+
// destination (`x/bank/keeper/send.go`: SendCoins → subUnlockedCoins →
|
|
1624
|
+
// addCoins → setBalance). Pre-inlining those file-mates is what
|
|
1625
|
+
// replaces the agent's "trace fail → search SendCoins → node SendCoins
|
|
1626
|
+
// → trace again" fan-out.
|
|
1627
|
+
const NEIGHBOR_LINES = 40;
|
|
1628
|
+
const NEIGHBOR_CHARS = 1200;
|
|
1629
|
+
const NEIGHBOR_K = 5;
|
|
1630
|
+
const fileSiblings = (anchor) => {
|
|
1631
|
+
// Functions and methods in the same file as the anchor, excluding
|
|
1632
|
+
// the anchor itself and anything we've already inlined. Sort by
|
|
1633
|
+
// distance from the anchor's startLine so the closest symbols come
|
|
1634
|
+
// first (the flow is usually adjacent in the file).
|
|
1635
|
+
const sameFile = cg
|
|
1636
|
+
.getNodesByKind('function')
|
|
1637
|
+
.filter((n) => n.filePath === anchor.filePath)
|
|
1638
|
+
.concat(cg.getNodesByKind('method').filter((n) => n.filePath === anchor.filePath));
|
|
1639
|
+
return sameFile
|
|
1640
|
+
.filter((n) => n.id !== anchor.id && !inlinedBodies.has(n.id))
|
|
1641
|
+
.sort((a, b) => Math.abs(a.startLine - anchor.startLine) - Math.abs(b.startLine - anchor.startLine))
|
|
1642
|
+
.slice(0, NEIGHBOR_K);
|
|
1643
|
+
};
|
|
1644
|
+
const renderSiblings = (label, siblings) => {
|
|
1645
|
+
if (siblings.length === 0)
|
|
1646
|
+
return;
|
|
1647
|
+
lines.push(`### ${label}`);
|
|
1648
|
+
for (const sib of siblings) {
|
|
1649
|
+
lines.push('');
|
|
1650
|
+
lines.push(`- \`${sib.name}\` (${sib.filePath}:${sib.startLine}-${sib.endLine})`);
|
|
1651
|
+
inlineBody(sib, NEIGHBOR_LINES, NEIGHBOR_CHARS);
|
|
1652
|
+
}
|
|
1653
|
+
lines.push('');
|
|
1654
|
+
};
|
|
1655
|
+
renderSiblings(`Other functions in \`${end.filePath}\` (the flow that the dynamic-dispatch hop reaches — bodies inlined)`, fileSiblings(end));
|
|
1656
|
+
lines.push('> Endpoint bodies + the other functions in the destination\'s file are inlined above. Together they typically cover the missing dynamic-dispatch boundary (interface-method calls like `k.Keeper.SendCoins` that static parsing can\'t follow). **No further codegraph_node / codegraph_callers / codegraph_callees / Read / Grep is needed for any symbol already shown here** — call them again only if you need to walk DEEPER than what is inlined.');
|
|
1657
|
+
return this.textResult(this.truncateOutput(lines.join('\n') + fromMatches.note + toMatches.note));
|
|
1173
1658
|
}
|
|
1174
1659
|
const lines = [
|
|
1175
1660
|
`## Trace: ${from} → ${to}`,
|
|
@@ -1288,6 +1773,14 @@ class ToolHandler {
|
|
|
1288
1773
|
registeredAt,
|
|
1289
1774
|
};
|
|
1290
1775
|
}
|
|
1776
|
+
if (m?.synthesizedBy === 'closure-collection') {
|
|
1777
|
+
const field = m.field ? `\`${String(m.field)}\`` : 'a collection';
|
|
1778
|
+
return {
|
|
1779
|
+
label: `closure collection — runs handlers appended to ${field} (dynamic dispatch)`,
|
|
1780
|
+
compact: `dynamic: runs ${field} handlers${at}`,
|
|
1781
|
+
registeredAt,
|
|
1782
|
+
};
|
|
1783
|
+
}
|
|
1291
1784
|
return null;
|
|
1292
1785
|
}
|
|
1293
1786
|
/**
|
|
@@ -1381,6 +1874,7 @@ class ToolHandler {
|
|
|
1381
1874
|
* dropping unrelated `OmsOrderService::list`.
|
|
1382
1875
|
*/
|
|
1383
1876
|
buildFlowFromNamedSymbols(cg, query) {
|
|
1877
|
+
const EMPTY = { text: '', pathNodeIds: new Set(), namedNodeIds: new Set(), uniqueNamedNodeIds: new Set() };
|
|
1384
1878
|
try {
|
|
1385
1879
|
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
1386
1880
|
// Strip only a REAL file extension (Create.cs → Create); KEEP qualified
|
|
@@ -1392,7 +1886,7 @@ class ToolHandler {
|
|
|
1392
1886
|
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
1393
1887
|
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
1394
1888
|
if (tokens.length < 2)
|
|
1395
|
-
return
|
|
1889
|
+
return EMPTY;
|
|
1396
1890
|
// Pool of name SEGMENTS (Class + method from every token) used to
|
|
1397
1891
|
// disambiguate an ambiguous SIMPLE name: keep a candidate only if its
|
|
1398
1892
|
// CONTAINER class is itself named in the query.
|
|
@@ -1402,24 +1896,34 @@ class ToolHandler {
|
|
|
1402
1896
|
if (s)
|
|
1403
1897
|
segPool.add(s);
|
|
1404
1898
|
const named = new Map();
|
|
1899
|
+
// Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in
|
|
1900
|
+
// the whole graph). These are safe to SPARE a file on: the agent named THIS
|
|
1901
|
+
// method (`getResponseWithInterceptorChain`, 1 def). A hyper-polymorphic name
|
|
1902
|
+
// (`as_sql`, 110 defs across every Expression/Compiler subclass) is NOT here,
|
|
1903
|
+
// so naming it doesn't keep every backend variant full and flood the budget.
|
|
1904
|
+
const uniqueNamedNodeIds = new Set();
|
|
1405
1905
|
for (const t of tokens) {
|
|
1406
1906
|
const cands = this.findAllSymbols(cg, t).nodes.filter((n) => CALLABLE.has(n.kind));
|
|
1407
1907
|
// A qualified or otherwise-specific name (<=3 hits) keeps all; an
|
|
1408
1908
|
// ambiguous simple name keeps only candidates whose container is named.
|
|
1409
|
-
const
|
|
1909
|
+
const specific = cands.length <= 3;
|
|
1910
|
+
const pick = specific
|
|
1410
1911
|
? cands
|
|
1411
1912
|
: cands.filter((n) => {
|
|
1412
1913
|
const segs = (n.qualifiedName || '').toLowerCase().split(/::|\./).filter(Boolean);
|
|
1413
1914
|
const container = segs.length >= 2 ? segs[segs.length - 2] : '';
|
|
1414
1915
|
return !!container && segPool.has(container);
|
|
1415
1916
|
});
|
|
1416
|
-
for (const n of pick.slice(0, 6))
|
|
1917
|
+
for (const n of pick.slice(0, 6)) {
|
|
1417
1918
|
named.set(n.id, n);
|
|
1919
|
+
if (specific)
|
|
1920
|
+
uniqueNamedNodeIds.add(n.id);
|
|
1921
|
+
}
|
|
1418
1922
|
if (named.size > 40)
|
|
1419
1923
|
break;
|
|
1420
1924
|
}
|
|
1421
1925
|
if (named.size < 2)
|
|
1422
|
-
return
|
|
1926
|
+
return EMPTY;
|
|
1423
1927
|
const MAX_HOPS = 7;
|
|
1424
1928
|
let best = null;
|
|
1425
1929
|
// BFS the full call graph (incl. synth edges) from each named seed, but
|
|
@@ -1465,22 +1969,66 @@ class ToolHandler {
|
|
|
1465
1969
|
if (!best || chain.length > best.length)
|
|
1466
1970
|
best = chain;
|
|
1467
1971
|
}
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1972
|
+
const hasMain = !!best && best.length >= 3;
|
|
1973
|
+
const pathIds = new Set((best ?? []).map((s) => s.node.id));
|
|
1974
|
+
// Supplementary: dynamic-dispatch (synthesized) edges incident to a NAMED
|
|
1975
|
+
// symbol — the indirect hops an agent would otherwise grep/Read to
|
|
1976
|
+
// reconstruct ("where do the appended `validators` actually run?"). The
|
|
1977
|
+
// synth edge IS that answer, so surface it even when the OTHER end wasn't
|
|
1978
|
+
// named (e.g. the agent names `validate` but not the `didCompleteTask`
|
|
1979
|
+
// that drains the collection). On-topic by construction: only heuristic
|
|
1980
|
+
// edges touching a symbol the agent named; skipped when the hop already
|
|
1981
|
+
// shows in the main chain.
|
|
1982
|
+
const synthLines = [];
|
|
1983
|
+
const synthSeen = new Set();
|
|
1984
|
+
for (const n of named.values()) {
|
|
1985
|
+
if (synthLines.length >= 6)
|
|
1986
|
+
break;
|
|
1987
|
+
for (const { node: other, edge } of [...cg.getCallers(n.id), ...cg.getCallees(n.id)]) {
|
|
1988
|
+
if (synthLines.length >= 6)
|
|
1989
|
+
break;
|
|
1990
|
+
if (edge.provenance !== 'heuristic' || other.id === n.id)
|
|
1991
|
+
continue;
|
|
1992
|
+
if (pathIds.has(edge.source) && pathIds.has(edge.target))
|
|
1993
|
+
continue; // already in the main chain
|
|
1994
|
+
const src = edge.source === n.id ? n : other;
|
|
1995
|
+
const tgt = edge.source === n.id ? other : n;
|
|
1996
|
+
const key = `${src.name}>${tgt.name}`;
|
|
1997
|
+
if (synthSeen.has(key))
|
|
1998
|
+
continue;
|
|
1999
|
+
synthSeen.add(key);
|
|
2000
|
+
const note = this.synthEdgeNote(edge);
|
|
2001
|
+
synthLines.push(`- ${src.name} → ${tgt.name} [${note ? note.compact : edge.kind}]`);
|
|
1476
2002
|
}
|
|
1477
|
-
out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
|
|
1478
2003
|
}
|
|
1479
|
-
|
|
1480
|
-
|
|
2004
|
+
if (!hasMain && synthLines.length === 0)
|
|
2005
|
+
return EMPTY;
|
|
2006
|
+
const out = [];
|
|
2007
|
+
if (hasMain) {
|
|
2008
|
+
out.push('## Flow (call path among the symbols you queried)', '');
|
|
2009
|
+
for (let i = 0; i < best.length; i++) {
|
|
2010
|
+
const step = best[i];
|
|
2011
|
+
if (step.edge) {
|
|
2012
|
+
const sy = this.synthEdgeNote(step.edge);
|
|
2013
|
+
out.push(` ↓ ${sy ? sy.compact : step.edge.kind}`);
|
|
2014
|
+
}
|
|
2015
|
+
out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
|
|
2016
|
+
}
|
|
2017
|
+
out.push('');
|
|
2018
|
+
}
|
|
2019
|
+
if (synthLines.length) {
|
|
2020
|
+
out.push('## Dynamic-dispatch links among your symbols', '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)', '', ...synthLines, '');
|
|
2021
|
+
}
|
|
2022
|
+
out.push('> Full source for these symbols is below; codegraph_trace(from,to) for the exact path between two endpoints.', '');
|
|
2023
|
+
// namedNodeIds = every callable the agent explicitly named (a superset of
|
|
2024
|
+
// the spine). A file holding one is something the agent asked to SEE, so it
|
|
2025
|
+
// must keep full source even if it's an off-spine polymorphic sibling — the
|
|
2026
|
+
// agent named `getResponseWithInterceptorChain` / `SQLCompiler.execute_sql`
|
|
2027
|
+
// as the mechanism, not as an interchangeable leaf. See the skeleton gate.
|
|
2028
|
+
return { text: out.join('\n'), pathNodeIds: pathIds, namedNodeIds: new Set(named.keys()), uniqueNamedNodeIds };
|
|
1481
2029
|
}
|
|
1482
2030
|
catch {
|
|
1483
|
-
return
|
|
2031
|
+
return EMPTY;
|
|
1484
2032
|
}
|
|
1485
2033
|
}
|
|
1486
2034
|
/**
|
|
@@ -1561,9 +2109,42 @@ class ToolHandler {
|
|
|
1561
2109
|
glueNodeIds.add(nb.id);
|
|
1562
2110
|
}
|
|
1563
2111
|
}
|
|
2112
|
+
// Named-symbol seeding: findRelevantContext is an FTS/text rank, so a query
|
|
2113
|
+
// that's a BAG of symbol names skewed toward one phase (Alamofire: 5 build
|
|
2114
|
+
// terms, each a high-frequency name, vs 3 validate terms) lets the
|
|
2115
|
+
// lower-frequency names fall below the search cut — their definitions, and
|
|
2116
|
+
// whole files (Validation.swift), never get gathered, so they can never
|
|
2117
|
+
// render and the agent Reads them. Resolve EACH named token to its
|
|
2118
|
+
// substantive definition (skip empty stubs + test files, same relevance the
|
|
2119
|
+
// trace endpoint picker uses) and inject it as an entry, so every symbol the
|
|
2120
|
+
// agent explicitly named is in the subgraph and its file is scored.
|
|
2121
|
+
const namedSeedIds = new Set();
|
|
2122
|
+
{
|
|
2123
|
+
const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
|
|
2124
|
+
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
2125
|
+
const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
|
|
2126
|
+
const bodyLines = (n) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
|
|
2127
|
+
const tokens = [...new Set(query.split(/[\s,()[\]]+/)
|
|
2128
|
+
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
2129
|
+
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
2130
|
+
for (const t of tokens) {
|
|
2131
|
+
const cands = this.findAllSymbols(cg, t).nodes
|
|
2132
|
+
.filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath))
|
|
2133
|
+
.sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a));
|
|
2134
|
+
// A specific name (<=3 defs) injects all its defs; an overloaded name
|
|
2135
|
+
// (`request` = 44, mostly stubs) injects only the single most substantive
|
|
2136
|
+
// one, so the build-overload flood doesn't crowd the subgraph.
|
|
2137
|
+
for (const n of cands.slice(0, cands.length <= 3 ? cands.length : 1)) {
|
|
2138
|
+
if (!subgraph.nodes.has(n.id)) {
|
|
2139
|
+
subgraph.nodes.set(n.id, n);
|
|
2140
|
+
namedSeedIds.add(n.id);
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
1564
2145
|
// Step 2: Group nodes by file, score by relevance
|
|
1565
2146
|
const fileGroups = new Map();
|
|
1566
|
-
const entryNodeIds = new Set(subgraph.roots);
|
|
2147
|
+
const entryNodeIds = new Set([...subgraph.roots, ...namedSeedIds]);
|
|
1567
2148
|
// Build a set of nodes directly connected to entry points (depth 1)
|
|
1568
2149
|
const connectedToEntry = new Set();
|
|
1569
2150
|
for (const edge of subgraph.edges) {
|
|
@@ -1578,8 +2159,16 @@ class ToolHandler {
|
|
|
1578
2159
|
continue;
|
|
1579
2160
|
const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
|
|
1580
2161
|
group.nodes.push(node);
|
|
1581
|
-
// Score:
|
|
1582
|
-
|
|
2162
|
+
// Score: a NAMED-SEED node (a symbol the agent named that FTS missed, now
|
|
2163
|
+
// injected) is worth far more than a mere reference — its file is where the
|
|
2164
|
+
// answer lives. Without this, an incidental file that name-drops the flow
|
|
2165
|
+
// (Combine.swift references request/task → score 23 from connected nodes)
|
|
2166
|
+
// outranks the file that DEFINES a named symbol (Validation.swift's
|
|
2167
|
+
// `validate` → 10) and steals its render slot. Definition ≫ reference.
|
|
2168
|
+
if (namedSeedIds.has(node.id)) {
|
|
2169
|
+
group.score += 50;
|
|
2170
|
+
}
|
|
2171
|
+
else if (entryNodeIds.has(node.id)) {
|
|
1583
2172
|
group.score += 10;
|
|
1584
2173
|
}
|
|
1585
2174
|
else if (connectedToEntry.has(node.id)) {
|
|
@@ -1591,9 +2180,44 @@ class ToolHandler {
|
|
|
1591
2180
|
fileGroups.set(node.filePath, group);
|
|
1592
2181
|
}
|
|
1593
2182
|
// Only include files that have entry points or nodes directly connected to entry points
|
|
1594
|
-
|
|
2183
|
+
let relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3);
|
|
1595
2184
|
// Extract query terms for relevance checking
|
|
1596
2185
|
const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
|
|
2186
|
+
// Test/spec/icon/i18n file detector — used both for the pre-sort hard
|
|
2187
|
+
// filter (tiny tier) and the comparator deprioritization (all tiers).
|
|
2188
|
+
const isLowValue = (p) => {
|
|
2189
|
+
const lp = p.toLowerCase();
|
|
2190
|
+
return (/\/(tests?|__tests?__|spec)\//.test(lp) ||
|
|
2191
|
+
/_test\.go$/.test(lp) ||
|
|
2192
|
+
/(?:^|\/)test_[^/]+\.py$/.test(lp) ||
|
|
2193
|
+
/_test\.py$/.test(lp) ||
|
|
2194
|
+
/_spec\.rb$/.test(lp) ||
|
|
2195
|
+
/_test\.rb$/.test(lp) ||
|
|
2196
|
+
/\.(test|spec)\.[jt]sx?$/.test(lp) ||
|
|
2197
|
+
/(test|spec|tests)\.(java|kt|scala)$/.test(lp) ||
|
|
2198
|
+
/(tests?|spec)\.cs$/.test(lp) ||
|
|
2199
|
+
/tests?\.swift$/.test(lp) ||
|
|
2200
|
+
/_test\.dart$/.test(lp) ||
|
|
2201
|
+
/\bicons?\b/.test(lp) ||
|
|
2202
|
+
/\bi18n\b/.test(lp));
|
|
2203
|
+
};
|
|
2204
|
+
// Hard-exclude test/spec files (ALL tiers, not just tiny). One slipped test
|
|
2205
|
+
// file dominates the per-file budget on small repos (cobra's `command_test.go`
|
|
2206
|
+
// displaced `args.go`) AND wastes budget on large ones (Django's
|
|
2207
|
+
// `custom_lookups/tests.py` ate ~2.3 KB of the 28 KB cap, crowding out the
|
|
2208
|
+
// SQLCompiler mechanism the agent then Read). A test file almost never answers
|
|
2209
|
+
// an architecture question. Skip when the query itself is about tests — the
|
|
2210
|
+
// legitimate "explore the tests" case — and only cut if ≥2 non-test candidates
|
|
2211
|
+
// remain (else tests are the only signal for this area).
|
|
2212
|
+
{
|
|
2213
|
+
const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query);
|
|
2214
|
+
if (!queryMentionsTests) {
|
|
2215
|
+
const nonLow = relevantFiles.filter(([p]) => !isLowValue(p));
|
|
2216
|
+
if (nonLow.length >= 2) {
|
|
2217
|
+
relevantFiles = nonLow;
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
1597
2221
|
// Sort files: highest relevance first, deprioritize low-value files
|
|
1598
2222
|
const sortedFiles = relevantFiles.sort((a, b) => {
|
|
1599
2223
|
const aPath = a[0].toLowerCase();
|
|
@@ -1609,14 +2233,20 @@ class ToolHandler {
|
|
|
1609
2233
|
const bRelevant = hasQueryRelevance(bPath, b[1].nodes);
|
|
1610
2234
|
if (aRelevant !== bRelevant)
|
|
1611
2235
|
return aRelevant ? -1 : 1;
|
|
1612
|
-
// Deprioritize test files, icon files, and i18n files
|
|
1613
|
-
const isLowValue = (p) => /\/(tests?|__tests?__|spec)\//i.test(p) ||
|
|
1614
|
-
/\bicons?\b/i.test(p) ||
|
|
1615
|
-
/\bi18n\b/i.test(p);
|
|
1616
2236
|
const aLow = isLowValue(aPath);
|
|
1617
2237
|
const bLow = isLowValue(bPath);
|
|
1618
2238
|
if (aLow !== bLow)
|
|
1619
2239
|
return aLow ? 1 : -1;
|
|
2240
|
+
// Deprioritize generated source (.pb.go / .pulsar.go / _mocks.go / …) —
|
|
2241
|
+
// the agent rarely needs to see the protobuf scaffold or gomock output
|
|
2242
|
+
// when asking about the actual flow, and dumping their bodies inflates
|
|
2243
|
+
// the response (the cosmos Q3 explore otherwise leads with
|
|
2244
|
+
// `expected_keepers_mocks.go`, displacing the real `tally.go` content
|
|
2245
|
+
// and forcing the agent to Read tally.go anyway).
|
|
2246
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a[0]);
|
|
2247
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b[0]);
|
|
2248
|
+
if (aGen !== bGen)
|
|
2249
|
+
return aGen ? 1 : -1;
|
|
1620
2250
|
if (a[1].score !== b[1].score)
|
|
1621
2251
|
return b[1].score - a[1].score;
|
|
1622
2252
|
return b[1].nodes.length - a[1].nodes.length;
|
|
@@ -1659,6 +2289,64 @@ class ToolHandler {
|
|
|
1659
2289
|
}
|
|
1660
2290
|
}
|
|
1661
2291
|
// Step 4: Read contiguous file sections
|
|
2292
|
+
// Compute the flow spine once — used both to prepend the Flow section (below)
|
|
2293
|
+
// and to gate adaptive source sizing: files on the spine get full source,
|
|
2294
|
+
// off-spine peers skeletonize.
|
|
2295
|
+
const flow = this.buildFlowFromNamedSymbols(cg, query);
|
|
2296
|
+
// Polymorphic-sibling detector for adaptive sizing. A class that implements/
|
|
2297
|
+
// extends a supertype shared by >= MIN_SIBLINGS classes is one of many
|
|
2298
|
+
// INTERCHANGEABLE implementations (OkHttp's 14 `: Interceptor` classes —
|
|
2299
|
+
// showing one + the rest as signatures is enough), as opposed to a DISTINCT
|
|
2300
|
+
// pipeline step (Excalidraw's `renderStaticScene`, which shares no supertype and
|
|
2301
|
+
// must stay full or the agent loses real content). Only off-spine sibling files
|
|
2302
|
+
// skeletonize; distinct steps and on-spine files keep full source. Cache
|
|
2303
|
+
// supertype→(has ≥N implementers) so this stays a handful of edge queries.
|
|
2304
|
+
const MIN_SIBLINGS = 3;
|
|
2305
|
+
const siblingSuper = new Map();
|
|
2306
|
+
const isPolymorphicSibling = (nodes) => {
|
|
2307
|
+
for (const n of nodes) {
|
|
2308
|
+
for (const e of cg.getOutgoingEdges(n.id)) {
|
|
2309
|
+
if (e.kind !== 'implements' && e.kind !== 'extends')
|
|
2310
|
+
continue;
|
|
2311
|
+
let many = siblingSuper.get(e.target);
|
|
2312
|
+
if (many === undefined) {
|
|
2313
|
+
many = cg.getIncomingEdges(e.target)
|
|
2314
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
2315
|
+
siblingSuper.set(e.target, many);
|
|
2316
|
+
}
|
|
2317
|
+
if (many)
|
|
2318
|
+
return true;
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
return false;
|
|
2322
|
+
};
|
|
2323
|
+
// A file that DEFINES a polymorphic supertype (a class/interface with ≥
|
|
2324
|
+
// MIN_SIBLINGS implementers) AND co-locates its subclasses is a redundant
|
|
2325
|
+
// "family" file — Django's compiler.py holds `SQLCompiler` + its 4 subclasses
|
|
2326
|
+
// (SQLInsert/Update/Delete/AggregateCompiler) in 2,266 lines. Such files are
|
|
2327
|
+
// huge and read-anyway, so they should STILL skeletonize even when the agent
|
|
2328
|
+
// named a method in them: a full one eats ~6.5K of the explore budget (Django
|
|
2329
|
+
// is pinned at the 28K cap, truncating), starving the sibling files the agent
|
|
2330
|
+
// then Reads. This flag OVERRIDES the named-callable spare below — it does NOT
|
|
2331
|
+
// by itself spare a file. (OkHttp's RealCall implements the `Lockable` mixin
|
|
2332
|
+
// but defines no ≥3-impl supertype, so the named spare keeps it full.)
|
|
2333
|
+
const superMany = new Map();
|
|
2334
|
+
const definesPolymorphicSupertype = (nodes) => {
|
|
2335
|
+
for (const n of nodes) {
|
|
2336
|
+
if (n.kind !== 'class' && n.kind !== 'interface' && n.kind !== 'struct'
|
|
2337
|
+
&& n.kind !== 'trait' && n.kind !== 'protocol' && n.kind !== 'type_alias')
|
|
2338
|
+
continue;
|
|
2339
|
+
let many = superMany.get(n.id);
|
|
2340
|
+
if (many === undefined) {
|
|
2341
|
+
many = cg.getIncomingEdges(n.id)
|
|
2342
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
2343
|
+
superMany.set(n.id, many);
|
|
2344
|
+
}
|
|
2345
|
+
if (many)
|
|
2346
|
+
return true;
|
|
2347
|
+
}
|
|
2348
|
+
return false;
|
|
2349
|
+
};
|
|
1662
2350
|
lines.push('### Source Code');
|
|
1663
2351
|
lines.push('');
|
|
1664
2352
|
lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
|
|
@@ -1669,8 +2357,15 @@ class ToolHandler {
|
|
|
1669
2357
|
for (const [filePath, group] of sortedFiles) {
|
|
1670
2358
|
if (filesIncluded >= maxFiles)
|
|
1671
2359
|
break;
|
|
1672
|
-
|
|
1673
|
-
|
|
2360
|
+
// A file DEFINES a named/spine symbol (the answer) vs merely references the
|
|
2361
|
+
// flow. Past 90% budget, stop pulling INCIDENTAL files — but keep scanning
|
|
2362
|
+
// for necessary ones, which render even past the cap (bounded by maxFiles).
|
|
2363
|
+
// Without this `continue` (was an unconditional `break`), the loop stopped
|
|
2364
|
+
// after the build + validators-exec files and never reached the ranked-in
|
|
2365
|
+
// validate-logic file (Alamofire's Validation.swift).
|
|
2366
|
+
const fileNecessary = group.nodes.some(n => entryNodeIds.has(n.id) || flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id));
|
|
2367
|
+
if (!fileNecessary && totalChars > budget.maxOutputChars * 0.9)
|
|
2368
|
+
continue;
|
|
1674
2369
|
const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
|
|
1675
2370
|
if (!absPath || !(0, fs_1.existsSync)(absPath))
|
|
1676
2371
|
continue;
|
|
@@ -1683,6 +2378,131 @@ class ToolHandler {
|
|
|
1683
2378
|
}
|
|
1684
2379
|
const fileLines = fileContent.split('\n');
|
|
1685
2380
|
const lang = group.nodes[0]?.language || '';
|
|
2381
|
+
// Adaptive sizing (CODEGRAPH_ADAPTIVE_EXPLORE, default on): collapse a file
|
|
2382
|
+
// to a per-symbol view when it's a redundant member of a polymorphic family.
|
|
2383
|
+
// Engages iff ALL hold:
|
|
2384
|
+
// 1. a flow spine exists,
|
|
2385
|
+
// 2. no symbol in the file is on that spine (it's not the mechanism path),
|
|
2386
|
+
// 3. it IS a polymorphic sibling (≥ MIN_SIBLINGS impls of a shared supertype),
|
|
2387
|
+
// 4. it is NOT SPARED, where a file is spared iff the agent named a
|
|
2388
|
+
// (near-)UNIQUE callable in it (`getResponseWithInterceptorChain`, 1 def →
|
|
2389
|
+
// keep RealCall.kt full) UNLESS the file DEFINES the family supertype (a
|
|
2390
|
+
// base+subclasses "family" file like Django's compiler.py — collapse it).
|
|
2391
|
+
// Uniqueness matters: `as_sql` has 110 defs across every Compiler/Expression
|
|
2392
|
+
// subclass; naming it must NOT keep every backend variant + test file full
|
|
2393
|
+
// and flood the budget. That's why the spare reads uniqueNamedNodeIds.
|
|
2394
|
+
// Within a collapsed file the render is PER-SYMBOL (condition B): a method the
|
|
2395
|
+
// agent NAMED or that's on the spine is shown with its FULL body (so the agent
|
|
2396
|
+
// doesn't Read the file back for it — Django's SQLCompiler.execute_sql/as_sql);
|
|
2397
|
+
// every other symbol is just its signature. So the base mechanism survives while
|
|
2398
|
+
// the file's other ~80 symbols + the redundant subclasses collapse to one line each.
|
|
2399
|
+
const spareNamed = group.nodes.some(n => flow.uniqueNamedNodeIds.has(n.id));
|
|
2400
|
+
const fileDefinesSuper = definesPolymorphicSupertype(group.nodes);
|
|
2401
|
+
const spared = spareNamed && !fileDefinesSuper;
|
|
2402
|
+
const CALLABLE_BODY = new Set(['method', 'function', 'constructor', 'component']);
|
|
2403
|
+
const hasSpineNode = group.nodes.some(n => flow.pathNodeIds.has(n.id));
|
|
2404
|
+
// On-spine god-file: the flow path runs THROUGH this file, but it also holds
|
|
2405
|
+
// many OTHER named methods, and rendering all of them in full blows the
|
|
2406
|
+
// per-file budget and starves the other flow files (Alamofire: the agent
|
|
2407
|
+
// names ~7 Session.swift methods — the build spine PLUS off-path
|
|
2408
|
+
// task/didCompleteTask — far past the whole response budget). Engage the
|
|
2409
|
+
// per-symbol view to keep the SPINE full and collapse the off-path named
|
|
2410
|
+
// methods to signatures. Only when there IS off-path content to shed —
|
|
2411
|
+
// otherwise the spine is irreducible (a sequential flow has no redundancy),
|
|
2412
|
+
// so leave it to the normal full render.
|
|
2413
|
+
const namedBodyChars = group.nodes
|
|
2414
|
+
.filter(n => CALLABLE_BODY.has(n.kind) && (flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id)))
|
|
2415
|
+
.reduce((s, n) => s + fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length, 0);
|
|
2416
|
+
const onSpineGodFile = hasSpineNode
|
|
2417
|
+
&& namedBodyChars > budget.maxCharsPerFile
|
|
2418
|
+
&& group.nodes.some(n => CALLABLE_BODY.has(n.kind) && flow.uniqueNamedNodeIds.has(n.id) && !flow.pathNodeIds.has(n.id));
|
|
2419
|
+
if (adaptiveExploreEnabled() && flow.pathNodeIds.size > 0
|
|
2420
|
+
&& (onSpineGodFile || (!hasSpineNode && isPolymorphicSibling(group.nodes) && !spared))) {
|
|
2421
|
+
const syms = group.nodes
|
|
2422
|
+
.filter(n => n.kind !== 'import' && n.kind !== 'export' && n.startLine > 0)
|
|
2423
|
+
.sort((a, b) => a.startLine - b.startLine);
|
|
2424
|
+
// Pass 1: choose which symbols get a FULL body, by priority, greedily within
|
|
2425
|
+
// a per-file body cap — so one huge family file can't body every named method
|
|
2426
|
+
// and crowd out the other flow files (Django's query.py). A symbol earns a
|
|
2427
|
+
// body if it's on-spine, or UNIQUELY named (`SQLCompiler.execute_sql`), or a
|
|
2428
|
+
// co-named method WHEN this file DEFINES the family supertype (so the base
|
|
2429
|
+
// `SQLCompiler.as_sql` body shows, but the 110 leaf `as_sql` overrides — and
|
|
2430
|
+
// OkHttp's 5 `intercept`s if the agent names `intercept` — stay signatures).
|
|
2431
|
+
const prio = (n) => !CALLABLE_BODY.has(n.kind) ? 99
|
|
2432
|
+
: flow.pathNodeIds.has(n.id) ? 0
|
|
2433
|
+
: flow.uniqueNamedNodeIds.has(n.id) ? 1
|
|
2434
|
+
: (fileDefinesSuper && flow.namedNodeIds.has(n.id)) ? 2 : 99;
|
|
2435
|
+
const bodyCap = budget.maxCharsPerFile * 2;
|
|
2436
|
+
const bodyIds = new Set();
|
|
2437
|
+
let bodyChars = 0;
|
|
2438
|
+
for (const n of syms.filter(n => prio(n) < 99 && n.endLine >= n.startLine).sort((a, b) => prio(a) - prio(b))) {
|
|
2439
|
+
const sz = fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length;
|
|
2440
|
+
// Spine methods (prio 0) ALWAYS get a full body — the cap governs the
|
|
2441
|
+
// off-path extras (unique-named, family base), never the flow path itself.
|
|
2442
|
+
if (prio(n) > 0 && bodyChars + sz > bodyCap && bodyIds.size > 0)
|
|
2443
|
+
continue;
|
|
2444
|
+
bodyIds.add(n.id);
|
|
2445
|
+
bodyChars += sz;
|
|
2446
|
+
}
|
|
2447
|
+
// Pass 2: render in line order — full body for chosen symbols, else the
|
|
2448
|
+
// signature line (capped, with a "+N more" tail so the structure map of a
|
|
2449
|
+
// god-file doesn't itself bloat the budget).
|
|
2450
|
+
const skel = [];
|
|
2451
|
+
let coveredUntil = 0; // skip symbols already inside an emitted body
|
|
2452
|
+
let sigCount = 0, sigDropped = 0;
|
|
2453
|
+
const SIG_MAX = Math.max(12, budget.maxSymbolsInFileHeader * 2);
|
|
2454
|
+
for (const n of syms) {
|
|
2455
|
+
if (n.startLine <= coveredUntil)
|
|
2456
|
+
continue;
|
|
2457
|
+
if (bodyIds.has(n.id)) {
|
|
2458
|
+
const end = Math.min(n.endLine, n.startLine + 220);
|
|
2459
|
+
const body = fileLines.slice(n.startLine - 1, end).join('\n');
|
|
2460
|
+
skel.push(exploreLineNumbersEnabled() ? numberSourceLines(body, n.startLine) : body);
|
|
2461
|
+
coveredUntil = end;
|
|
2462
|
+
}
|
|
2463
|
+
else {
|
|
2464
|
+
// Elide the body, emit the signature. node.startLine can point at a
|
|
2465
|
+
// decorator/annotation, so scan forward for the line that names the symbol.
|
|
2466
|
+
let lineNo = n.startLine;
|
|
2467
|
+
for (let k = 0; k < 4; k++) {
|
|
2468
|
+
if ((fileLines[n.startLine - 1 + k] || '').includes(n.name)) {
|
|
2469
|
+
lineNo = n.startLine + k;
|
|
2470
|
+
break;
|
|
2471
|
+
}
|
|
2472
|
+
}
|
|
2473
|
+
if (lineNo <= coveredUntil)
|
|
2474
|
+
continue;
|
|
2475
|
+
if (sigCount >= SIG_MAX) {
|
|
2476
|
+
sigDropped++;
|
|
2477
|
+
continue;
|
|
2478
|
+
}
|
|
2479
|
+
const sig = (fileLines[lineNo - 1] || '').trim();
|
|
2480
|
+
if (sig) {
|
|
2481
|
+
skel.push(exploreLineNumbersEnabled() ? `${lineNo}\t${sig}` : sig);
|
|
2482
|
+
sigCount++;
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2485
|
+
}
|
|
2486
|
+
if (sigDropped > 0)
|
|
2487
|
+
skel.push(`… +${sigDropped} more (signatures elided)`);
|
|
2488
|
+
if (skel.length > 0) {
|
|
2489
|
+
const names = [...new Set(group.nodes.filter(n => n.kind !== 'import' && n.kind !== 'export').map(n => n.name))]
|
|
2490
|
+
.slice(0, budget.maxSymbolsInFileHeader).join(', ');
|
|
2491
|
+
// Steer the agent to codegraph_explore for an elided body — NEVER to
|
|
2492
|
+
// Read. The old "Read for more" / "Read for a full body" tags invited
|
|
2493
|
+
// a Read of the very file just skeletonized; on a central, wanted file
|
|
2494
|
+
// (Session.swift, DataRequest.swift) that fired an over-investigation
|
|
2495
|
+
// spiral (the agent Read the skeletonized file, then kept digging).
|
|
2496
|
+
// CLAUDE.md: explore output must never tell the agent to Read.
|
|
2497
|
+
const tag = bodyIds.size > 0
|
|
2498
|
+
? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
|
|
2499
|
+
: 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
|
|
2500
|
+
lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
|
|
2501
|
+
totalChars += skel.join('\n').length + 120;
|
|
2502
|
+
filesIncluded++;
|
|
2503
|
+
continue;
|
|
2504
|
+
}
|
|
2505
|
+
}
|
|
1686
2506
|
// Whole-small-file rule: if a relevant file is small enough to afford,
|
|
1687
2507
|
// return it ENTIRELY instead of clustering. Clustering exists to tame
|
|
1688
2508
|
// god-files (App.tsx ~13k lines); on a ~134-line component a cluster is a
|
|
@@ -1732,14 +2552,33 @@ class ToolHandler {
|
|
|
1732
2552
|
// Alamofire is the canonical case: the `Session` class spans ~1,400
|
|
1733
2553
|
// lines). We want the granular symbols inside, not the envelope.
|
|
1734
2554
|
const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
|
|
1735
|
-
|
|
1736
|
-
|
|
2555
|
+
// Cluster from this file's gathered nodes PLUS any callable the agent NAMED that
|
|
2556
|
+
// lives here. Explore's relevance gather can miss a named method def in a huge
|
|
2557
|
+
// non-sibling file — Django's query.py is 3,040 lines and `_fetch_all` (L2237)
|
|
2558
|
+
// was gathered only as call-reference edges, never as a def, so it formed no
|
|
2559
|
+
// cluster and the agent Read it back. Inject named defs directly and rank them
|
|
2560
|
+
// ABOVE connected/glue nodes (importance 9) so their cluster wins the per-file
|
|
2561
|
+
// budget — the agent explicitly asked for these symbols.
|
|
2562
|
+
const rangeNodes = new Map();
|
|
2563
|
+
for (const n of group.nodes)
|
|
2564
|
+
if (n.startLine > 0 && n.endLine > 0)
|
|
2565
|
+
rangeNodes.set(n.id, n);
|
|
2566
|
+
for (const id of flow.namedNodeIds) {
|
|
2567
|
+
if (rangeNodes.has(id))
|
|
2568
|
+
continue;
|
|
2569
|
+
const n = cg.getNode(id);
|
|
2570
|
+
if (n && n.filePath === filePath && n.startLine > 0 && n.endLine > 0)
|
|
2571
|
+
rangeNodes.set(id, n);
|
|
2572
|
+
}
|
|
2573
|
+
const ranges = [...rangeNodes.values()]
|
|
1737
2574
|
// Drop whole-file envelope nodes (containers covering >50% of the file).
|
|
1738
2575
|
.filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
|
|
1739
2576
|
.map(n => {
|
|
1740
2577
|
let importance = 1;
|
|
1741
2578
|
if (entryNodeIds.has(n.id))
|
|
1742
2579
|
importance = 10;
|
|
2580
|
+
else if (flow.namedNodeIds.has(n.id))
|
|
2581
|
+
importance = 9; // agent named it → keep its cluster
|
|
1743
2582
|
else if (glueNodeIds.has(n.id))
|
|
1744
2583
|
importance = 6; // bridging caller/callee of an entry
|
|
1745
2584
|
else if (connectedToEntry.has(n.id))
|
|
@@ -1840,6 +2679,13 @@ class ToolHandler {
|
|
|
1840
2679
|
return b.c.score - a.c.score;
|
|
1841
2680
|
return a.span - b.span;
|
|
1842
2681
|
});
|
|
2682
|
+
// Per-file budget is the SMALLER of the per-file cap and what's left of the
|
|
2683
|
+
// total output cap — so selection (which ranks by importance) keeps the
|
|
2684
|
+
// high-importance clusters and drops peripheral ones, instead of the
|
|
2685
|
+
// downstream source-order trim slicing off whatever comes last in the file.
|
|
2686
|
+
// That source-order slice is what cut Django's `_fetch_all` (L2237, importance
|
|
2687
|
+
// 9 — agent-named) when query.py was the last of four big files to be emitted.
|
|
2688
|
+
const fileBudget = Math.min(budget.maxCharsPerFile, Math.max(0, budget.maxOutputChars - totalChars - 200));
|
|
1843
2689
|
const chosenIndices = new Set();
|
|
1844
2690
|
let projectedChars = 0;
|
|
1845
2691
|
for (const rc of rankedClusters) {
|
|
@@ -1852,7 +2698,7 @@ class ToolHandler {
|
|
|
1852
2698
|
projectedChars += sectionLen;
|
|
1853
2699
|
continue;
|
|
1854
2700
|
}
|
|
1855
|
-
if (projectedChars + sectionLen >
|
|
2701
|
+
if (projectedChars + sectionLen > fileBudget)
|
|
1856
2702
|
continue;
|
|
1857
2703
|
chosenIndices.add(rc.idx);
|
|
1858
2704
|
projectedChars += sectionLen;
|
|
@@ -1898,22 +2744,22 @@ class ToolHandler {
|
|
|
1898
2744
|
? `${headerSymbols.join(', ')}, +${omittedCount} more`
|
|
1899
2745
|
: headerSymbols.join(', ');
|
|
1900
2746
|
const fileHeader = `#### ${filePath} — ${headerSuffix}`;
|
|
1901
|
-
//
|
|
1902
|
-
|
|
2747
|
+
// The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
|
|
2748
|
+
// the agent named (or that's on the flow spine) renders even when the
|
|
2749
|
+
// nominal total is used up — it's the answer, and the set is bounded by
|
|
2750
|
+
// maxFiles AND by true-spine/named-seeding having already trimmed each file
|
|
2751
|
+
// to its necessary content. A file that merely REFERENCES the flow
|
|
2752
|
+
// (Combine.swift name-drops request/task) is incidental → still capped, so
|
|
2753
|
+
// freed budget never leaks into noise. This is the last god-file layer:
|
|
2754
|
+
// build (Session, true-spined) + validators-exec (Request) + validate
|
|
2755
|
+
// (DataRequest/Validation) all render, instead of the cap dropping whichever
|
|
2756
|
+
// phase the file order happened to put last.
|
|
2757
|
+
if (!fileNecessary && totalChars + fileSection.length + 200 > budget.maxOutputChars) {
|
|
1903
2758
|
const remaining = budget.maxOutputChars - totalChars - 200;
|
|
1904
2759
|
if (remaining < 500)
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
lines.push(fileHeader);
|
|
1908
|
-
lines.push('');
|
|
1909
|
-
lines.push('```' + lang);
|
|
1910
|
-
lines.push(trimmed);
|
|
1911
|
-
lines.push('```');
|
|
1912
|
-
lines.push('');
|
|
1913
|
-
totalChars += trimmed.length + 200;
|
|
1914
|
-
filesIncluded++;
|
|
2760
|
+
continue; // incidental file, no room — skip it, keep scanning for necessary ones
|
|
2761
|
+
fileSection = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
|
|
1915
2762
|
anyFileTrimmed = true;
|
|
1916
|
-
break;
|
|
1917
2763
|
}
|
|
1918
2764
|
lines.push(fileHeader);
|
|
1919
2765
|
lines.push('');
|
|
@@ -1976,11 +2822,20 @@ class ToolHandler {
|
|
|
1976
2822
|
// maxOutputChars (observed 30k against a 28k tier cap). A fat explore
|
|
1977
2823
|
// payload persists in the agent's context and is re-read as cache-input
|
|
1978
2824
|
// on every subsequent turn, so the overrun is paid many times over.
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
2825
|
+
// Final ceiling. The render loop is now the authority on WHAT to emit — it
|
|
2826
|
+
// renders necessary files (named/spine) even past maxOutputChars and caps
|
|
2827
|
+
// only incidental ones, all bounded by maxFiles + per-file true-spine — so
|
|
2828
|
+
// this is a SAFETY ceiling above that necessary content, not a hard cut
|
|
2829
|
+
// through it. Cutting at a flat maxOutputChars here undid the whole point:
|
|
2830
|
+
// Alamofire's loop assembles build+validators-exec+validate (~15K) and a 13K
|
|
2831
|
+
// slice dropped the validate phase the agent then Read. Allow necessary
|
|
2832
|
+
// overflow up to 1.5× (still bounds a pathological monolith).
|
|
2833
|
+
const output = flow.text + lines.join('\n');
|
|
2834
|
+
const hardCeiling = Math.round(budget.maxOutputChars * 1.5);
|
|
2835
|
+
if (output.length > hardCeiling) {
|
|
2836
|
+
const cut = output.slice(0, hardCeiling);
|
|
1982
2837
|
const lastNewline = cut.lastIndexOf('\n');
|
|
1983
|
-
const safe = lastNewline >
|
|
2838
|
+
const safe = lastNewline > hardCeiling * 0.8 ? cut.slice(0, lastNewline) : cut;
|
|
1984
2839
|
return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
|
|
1985
2840
|
}
|
|
1986
2841
|
return this.textResult(output);
|
|
@@ -2380,10 +3235,19 @@ class ToolHandler {
|
|
|
2380
3235
|
return { node: exactMatches[0].node, note: '' };
|
|
2381
3236
|
}
|
|
2382
3237
|
if (exactMatches.length > 1) {
|
|
3238
|
+
// Down-rank generated files (.pb.go, .pulsar.go, _grpc.pb.go, …)
|
|
3239
|
+
// so a query like "Send" prefers the keeper implementation over
|
|
3240
|
+
// the protobuf-generated interface stub. Stable sort preserves
|
|
3241
|
+
// FTS order within each group. See generated-detection.ts.
|
|
3242
|
+
const ranked = [...exactMatches].sort((a, b) => {
|
|
3243
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
3244
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
3245
|
+
return aGen - bGen;
|
|
3246
|
+
});
|
|
2383
3247
|
// Multiple exact matches - pick first, note the others
|
|
2384
|
-
const picked =
|
|
2385
|
-
const others =
|
|
2386
|
-
const note = `\n\n> **Note:** ${
|
|
3248
|
+
const picked = ranked[0].node;
|
|
3249
|
+
const others = ranked.slice(1).map(r => `${r.node.name} (${r.node.kind}) at ${r.node.filePath}:${r.node.startLine}`);
|
|
3250
|
+
const note = `\n\n> **Note:** ${ranked.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`;
|
|
2387
3251
|
return { node: picked, note };
|
|
2388
3252
|
}
|
|
2389
3253
|
// No exact match. For qualified lookups, don't silently fall back
|
|
@@ -2416,9 +3280,17 @@ class ToolHandler {
|
|
|
2416
3280
|
const node = exactMatches[0]?.node ?? results[0].node;
|
|
2417
3281
|
return { nodes: [node], note: '' };
|
|
2418
3282
|
}
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
3283
|
+
// Same generated-file down-rank as findSymbol — keeps callers/callees
|
|
3284
|
+
// /impact aggregation aligned (a query against "Send" returns the
|
|
3285
|
+
// hand-written implementations before the protobuf scaffold).
|
|
3286
|
+
const ranked = [...exactMatches].sort((a, b) => {
|
|
3287
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
3288
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
3289
|
+
return aGen - bGen;
|
|
3290
|
+
});
|
|
3291
|
+
const locations = ranked.map(r => `${r.node.kind} at ${r.node.filePath}:${r.node.startLine}`);
|
|
3292
|
+
const note = `\n\n> **Note:** Aggregated results across ${ranked.length} symbols named "${symbol}": ${locations.join(', ')}`;
|
|
3293
|
+
return { nodes: ranked.map(r => r.node), note };
|
|
2422
3294
|
}
|
|
2423
3295
|
/**
|
|
2424
3296
|
* Truncate output if it exceeds the maximum length
|