@colbymchenry/codegraph-darwin-x64 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/lib/dist/bin/codegraph.js +18 -20
  2. package/lib/dist/bin/codegraph.js.map +1 -1
  3. package/lib/dist/extraction/grammars.d.ts +10 -0
  4. package/lib/dist/extraction/grammars.d.ts.map +1 -1
  5. package/lib/dist/extraction/grammars.js +13 -0
  6. package/lib/dist/extraction/grammars.js.map +1 -1
  7. package/lib/dist/extraction/index.d.ts.map +1 -1
  8. package/lib/dist/extraction/index.js +17 -2
  9. package/lib/dist/extraction/index.js.map +1 -1
  10. package/lib/dist/extraction/tree-sitter.js +1 -1
  11. package/lib/dist/extraction/tree-sitter.js.map +1 -1
  12. package/lib/dist/index.d.ts +2 -1
  13. package/lib/dist/index.d.ts.map +1 -1
  14. package/lib/dist/index.js +8 -1
  15. package/lib/dist/index.js.map +1 -1
  16. package/lib/dist/mcp/engine.d.ts.map +1 -1
  17. package/lib/dist/mcp/engine.js +12 -38
  18. package/lib/dist/mcp/engine.js.map +1 -1
  19. package/lib/dist/mcp/index.d.ts +7 -4
  20. package/lib/dist/mcp/index.d.ts.map +1 -1
  21. package/lib/dist/mcp/index.js +46 -39
  22. package/lib/dist/mcp/index.js.map +1 -1
  23. package/lib/dist/mcp/proxy.d.ts +35 -0
  24. package/lib/dist/mcp/proxy.d.ts.map +1 -1
  25. package/lib/dist/mcp/proxy.js +223 -0
  26. package/lib/dist/mcp/proxy.js.map +1 -1
  27. package/lib/dist/mcp/session.d.ts +10 -0
  28. package/lib/dist/mcp/session.d.ts.map +1 -1
  29. package/lib/dist/mcp/session.js +7 -5
  30. package/lib/dist/mcp/session.js.map +1 -1
  31. package/lib/dist/mcp/tools.d.ts +8 -1
  32. package/lib/dist/mcp/tools.d.ts.map +1 -1
  33. package/lib/dist/mcp/tools.js +438 -56
  34. package/lib/dist/mcp/tools.js.map +1 -1
  35. package/lib/dist/resolution/callback-synthesizer.d.ts +2 -2
  36. package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
  37. package/lib/dist/resolution/callback-synthesizer.js +239 -2
  38. package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
  39. package/lib/node_modules/.package-lock.json +1 -1
  40. package/lib/package.json +1 -1
  41. package/package.json +1 -1
@@ -43,7 +43,14 @@ exports.getExploreBudget = getExploreBudget;
43
43
  exports.getExploreOutputBudget = getExploreOutputBudget;
44
44
  exports.formatStaleBanner = formatStaleBanner;
45
45
  exports.formatStaleFooter = formatStaleFooter;
46
- const index_1 = __importStar(require("../index"));
46
+ exports.getStaticTools = getStaticTools;
47
+ const directory_1 = require("../directory");
48
+ // Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
49
+ // helper in engine.ts. ToolHandler must load to answer tools/list (static
50
+ // schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
51
+ // CodeGraph is pulled in only when a tool actually opens a project. require() is
52
+ // sync + cached (CommonJS build).
53
+ const loadCodeGraph = () => require('../index').default;
47
54
  const worktree_1 = require("../sync/worktree");
48
55
  const crypto_1 = require("crypto");
49
56
  const fs_1 = require("fs");
@@ -206,6 +213,21 @@ function getExploreOutputBudget(fileCount) {
206
213
  function exploreLineNumbersEnabled() {
207
214
  return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
208
215
  }
216
+ /**
217
+ * Adaptive explore sizing (default ON). `codegraph_explore` skeletonizes OFF-SPINE
218
+ * polymorphic-sibling files — a file whose class is one of ≥3 interchangeable
219
+ * implementations of a shared interface (e.g. OkHttp's `: Interceptor` classes) —
220
+ * to class + member signatures (bodies elided), keeping the on-spine exemplar full.
221
+ * This sizes the response to the answer instead of the budget cap on sibling-heavy
222
+ * flows (OkHttp interceptor-chain explore 28.5k→16.6k, ~28% cheaper than native
223
+ * search, reads flat). It is PROVABLY INERT elsewhere: distinct pipeline steps (no
224
+ * ≥3-implementer supertype, e.g. Excalidraw's `renderStaticScene`) and on-spine
225
+ * files keep full source — output is byte-identical to shipped on excalidraw /
226
+ * tokio / django / vscode / gin. Set `CODEGRAPH_ADAPTIVE_EXPLORE=0` to disable.
227
+ */
228
+ function adaptiveExploreEnabled() {
229
+ return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
230
+ }
209
231
  /**
210
232
  * Prefix each line of a source slice with its 1-based line number, matching
211
233
  * the Read tool's `cat -n` convention (number + tab) so the agent treats it
@@ -536,6 +558,19 @@ exports.tools = [
536
558
  },
537
559
  },
538
560
  ];
561
+ /**
562
+ * Allowlist-filtered tool definitions WITHOUT an engine — the static surface the
563
+ * proxy answers `tools/list` with before any project is open. Mirrors
564
+ * `ToolHandler.getTools()` in the no-CodeGraph case (the dynamic per-repo budget
565
+ * note in a description only adds once `cg` is loaded; the schemas are static).
566
+ */
567
+ function getStaticTools() {
568
+ const raw = process.env.CODEGRAPH_MCP_TOOLS;
569
+ if (!raw || !raw.trim())
570
+ return exports.tools;
571
+ const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
572
+ return allow.size ? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : exports.tools;
573
+ }
539
574
  /**
540
575
  * Tool handler that executes tools against a CodeGraph instance
541
576
  *
@@ -718,7 +753,7 @@ class ToolHandler {
718
753
  }
719
754
  }
720
755
  // Walk up parent directories to find nearest .codegraph/
721
- const resolvedRoot = (0, index_1.findNearestCodeGraphRoot)(projectPath);
756
+ const resolvedRoot = (0, directory_1.findNearestCodeGraphRoot)(projectPath);
722
757
  if (!resolvedRoot) {
723
758
  throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
724
759
  }
@@ -740,7 +775,7 @@ class ToolHandler {
740
775
  return cg;
741
776
  }
742
777
  // Open and cache under both paths
743
- const cg = index_1.default.openSync(resolvedRoot);
778
+ const cg = loadCodeGraph().openSync(resolvedRoot);
744
779
  this.projectCache.set(resolvedRoot, cg);
745
780
  if (projectPath !== resolvedRoot) {
746
781
  this.projectCache.set(projectPath, cg);
@@ -1463,10 +1498,30 @@ class ToolHandler {
1463
1498
  - (isLessCanonicalPath(b) ? LESS_CANONICAL_PENALTY : 0);
1464
1499
  const fromCands = fromMatches.nodes;
1465
1500
  const toCands = toMatches.nodes;
1501
+ // Candidate relevance: an overloaded name (Alamofire has 44 `request`s, most
1502
+ // of them EMPTY EventMonitor protocol-conformance stubs `func request(…){}`)
1503
+ // floods the pool with no-op decls. Shared-dir-prefix alone then MISLEADS —
1504
+ // two unrelated `Source/Features/` delegate stubs outscore the real
1505
+ // `Source/Core/Session.request` × `Source/Core/…task` pair the agent meant,
1506
+ // so trace resolves to stubs, finds no path, and the agent reads by line.
1507
+ // Penalize empty stubs and test-file symbols so a substantive entry point
1508
+ // wins; among real methods this is ~flat, so path-proximity still decides
1509
+ // (cosmos EndBlocker disambiguation is unaffected — none of its candidates
1510
+ // are stubs/tests).
1511
+ const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
1512
+ const nodeRelevance = (n) => {
1513
+ const bodyLines = Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
1514
+ let s = Math.min(bodyLines, 20); // a substantive body is more likely the meant symbol
1515
+ if (bodyLines <= 1)
1516
+ s -= 40; // empty/one-line stub (protocol no-op, decl-only) — almost never the trace endpoint
1517
+ if (isTestPath(n.filePath))
1518
+ s -= 150; // a Source/ symbol is meant over a Tests/ same-named one
1519
+ return s;
1520
+ };
1466
1521
  const pairs = [];
1467
1522
  for (const f of fromCands) {
1468
1523
  for (const t of toCands) {
1469
- pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) });
1524
+ pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) + nodeRelevance(f) + nodeRelevance(t) });
1470
1525
  }
1471
1526
  }
1472
1527
  // Sort by shared prefix desc, then by FTS order (already encoded in the
@@ -1718,6 +1773,14 @@ class ToolHandler {
1718
1773
  registeredAt,
1719
1774
  };
1720
1775
  }
1776
+ if (m?.synthesizedBy === 'closure-collection') {
1777
+ const field = m.field ? `\`${String(m.field)}\`` : 'a collection';
1778
+ return {
1779
+ label: `closure collection — runs handlers appended to ${field} (dynamic dispatch)`,
1780
+ compact: `dynamic: runs ${field} handlers${at}`,
1781
+ registeredAt,
1782
+ };
1783
+ }
1721
1784
  return null;
1722
1785
  }
1723
1786
  /**
@@ -1811,6 +1874,7 @@ class ToolHandler {
1811
1874
  * dropping unrelated `OmsOrderService::list`.
1812
1875
  */
1813
1876
  buildFlowFromNamedSymbols(cg, query) {
1877
+ const EMPTY = { text: '', pathNodeIds: new Set(), namedNodeIds: new Set(), uniqueNamedNodeIds: new Set() };
1814
1878
  try {
1815
1879
  const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
1816
1880
  // Strip only a REAL file extension (Create.cs → Create); KEEP qualified
@@ -1822,7 +1886,7 @@ class ToolHandler {
1822
1886
  .map((t) => t.replace(FILE_EXT, '').trim())
1823
1887
  .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
1824
1888
  if (tokens.length < 2)
1825
- return '';
1889
+ return EMPTY;
1826
1890
  // Pool of name SEGMENTS (Class + method from every token) used to
1827
1891
  // disambiguate an ambiguous SIMPLE name: keep a candidate only if its
1828
1892
  // CONTAINER class is itself named in the query.
@@ -1832,24 +1896,34 @@ class ToolHandler {
1832
1896
  if (s)
1833
1897
  segPool.add(s);
1834
1898
  const named = new Map();
1899
+ // Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in
1900
+ // the whole graph). These are safe to SPARE a file on: the agent named THIS
1901
+ // method (`getResponseWithInterceptorChain`, 1 def). A hyper-polymorphic name
1902
+ // (`as_sql`, 110 defs across every Expression/Compiler subclass) is NOT here,
1903
+ // so naming it doesn't keep every backend variant full and flood the budget.
1904
+ const uniqueNamedNodeIds = new Set();
1835
1905
  for (const t of tokens) {
1836
1906
  const cands = this.findAllSymbols(cg, t).nodes.filter((n) => CALLABLE.has(n.kind));
1837
1907
  // A qualified or otherwise-specific name (<=3 hits) keeps all; an
1838
1908
  // ambiguous simple name keeps only candidates whose container is named.
1839
- const pick = cands.length <= 3
1909
+ const specific = cands.length <= 3;
1910
+ const pick = specific
1840
1911
  ? cands
1841
1912
  : cands.filter((n) => {
1842
1913
  const segs = (n.qualifiedName || '').toLowerCase().split(/::|\./).filter(Boolean);
1843
1914
  const container = segs.length >= 2 ? segs[segs.length - 2] : '';
1844
1915
  return !!container && segPool.has(container);
1845
1916
  });
1846
- for (const n of pick.slice(0, 6))
1917
+ for (const n of pick.slice(0, 6)) {
1847
1918
  named.set(n.id, n);
1919
+ if (specific)
1920
+ uniqueNamedNodeIds.add(n.id);
1921
+ }
1848
1922
  if (named.size > 40)
1849
1923
  break;
1850
1924
  }
1851
1925
  if (named.size < 2)
1852
- return '';
1926
+ return EMPTY;
1853
1927
  const MAX_HOPS = 7;
1854
1928
  let best = null;
1855
1929
  // BFS the full call graph (incl. synth edges) from each named seed, but
@@ -1895,22 +1969,66 @@ class ToolHandler {
1895
1969
  if (!best || chain.length > best.length)
1896
1970
  best = chain;
1897
1971
  }
1898
- if (!best || best.length < 3)
1899
- return '';
1900
- const out = ['## Flow (call path among the symbols you queried)', ''];
1901
- for (let i = 0; i < best.length; i++) {
1902
- const step = best[i];
1903
- if (step.edge) {
1904
- const sy = this.synthEdgeNote(step.edge);
1905
- out.push(` ↓ ${sy ? sy.compact : step.edge.kind}`);
1972
+ const hasMain = !!best && best.length >= 3;
1973
+ const pathIds = new Set((best ?? []).map((s) => s.node.id));
1974
+ // Supplementary: dynamic-dispatch (synthesized) edges incident to a NAMED
1975
+ // symbol the indirect hops an agent would otherwise grep/Read to
1976
+ // reconstruct ("where do the appended `validators` actually run?"). The
1977
+ // synth edge IS that answer, so surface it even when the OTHER end wasn't
1978
+ // named (e.g. the agent names `validate` but not the `didCompleteTask`
1979
+ // that drains the collection). On-topic by construction: only heuristic
1980
+ // edges touching a symbol the agent named; skipped when the hop already
1981
+ // shows in the main chain.
1982
+ const synthLines = [];
1983
+ const synthSeen = new Set();
1984
+ for (const n of named.values()) {
1985
+ if (synthLines.length >= 6)
1986
+ break;
1987
+ for (const { node: other, edge } of [...cg.getCallers(n.id), ...cg.getCallees(n.id)]) {
1988
+ if (synthLines.length >= 6)
1989
+ break;
1990
+ if (edge.provenance !== 'heuristic' || other.id === n.id)
1991
+ continue;
1992
+ if (pathIds.has(edge.source) && pathIds.has(edge.target))
1993
+ continue; // already in the main chain
1994
+ const src = edge.source === n.id ? n : other;
1995
+ const tgt = edge.source === n.id ? other : n;
1996
+ const key = `${src.name}>${tgt.name}`;
1997
+ if (synthSeen.has(key))
1998
+ continue;
1999
+ synthSeen.add(key);
2000
+ const note = this.synthEdgeNote(edge);
2001
+ synthLines.push(`- ${src.name} → ${tgt.name} [${note ? note.compact : edge.kind}]`);
2002
+ }
2003
+ }
2004
+ if (!hasMain && synthLines.length === 0)
2005
+ return EMPTY;
2006
+ const out = [];
2007
+ if (hasMain) {
2008
+ out.push('## Flow (call path among the symbols you queried)', '');
2009
+ for (let i = 0; i < best.length; i++) {
2010
+ const step = best[i];
2011
+ if (step.edge) {
2012
+ const sy = this.synthEdgeNote(step.edge);
2013
+ out.push(` ↓ ${sy ? sy.compact : step.edge.kind}`);
2014
+ }
2015
+ out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
1906
2016
  }
1907
- out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
2017
+ out.push('');
1908
2018
  }
1909
- out.push('', '> Full source for these symbols is below; codegraph_trace(from,to) for the exact path between two endpoints.', '');
1910
- return out.join('\n');
2019
+ if (synthLines.length) {
2020
+ out.push('## Dynamic-dispatch links among your symbols', '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)', '', ...synthLines, '');
2021
+ }
2022
+ out.push('> Full source for these symbols is below; codegraph_trace(from,to) for the exact path between two endpoints.', '');
2023
+ // namedNodeIds = every callable the agent explicitly named (a superset of
2024
+ // the spine). A file holding one is something the agent asked to SEE, so it
2025
+ // must keep full source even if it's an off-spine polymorphic sibling — the
2026
+ // agent named `getResponseWithInterceptorChain` / `SQLCompiler.execute_sql`
2027
+ // as the mechanism, not as an interchangeable leaf. See the skeleton gate.
2028
+ return { text: out.join('\n'), pathNodeIds: pathIds, namedNodeIds: new Set(named.keys()), uniqueNamedNodeIds };
1911
2029
  }
1912
2030
  catch {
1913
- return '';
2031
+ return EMPTY;
1914
2032
  }
1915
2033
  }
1916
2034
  /**
@@ -1991,9 +2109,42 @@ class ToolHandler {
1991
2109
  glueNodeIds.add(nb.id);
1992
2110
  }
1993
2111
  }
2112
+ // Named-symbol seeding: findRelevantContext is an FTS/text rank, so a query
2113
+ // that's a BAG of symbol names skewed toward one phase (Alamofire: 5 build
2114
+ // terms, each a high-frequency name, vs 3 validate terms) lets the
2115
+ // lower-frequency names fall below the search cut — their definitions, and
2116
+ // whole files (Validation.swift), never get gathered, so they can never
2117
+ // render and the agent Reads them. Resolve EACH named token to its
2118
+ // substantive definition (skip empty stubs + test files, same relevance the
2119
+ // trace endpoint picker uses) and inject it as an entry, so every symbol the
2120
+ // agent explicitly named is in the subgraph and its file is scored.
2121
+ const namedSeedIds = new Set();
2122
+ {
2123
+ const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
2124
+ const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
2125
+ const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
2126
+ const bodyLines = (n) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
2127
+ const tokens = [...new Set(query.split(/[\s,()[\]]+/)
2128
+ .map((t) => t.replace(FILE_EXT, '').trim())
2129
+ .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
2130
+ for (const t of tokens) {
2131
+ const cands = this.findAllSymbols(cg, t).nodes
2132
+ .filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath))
2133
+ .sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a));
2134
+ // A specific name (<=3 defs) injects all its defs; an overloaded name
2135
+ // (`request` = 44, mostly stubs) injects only the single most substantive
2136
+ // one, so the build-overload flood doesn't crowd the subgraph.
2137
+ for (const n of cands.slice(0, cands.length <= 3 ? cands.length : 1)) {
2138
+ if (!subgraph.nodes.has(n.id)) {
2139
+ subgraph.nodes.set(n.id, n);
2140
+ namedSeedIds.add(n.id);
2141
+ }
2142
+ }
2143
+ }
2144
+ }
1994
2145
  // Step 2: Group nodes by file, score by relevance
1995
2146
  const fileGroups = new Map();
1996
- const entryNodeIds = new Set(subgraph.roots);
2147
+ const entryNodeIds = new Set([...subgraph.roots, ...namedSeedIds]);
1997
2148
  // Build a set of nodes directly connected to entry points (depth 1)
1998
2149
  const connectedToEntry = new Set();
1999
2150
  for (const edge of subgraph.edges) {
@@ -2008,8 +2159,16 @@ class ToolHandler {
2008
2159
  continue;
2009
2160
  const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
2010
2161
  group.nodes.push(node);
2011
- // Score: entry point nodes worth 10, directly connected worth 3, others worth 1
2012
- if (entryNodeIds.has(node.id)) {
2162
+ // Score: a NAMED-SEED node (a symbol the agent named that FTS missed, now
2163
+ // injected) is worth far more than a mere reference — its file is where the
2164
+ // answer lives. Without this, an incidental file that name-drops the flow
2165
+ // (Combine.swift references request/task → score 23 from connected nodes)
2166
+ // outranks the file that DEFINES a named symbol (Validation.swift's
2167
+ // `validate` → 10) and steals its render slot. Definition ≫ reference.
2168
+ if (namedSeedIds.has(node.id)) {
2169
+ group.score += 50;
2170
+ }
2171
+ else if (entryNodeIds.has(node.id)) {
2013
2172
  group.score += 10;
2014
2173
  }
2015
2174
  else if (connectedToEntry.has(node.id)) {
@@ -2042,20 +2201,18 @@ class ToolHandler {
2042
2201
  /\bicons?\b/.test(lp) ||
2043
2202
  /\bi18n\b/.test(lp));
2044
2203
  };
2045
- // Tiny-tier hard-exclude: on small projects (`excludeLowValueFiles`
2046
- // budget flag), one slipped test/spec file dominates the per-file budget
2047
- // (cobra's `command_test.go` displaced `args.go` and contributed ~10KB of
2048
- // pure noise to "How does cobra parse commands?"). The sort-step
2049
- // deprioritization isn't enough at small N. Skip the hard-exclude when
2050
- // the query itself is about tests — that's the legitimate "explore the
2051
- // tests" case where the agent does want them.
2052
- if (budget.excludeLowValueFiles) {
2204
+ // Hard-exclude test/spec files (ALL tiers, not just tiny). One slipped test
2205
+ // file dominates the per-file budget on small repos (cobra's `command_test.go`
2206
+ // displaced `args.go`) AND wastes budget on large ones (Django's
2207
+ // `custom_lookups/tests.py` ate ~2.3 KB of the 28 KB cap, crowding out the
2208
+ // SQLCompiler mechanism the agent then Read). A test file almost never answers
2209
+ // an architecture question. Skip when the query itself is about tests — the
2210
+ // legitimate "explore the tests" case and only cut if ≥2 non-test candidates
2211
+ // remain (else tests are the only signal for this area).
2212
+ {
2053
2213
  const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query);
2054
2214
  if (!queryMentionsTests) {
2055
2215
  const nonLow = relevantFiles.filter(([p]) => !isLowValue(p));
2056
- // Only apply the hard-filter if we still have at least 2 non-test
2057
- // candidates after the cut — otherwise the agent is asking about an
2058
- // area where tests are the only signal, and we should not strip them.
2059
2216
  if (nonLow.length >= 2) {
2060
2217
  relevantFiles = nonLow;
2061
2218
  }
@@ -2132,6 +2289,64 @@ class ToolHandler {
2132
2289
  }
2133
2290
  }
2134
2291
  // Step 4: Read contiguous file sections
2292
+ // Compute the flow spine once — used both to prepend the Flow section (below)
2293
+ // and to gate adaptive source sizing: files on the spine get full source,
2294
+ // off-spine peers skeletonize.
2295
+ const flow = this.buildFlowFromNamedSymbols(cg, query);
2296
+ // Polymorphic-sibling detector for adaptive sizing. A class that implements/
2297
+ // extends a supertype shared by >= MIN_SIBLINGS classes is one of many
2298
+ // INTERCHANGEABLE implementations (OkHttp's 14 `: Interceptor` classes —
2299
+ // showing one + the rest as signatures is enough), as opposed to a DISTINCT
2300
+ // pipeline step (Excalidraw's `renderStaticScene`, which shares no supertype and
2301
+ // must stay full or the agent loses real content). Only off-spine sibling files
2302
+ // skeletonize; distinct steps and on-spine files keep full source. Cache
2303
+ // supertype→(has ≥N implementers) so this stays a handful of edge queries.
2304
+ const MIN_SIBLINGS = 3;
2305
+ const siblingSuper = new Map();
2306
+ const isPolymorphicSibling = (nodes) => {
2307
+ for (const n of nodes) {
2308
+ for (const e of cg.getOutgoingEdges(n.id)) {
2309
+ if (e.kind !== 'implements' && e.kind !== 'extends')
2310
+ continue;
2311
+ let many = siblingSuper.get(e.target);
2312
+ if (many === undefined) {
2313
+ many = cg.getIncomingEdges(e.target)
2314
+ .filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
2315
+ siblingSuper.set(e.target, many);
2316
+ }
2317
+ if (many)
2318
+ return true;
2319
+ }
2320
+ }
2321
+ return false;
2322
+ };
2323
+ // A file that DEFINES a polymorphic supertype (a class/interface with ≥
2324
+ // MIN_SIBLINGS implementers) AND co-locates its subclasses is a redundant
2325
+ // "family" file — Django's compiler.py holds `SQLCompiler` + its 4 subclasses
2326
+ // (SQLInsert/Update/Delete/AggregateCompiler) in 2,266 lines. Such files are
2327
+ // huge and read-anyway, so they should STILL skeletonize even when the agent
2328
+ // named a method in them: a full one eats ~6.5K of the explore budget (Django
2329
+ // is pinned at the 28K cap, truncating), starving the sibling files the agent
2330
+ // then Reads. This flag OVERRIDES the named-callable spare below — it does NOT
2331
+ // by itself spare a file. (OkHttp's RealCall implements the `Lockable` mixin
2332
+ // but defines no ≥3-impl supertype, so the named spare keeps it full.)
2333
+ const superMany = new Map();
2334
+ const definesPolymorphicSupertype = (nodes) => {
2335
+ for (const n of nodes) {
2336
+ if (n.kind !== 'class' && n.kind !== 'interface' && n.kind !== 'struct'
2337
+ && n.kind !== 'trait' && n.kind !== 'protocol' && n.kind !== 'type_alias')
2338
+ continue;
2339
+ let many = superMany.get(n.id);
2340
+ if (many === undefined) {
2341
+ many = cg.getIncomingEdges(n.id)
2342
+ .filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
2343
+ superMany.set(n.id, many);
2344
+ }
2345
+ if (many)
2346
+ return true;
2347
+ }
2348
+ return false;
2349
+ };
2135
2350
  lines.push('### Source Code');
2136
2351
  lines.push('');
2137
2352
  lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
@@ -2142,8 +2357,15 @@ class ToolHandler {
2142
2357
  for (const [filePath, group] of sortedFiles) {
2143
2358
  if (filesIncluded >= maxFiles)
2144
2359
  break;
2145
- if (totalChars > budget.maxOutputChars * 0.9)
2146
- break;
2360
+ // A file DEFINES a named/spine symbol (the answer) vs merely references the
2361
+ // flow. Past 90% budget, stop pulling INCIDENTAL files — but keep scanning
2362
+ // for necessary ones, which render even past the cap (bounded by maxFiles).
2363
+ // Without this `continue` (was an unconditional `break`), the loop stopped
2364
+ // after the build + validators-exec files and never reached the ranked-in
2365
+ // validate-logic file (Alamofire's Validation.swift).
2366
+ const fileNecessary = group.nodes.some(n => entryNodeIds.has(n.id) || flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id));
2367
+ if (!fileNecessary && totalChars > budget.maxOutputChars * 0.9)
2368
+ continue;
2147
2369
  const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
2148
2370
  if (!absPath || !(0, fs_1.existsSync)(absPath))
2149
2371
  continue;
@@ -2156,6 +2378,131 @@ class ToolHandler {
2156
2378
  }
2157
2379
  const fileLines = fileContent.split('\n');
2158
2380
  const lang = group.nodes[0]?.language || '';
2381
+ // Adaptive sizing (CODEGRAPH_ADAPTIVE_EXPLORE, default on): collapse a file
2382
+ // to a per-symbol view when it's a redundant member of a polymorphic family.
2383
+ // Engages iff ALL hold:
2384
+ // 1. a flow spine exists,
2385
+ // 2. no symbol in the file is on that spine (it's not the mechanism path),
2386
+ // 3. it IS a polymorphic sibling (≥ MIN_SIBLINGS impls of a shared supertype),
2387
+ // 4. it is NOT SPARED, where a file is spared iff the agent named a
2388
+ // (near-)UNIQUE callable in it (`getResponseWithInterceptorChain`, 1 def →
2389
+ // keep RealCall.kt full) UNLESS the file DEFINES the family supertype (a
2390
+ // base+subclasses "family" file like Django's compiler.py — collapse it).
2391
+ // Uniqueness matters: `as_sql` has 110 defs across every Compiler/Expression
2392
+ // subclass; naming it must NOT keep every backend variant + test file full
2393
+ // and flood the budget. That's why the spare reads uniqueNamedNodeIds.
2394
+ // Within a collapsed file the render is PER-SYMBOL (condition B): a method the
2395
+ // agent NAMED or that's on the spine is shown with its FULL body (so the agent
2396
+ // doesn't Read the file back for it — Django's SQLCompiler.execute_sql/as_sql);
2397
+ // every other symbol is just its signature. So the base mechanism survives while
2398
+ // the file's other ~80 symbols + the redundant subclasses collapse to one line each.
2399
+ const spareNamed = group.nodes.some(n => flow.uniqueNamedNodeIds.has(n.id));
2400
+ const fileDefinesSuper = definesPolymorphicSupertype(group.nodes);
2401
+ const spared = spareNamed && !fileDefinesSuper;
2402
+ const CALLABLE_BODY = new Set(['method', 'function', 'constructor', 'component']);
2403
+ const hasSpineNode = group.nodes.some(n => flow.pathNodeIds.has(n.id));
2404
+ // On-spine god-file: the flow path runs THROUGH this file, but it also holds
2405
+ // many OTHER named methods, and rendering all of them in full blows the
2406
+ // per-file budget and starves the other flow files (Alamofire: the agent
2407
+ // names ~7 Session.swift methods — the build spine PLUS off-path
2408
+ // task/didCompleteTask — far past the whole response budget). Engage the
2409
+ // per-symbol view to keep the SPINE full and collapse the off-path named
2410
+ // methods to signatures. Only when there IS off-path content to shed —
2411
+ // otherwise the spine is irreducible (a sequential flow has no redundancy),
2412
+ // so leave it to the normal full render.
2413
+ const namedBodyChars = group.nodes
2414
+ .filter(n => CALLABLE_BODY.has(n.kind) && (flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id)))
2415
+ .reduce((s, n) => s + fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length, 0);
2416
+ const onSpineGodFile = hasSpineNode
2417
+ && namedBodyChars > budget.maxCharsPerFile
2418
+ && group.nodes.some(n => CALLABLE_BODY.has(n.kind) && flow.uniqueNamedNodeIds.has(n.id) && !flow.pathNodeIds.has(n.id));
2419
+ if (adaptiveExploreEnabled() && flow.pathNodeIds.size > 0
2420
+ && (onSpineGodFile || (!hasSpineNode && isPolymorphicSibling(group.nodes) && !spared))) {
2421
+ const syms = group.nodes
2422
+ .filter(n => n.kind !== 'import' && n.kind !== 'export' && n.startLine > 0)
2423
+ .sort((a, b) => a.startLine - b.startLine);
2424
+ // Pass 1: choose which symbols get a FULL body, by priority, greedily within
2425
+ // a per-file body cap — so one huge family file can't body every named method
2426
+ // and crowd out the other flow files (Django's query.py). A symbol earns a
2427
+ // body if it's on-spine, or UNIQUELY named (`SQLCompiler.execute_sql`), or a
2428
+ // co-named method WHEN this file DEFINES the family supertype (so the base
2429
+ // `SQLCompiler.as_sql` body shows, but the 110 leaf `as_sql` overrides — and
2430
+ // OkHttp's 5 `intercept`s if the agent names `intercept` — stay signatures).
2431
+ const prio = (n) => !CALLABLE_BODY.has(n.kind) ? 99
2432
+ : flow.pathNodeIds.has(n.id) ? 0
2433
+ : flow.uniqueNamedNodeIds.has(n.id) ? 1
2434
+ : (fileDefinesSuper && flow.namedNodeIds.has(n.id)) ? 2 : 99;
2435
+ const bodyCap = budget.maxCharsPerFile * 2;
2436
+ const bodyIds = new Set();
2437
+ let bodyChars = 0;
2438
+ for (const n of syms.filter(n => prio(n) < 99 && n.endLine >= n.startLine).sort((a, b) => prio(a) - prio(b))) {
2439
+ const sz = fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length;
2440
+ // Spine methods (prio 0) ALWAYS get a full body — the cap governs the
2441
+ // off-path extras (unique-named, family base), never the flow path itself.
2442
+ if (prio(n) > 0 && bodyChars + sz > bodyCap && bodyIds.size > 0)
2443
+ continue;
2444
+ bodyIds.add(n.id);
2445
+ bodyChars += sz;
2446
+ }
2447
+ // Pass 2: render in line order — full body for chosen symbols, else the
2448
+ // signature line (capped, with a "+N more" tail so the structure map of a
2449
+ // god-file doesn't itself bloat the budget).
2450
+ const skel = [];
2451
+ let coveredUntil = 0; // skip symbols already inside an emitted body
2452
+ let sigCount = 0, sigDropped = 0;
2453
+ const SIG_MAX = Math.max(12, budget.maxSymbolsInFileHeader * 2);
2454
+ for (const n of syms) {
2455
+ if (n.startLine <= coveredUntil)
2456
+ continue;
2457
+ if (bodyIds.has(n.id)) {
2458
+ const end = Math.min(n.endLine, n.startLine + 220);
2459
+ const body = fileLines.slice(n.startLine - 1, end).join('\n');
2460
+ skel.push(exploreLineNumbersEnabled() ? numberSourceLines(body, n.startLine) : body);
2461
+ coveredUntil = end;
2462
+ }
2463
+ else {
2464
+ // Elide the body, emit the signature. node.startLine can point at a
2465
+ // decorator/annotation, so scan forward for the line that names the symbol.
2466
+ let lineNo = n.startLine;
2467
+ for (let k = 0; k < 4; k++) {
2468
+ if ((fileLines[n.startLine - 1 + k] || '').includes(n.name)) {
2469
+ lineNo = n.startLine + k;
2470
+ break;
2471
+ }
2472
+ }
2473
+ if (lineNo <= coveredUntil)
2474
+ continue;
2475
+ if (sigCount >= SIG_MAX) {
2476
+ sigDropped++;
2477
+ continue;
2478
+ }
2479
+ const sig = (fileLines[lineNo - 1] || '').trim();
2480
+ if (sig) {
2481
+ skel.push(exploreLineNumbersEnabled() ? `${lineNo}\t${sig}` : sig);
2482
+ sigCount++;
2483
+ }
2484
+ }
2485
+ }
2486
+ if (sigDropped > 0)
2487
+ skel.push(`… +${sigDropped} more (signatures elided)`);
2488
+ if (skel.length > 0) {
2489
+ const names = [...new Set(group.nodes.filter(n => n.kind !== 'import' && n.kind !== 'export').map(n => n.name))]
2490
+ .slice(0, budget.maxSymbolsInFileHeader).join(', ');
2491
+ // Steer the agent to codegraph_explore for an elided body — NEVER to
2492
+ // Read. The old "Read for more" / "Read for a full body" tags invited
2493
+ // a Read of the very file just skeletonized; on a central, wanted file
2494
+ // (Session.swift, DataRequest.swift) that fired an over-investigation
2495
+ // spiral (the agent Read the skeletonized file, then kept digging).
2496
+ // CLAUDE.md: explore output must never tell the agent to Read.
2497
+ const tag = bodyIds.size > 0
2498
+ ? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
2499
+ : 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
2500
+ lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
2501
+ totalChars += skel.join('\n').length + 120;
2502
+ filesIncluded++;
2503
+ continue;
2504
+ }
2505
+ }
2159
2506
  // Whole-small-file rule: if a relevant file is small enough to afford,
2160
2507
  // return it ENTIRELY instead of clustering. Clustering exists to tame
2161
2508
  // god-files (App.tsx ~13k lines); on a ~134-line component a cluster is a
@@ -2205,14 +2552,33 @@ class ToolHandler {
2205
2552
  // Alamofire is the canonical case: the `Session` class spans ~1,400
2206
2553
  // lines). We want the granular symbols inside, not the envelope.
2207
2554
  const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
2208
- const ranges = group.nodes
2209
- .filter(n => n.startLine > 0 && n.endLine > 0)
2555
+ // Cluster from this file's gathered nodes PLUS any callable the agent NAMED that
2556
+ // lives here. Explore's relevance gather can miss a named method def in a huge
2557
+ // non-sibling file — Django's query.py is 3,040 lines and `_fetch_all` (L2237)
2558
+ // was gathered only as call-reference edges, never as a def, so it formed no
2559
+ // cluster and the agent Read it back. Inject named defs directly and rank them
2560
+ // ABOVE connected/glue nodes (importance 9) so their cluster wins the per-file
2561
+ // budget — the agent explicitly asked for these symbols.
2562
+ const rangeNodes = new Map();
2563
+ for (const n of group.nodes)
2564
+ if (n.startLine > 0 && n.endLine > 0)
2565
+ rangeNodes.set(n.id, n);
2566
+ for (const id of flow.namedNodeIds) {
2567
+ if (rangeNodes.has(id))
2568
+ continue;
2569
+ const n = cg.getNode(id);
2570
+ if (n && n.filePath === filePath && n.startLine > 0 && n.endLine > 0)
2571
+ rangeNodes.set(id, n);
2572
+ }
2573
+ const ranges = [...rangeNodes.values()]
2210
2574
  // Drop whole-file envelope nodes (containers covering >50% of the file).
2211
2575
  .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
2212
2576
  .map(n => {
2213
2577
  let importance = 1;
2214
2578
  if (entryNodeIds.has(n.id))
2215
2579
  importance = 10;
2580
+ else if (flow.namedNodeIds.has(n.id))
2581
+ importance = 9; // agent named it → keep its cluster
2216
2582
  else if (glueNodeIds.has(n.id))
2217
2583
  importance = 6; // bridging caller/callee of an entry
2218
2584
  else if (connectedToEntry.has(n.id))
@@ -2313,6 +2679,13 @@ class ToolHandler {
2313
2679
  return b.c.score - a.c.score;
2314
2680
  return a.span - b.span;
2315
2681
  });
2682
+ // Per-file budget is the SMALLER of the per-file cap and what's left of the
2683
+ // total output cap — so selection (which ranks by importance) keeps the
2684
+ // high-importance clusters and drops peripheral ones, instead of the
2685
+ // downstream source-order trim slicing off whatever comes last in the file.
2686
+ // That source-order slice is what cut Django's `_fetch_all` (L2237, importance
2687
+ // 9 — agent-named) when query.py was the last of four big files to be emitted.
2688
+ const fileBudget = Math.min(budget.maxCharsPerFile, Math.max(0, budget.maxOutputChars - totalChars - 200));
2316
2689
  const chosenIndices = new Set();
2317
2690
  let projectedChars = 0;
2318
2691
  for (const rc of rankedClusters) {
@@ -2325,7 +2698,7 @@ class ToolHandler {
2325
2698
  projectedChars += sectionLen;
2326
2699
  continue;
2327
2700
  }
2328
- if (projectedChars + sectionLen > budget.maxCharsPerFile)
2701
+ if (projectedChars + sectionLen > fileBudget)
2329
2702
  continue;
2330
2703
  chosenIndices.add(rc.idx);
2331
2704
  projectedChars += sectionLen;
@@ -2371,22 +2744,22 @@ class ToolHandler {
2371
2744
  ? `${headerSymbols.join(', ')}, +${omittedCount} more`
2372
2745
  : headerSymbols.join(', ');
2373
2746
  const fileHeader = `#### ${filePath} — ${headerSuffix}`;
2374
- // Respect the total output cap on a file-by-file basis.
2375
- if (totalChars + fileSection.length + 200 > budget.maxOutputChars) {
2747
+ // The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
2748
+ // the agent named (or that's on the flow spine) renders even when the
2749
+ // nominal total is used up — it's the answer, and the set is bounded by
2750
+ // maxFiles AND by true-spine/named-seeding having already trimmed each file
2751
+ // to its necessary content. A file that merely REFERENCES the flow
2752
+ // (Combine.swift name-drops request/task) is incidental → still capped, so
2753
+ // freed budget never leaks into noise. This is the last god-file layer:
2754
+ // build (Session, true-spined) + validators-exec (Request) + validate
2755
+ // (DataRequest/Validation) all render, instead of the cap dropping whichever
2756
+ // phase the file order happened to put last.
2757
+ if (!fileNecessary && totalChars + fileSection.length + 200 > budget.maxOutputChars) {
2376
2758
  const remaining = budget.maxOutputChars - totalChars - 200;
2377
2759
  if (remaining < 500)
2378
- break;
2379
- const trimmed = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
2380
- lines.push(fileHeader);
2381
- lines.push('');
2382
- lines.push('```' + lang);
2383
- lines.push(trimmed);
2384
- lines.push('```');
2385
- lines.push('');
2386
- totalChars += trimmed.length + 200;
2387
- filesIncluded++;
2760
+ continue; // incidental file, no room — skip it, keep scanning for necessary ones
2761
+ fileSection = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
2388
2762
  anyFileTrimmed = true;
2389
- break;
2390
2763
  }
2391
2764
  lines.push(fileHeader);
2392
2765
  lines.push('');
@@ -2449,11 +2822,20 @@ class ToolHandler {
2449
2822
  // maxOutputChars (observed 30k against a 28k tier cap). A fat explore
2450
2823
  // payload persists in the agent's context and is re-read as cache-input
2451
2824
  // on every subsequent turn, so the overrun is paid many times over.
2452
- const output = this.buildFlowFromNamedSymbols(cg, query) + lines.join('\n');
2453
- if (output.length > budget.maxOutputChars) {
2454
- const cut = output.slice(0, budget.maxOutputChars);
2825
+ // Final ceiling. The render loop is now the authority on WHAT to emit — it
2826
+ // renders necessary files (named/spine) even past maxOutputChars and caps
2827
+ // only incidental ones, all bounded by maxFiles + per-file true-spine — so
2828
+ // this is a SAFETY ceiling above that necessary content, not a hard cut
2829
+ // through it. Cutting at a flat maxOutputChars here undid the whole point:
2830
+ // Alamofire's loop assembles build+validators-exec+validate (~15K) and a 13K
2831
+ // slice dropped the validate phase the agent then Read. Allow necessary
2832
+ // overflow up to 1.5× (still bounds a pathological monolith).
2833
+ const output = flow.text + lines.join('\n');
2834
+ const hardCeiling = Math.round(budget.maxOutputChars * 1.5);
2835
+ if (output.length > hardCeiling) {
2836
+ const cut = output.slice(0, hardCeiling);
2455
2837
  const lastNewline = cut.lastIndexOf('\n');
2456
- const safe = lastNewline > budget.maxOutputChars * 0.8 ? cut.slice(0, lastNewline) : cut;
2838
+ const safe = lastNewline > hardCeiling * 0.8 ? cut.slice(0, lastNewline) : cut;
2457
2839
  return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
2458
2840
  }
2459
2841
  return this.textResult(output);