npm - @colbymchenry/codegraph-darwin-x64 - Versions diffs - 0.9.7 → 0.9.8 - Mend

@colbymchenry/codegraph-darwin-x64 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/lib/dist/bin/codegraph.js +18 -20
package/lib/dist/bin/codegraph.js.map +1 -1
package/lib/dist/extraction/grammars.d.ts +10 -0
package/lib/dist/extraction/grammars.d.ts.map +1 -1
package/lib/dist/extraction/grammars.js +13 -0
package/lib/dist/extraction/grammars.js.map +1 -1
package/lib/dist/extraction/index.d.ts.map +1 -1
package/lib/dist/extraction/index.js +17 -2
package/lib/dist/extraction/index.js.map +1 -1
package/lib/dist/extraction/tree-sitter.js +1 -1
package/lib/dist/extraction/tree-sitter.js.map +1 -1
package/lib/dist/index.d.ts +2 -1
package/lib/dist/index.d.ts.map +1 -1
package/lib/dist/index.js +8 -1
package/lib/dist/index.js.map +1 -1
package/lib/dist/mcp/engine.d.ts.map +1 -1
package/lib/dist/mcp/engine.js +12 -38
package/lib/dist/mcp/engine.js.map +1 -1
package/lib/dist/mcp/index.d.ts +7 -4
package/lib/dist/mcp/index.d.ts.map +1 -1
package/lib/dist/mcp/index.js +46 -39
package/lib/dist/mcp/index.js.map +1 -1
package/lib/dist/mcp/proxy.d.ts +35 -0
package/lib/dist/mcp/proxy.d.ts.map +1 -1
package/lib/dist/mcp/proxy.js +223 -0
package/lib/dist/mcp/proxy.js.map +1 -1
package/lib/dist/mcp/session.d.ts +10 -0
package/lib/dist/mcp/session.d.ts.map +1 -1
package/lib/dist/mcp/session.js +7 -5
package/lib/dist/mcp/session.js.map +1 -1
package/lib/dist/mcp/tools.d.ts +8 -1
package/lib/dist/mcp/tools.d.ts.map +1 -1
package/lib/dist/mcp/tools.js +438 -56
package/lib/dist/mcp/tools.js.map +1 -1
package/lib/dist/resolution/callback-synthesizer.d.ts +2 -2
package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
package/lib/dist/resolution/callback-synthesizer.js +239 -2
package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
package/lib/node_modules/.package-lock.json +1 -1
package/lib/package.json +1 -1
package/package.json +1 -1

package/lib/dist/mcp/tools.js CHANGED Viewed

@@ -43,7 +43,14 @@ exports.getExploreBudget = getExploreBudget;
 exports.getExploreOutputBudget = getExploreOutputBudget;
 exports.formatStaleBanner = formatStaleBanner;
 exports.formatStaleFooter = formatStaleFooter;
-const index_1 = __importStar(require("../index"));
+exports.getStaticTools = getStaticTools;
+const directory_1 = require("../directory");
+// Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
+// helper in engine.ts. ToolHandler must load to answer tools/list (static
+// schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
+// CodeGraph is pulled in only when a tool actually opens a project. require() is
+// sync + cached (CommonJS build).
+const loadCodeGraph = () => require('../index').default;
 const worktree_1 = require("../sync/worktree");
 const crypto_1 = require("crypto");
 const fs_1 = require("fs");
@@ -206,6 +213,21 @@ function getExploreOutputBudget(fileCount) {
 function exploreLineNumbersEnabled() {
     return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
 }
+/**
+ * Adaptive explore sizing (default ON). `codegraph_explore` skeletonizes OFF-SPINE
+ * polymorphic-sibling files — a file whose class is one of ≥3 interchangeable
+ * implementations of a shared interface (e.g. OkHttp's `: Interceptor` classes) —
+ * to class + member signatures (bodies elided), keeping the on-spine exemplar full.
+ * This sizes the response to the answer instead of the budget cap on sibling-heavy
+ * flows (OkHttp interceptor-chain explore 28.5k→16.6k, ~28% cheaper than native
+ * search, reads flat). It is PROVABLY INERT elsewhere: distinct pipeline steps (no
+ * ≥3-implementer supertype, e.g. Excalidraw's `renderStaticScene`) and on-spine
+ * files keep full source — output is byte-identical to shipped on excalidraw /
+ * tokio / django / vscode / gin. Set `CODEGRAPH_ADAPTIVE_EXPLORE=0` to disable.
+ */
+function adaptiveExploreEnabled() {
+    return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
+}
 /**
  * Prefix each line of a source slice with its 1-based line number, matching
  * the Read tool's `cat -n` convention (number + tab) so the agent treats it
@@ -536,6 +558,19 @@ exports.tools = [
         },
     },
 ];
+/**
+ * Allowlist-filtered tool definitions WITHOUT an engine — the static surface the
+ * proxy answers `tools/list` with before any project is open. Mirrors
+ * `ToolHandler.getTools()` in the no-CodeGraph case (the dynamic per-repo budget
+ * note in a description only adds once `cg` is loaded; the schemas are static).
+ */
+function getStaticTools() {
+    const raw = process.env.CODEGRAPH_MCP_TOOLS;
+    if (!raw || !raw.trim())
+        return exports.tools;
+    const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
+    return allow.size ? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : exports.tools;
+}
 /**
  * Tool handler that executes tools against a CodeGraph instance
  *
@@ -718,7 +753,7 @@ class ToolHandler {
             }
         }
         // Walk up parent directories to find nearest .codegraph/
-        const resolvedRoot = (0, index_1.findNearestCodeGraphRoot)(projectPath);
+        const resolvedRoot = (0, directory_1.findNearestCodeGraphRoot)(projectPath);
         if (!resolvedRoot) {
             throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
         }
@@ -740,7 +775,7 @@ class ToolHandler {
             return cg;
         }
         // Open and cache under both paths
-        const cg = index_1.default.openSync(resolvedRoot);
+        const cg = loadCodeGraph().openSync(resolvedRoot);
         this.projectCache.set(resolvedRoot, cg);
         if (projectPath !== resolvedRoot) {
             this.projectCache.set(projectPath, cg);
@@ -1463,10 +1498,30 @@ class ToolHandler {
             - (isLessCanonicalPath(b) ? LESS_CANONICAL_PENALTY : 0);
         const fromCands = fromMatches.nodes;
         const toCands = toMatches.nodes;
+        // Candidate relevance: an overloaded name (Alamofire has 44 `request`s, most
+        // of them EMPTY EventMonitor protocol-conformance stubs `func request(…){}`)
+        // floods the pool with no-op decls. Shared-dir-prefix alone then MISLEADS —
+        // two unrelated `Source/Features/` delegate stubs outscore the real
+        // `Source/Core/Session.request` × `Source/Core/…task` pair the agent meant,
+        // so trace resolves to stubs, finds no path, and the agent reads by line.
+        // Penalize empty stubs and test-file symbols so a substantive entry point
+        // wins; among real methods this is ~flat, so path-proximity still decides
+        // (cosmos EndBlocker disambiguation is unaffected — none of its candidates
+        // are stubs/tests).
+        const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
+        const nodeRelevance = (n) => {
+            const bodyLines = Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
+            let s = Math.min(bodyLines, 20); // a substantive body is more likely the meant symbol
+            if (bodyLines <= 1)
+                s -= 40; // empty/one-line stub (protocol no-op, decl-only) — almost never the trace endpoint
+            if (isTestPath(n.filePath))
+                s -= 150; // a Source/ symbol is meant over a Tests/ same-named one
+            return s;
+        };
         const pairs = [];
         for (const f of fromCands) {
             for (const t of toCands) {
-                pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) });
+                pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) + nodeRelevance(f) + nodeRelevance(t) });
             }
         }
         // Sort by shared prefix desc, then by FTS order (already encoded in the
@@ -1718,6 +1773,14 @@ class ToolHandler {
                 registeredAt,
             };
         }
+        if (m?.synthesizedBy === 'closure-collection') {
+            const field = m.field ? `\`${String(m.field)}\`` : 'a collection';
+            return {
+                label: `closure collection — runs handlers appended to ${field} (dynamic dispatch)`,
+                compact: `dynamic: runs ${field} handlers${at}`,
+                registeredAt,
+            };
+        }
         return null;
     }
     /**
@@ -1811,6 +1874,7 @@ class ToolHandler {
      * dropping unrelated `OmsOrderService::list`.
      */
     buildFlowFromNamedSymbols(cg, query) {
+        const EMPTY = { text: '', pathNodeIds: new Set(), namedNodeIds: new Set(), uniqueNamedNodeIds: new Set() };
         try {
             const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
             // Strip only a REAL file extension (Create.cs → Create); KEEP qualified
@@ -1822,7 +1886,7 @@ class ToolHandler {
                     .map((t) => t.replace(FILE_EXT, '').trim())
                     .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
             if (tokens.length < 2)
-                return '';
+                return EMPTY;
             // Pool of name SEGMENTS (Class + method from every token) used to
             // disambiguate an ambiguous SIMPLE name: keep a candidate only if its
             // CONTAINER class is itself named in the query.
@@ -1832,24 +1896,34 @@ class ToolHandler {
                     if (s)
                         segPool.add(s);
             const named = new Map();
+            // Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in
+            // the whole graph). These are safe to SPARE a file on: the agent named THIS
+            // method (`getResponseWithInterceptorChain`, 1 def). A hyper-polymorphic name
+            // (`as_sql`, 110 defs across every Expression/Compiler subclass) is NOT here,
+            // so naming it doesn't keep every backend variant full and flood the budget.
+            const uniqueNamedNodeIds = new Set();
             for (const t of tokens) {
                 const cands = this.findAllSymbols(cg, t).nodes.filter((n) => CALLABLE.has(n.kind));
                 // A qualified or otherwise-specific name (<=3 hits) keeps all; an
                 // ambiguous simple name keeps only candidates whose container is named.
-                const pick = cands.length <= 3
+                const specific = cands.length <= 3;
+                const pick = specific
                     ? cands
                     : cands.filter((n) => {
                         const segs = (n.qualifiedName || '').toLowerCase().split(/::|\./).filter(Boolean);
                         const container = segs.length >= 2 ? segs[segs.length - 2] : '';
                         return !!container && segPool.has(container);
                     });
-                for (const n of pick.slice(0, 6))
+                for (const n of pick.slice(0, 6)) {
                     named.set(n.id, n);
+                    if (specific)
+                        uniqueNamedNodeIds.add(n.id);
+                }
                 if (named.size > 40)
                     break;
             }
             if (named.size < 2)
-                return '';
+                return EMPTY;
             const MAX_HOPS = 7;
             let best = null;
             // BFS the full call graph (incl. synth edges) from each named seed, but
@@ -1895,22 +1969,66 @@ class ToolHandler {
                 if (!best || chain.length > best.length)
                     best = chain;
             }
-            if (!best || best.length < 3)
-                return '';
-            const out = ['## Flow (call path among the symbols you queried)', ''];
-            for (let i = 0; i < best.length; i++) {
-                const step = best[i];
-                if (step.edge) {
-                    const sy = this.synthEdgeNote(step.edge);
-                    out.push(`   ↓ ${sy ? sy.compact : step.edge.kind}`);
+            const hasMain = !!best && best.length >= 3;
+            const pathIds = new Set((best ?? []).map((s) => s.node.id));
+            // Supplementary: dynamic-dispatch (synthesized) edges incident to a NAMED
+            // symbol — the indirect hops an agent would otherwise grep/Read to
+            // reconstruct ("where do the appended `validators` actually run?"). The
+            // synth edge IS that answer, so surface it even when the OTHER end wasn't
+            // named (e.g. the agent names `validate` but not the `didCompleteTask`
+            // that drains the collection). On-topic by construction: only heuristic
+            // edges touching a symbol the agent named; skipped when the hop already
+            // shows in the main chain.
+            const synthLines = [];
+            const synthSeen = new Set();
+            for (const n of named.values()) {
+                if (synthLines.length >= 6)
+                    break;
+                for (const { node: other, edge } of [...cg.getCallers(n.id), ...cg.getCallees(n.id)]) {
+                    if (synthLines.length >= 6)
+                        break;
+                    if (edge.provenance !== 'heuristic' || other.id === n.id)
+                        continue;
+                    if (pathIds.has(edge.source) && pathIds.has(edge.target))
+                        continue; // already in the main chain
+                    const src = edge.source === n.id ? n : other;
+                    const tgt = edge.source === n.id ? other : n;
+                    const key = `${src.name}>${tgt.name}`;
+                    if (synthSeen.has(key))
+                        continue;
+                    synthSeen.add(key);
+                    const note = this.synthEdgeNote(edge);
+                    synthLines.push(`- ${src.name} → ${tgt.name}   [${note ? note.compact : edge.kind}]`);
+                }
+            }
+            if (!hasMain && synthLines.length === 0)
+                return EMPTY;
+            const out = [];
+            if (hasMain) {
+                out.push('## Flow (call path among the symbols you queried)', '');
+                for (let i = 0; i < best.length; i++) {
+                    const step = best[i];
+                    if (step.edge) {
+                        const sy = this.synthEdgeNote(step.edge);
+                        out.push(`   ↓ ${sy ? sy.compact : step.edge.kind}`);
+                    }
+                    out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
                 }
-                out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
+                out.push('');
             }
-            out.push('', '> Full source for these symbols is below; codegraph_trace(from,to) for the exact path between two endpoints.', '');
-            return out.join('\n');
+            if (synthLines.length) {
+                out.push('## Dynamic-dispatch links among your symbols', '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)', '', ...synthLines, '');
+            }
+            out.push('> Full source for these symbols is below; codegraph_trace(from,to) for the exact path between two endpoints.', '');
+            // namedNodeIds = every callable the agent explicitly named (a superset of
+            // the spine). A file holding one is something the agent asked to SEE, so it
+            // must keep full source even if it's an off-spine polymorphic sibling — the
+            // agent named `getResponseWithInterceptorChain` / `SQLCompiler.execute_sql`
+            // as the mechanism, not as an interchangeable leaf. See the skeleton gate.
+            return { text: out.join('\n'), pathNodeIds: pathIds, namedNodeIds: new Set(named.keys()), uniqueNamedNodeIds };
         }
         catch {
-            return '';
+            return EMPTY;
         }
     }
     /**
@@ -1991,9 +2109,42 @@ class ToolHandler {
                 glueNodeIds.add(nb.id);
             }
         }
+        // Named-symbol seeding: findRelevantContext is an FTS/text rank, so a query
+        // that's a BAG of symbol names skewed toward one phase (Alamofire: 5 build
+        // terms, each a high-frequency name, vs 3 validate terms) lets the
+        // lower-frequency names fall below the search cut — their definitions, and
+        // whole files (Validation.swift), never get gathered, so they can never
+        // render and the agent Reads them. Resolve EACH named token to its
+        // substantive definition (skip empty stubs + test files, same relevance the
+        // trace endpoint picker uses) and inject it as an entry, so every symbol the
+        // agent explicitly named is in the subgraph and its file is scored.
+        const namedSeedIds = new Set();
+        {
+            const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
+            const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
+            const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
+            const bodyLines = (n) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
+            const tokens = [...new Set(query.split(/[\s,()[\]]+/)
+                    .map((t) => t.replace(FILE_EXT, '').trim())
+                    .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
+            for (const t of tokens) {
+                const cands = this.findAllSymbols(cg, t).nodes
+                    .filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath))
+                    .sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a));
+                // A specific name (<=3 defs) injects all its defs; an overloaded name
+                // (`request` = 44, mostly stubs) injects only the single most substantive
+                // one, so the build-overload flood doesn't crowd the subgraph.
+                for (const n of cands.slice(0, cands.length <= 3 ? cands.length : 1)) {
+                    if (!subgraph.nodes.has(n.id)) {
+                        subgraph.nodes.set(n.id, n);
+                        namedSeedIds.add(n.id);
+                    }
+                }
+            }
+        }
         // Step 2: Group nodes by file, score by relevance
         const fileGroups = new Map();
-        const entryNodeIds = new Set(subgraph.roots);
+        const entryNodeIds = new Set([...subgraph.roots, ...namedSeedIds]);
         // Build a set of nodes directly connected to entry points (depth 1)
         const connectedToEntry = new Set();
         for (const edge of subgraph.edges) {
@@ -2008,8 +2159,16 @@ class ToolHandler {
                 continue;
             const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
             group.nodes.push(node);
-            // Score: entry point nodes worth 10, directly connected worth 3, others worth 1
-            if (entryNodeIds.has(node.id)) {
+            // Score: a NAMED-SEED node (a symbol the agent named that FTS missed, now
+            // injected) is worth far more than a mere reference — its file is where the
+            // answer lives. Without this, an incidental file that name-drops the flow
+            // (Combine.swift references request/task → score 23 from connected nodes)
+            // outranks the file that DEFINES a named symbol (Validation.swift's
+            // `validate` → 10) and steals its render slot. Definition ≫ reference.
+            if (namedSeedIds.has(node.id)) {
+                group.score += 50;
+            }
+            else if (entryNodeIds.has(node.id)) {
                 group.score += 10;
             }
             else if (connectedToEntry.has(node.id)) {
@@ -2042,20 +2201,18 @@ class ToolHandler {
                 /\bicons?\b/.test(lp) ||
                 /\bi18n\b/.test(lp));
         };
-        // Tiny-tier hard-exclude: on small projects (`excludeLowValueFiles`
-        // budget flag), one slipped test/spec file dominates the per-file budget
-        // (cobra's `command_test.go` displaced `args.go` and contributed ~10KB of
-        // pure noise to "How does cobra parse commands?"). The sort-step
-        // deprioritization isn't enough at small N. Skip the hard-exclude when
-        // the query itself is about tests — that's the legitimate "explore the
-        // tests" case where the agent does want them.
-        if (budget.excludeLowValueFiles) {
+        // Hard-exclude test/spec files (ALL tiers, not just tiny). One slipped test
+        // file dominates the per-file budget on small repos (cobra's `command_test.go`
+        // displaced `args.go`) AND wastes budget on large ones (Django's
+        // `custom_lookups/tests.py` ate ~2.3 KB of the 28 KB cap, crowding out the
+        // SQLCompiler mechanism the agent then Read). A test file almost never answers
+        // an architecture question. Skip when the query itself is about tests — the
+        // legitimate "explore the tests" case — and only cut if ≥2 non-test candidates
+        // remain (else tests are the only signal for this area).
+        {
             const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query);
             if (!queryMentionsTests) {
                 const nonLow = relevantFiles.filter(([p]) => !isLowValue(p));
-                // Only apply the hard-filter if we still have at least 2 non-test
-                // candidates after the cut — otherwise the agent is asking about an
-                // area where tests are the only signal, and we should not strip them.
                 if (nonLow.length >= 2) {
                     relevantFiles = nonLow;
                 }
@@ -2132,6 +2289,64 @@ class ToolHandler {
             }
         }
         // Step 4: Read contiguous file sections
+        // Compute the flow spine once — used both to prepend the Flow section (below)
+        // and to gate adaptive source sizing: files on the spine get full source,
+        // off-spine peers skeletonize.
+        const flow = this.buildFlowFromNamedSymbols(cg, query);
+        // Polymorphic-sibling detector for adaptive sizing. A class that implements/
+        // extends a supertype shared by >= MIN_SIBLINGS classes is one of many
+        // INTERCHANGEABLE implementations (OkHttp's 14 `: Interceptor` classes —
+        // showing one + the rest as signatures is enough), as opposed to a DISTINCT
+        // pipeline step (Excalidraw's `renderStaticScene`, which shares no supertype and
+        // must stay full or the agent loses real content). Only off-spine sibling files
+        // skeletonize; distinct steps and on-spine files keep full source. Cache
+        // supertype→(has ≥N implementers) so this stays a handful of edge queries.
+        const MIN_SIBLINGS = 3;
+        const siblingSuper = new Map();
+        const isPolymorphicSibling = (nodes) => {
+            for (const n of nodes) {
+                for (const e of cg.getOutgoingEdges(n.id)) {
+                    if (e.kind !== 'implements' && e.kind !== 'extends')
+                        continue;
+                    let many = siblingSuper.get(e.target);
+                    if (many === undefined) {
+                        many = cg.getIncomingEdges(e.target)
+                            .filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
+                        siblingSuper.set(e.target, many);
+                    }
+                    if (many)
+                        return true;
+                }
+            }
+            return false;
+        };
+        // A file that DEFINES a polymorphic supertype (a class/interface with ≥
+        // MIN_SIBLINGS implementers) AND co-locates its subclasses is a redundant
+        // "family" file — Django's compiler.py holds `SQLCompiler` + its 4 subclasses
+        // (SQLInsert/Update/Delete/AggregateCompiler) in 2,266 lines. Such files are
+        // huge and read-anyway, so they should STILL skeletonize even when the agent
+        // named a method in them: a full one eats ~6.5K of the explore budget (Django
+        // is pinned at the 28K cap, truncating), starving the sibling files the agent
+        // then Reads. This flag OVERRIDES the named-callable spare below — it does NOT
+        // by itself spare a file. (OkHttp's RealCall implements the `Lockable` mixin
+        // but defines no ≥3-impl supertype, so the named spare keeps it full.)
+        const superMany = new Map();
+        const definesPolymorphicSupertype = (nodes) => {
+            for (const n of nodes) {
+                if (n.kind !== 'class' && n.kind !== 'interface' && n.kind !== 'struct'
+                    && n.kind !== 'trait' && n.kind !== 'protocol' && n.kind !== 'type_alias')
+                    continue;
+                let many = superMany.get(n.id);
+                if (many === undefined) {
+                    many = cg.getIncomingEdges(n.id)
+                        .filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
+                    superMany.set(n.id, many);
+                }
+                if (many)
+                    return true;
+            }
+            return false;
+        };
         lines.push('### Source Code');
         lines.push('');
         lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
@@ -2142,8 +2357,15 @@ class ToolHandler {
         for (const [filePath, group] of sortedFiles) {
             if (filesIncluded >= maxFiles)
                 break;
-            if (totalChars > budget.maxOutputChars * 0.9)
-                break;
+            // A file DEFINES a named/spine symbol (the answer) vs merely references the
+            // flow. Past 90% budget, stop pulling INCIDENTAL files — but keep scanning
+            // for necessary ones, which render even past the cap (bounded by maxFiles).
+            // Without this `continue` (was an unconditional `break`), the loop stopped
+            // after the build + validators-exec files and never reached the ranked-in
+            // validate-logic file (Alamofire's Validation.swift).
+            const fileNecessary = group.nodes.some(n => entryNodeIds.has(n.id) || flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id));
+            if (!fileNecessary && totalChars > budget.maxOutputChars * 0.9)
+                continue;
             const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
             if (!absPath || !(0, fs_1.existsSync)(absPath))
                 continue;
@@ -2156,6 +2378,131 @@ class ToolHandler {
             }
             const fileLines = fileContent.split('\n');
             const lang = group.nodes[0]?.language || '';
+            // Adaptive sizing (CODEGRAPH_ADAPTIVE_EXPLORE, default on): collapse a file
+            // to a per-symbol view when it's a redundant member of a polymorphic family.
+            // Engages iff ALL hold:
+            //   1. a flow spine exists,
+            //   2. no symbol in the file is on that spine (it's not the mechanism path),
+            //   3. it IS a polymorphic sibling (≥ MIN_SIBLINGS impls of a shared supertype),
+            //   4. it is NOT SPARED, where a file is spared iff the agent named a
+            //      (near-)UNIQUE callable in it (`getResponseWithInterceptorChain`, 1 def →
+            //      keep RealCall.kt full) UNLESS the file DEFINES the family supertype (a
+            //      base+subclasses "family" file like Django's compiler.py — collapse it).
+            //      Uniqueness matters: `as_sql` has 110 defs across every Compiler/Expression
+            //      subclass; naming it must NOT keep every backend variant + test file full
+            //      and flood the budget. That's why the spare reads uniqueNamedNodeIds.
+            // Within a collapsed file the render is PER-SYMBOL (condition B): a method the
+            // agent NAMED or that's on the spine is shown with its FULL body (so the agent
+            // doesn't Read the file back for it — Django's SQLCompiler.execute_sql/as_sql);
+            // every other symbol is just its signature. So the base mechanism survives while
+            // the file's other ~80 symbols + the redundant subclasses collapse to one line each.
+            const spareNamed = group.nodes.some(n => flow.uniqueNamedNodeIds.has(n.id));
+            const fileDefinesSuper = definesPolymorphicSupertype(group.nodes);
+            const spared = spareNamed && !fileDefinesSuper;
+            const CALLABLE_BODY = new Set(['method', 'function', 'constructor', 'component']);
+            const hasSpineNode = group.nodes.some(n => flow.pathNodeIds.has(n.id));
+            // On-spine god-file: the flow path runs THROUGH this file, but it also holds
+            // many OTHER named methods, and rendering all of them in full blows the
+            // per-file budget and starves the other flow files (Alamofire: the agent
+            // names ~7 Session.swift methods — the build spine PLUS off-path
+            // task/didCompleteTask — far past the whole response budget). Engage the
+            // per-symbol view to keep the SPINE full and collapse the off-path named
+            // methods to signatures. Only when there IS off-path content to shed —
+            // otherwise the spine is irreducible (a sequential flow has no redundancy),
+            // so leave it to the normal full render.
+            const namedBodyChars = group.nodes
+                .filter(n => CALLABLE_BODY.has(n.kind) && (flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id)))
+                .reduce((s, n) => s + fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length, 0);
+            const onSpineGodFile = hasSpineNode
+                && namedBodyChars > budget.maxCharsPerFile
+                && group.nodes.some(n => CALLABLE_BODY.has(n.kind) && flow.uniqueNamedNodeIds.has(n.id) && !flow.pathNodeIds.has(n.id));
+            if (adaptiveExploreEnabled() && flow.pathNodeIds.size > 0
+                && (onSpineGodFile || (!hasSpineNode && isPolymorphicSibling(group.nodes) && !spared))) {
+                const syms = group.nodes
+                    .filter(n => n.kind !== 'import' && n.kind !== 'export' && n.startLine > 0)
+                    .sort((a, b) => a.startLine - b.startLine);
+                // Pass 1: choose which symbols get a FULL body, by priority, greedily within
+                // a per-file body cap — so one huge family file can't body every named method
+                // and crowd out the other flow files (Django's query.py). A symbol earns a
+                // body if it's on-spine, or UNIQUELY named (`SQLCompiler.execute_sql`), or a
+                // co-named method WHEN this file DEFINES the family supertype (so the base
+                // `SQLCompiler.as_sql` body shows, but the 110 leaf `as_sql` overrides — and
+                // OkHttp's 5 `intercept`s if the agent names `intercept` — stay signatures).
+                const prio = (n) => !CALLABLE_BODY.has(n.kind) ? 99
+                    : flow.pathNodeIds.has(n.id) ? 0
+                        : flow.uniqueNamedNodeIds.has(n.id) ? 1
+                            : (fileDefinesSuper && flow.namedNodeIds.has(n.id)) ? 2 : 99;
+                const bodyCap = budget.maxCharsPerFile * 2;
+                const bodyIds = new Set();
+                let bodyChars = 0;
+                for (const n of syms.filter(n => prio(n) < 99 && n.endLine >= n.startLine).sort((a, b) => prio(a) - prio(b))) {
+                    const sz = fileLines.slice(n.startLine - 1, Math.min(n.endLine, n.startLine + 220)).join('\n').length;
+                    // Spine methods (prio 0) ALWAYS get a full body — the cap governs the
+                    // off-path extras (unique-named, family base), never the flow path itself.
+                    if (prio(n) > 0 && bodyChars + sz > bodyCap && bodyIds.size > 0)
+                        continue;
+                    bodyIds.add(n.id);
+                    bodyChars += sz;
+                }
+                // Pass 2: render in line order — full body for chosen symbols, else the
+                // signature line (capped, with a "+N more" tail so the structure map of a
+                // god-file doesn't itself bloat the budget).
+                const skel = [];
+                let coveredUntil = 0; // skip symbols already inside an emitted body
+                let sigCount = 0, sigDropped = 0;
+                const SIG_MAX = Math.max(12, budget.maxSymbolsInFileHeader * 2);
+                for (const n of syms) {
+                    if (n.startLine <= coveredUntil)
+                        continue;
+                    if (bodyIds.has(n.id)) {
+                        const end = Math.min(n.endLine, n.startLine + 220);
+                        const body = fileLines.slice(n.startLine - 1, end).join('\n');
+                        skel.push(exploreLineNumbersEnabled() ? numberSourceLines(body, n.startLine) : body);
+                        coveredUntil = end;
+                    }
+                    else {
+                        // Elide the body, emit the signature. node.startLine can point at a
+                        // decorator/annotation, so scan forward for the line that names the symbol.
+                        let lineNo = n.startLine;
+                        for (let k = 0; k < 4; k++) {
+                            if ((fileLines[n.startLine - 1 + k] || '').includes(n.name)) {
+                                lineNo = n.startLine + k;
+                                break;
+                            }
+                        }
+                        if (lineNo <= coveredUntil)
+                            continue;
+                        if (sigCount >= SIG_MAX) {
+                            sigDropped++;
+                            continue;
+                        }
+                        const sig = (fileLines[lineNo - 1] || '').trim();
+                        if (sig) {
+                            skel.push(exploreLineNumbersEnabled() ? `${lineNo}\t${sig}` : sig);
+                            sigCount++;
+                        }
+                    }
+                }
+                if (sigDropped > 0)
+                    skel.push(`… +${sigDropped} more (signatures elided)`);
+                if (skel.length > 0) {
+                    const names = [...new Set(group.nodes.filter(n => n.kind !== 'import' && n.kind !== 'export').map(n => n.name))]
+                        .slice(0, budget.maxSymbolsInFileHeader).join(', ');
+                    // Steer the agent to codegraph_explore for an elided body — NEVER to
+                    // Read. The old "Read for more" / "Read for a full body" tags invited
+                    // a Read of the very file just skeletonized; on a central, wanted file
+                    // (Session.swift, DataRequest.swift) that fired an over-investigation
+                    // spiral (the agent Read the skeletonized file, then kept digging).
+                    // CLAUDE.md: explore output must never tell the agent to Read.
+                    const tag = bodyIds.size > 0
+                        ? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
+                        : 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
+                    lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
+                    totalChars += skel.join('\n').length + 120;
+                    filesIncluded++;
+                    continue;
+                }
+            }
             // Whole-small-file rule: if a relevant file is small enough to afford,
             // return it ENTIRELY instead of clustering. Clustering exists to tame
             // god-files (App.tsx ~13k lines); on a ~134-line component a cluster is a
@@ -2205,14 +2552,33 @@ class ToolHandler {
             // Alamofire is the canonical case: the `Session` class spans ~1,400
             // lines). We want the granular symbols inside, not the envelope.
             const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
-            const ranges = group.nodes
-                .filter(n => n.startLine > 0 && n.endLine > 0)
+            // Cluster from this file's gathered nodes PLUS any callable the agent NAMED that
+            // lives here. Explore's relevance gather can miss a named method def in a huge
+            // non-sibling file — Django's query.py is 3,040 lines and `_fetch_all` (L2237)
+            // was gathered only as call-reference edges, never as a def, so it formed no
+            // cluster and the agent Read it back. Inject named defs directly and rank them
+            // ABOVE connected/glue nodes (importance 9) so their cluster wins the per-file
+            // budget — the agent explicitly asked for these symbols.
+            const rangeNodes = new Map();
+            for (const n of group.nodes)
+                if (n.startLine > 0 && n.endLine > 0)
+                    rangeNodes.set(n.id, n);
+            for (const id of flow.namedNodeIds) {
+                if (rangeNodes.has(id))
+                    continue;
+                const n = cg.getNode(id);
+                if (n && n.filePath === filePath && n.startLine > 0 && n.endLine > 0)
+                    rangeNodes.set(id, n);
+            }
+            const ranges = [...rangeNodes.values()]
                 // Drop whole-file envelope nodes (containers covering >50% of the file).
                 .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
                 .map(n => {
                 let importance = 1;
                 if (entryNodeIds.has(n.id))
                     importance = 10;
+                else if (flow.namedNodeIds.has(n.id))
+                    importance = 9; // agent named it → keep its cluster
                 else if (glueNodeIds.has(n.id))
                     importance = 6; // bridging caller/callee of an entry
                 else if (connectedToEntry.has(n.id))
@@ -2313,6 +2679,13 @@ class ToolHandler {
                     return b.c.score - a.c.score;
                 return a.span - b.span;
             });
+            // Per-file budget is the SMALLER of the per-file cap and what's left of the
+            // total output cap — so selection (which ranks by importance) keeps the
+            // high-importance clusters and drops peripheral ones, instead of the
+            // downstream source-order trim slicing off whatever comes last in the file.
+            // That source-order slice is what cut Django's `_fetch_all` (L2237, importance
+            // 9 — agent-named) when query.py was the last of four big files to be emitted.
+            const fileBudget = Math.min(budget.maxCharsPerFile, Math.max(0, budget.maxOutputChars - totalChars - 200));
             const chosenIndices = new Set();
             let projectedChars = 0;
             for (const rc of rankedClusters) {
@@ -2325,7 +2698,7 @@ class ToolHandler {
                     projectedChars += sectionLen;
                     continue;
                 }
-                if (projectedChars + sectionLen > budget.maxCharsPerFile)
+                if (projectedChars + sectionLen > fileBudget)
                     continue;
                 chosenIndices.add(rc.idx);
                 projectedChars += sectionLen;
@@ -2371,22 +2744,22 @@ class ToolHandler {
                 ? `${headerSymbols.join(', ')}, +${omittedCount} more`
                 : headerSymbols.join(', ');
             const fileHeader = `#### ${filePath} — ${headerSuffix}`;
-            // Respect the total output cap on a file-by-file basis.
-            if (totalChars + fileSection.length + 200 > budget.maxOutputChars) {
+            // The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
+            // the agent named (or that's on the flow spine) renders even when the
+            // nominal total is used up — it's the answer, and the set is bounded by
+            // maxFiles AND by true-spine/named-seeding having already trimmed each file
+            // to its necessary content. A file that merely REFERENCES the flow
+            // (Combine.swift name-drops request/task) is incidental → still capped, so
+            // freed budget never leaks into noise. This is the last god-file layer:
+            // build (Session, true-spined) + validators-exec (Request) + validate
+            // (DataRequest/Validation) all render, instead of the cap dropping whichever
+            // phase the file order happened to put last.
+            if (!fileNecessary && totalChars + fileSection.length + 200 > budget.maxOutputChars) {
                 const remaining = budget.maxOutputChars - totalChars - 200;
                 if (remaining < 500)
-                    break;
-                const trimmed = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
-                lines.push(fileHeader);
-                lines.push('');
-                lines.push('```' + lang);
-                lines.push(trimmed);
-                lines.push('```');
-                lines.push('');
-                totalChars += trimmed.length + 200;
-                filesIncluded++;
+                    continue; // incidental file, no room — skip it, keep scanning for necessary ones
+                fileSection = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
                 anyFileTrimmed = true;
-                break;
             }
             lines.push(fileHeader);
             lines.push('');
@@ -2449,11 +2822,20 @@ class ToolHandler {
         // maxOutputChars (observed 30k against a 28k tier cap). A fat explore
         // payload persists in the agent's context and is re-read as cache-input
         // on every subsequent turn, so the overrun is paid many times over.
-        const output = this.buildFlowFromNamedSymbols(cg, query) + lines.join('\n');
-        if (output.length > budget.maxOutputChars) {
-            const cut = output.slice(0, budget.maxOutputChars);
+        // Final ceiling. The render loop is now the authority on WHAT to emit — it
+        // renders necessary files (named/spine) even past maxOutputChars and caps
+        // only incidental ones, all bounded by maxFiles + per-file true-spine — so
+        // this is a SAFETY ceiling above that necessary content, not a hard cut
+        // through it. Cutting at a flat maxOutputChars here undid the whole point:
+        // Alamofire's loop assembles build+validators-exec+validate (~15K) and a 13K
+        // slice dropped the validate phase the agent then Read. Allow necessary
+        // overflow up to 1.5× (still bounds a pathological monolith).
+        const output = flow.text + lines.join('\n');
+        const hardCeiling = Math.round(budget.maxOutputChars * 1.5);
+        if (output.length > hardCeiling) {
+            const cut = output.slice(0, hardCeiling);
             const lastNewline = cut.lastIndexOf('\n');
-            const safe = lastNewline > budget.maxOutputChars * 0.8 ? cut.slice(0, lastNewline) : cut;
+            const safe = lastNewline > hardCeiling * 0.8 ? cut.slice(0, lastNewline) : cut;
             return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
         }
         return this.textResult(output);