@stupidloud/codegraph 0.7.14 → 0.7.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +44 -10
  2. package/README.zh-CN.md +3 -8
  3. package/dist/bin/codegraph.js +102 -24
  4. package/dist/bin/codegraph.js.map +1 -1
  5. package/dist/bin/node-version-check.d.ts +3 -0
  6. package/dist/bin/node-version-check.d.ts.map +1 -1
  7. package/dist/bin/node-version-check.js +5 -2
  8. package/dist/bin/node-version-check.js.map +1 -1
  9. package/dist/bin/uninstall.d.ts +7 -7
  10. package/dist/bin/uninstall.d.ts.map +1 -1
  11. package/dist/bin/uninstall.js +23 -135
  12. package/dist/bin/uninstall.js.map +1 -1
  13. package/dist/config.d.ts.map +1 -1
  14. package/dist/config.js +0 -2
  15. package/dist/config.js.map +1 -1
  16. package/dist/context/index.d.ts.map +1 -1
  17. package/dist/context/index.js +4 -2
  18. package/dist/context/index.js.map +1 -1
  19. package/dist/db/migrations.d.ts +1 -1
  20. package/dist/db/migrations.d.ts.map +1 -1
  21. package/dist/db/migrations.js +22 -8
  22. package/dist/db/migrations.js.map +1 -1
  23. package/dist/db/queries.d.ts.map +1 -1
  24. package/dist/db/queries.js +7 -1
  25. package/dist/db/queries.js.map +1 -1
  26. package/dist/db/schema.sql +0 -1
  27. package/dist/extraction/index.d.ts +1 -1
  28. package/dist/extraction/index.d.ts.map +1 -1
  29. package/dist/index.d.ts +0 -1
  30. package/dist/index.d.ts.map +1 -1
  31. package/dist/index.js +3 -3
  32. package/dist/index.js.map +1 -1
  33. package/dist/installer/claude-md-template.d.ts +10 -6
  34. package/dist/installer/claude-md-template.d.ts.map +1 -1
  35. package/dist/installer/claude-md-template.js +15 -40
  36. package/dist/installer/claude-md-template.js.map +1 -1
  37. package/dist/installer/config-writer.d.ts +17 -24
  38. package/dist/installer/config-writer.d.ts.map +1 -1
  39. package/dist/installer/config-writer.js +44 -239
  40. package/dist/installer/config-writer.js.map +1 -1
  41. package/dist/installer/index.d.ts +45 -4
  42. package/dist/installer/index.d.ts.map +1 -1
  43. package/dist/installer/index.js +216 -79
  44. package/dist/installer/index.js.map +1 -1
  45. package/dist/installer/instructions-template.d.ts +28 -0
  46. package/dist/installer/instructions-template.d.ts.map +1 -0
  47. package/dist/installer/instructions-template.js +63 -0
  48. package/dist/installer/instructions-template.js.map +1 -0
  49. package/dist/installer/targets/claude.d.ts +27 -0
  50. package/dist/installer/targets/claude.d.ts.map +1 -0
  51. package/dist/installer/targets/claude.js +246 -0
  52. package/dist/installer/targets/claude.js.map +1 -0
  53. package/dist/installer/targets/codex.d.ts +18 -0
  54. package/dist/installer/targets/codex.d.ts.map +1 -0
  55. package/dist/installer/targets/codex.js +185 -0
  56. package/dist/installer/targets/codex.js.map +1 -0
  57. package/dist/installer/targets/cursor.d.ts +35 -0
  58. package/dist/installer/targets/cursor.d.ts.map +1 -0
  59. package/dist/installer/targets/cursor.js +229 -0
  60. package/dist/installer/targets/cursor.js.map +1 -0
  61. package/dist/installer/targets/opencode.d.ts +30 -0
  62. package/dist/installer/targets/opencode.d.ts.map +1 -0
  63. package/dist/installer/targets/opencode.js +235 -0
  64. package/dist/installer/targets/opencode.js.map +1 -0
  65. package/dist/installer/targets/registry.d.ts +35 -0
  66. package/dist/installer/targets/registry.d.ts.map +1 -0
  67. package/dist/installer/targets/registry.js +83 -0
  68. package/dist/installer/targets/registry.js.map +1 -0
  69. package/dist/installer/targets/shared.d.ts +77 -0
  70. package/dist/installer/targets/shared.d.ts.map +1 -0
  71. package/dist/installer/targets/shared.js +246 -0
  72. package/dist/installer/targets/shared.js.map +1 -0
  73. package/dist/installer/targets/toml.d.ts +52 -0
  74. package/dist/installer/targets/toml.d.ts.map +1 -0
  75. package/dist/installer/targets/toml.js +147 -0
  76. package/dist/installer/targets/toml.js.map +1 -0
  77. package/dist/installer/targets/types.d.ts +116 -0
  78. package/dist/installer/targets/types.d.ts.map +1 -0
  79. package/dist/installer/targets/types.js +16 -0
  80. package/dist/installer/targets/types.js.map +1 -0
  81. package/dist/mcp/index.d.ts +4 -0
  82. package/dist/mcp/index.d.ts.map +1 -1
  83. package/dist/mcp/index.js +34 -9
  84. package/dist/mcp/index.js.map +1 -1
  85. package/dist/mcp/server-instructions.d.ts +1 -1
  86. package/dist/mcp/server-instructions.d.ts.map +1 -1
  87. package/dist/mcp/server-instructions.js +6 -6
  88. package/dist/mcp/tools.d.ts +61 -5
  89. package/dist/mcp/tools.d.ts.map +1 -1
  90. package/dist/mcp/tools.js +389 -81
  91. package/dist/mcp/tools.js.map +1 -1
  92. package/dist/search/query-utils.d.ts.map +1 -1
  93. package/dist/search/query-utils.js +29 -26
  94. package/dist/search/query-utils.js.map +1 -1
  95. package/dist/semantic-config-prompt.d.ts.map +1 -1
  96. package/dist/semantic-config-prompt.js +2 -3
  97. package/dist/semantic-config-prompt.js.map +1 -1
  98. package/dist/types.d.ts +0 -2
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js +1 -2
  101. package/dist/types.js.map +1 -1
  102. package/dist/ui/glyphs.d.ts +42 -0
  103. package/dist/ui/glyphs.d.ts.map +1 -0
  104. package/dist/ui/glyphs.js +78 -0
  105. package/dist/ui/glyphs.js.map +1 -0
  106. package/dist/ui/shimmer-progress.d.ts +1 -0
  107. package/dist/ui/shimmer-progress.d.ts.map +1 -1
  108. package/dist/ui/shimmer-progress.js +7 -0
  109. package/dist/ui/shimmer-progress.js.map +1 -1
  110. package/dist/ui/shimmer-worker.js +20 -11
  111. package/dist/ui/shimmer-worker.js.map +1 -1
  112. package/dist/ui/types.d.ts +1 -0
  113. package/dist/ui/types.d.ts.map +1 -1
  114. package/dist/vectors/embedder.d.ts +15 -8
  115. package/dist/vectors/embedder.d.ts.map +1 -1
  116. package/dist/vectors/embedder.js +81 -53
  117. package/dist/vectors/embedder.js.map +1 -1
  118. package/dist/vectors/index.d.ts +1 -1
  119. package/dist/vectors/index.d.ts.map +1 -1
  120. package/dist/vectors/index.js.map +1 -1
  121. package/dist/vectors/manager.d.ts +5 -1
  122. package/dist/vectors/manager.d.ts.map +1 -1
  123. package/dist/vectors/manager.js +47 -28
  124. package/dist/vectors/manager.js.map +1 -1
  125. package/dist/vectors/search.d.ts +1 -1
  126. package/dist/vectors/search.d.ts.map +1 -1
  127. package/dist/vectors/search.js +9 -16
  128. package/dist/vectors/search.js.map +1 -1
  129. package/package.json +3 -2
  130. package/scripts/agent-eval/itrun.sh +107 -0
  131. package/scripts/agent-eval/parse-run.mjs +45 -0
  132. package/scripts/agent-eval/parse-session.mjs +93 -0
  133. package/scripts/agent-eval/run-agent.sh +34 -0
  134. package/scripts/extract-release-notes.mjs +130 -0
  135. package/scripts/local-install.sh +41 -0
  136. package/scripts/release.sh +68 -0
package/dist/mcp/tools.js CHANGED
@@ -40,6 +40,7 @@ var __importStar = (this && this.__importStar) || (function () {
40
40
  Object.defineProperty(exports, "__esModule", { value: true });
41
41
  exports.ToolHandler = exports.tools = void 0;
42
42
  exports.getExploreBudget = getExploreBudget;
43
+ exports.getExploreOutputBudget = getExploreOutputBudget;
43
44
  const index_1 = __importStar(require("../index"));
44
45
  const crypto_1 = require("crypto");
45
46
  const fs_1 = require("fs");
@@ -49,6 +50,19 @@ const path_1 = require("path");
49
50
  const db_1 = require("../db");
50
51
  /** Maximum output length to prevent context bloat (characters) */
51
52
  const MAX_OUTPUT_LENGTH = 15000;
53
+ /**
54
+ * Rust path roots that have no file-system equivalent — `crate` is the
55
+ * current crate, `super` is the parent module, `self` is the current
56
+ * module. Used by `matchesSymbol` to strip these before file-path
57
+ * matching so `crate::configurator::stage_apply::run` resolves the
58
+ * same as `configurator::stage_apply::run`.
59
+ */
60
+ const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']);
61
+ /** Last `::` / `.` / `/`-separated segment of a qualified symbol. */
62
+ function lastQualifierPart(symbol) {
63
+ const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
64
+ return parts[parts.length - 1] ?? symbol;
65
+ }
52
66
  /**
53
67
  * Calculate the recommended number of codegraph_explore calls based on project size.
54
68
  * Larger codebases need more exploration calls to cover their surface area,
@@ -65,6 +79,92 @@ function getExploreBudget(fileCount) {
65
79
  return 4;
66
80
  return 5;
67
81
  }
82
+ function getExploreOutputBudget(fileCount) {
83
+ if (fileCount < 500) {
84
+ return {
85
+ maxOutputChars: 18000,
86
+ defaultMaxFiles: 5,
87
+ maxCharsPerFile: 3800,
88
+ gapThreshold: 8,
89
+ maxSymbolsInFileHeader: 6,
90
+ maxEdgesPerRelationshipKind: 6,
91
+ includeRelationships: true,
92
+ includeAdditionalFiles: false,
93
+ includeCompletenessSignal: false,
94
+ includeBudgetNote: false,
95
+ };
96
+ }
97
+ if (fileCount < 5000) {
98
+ return {
99
+ maxOutputChars: 28000,
100
+ defaultMaxFiles: 9,
101
+ maxCharsPerFile: 5000,
102
+ gapThreshold: 12,
103
+ maxSymbolsInFileHeader: 10,
104
+ maxEdgesPerRelationshipKind: 10,
105
+ includeRelationships: true,
106
+ includeAdditionalFiles: true,
107
+ includeCompletenessSignal: true,
108
+ includeBudgetNote: true,
109
+ };
110
+ }
111
+ if (fileCount < 15000) {
112
+ return {
113
+ maxOutputChars: 35000,
114
+ defaultMaxFiles: 12,
115
+ maxCharsPerFile: 7000,
116
+ gapThreshold: 15,
117
+ maxSymbolsInFileHeader: 15,
118
+ maxEdgesPerRelationshipKind: 15,
119
+ includeRelationships: true,
120
+ includeAdditionalFiles: true,
121
+ includeCompletenessSignal: true,
122
+ includeBudgetNote: true,
123
+ };
124
+ }
125
+ return {
126
+ maxOutputChars: 38000,
127
+ defaultMaxFiles: 14,
128
+ maxCharsPerFile: 7000,
129
+ gapThreshold: 15,
130
+ maxSymbolsInFileHeader: 15,
131
+ maxEdgesPerRelationshipKind: 15,
132
+ includeRelationships: true,
133
+ includeAdditionalFiles: true,
134
+ includeCompletenessSignal: true,
135
+ includeBudgetNote: true,
136
+ };
137
+ }
138
+ /**
139
+ * Whether `codegraph_explore` should prefix source lines with their line
140
+ * numbers (cat -n style: `<num>\t<code>`).
141
+ *
142
+ * Line numbers let the agent cite `file:line` straight from the explore
143
+ * payload instead of re-Reading the file just to find a line number — the
144
+ * dominant residual cost on precise-tracing questions (#185 follow-up).
145
+ *
146
+ * Defaults ON. Set `CODEGRAPH_EXPLORE_LINENUMS=0` to disable (used by the
147
+ * A/B harness to measure the payload-cost vs. read-savings tradeoff).
148
+ */
149
+ function exploreLineNumbersEnabled() {
150
+ return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
151
+ }
152
+ /**
153
+ * Prefix each line of a source slice with its 1-based line number, matching
154
+ * the Read tool's `cat -n` convention (number + tab) so the agent treats it
155
+ * the same way it treats Read output.
156
+ *
157
+ * @param slice contiguous source text (already extracted from the file)
158
+ * @param firstLineNumber the 1-based line number of the slice's first line
159
+ */
160
+ function numberSourceLines(slice, firstLineNumber) {
161
+ const out = [];
162
+ const split = slice.split('\n');
163
+ for (let i = 0; i < split.length; i++) {
164
+ out.push(`${firstLineNumber + i}\t${split[i]}`);
165
+ }
166
+ return out.join('\n');
167
+ }
68
168
  /**
69
169
  * Mark a Claude session as having consulted MCP tools.
70
170
  * This enables Grep/Glob/Bash commands that would otherwise be blocked.
@@ -97,7 +197,7 @@ const projectPathProperty = {
97
197
  exports.tools = [
98
198
  {
99
199
  name: 'codegraph_search',
100
- description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_context instead for comprehensive task context.',
200
+ description: 'Quick symbol search by name. Returns locations only (no code) — best for pinpoint "where is X defined / find the symbol named X" lookups. For understanding how something works or tracing a flow, lead with codegraph_explore instead of searching then reading.',
101
201
  inputSchema: {
102
202
  type: 'object',
103
203
  properties: {
@@ -227,13 +327,13 @@ exports.tools = [
227
327
  },
228
328
  {
229
329
  name: 'codegraph_explore',
230
- description: 'Deep exploration toolreturns comprehensive context for a topic in a SINGLE call. Groups all relevant source code by file (contiguous sections, not snippets), includes a relationship map, and uses deeper graph traversal. Designed to replace multiple codegraph_node + file Read calls. Use this instead of codegraph_context when you need thorough understanding. IMPORTANT: Use specific symbol names, file names, or short code terms in your query NOT natural language sentences. Before calling this, use codegraph_search to discover relevant symbol names, then include those names in your query. Bad: "how are agent prompts loaded and passed to the CLI". Good: "readAgentsFromDirectory createClaudeSession chat-manager agents.ts".',
330
+ description: 'PRIMARY TOOL for understanding questions "how does X work", "trace X end to end", "explain the Y system", architecture/onboarding. Returns comprehensive context in a SINGLE call: relevant source grouped by file (contiguous, line-numbered sections, not snippets) + a relationship map + deep graph traversal. It REPLACES the grep+Read exploration loop: feed it the key symbol/file names and read its output do NOT Read the files one by one. It works best when your query names the relevant symbols (e.g. "readAgentsFromDirectory createClaudeSession chat-manager agents.ts"); if the question is a plain sentence that names nothing concrete, do ONE quick codegraph_search or codegraph_context to surface the names, then call this with them. After exploring, use codegraph_node / Read only to fill specific gaps it did not cover. Prefer codegraph_search over this only for a pinpoint "where is X defined" lookup.',
231
331
  inputSchema: {
232
332
  type: 'object',
233
333
  properties: {
234
334
  query: {
235
335
  type: 'string',
236
- description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.',
336
+ description: 'What to explore. A short list of symbol/file/keyword terms works best (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"), but a plain-language phrase also works — the tool runs its own retrieval. No need to codegraph_search first.',
237
337
  },
238
338
  maxFiles: {
239
339
  type: 'number',
@@ -606,22 +706,34 @@ class ToolHandler {
606
706
  const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note;
607
707
  return this.textResult(this.truncateOutput(formatted));
608
708
  }
609
- /** Maximum output for explore tool — sized to stay under MCP client token limits (~10k tokens) */
610
- static EXPLORE_MAX_OUTPUT = 35000;
611
709
  /**
612
710
  * Handle codegraph_explore — deep exploration in a single call
613
711
  *
614
712
  * Strategy: find relevant symbols via graph traversal, group by file,
615
713
  * then read contiguous file sections covering all symbols per file.
616
714
  * This replaces multiple codegraph_node + Read calls.
715
+ *
716
+ * Output size is adaptive to project file count via
717
+ * `getExploreOutputBudget` — see #185 for why a fixed 35k cap was a
718
+ * tax on small projects while earning its keep on large ones.
617
719
  */
618
720
  async handleExplore(args) {
619
721
  const query = this.validateString(args.query, 'query');
620
722
  if (typeof query !== 'string')
621
723
  return query;
622
724
  const cg = this.getCodeGraph(args.projectPath);
623
- const maxFiles = (0, utils_1.clamp)(args.maxFiles || 12, 1, 20);
624
725
  const projectRoot = cg.getProjectRoot();
726
+ // Resolve adaptive output budget from project size. Falls back to the
727
+ // largest-tier defaults if stats aren't available, which preserves
728
+ // pre-#185 behavior for callers that hit the rare stats failure.
729
+ let budget;
730
+ try {
731
+ budget = getExploreOutputBudget(cg.getStats().fileCount);
732
+ }
733
+ catch {
734
+ budget = getExploreOutputBudget(Infinity);
735
+ }
736
+ const maxFiles = (0, utils_1.clamp)(args.maxFiles || budget.defaultMaxFiles, 1, 20);
625
737
  // Step 1: Find relevant context with generous parameters.
626
738
  // Use a large maxNodes budget — explore has its own 35k char output limit
627
739
  // that prevents context bloat, so more nodes just means better coverage
@@ -705,7 +817,7 @@ class ToolHandler {
705
817
  // Relationship map — show how symbols connect
706
818
  const significantEdges = subgraph.edges.filter(e => e.kind !== 'contains' // skip contains — it's implied by file grouping
707
819
  );
708
- if (significantEdges.length > 0) {
820
+ if (budget.includeRelationships && significantEdges.length > 0) {
709
821
  lines.push('### Relationships');
710
822
  lines.push('');
711
823
  // Group edges by kind for readability
@@ -720,14 +832,14 @@ class ToolHandler {
720
832
  byKind.set(edge.kind, group);
721
833
  }
722
834
  for (const [kind, edges] of byKind) {
723
- // Show up to 15 relationships per kind
724
- const shown = edges.slice(0, 15);
835
+ const cap = budget.maxEdgesPerRelationshipKind;
836
+ const shown = edges.slice(0, cap);
725
837
  lines.push(`**${kind}:**`);
726
838
  for (const e of shown) {
727
839
  lines.push(`- ${e.source} → ${e.target}`);
728
840
  }
729
- if (edges.length > 15) {
730
- lines.push(`- ... and ${edges.length - 15} more`);
841
+ if (edges.length > cap) {
842
+ lines.push(`- ... and ${edges.length - cap} more`);
731
843
  }
732
844
  lines.push('');
733
845
  }
@@ -737,10 +849,11 @@ class ToolHandler {
737
849
  lines.push('');
738
850
  let totalChars = lines.join('\n').length;
739
851
  let filesIncluded = 0;
852
+ let anyFileTrimmed = false;
740
853
  for (const [filePath, group] of sortedFiles) {
741
854
  if (filesIncluded >= maxFiles)
742
855
  break;
743
- if (totalChars > ToolHandler.EXPLORE_MAX_OUTPUT * 0.9)
856
+ if (totalChars > budget.maxOutputChars * 0.9)
744
857
  break;
745
858
  const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
746
859
  if (!absPath || !(0, fs_1.existsSync)(absPath))
@@ -755,14 +868,37 @@ class ToolHandler {
755
868
  const fileLines = fileContent.split('\n');
756
869
  const lang = group.nodes[0]?.language || '';
757
870
  // Cluster nearby symbols to avoid reading huge gaps between distant symbols.
758
- // Sort by start line, then merge overlapping/adjacent ranges (within 15 lines).
759
- // Include both node ranges AND edge source locations so template sections
760
- // with component usages/calls are covered (not just script block symbols).
871
+ // Sort by start line, then merge overlapping/adjacent ranges (within the
872
+ // adaptive gap threshold). Include both node ranges AND edge source
873
+ // locations so template sections with component usages/calls are
874
+ // covered (not just script block symbols).
875
+ //
876
+ // Each range carries an `importance` score so we can rank clusters
877
+ // when the per-file budget forces us to drop some: entry-point nodes
878
+ // are worth 10, directly-connected nodes 3, peripheral nodes 1, and
879
+ // bare edge-source lines 2 (less than a connected node but more than
880
+ // a peripheral one — they hint at a reference but aren't a definition).
881
+ // Container kinds whose body can span most/all of a file. When such a
882
+ // node covers most of the file we drop it from the ranges: keeping it
883
+ // would merge every method inside it into one giant cluster spanning
884
+ // the whole file, which then tail-trims down to just the container's
885
+ // opening lines (its header/declarations) and buries the methods the
886
+ // query actually asked about (#185 follow-up — Session.swift in
887
+ // Alamofire is the canonical case: the `Session` class spans ~1,400
888
+ // lines). We want the granular symbols inside, not the envelope.
889
+ const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
761
890
  const ranges = group.nodes
762
891
  .filter(n => n.startLine > 0 && n.endLine > 0)
763
- // Skip file/component nodes that span the entire file — they'd create one giant cluster
764
- .filter(n => !(n.kind === 'component' && n.startLine === 1 && n.endLine >= fileLines.length - 1))
765
- .map(n => ({ start: n.startLine, end: n.endLine, name: n.name, kind: n.kind }));
892
+ // Drop whole-file envelope nodes (containers covering >50% of the file).
893
+ .filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
894
+ .map(n => {
895
+ let importance = 1;
896
+ if (entryNodeIds.has(n.id))
897
+ importance = 10;
898
+ else if (connectedToEntry.has(n.id))
899
+ importance = 3;
900
+ return { start: n.startLine, end: n.endLine, name: n.name, kind: n.kind, importance };
901
+ });
766
902
  // Add edge source locations in this file — captures template references
767
903
  // (component usages, event handlers) that aren't nodes themselves.
768
904
  // Query edges directly from the DB (not just the subgraph) because BFS
@@ -780,48 +916,148 @@ class ToolHandler {
780
916
  // Look up target name from subgraph first, fall back to edge kind
781
917
  const targetNode = subgraph.nodes.get(edge.target);
782
918
  const targetName = targetNode?.name ?? edge.kind;
783
- ranges.push({ start: edge.line, end: edge.line, name: targetName, kind: edge.kind });
919
+ ranges.push({ start: edge.line, end: edge.line, name: targetName, kind: edge.kind, importance: 2 });
784
920
  }
785
921
  }
786
922
  ranges.sort((a, b) => a.start - b.start);
787
923
  if (ranges.length === 0)
788
924
  continue;
789
- const GAP_THRESHOLD = 15; // merge sections within 15 lines of each other
925
+ const gapThreshold = budget.gapThreshold;
790
926
  const clusters = [];
791
- let current = { start: ranges[0].start, end: ranges[0].end, symbols: [`${ranges[0].name}(${ranges[0].kind})`] };
927
+ let current = {
928
+ start: ranges[0].start,
929
+ end: ranges[0].end,
930
+ symbols: [`${ranges[0].name}(${ranges[0].kind})`],
931
+ score: ranges[0].importance,
932
+ maxImportance: ranges[0].importance,
933
+ };
792
934
  for (let i = 1; i < ranges.length; i++) {
793
935
  const r = ranges[i];
794
- if (r.start <= current.end + GAP_THRESHOLD) {
936
+ if (r.start <= current.end + gapThreshold) {
795
937
  current.end = Math.max(current.end, r.end);
796
938
  current.symbols.push(`${r.name}(${r.kind})`);
939
+ current.score += r.importance;
940
+ current.maxImportance = Math.max(current.maxImportance, r.importance);
797
941
  }
798
942
  else {
799
943
  clusters.push(current);
800
- current = { start: r.start, end: r.end, symbols: [`${r.name}(${r.kind})`] };
944
+ current = {
945
+ start: r.start,
946
+ end: r.end,
947
+ symbols: [`${r.name}(${r.kind})`],
948
+ score: r.importance,
949
+ maxImportance: r.importance,
950
+ };
801
951
  }
802
952
  }
803
953
  clusters.push(current);
804
- // Build file section output from clusters
954
+ // Build file section output from clusters, capped by per-file budget.
955
+ // The pathological case (#185): a file like Session.swift where every
956
+ // method is adjacent collapses into one cluster spanning the whole
957
+ // file, and dumping that into the agent's context is most of the
958
+ // token cost on small projects. We pick clusters in priority order
959
+ // until the per-file char cap is hit. Truly enormous single clusters
960
+ // get tail-trimmed with a marker.
805
961
  const contextPadding = 3;
962
+ const withLineNumbers = exploreLineNumbersEnabled();
963
+ const buildSection = (c) => {
964
+ const startIdx = Math.max(0, c.start - 1 - contextPadding);
965
+ const endIdx = Math.min(fileLines.length, c.end + contextPadding);
966
+ const slice = fileLines.slice(startIdx, endIdx).join('\n');
967
+ // startIdx is 0-based, so the slice's first line is line startIdx + 1.
968
+ return withLineNumbers ? numberSourceLines(slice, startIdx + 1) : slice;
969
+ };
970
+ // Language-neutral separator (no `//` — not a comment in Python, Ruby,
971
+ // etc.). With line numbers on, the line-number jump also signals the gap.
972
+ const GAP_MARKER = '\n\n... (gap) ...\n\n';
973
+ // Rank clusters for inclusion under the per-file cap. Entry-point
974
+ // clusters come first: a cluster containing a query entry point
975
+ // (importance 10) must outrank a dense block of mere declarations,
976
+ // otherwise on a large file like Session.swift the top-of-file class
977
+ // header + property list (many adjacent low-importance nodes, high
978
+ // density) wins the budget and buries the actual methods the query
979
+ // asked about (perform/didCreateURLRequest/task live deep in the
980
+ // file). Within the same importance tier, prefer density (score per
981
+ // line) so we still favor focused clusters over sprawling ones, then
982
+ // smaller span as a cheap-to-include tiebreak.
983
+ const rankedClusters = clusters
984
+ .map((c, i) => ({ idx: i, span: c.end - c.start + 1, c }))
985
+ .sort((a, b) => {
986
+ if (b.c.maxImportance !== a.c.maxImportance)
987
+ return b.c.maxImportance - a.c.maxImportance;
988
+ const densityA = a.c.score / a.span;
989
+ const densityB = b.c.score / b.span;
990
+ if (densityB !== densityA)
991
+ return densityB - densityA;
992
+ if (b.c.score !== a.c.score)
993
+ return b.c.score - a.c.score;
994
+ return a.span - b.span;
995
+ });
996
+ const chosenIndices = new Set();
997
+ let projectedChars = 0;
998
+ for (const rc of rankedClusters) {
999
+ const sectionLen = buildSection(rc.c).length + (chosenIndices.size > 0 ? GAP_MARKER.length : 0);
1000
+ // Always take the top-ranked cluster, even if oversize, so we don't
1001
+ // return an empty file section (agent would then re-Read the file,
1002
+ // negating the savings).
1003
+ if (chosenIndices.size === 0) {
1004
+ chosenIndices.add(rc.idx);
1005
+ projectedChars += sectionLen;
1006
+ continue;
1007
+ }
1008
+ if (projectedChars + sectionLen > budget.maxCharsPerFile)
1009
+ continue;
1010
+ chosenIndices.add(rc.idx);
1011
+ projectedChars += sectionLen;
1012
+ }
1013
+ // Emit chosen clusters in source order so the file reads top-to-bottom.
806
1014
  let fileSection = '';
807
1015
  const allSymbols = [];
808
- for (const cluster of clusters) {
809
- const startIdx = Math.max(0, cluster.start - 1 - contextPadding);
810
- const endIdx = Math.min(fileLines.length, cluster.end + contextPadding);
811
- const section = fileLines.slice(startIdx, endIdx).join('\n');
812
- if (fileSection.length > 0) {
813
- fileSection += '\n\n// ... (gap) ...\n\n';
814
- }
1016
+ let fileTrimmed = false;
1017
+ for (let i = 0; i < clusters.length; i++) {
1018
+ if (!chosenIndices.has(i))
1019
+ continue;
1020
+ const cluster = clusters[i];
1021
+ const section = buildSection(cluster);
1022
+ if (fileSection.length > 0)
1023
+ fileSection += GAP_MARKER;
815
1024
  fileSection += section;
816
1025
  allSymbols.push(...cluster.symbols);
817
1026
  }
818
- // Skip if this section would blow the output limit
819
- if (totalChars + fileSection.length + 200 > ToolHandler.EXPLORE_MAX_OUTPUT) {
820
- const budget = ToolHandler.EXPLORE_MAX_OUTPUT - totalChars - 200;
821
- if (budget < 500)
1027
+ // If a single chosen cluster is still oversize (long monolithic
1028
+ // function), tail-trim it. Better one trimmed view than nothing.
1029
+ if (fileSection.length > budget.maxCharsPerFile) {
1030
+ fileSection = fileSection.slice(0, budget.maxCharsPerFile) + '\n... (trimmed) ...';
1031
+ fileTrimmed = true;
1032
+ }
1033
+ if (chosenIndices.size < clusters.length || fileTrimmed) {
1034
+ anyFileTrimmed = true;
1035
+ }
1036
+ // Dedupe + cap the symbols list shown in the per-file header. Some
1037
+ // files (Session.swift in Alamofire) produced 3.4KB symbol lists
1038
+ // from cluster scoring + edge-source lines, dwarfing the per-file
1039
+ // body cap. Show top names by frequency, with a "+N more" tail.
1040
+ const symbolCounts = new Map();
1041
+ for (const s of allSymbols) {
1042
+ symbolCounts.set(s, (symbolCounts.get(s) ?? 0) + 1);
1043
+ }
1044
+ const sortedSymbols = [...symbolCounts.entries()]
1045
+ .sort((a, b) => b[1] - a[1])
1046
+ .map(([name]) => name);
1047
+ const headerCap = budget.maxSymbolsInFileHeader;
1048
+ const headerSymbols = sortedSymbols.slice(0, headerCap);
1049
+ const omittedCount = sortedSymbols.length - headerSymbols.length;
1050
+ const headerSuffix = omittedCount > 0
1051
+ ? `${headerSymbols.join(', ')}, +${omittedCount} more`
1052
+ : headerSymbols.join(', ');
1053
+ const fileHeader = `#### ${filePath} — ${headerSuffix}`;
1054
+ // Respect the total output cap on a file-by-file basis.
1055
+ if (totalChars + fileSection.length + 200 > budget.maxOutputChars) {
1056
+ const remaining = budget.maxOutputChars - totalChars - 200;
1057
+ if (remaining < 500)
822
1058
  break;
823
- const trimmed = fileSection.slice(0, budget) + '\n// ... trimmed ...';
824
- lines.push(`#### ${filePath} — ${allSymbols.join(', ')}`);
1059
+ const trimmed = fileSection.slice(0, remaining) + '\n... (trimmed) ...';
1060
+ lines.push(fileHeader);
825
1061
  lines.push('');
826
1062
  lines.push('```' + lang);
827
1063
  lines.push(trimmed);
@@ -829,9 +1065,10 @@ class ToolHandler {
829
1065
  lines.push('');
830
1066
  totalChars += trimmed.length + 200;
831
1067
  filesIncluded++;
1068
+ anyFileTrimmed = true;
832
1069
  break;
833
1070
  }
834
- lines.push(`#### ${filePath} — ${allSymbols.join(', ')}`);
1071
+ lines.push(fileHeader);
835
1072
  lines.push('');
836
1073
  lines.push('```' + lang);
837
1074
  lines.push(fileSection);
@@ -840,36 +1077,51 @@ class ToolHandler {
840
1077
  totalChars += fileSection.length + 200;
841
1078
  filesIncluded++;
842
1079
  }
843
- // Add remaining files as references (from both relevant and peripheral files)
844
- const remainingRelevant = sortedFiles.slice(filesIncluded);
845
- const peripheralFiles = [...fileGroups.entries()]
846
- .filter(([, group]) => group.score < 3)
847
- .sort((a, b) => b[1].score - a[1].score);
848
- const remainingFiles = [...remainingRelevant, ...peripheralFiles];
849
- if (remainingFiles.length > 0) {
850
- lines.push('### Additional relevant files (not shown)');
851
- lines.push('');
852
- for (const [filePath, group] of remainingFiles.slice(0, 10)) {
853
- const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
854
- lines.push(`- ${filePath}: ${symbols}`);
855
- }
856
- if (remainingFiles.length > 10) {
857
- lines.push(`- ... and ${remainingFiles.length - 10} more files`);
1080
+ // Add remaining files as references (from both relevant and peripheral files).
1081
+ // Small projects (per budget) skip this — the relevant story already fits
1082
+ // in the source section, and a trailing pointer list is pure overhead.
1083
+ if (budget.includeAdditionalFiles) {
1084
+ const remainingRelevant = sortedFiles.slice(filesIncluded);
1085
+ const peripheralFiles = [...fileGroups.entries()]
1086
+ .filter(([, group]) => group.score < 3)
1087
+ .sort((a, b) => b[1].score - a[1].score);
1088
+ const remainingFiles = [...remainingRelevant, ...peripheralFiles];
1089
+ if (remainingFiles.length > 0) {
1090
+ lines.push('### Additional relevant files (not shown)');
1091
+ lines.push('');
1092
+ for (const [filePath, group] of remainingFiles.slice(0, 10)) {
1093
+ const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
1094
+ lines.push(`- ${filePath}: ${symbols}`);
1095
+ }
1096
+ if (remainingFiles.length > 10) {
1097
+ lines.push(`- ... and ${remainingFiles.length - 10} more files`);
1098
+ }
858
1099
  }
859
1100
  }
860
- // Add completeness signal so agents know they don't need to re-read these files
861
- lines.push('');
862
- lines.push('---');
863
- lines.push(`> **Complete source code is included above for ${filesIncluded} files.** You do NOT need to re-read these files — the relevant sections are already shown in full. Only use Read/Grep for files listed under "Additional relevant files" if you need more detail.`);
864
- // Add explore budget note based on project size
865
- try {
866
- const stats = cg.getStats();
867
- const budget = getExploreBudget(stats.fileCount);
1101
+ // Add completeness signal so agents know they don't need to re-read these files.
1102
+ // On small projects the budget gates this off — but if we actually had to
1103
+ // trim or drop clusters, surface a brief note so the agent knows it can
1104
+ // still Read for more detail.
1105
+ if (budget.includeCompletenessSignal) {
868
1106
  lines.push('');
869
- lines.push(`> **Explore budget: ${budget} calls max for this project (${stats.fileCount.toLocaleString()} files indexed).** Stop exploring and synthesize your answer once you've used ${budget} calls — do NOT make additional explore calls beyond this budget.`);
1107
+ lines.push('---');
1108
+ lines.push(`> **Complete source code is included above for ${filesIncluded} files.** You do NOT need to re-read these files — the relevant sections are already shown in full. Only use Read/Grep for files listed under "Additional relevant files" if you need more detail.`);
870
1109
  }
871
- catch {
872
- // Stats unavailable — skip budget note
1110
+ else if (anyFileTrimmed) {
1111
+ lines.push('');
1112
+ lines.push(`> Some file sections were trimmed for size. Use \`codegraph_node\` or Read for the full source if needed.`);
1113
+ }
1114
+ // Add explore budget note based on project size
1115
+ if (budget.includeBudgetNote) {
1116
+ try {
1117
+ const stats = cg.getStats();
1118
+ const callBudget = getExploreBudget(stats.fileCount);
1119
+ lines.push('');
1120
+ lines.push(`> **Explore budget: ${callBudget} calls max for this project (${stats.fileCount.toLocaleString()} files indexed).** Stop exploring and synthesize your answer once you've used ${callBudget} calls — do NOT make additional explore calls beyond this budget.`);
1121
+ }
1122
+ catch {
1123
+ // Stats unavailable — skip budget note
1124
+ }
873
1125
  }
874
1126
  return this.textResult(lines.join('\n'));
875
1127
  }
@@ -1092,9 +1344,22 @@ class ToolHandler {
1092
1344
  * Returns the best match and a note about alternatives if any.
1093
1345
  */
1094
1346
  /**
1095
- * Check if a node matches a symbol query, supporting both simple names and
1096
- * qualified "Parent.child" notation (e.g., "Session.request" matches a method
1097
- * named "request" inside a class named "Session").
1347
+ * Check if a node matches a symbol query.
1348
+ *
1349
+ * Accepts simple names (`run`) and three flavors of qualifier:
1350
+ * - dotted `Session.request` (TS/JS/Python)
1351
+ * - colon-pair `stage_apply::run` (Rust, C++, Ruby)
1352
+ * - slash `configurator/stage_apply` (path-ish)
1353
+ *
1354
+ * Multi-level qualifiers compose: `crate::configurator::stage_apply::run`
1355
+ * works. Rust path prefixes (`crate`, `super`, `self`) are stripped so
1356
+ * the canonical `crate::module::symbol` form resolves.
1357
+ *
1358
+ * Resolution order, last part must always equal `node.name`:
1359
+ * 1. Suffix-match against `qualifiedName` (handles class-scoped methods
1360
+ * where the extractor builds the qualified name from the AST stack)
1361
+ * 2. File-path containment (handles file-derived modules in Rust/
1362
+ * Python — `stage_apply::run` matches a `run` in `stage_apply.rs`)
1098
1363
  */
1099
1364
  matchesSymbol(node, symbol) {
1100
1365
  // Simple name match
@@ -1103,20 +1368,50 @@ class ToolHandler {
1103
1368
  // File basename match (e.g., "product-card" matches "product-card.liquid")
1104
1369
  if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol)
1105
1370
  return true;
1106
- // Qualified name match: "Parent.child" look for "::Parent::child" in qualified_name
1107
- if (symbol.includes('.')) {
1108
- const parts = symbol.split('.');
1109
- const qualifiedSuffix = parts.join('::');
1110
- if (node.qualifiedName.includes(qualifiedSuffix))
1111
- return true;
1112
- }
1113
- return false;
1371
+ // Qualified-name lookups: split on any supported separator. `\w` keeps
1372
+ // identifier chars (incl. `_`) intact; everything else is treated as
1373
+ // a separator we tolerate.
1374
+ if (!/[.\/]|::/.test(symbol))
1375
+ return false;
1376
+ const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
1377
+ if (parts.length < 2)
1378
+ return false;
1379
+ const lastPart = parts[parts.length - 1];
1380
+ if (node.name !== lastPart)
1381
+ return false;
1382
+ // Stage 1: qualified-name suffix match. The extractor joins the
1383
+ // semantic hierarchy with `::`, so `Session.request` and
1384
+ // `Session::request` both become `Session::request` here.
1385
+ const colonSuffix = parts.join('::');
1386
+ if (node.qualifiedName.includes(colonSuffix))
1387
+ return true;
1388
+ // Stage 2: file-path containment. Rust modules and Python packages
1389
+ // are not in `qualifiedName` — they're encoded in the file path. So
1390
+ // `stage_apply::run` matches a `run` in any file whose path
1391
+ // contains a `stage_apply` segment (with or without an extension).
1392
+ //
1393
+ // Filter out Rust path prefixes that have no file-system equivalent.
1394
+ const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p));
1395
+ if (containerHints.length === 0)
1396
+ return false;
1397
+ const segments = node.filePath.split('/').filter((s) => s.length > 0);
1398
+ return containerHints.every((hint) => segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint));
1114
1399
  }
1115
1400
  findSymbol(cg, symbol) {
1116
- // Use higher limit for qualified lookups (e.g., "Session.request") since the
1117
- // target may rank lower in FTS when there are many partial matches
1118
- const limit = symbol.includes('.') ? 50 : 10;
1119
- const results = cg.searchNodes(symbol, { limit });
1401
+ // Use higher limit for qualified lookups (e.g., "Session.request",
1402
+ // "stage_apply::run") since the target may rank lower in FTS when
1403
+ // there are many partial matches across the qualifier parts.
1404
+ const isQualified = /[.\/]|::/.test(symbol);
1405
+ const limit = isQualified ? 50 : 10;
1406
+ let results = cg.searchNodes(symbol, { limit });
1407
+ // FTS strips colons as a special char, so `stage_apply::run` searches
1408
+ // for the literal `stage_applyrun` and finds nothing. Re-search by
1409
+ // the bare last part and let `matchesSymbol` filter by qualifier.
1410
+ if (isQualified && results.length === 0) {
1411
+ const tail = lastQualifierPart(symbol);
1412
+ if (tail && tail !== symbol)
1413
+ results = cg.searchNodes(tail, { limit });
1414
+ }
1120
1415
  if (results.length === 0 || !results[0]) {
1121
1416
  return null;
1122
1417
  }
@@ -1131,7 +1426,12 @@ class ToolHandler {
1131
1426
  const note = `\n\n> **Note:** ${exactMatches.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`;
1132
1427
  return { node: picked, note };
1133
1428
  }
1134
- // No exact match, use best fuzzy match
1429
+ // No exact match. For qualified lookups, don't silently fall back
1430
+ // to a fuzzy result — the user typed a specific qualifier, and
1431
+ // resolving `stage_apply::nonexistent_fn` to the unrelated
1432
+ // `stage_apply.rs` file would be actively misleading (#173).
1433
+ if (isQualified)
1434
+ return null;
1135
1435
  return { node: results[0].node, note: '' };
1136
1436
  }
1137
1437
  /**
@@ -1139,7 +1439,15 @@ class ToolHandler {
1139
1439
  * results across all matching symbols (e.g., multiple classes with an `execute` method).
1140
1440
  */
1141
1441
  findAllSymbols(cg, symbol) {
1142
- const results = cg.searchNodes(symbol, { limit: 50 });
1442
+ let results = cg.searchNodes(symbol, { limit: 50 });
1443
+ // Mirror the fallback in `findSymbol` for qualified queries — FTS
1444
+ // strips colons, so a module-qualified lookup needs a second pass
1445
+ // by the bare last part.
1446
+ if (results.length === 0 && /[.\/]|::/.test(symbol)) {
1447
+ const tail = lastQualifierPart(symbol);
1448
+ if (tail && tail !== symbol)
1449
+ results = cg.searchNodes(tail, { limit: 50 });
1450
+ }
1143
1451
  if (results.length === 0) {
1144
1452
  return { nodes: [], note: '' };
1145
1453
  }