mixdog 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (404) hide show
  1. package/.claude-plugin/marketplace.json +31 -0
  2. package/.claude-plugin/plugin.json +20 -0
  3. package/.gitattributes +34 -0
  4. package/.mcp.json +14 -0
  5. package/ARCHITECTURE.md +77 -0
  6. package/CHANGELOG.md +7 -0
  7. package/CONTRIBUTING.md +45 -0
  8. package/DATA-FLOW.md +79 -0
  9. package/LICENSE +21 -0
  10. package/README.md +389 -0
  11. package/SECURITY.md +138 -0
  12. package/UNINSTALL.md +112 -0
  13. package/agents/maintenance.md +5 -0
  14. package/agents/memory-classification.md +30 -0
  15. package/agents/scheduler-task.md +18 -0
  16. package/agents/webhook-handler.md +27 -0
  17. package/agents/worker.md +24 -0
  18. package/bin/bridge +133 -0
  19. package/bin/statusline-launcher.mjs +78 -0
  20. package/bin/statusline-lib.mjs +550 -0
  21. package/bin/statusline.mjs +607 -0
  22. package/bun.lock +802 -0
  23. package/commands/config.md +16 -0
  24. package/commands/doctor.md +13 -0
  25. package/commands/setup.md +17 -0
  26. package/defaults/cycle3-review-prompt.md +90 -0
  27. package/defaults/hidden-roles.json +65 -0
  28. package/defaults/memory-chunk-prompt.md +63 -0
  29. package/defaults/memory-promote-prompt.md +135 -0
  30. package/defaults/mixdog-config.template.json +27 -0
  31. package/defaults/user-workflow.json +8 -0
  32. package/defaults/user-workflow.md +12 -0
  33. package/hooks/hooks.json +73 -0
  34. package/hooks/lib/active-instance.cjs +77 -0
  35. package/hooks/lib/permission-evaluator.cjs +411 -0
  36. package/hooks/lib/permission-route.cjs +63 -0
  37. package/hooks/lib/permission-rules.cjs +170 -0
  38. package/hooks/lib/settings-loader.cjs +116 -0
  39. package/hooks/post-tool-use.cjs +84 -0
  40. package/hooks/pre-mcp-sandbox.cjs +158 -0
  41. package/hooks/pre-tool-subagent.cjs +253 -0
  42. package/hooks/session-start.cjs +1372 -0
  43. package/hooks/turn-timer.cjs +82 -0
  44. package/lib/claude-md-writer.cjs +386 -0
  45. package/lib/config-cjs.cjs +61 -0
  46. package/lib/hook-pipe-path.cjs +10 -0
  47. package/lib/keychain-cjs.cjs +263 -0
  48. package/lib/plugin-paths.cjs +61 -0
  49. package/lib/rules-builder.cjs +241 -0
  50. package/lib/text-utils.cjs +61 -0
  51. package/native/README.md +117 -0
  52. package/native/prebuilt/linux-aarch64/mixdog-shim +0 -0
  53. package/native/prebuilt/linux-x86_64/mixdog-shim +0 -0
  54. package/native/prebuilt/macos-aarch64/mixdog-shim +0 -0
  55. package/native/prebuilt/macos-x86_64/mixdog-shim +0 -0
  56. package/native/prebuilt/windows-x86_64/mixdog-shim.exe +0 -0
  57. package/package.json +107 -0
  58. package/prompts/code-review.txt +16 -0
  59. package/prompts/security-audit.txt +17 -0
  60. package/rules/bridge/00-common.md +39 -0
  61. package/rules/bridge/20-skip-protocol.md +18 -0
  62. package/rules/bridge/30-explorer.md +33 -0
  63. package/rules/bridge/40-cycle1-agent.md +52 -0
  64. package/rules/bridge/41-cycle2-agent.md +62 -0
  65. package/rules/bridge/42-cycle3-agent.md +44 -0
  66. package/rules/lead/00-tool-lead.md +61 -0
  67. package/rules/lead/01-general.md +23 -0
  68. package/rules/lead/02-channels.md +49 -0
  69. package/rules/lead/03-team.md +27 -0
  70. package/rules/lead/04-workflow.md +20 -0
  71. package/rules/shared/00-language.md +14 -0
  72. package/rules/shared/01-tool.md +138 -0
  73. package/scripts/bootstrap.mjs +184 -0
  74. package/scripts/bridge-unify-smoke.mjs +308 -0
  75. package/scripts/build-runtime-linux.sh +348 -0
  76. package/scripts/build-runtime-macos.sh +217 -0
  77. package/scripts/build-runtime-windows.ps1 +242 -0
  78. package/scripts/builtin-utils-smoke.mjs +392 -0
  79. package/scripts/check-json.mjs +45 -0
  80. package/scripts/check-syntax-changed.mjs +102 -0
  81. package/scripts/check-syntax.mjs +58 -0
  82. package/scripts/code-graph-batch.test.mjs +33 -0
  83. package/scripts/config-preserve-smoke.mjs +180 -0
  84. package/scripts/doctor.mjs +484 -0
  85. package/scripts/edit-normalize-fuzz.mjs +130 -0
  86. package/scripts/edit-normalize-smoke.mjs +401 -0
  87. package/scripts/edit-operation-smoke.mjs +369 -0
  88. package/scripts/edit2-smoke.mjs +63 -0
  89. package/scripts/fuzzy-e2e.mjs +28 -0
  90. package/scripts/fuzzy-smoke.mjs +26 -0
  91. package/scripts/generate-runtime-manifest.mjs +166 -0
  92. package/scripts/guard-smoke.mjs +66 -0
  93. package/scripts/hidden-role-schema-smoke.mjs +162 -0
  94. package/scripts/hook-routing-smoke.mjs +29 -0
  95. package/scripts/inject-input.ps1 +204 -0
  96. package/scripts/io-complex-smoke.mjs +667 -0
  97. package/scripts/io-explore-bench.mjs +424 -0
  98. package/scripts/io-guardrails-smoke.mjs +205 -0
  99. package/scripts/io-mini-bench-baseline.json +11 -0
  100. package/scripts/io-mini-bench.mjs +216 -0
  101. package/scripts/io-route-harness.mjs +933 -0
  102. package/scripts/io-telemetry-report.mjs +691 -0
  103. package/scripts/mutation-bench.mjs +564 -0
  104. package/scripts/mutation-io-smoke.mjs +1081 -0
  105. package/scripts/native-patch-bridge-smoke.mjs +288 -0
  106. package/scripts/native-patch-smoke.mjs +304 -0
  107. package/scripts/patch-interior-context-smoke.mjs +49 -0
  108. package/scripts/patch-newline-utf8-smoke.mjs +157 -0
  109. package/scripts/perf-hook-smoke.mjs +71 -0
  110. package/scripts/permission-eval-smoke.mjs +426 -0
  111. package/scripts/prep-patch.mjs +53 -0
  112. package/scripts/prep-shim.mjs +96 -0
  113. package/scripts/provider-cache-smoke.mjs +687 -0
  114. package/scripts/report-runtime-health.mjs +132 -0
  115. package/scripts/run-mcp.mjs +1547 -0
  116. package/scripts/salvage-v4a-shatter.test.mjs +58 -0
  117. package/scripts/scoped-cache-io-smoke.mjs +103 -0
  118. package/scripts/shell-policy-round3-smoke.mjs +46 -0
  119. package/scripts/smoke-runtime-negative.ps1 +100 -0
  120. package/scripts/smoke-runtime-negative.sh +95 -0
  121. package/scripts/stall-policy-smoke.mjs +50 -0
  122. package/scripts/start-memory-worker.mjs +23 -0
  123. package/scripts/statusline-launcher-smoke.mjs +82 -0
  124. package/scripts/stress-atomic-write.mjs +1028 -0
  125. package/scripts/test-config-rmw-restore.mjs +122 -0
  126. package/scripts/test-fault-inject.mjs +164 -0
  127. package/scripts/test-large-file.mjs +174 -0
  128. package/scripts/tool-edge-smoke.mjs +209 -0
  129. package/scripts/uninstall.mjs +201 -0
  130. package/scripts/webhook-selfheal-smoke.mjs +29 -0
  131. package/scripts/write-overwrite-guard-smoke.mjs +56 -0
  132. package/server-main.mjs +3055 -0
  133. package/server.mjs +468 -0
  134. package/setup/config-merge.mjs +254 -0
  135. package/setup/install.mjs +120 -0
  136. package/setup/launch-core.mjs +507 -0
  137. package/setup/launch.mjs +101 -0
  138. package/setup/setup-server.mjs +3206 -0
  139. package/setup/setup.html +3693 -0
  140. package/skills/retro-skill-proposer/SKILL.md +92 -0
  141. package/skills/schedule-add/SKILL.md +77 -0
  142. package/skills/setup/SKILL.md +346 -0
  143. package/skills/webhook-add/SKILL.md +81 -0
  144. package/src/agent/bridge-stall-watchdog.mjs +337 -0
  145. package/src/agent/index.mjs +2138 -0
  146. package/src/agent/orchestrator/activity-bus.mjs +38 -0
  147. package/src/agent/orchestrator/ai-wrapped-dispatch.mjs +1010 -0
  148. package/src/agent/orchestrator/bridge-retry.mjs +220 -0
  149. package/src/agent/orchestrator/bridge-trace.mjs +583 -0
  150. package/src/agent/orchestrator/cache-mtime.mjs +58 -0
  151. package/src/agent/orchestrator/config.mjs +358 -0
  152. package/src/agent/orchestrator/context/collect.mjs +651 -0
  153. package/src/agent/orchestrator/dispatch-persist.mjs +549 -0
  154. package/src/agent/orchestrator/drain-registry.mjs +50 -0
  155. package/src/agent/orchestrator/explore-validator.mjs +8 -0
  156. package/src/agent/orchestrator/internal-roles.mjs +118 -0
  157. package/src/agent/orchestrator/internal-tools.mjs +88 -0
  158. package/src/agent/orchestrator/jobs.mjs +116 -0
  159. package/src/agent/orchestrator/mcp/client.mjs +364 -0
  160. package/src/agent/orchestrator/providers/anthropic-betas.mjs +21 -0
  161. package/src/agent/orchestrator/providers/anthropic-oauth.mjs +1745 -0
  162. package/src/agent/orchestrator/providers/anthropic.mjs +437 -0
  163. package/src/agent/orchestrator/providers/gemini.mjs +1175 -0
  164. package/src/agent/orchestrator/providers/grok-oauth.mjs +782 -0
  165. package/src/agent/orchestrator/providers/model-catalog.mjs +241 -0
  166. package/src/agent/orchestrator/providers/openai-compat.mjs +1467 -0
  167. package/src/agent/orchestrator/providers/openai-oauth-ws.mjs +1890 -0
  168. package/src/agent/orchestrator/providers/openai-oauth.mjs +1307 -0
  169. package/src/agent/orchestrator/providers/openai-ws.mjs +104 -0
  170. package/src/agent/orchestrator/providers/registry.mjs +192 -0
  171. package/src/agent/orchestrator/providers/retry-classifier.mjs +325 -0
  172. package/src/agent/orchestrator/session/abort-lookup.mjs +13 -0
  173. package/src/agent/orchestrator/session/cache/post-edit-marks.mjs +42 -0
  174. package/src/agent/orchestrator/session/cache/prefetch-cache.mjs +142 -0
  175. package/src/agent/orchestrator/session/cache/read-cache.mjs +319 -0
  176. package/src/agent/orchestrator/session/cache/scoped-cache-outcome.mjs +11 -0
  177. package/src/agent/orchestrator/session/cache/scoped-cache.mjs +361 -0
  178. package/src/agent/orchestrator/session/cache/util.mjs +49 -0
  179. package/src/agent/orchestrator/session/loop.mjs +1478 -0
  180. package/src/agent/orchestrator/session/manager.mjs +1975 -0
  181. package/src/agent/orchestrator/session/read-dedup.mjs +6 -0
  182. package/src/agent/orchestrator/session/result-classification.mjs +65 -0
  183. package/src/agent/orchestrator/session/save-session-worker.mjs +18 -0
  184. package/src/agent/orchestrator/session/store.mjs +624 -0
  185. package/src/agent/orchestrator/session/stream-watchdog.mjs +130 -0
  186. package/src/agent/orchestrator/session/tool-result-offload.mjs +166 -0
  187. package/src/agent/orchestrator/session/trim.mjs +491 -0
  188. package/src/agent/orchestrator/smart-bridge/CACHE-SHARD.md +115 -0
  189. package/src/agent/orchestrator/smart-bridge/bridge-llm.mjs +327 -0
  190. package/src/agent/orchestrator/smart-bridge/cache-obs.mjs +150 -0
  191. package/src/agent/orchestrator/smart-bridge/cache-strategy.mjs +228 -0
  192. package/src/agent/orchestrator/smart-bridge/index.mjs +215 -0
  193. package/src/agent/orchestrator/smart-bridge/profiles.mjs +37 -0
  194. package/src/agent/orchestrator/smart-bridge/registry.mjs +348 -0
  195. package/src/agent/orchestrator/smart-bridge/session-builder.mjs +116 -0
  196. package/src/agent/orchestrator/stall-policy.mjs +195 -0
  197. package/src/agent/orchestrator/tool-loop-guard.mjs +75 -0
  198. package/src/agent/orchestrator/tools/bash-policy-scan.mjs +77 -0
  199. package/src/agent/orchestrator/tools/bash-session.mjs +721 -0
  200. package/src/agent/orchestrator/tools/builtin/advisory-lock.mjs +171 -0
  201. package/src/agent/orchestrator/tools/builtin/arg-guard.mjs +455 -0
  202. package/src/agent/orchestrator/tools/builtin/atomic-write.mjs +236 -0
  203. package/src/agent/orchestrator/tools/builtin/bash-tool.mjs +480 -0
  204. package/src/agent/orchestrator/tools/builtin/binary-file.mjs +76 -0
  205. package/src/agent/orchestrator/tools/builtin/builtin-tools.mjs +256 -0
  206. package/src/agent/orchestrator/tools/builtin/cache-layers.mjs +386 -0
  207. package/src/agent/orchestrator/tools/builtin/cwd-utils.mjs +37 -0
  208. package/src/agent/orchestrator/tools/builtin/device-paths.mjs +154 -0
  209. package/src/agent/orchestrator/tools/builtin/diagnostics-tool.mjs +292 -0
  210. package/src/agent/orchestrator/tools/builtin/diff-utils.mjs +109 -0
  211. package/src/agent/orchestrator/tools/builtin/edit-base-guard.mjs +58 -0
  212. package/src/agent/orchestrator/tools/builtin/edit-byte-plan.mjs +240 -0
  213. package/src/agent/orchestrator/tools/builtin/edit-byte-utils.mjs +113 -0
  214. package/src/agent/orchestrator/tools/builtin/edit-commit.mjs +74 -0
  215. package/src/agent/orchestrator/tools/builtin/edit-context-utils.mjs +242 -0
  216. package/src/agent/orchestrator/tools/builtin/edit-diagnostics.mjs +211 -0
  217. package/src/agent/orchestrator/tools/builtin/edit-engine.mjs +1364 -0
  218. package/src/agent/orchestrator/tools/builtin/edit-failure-context.mjs +126 -0
  219. package/src/agent/orchestrator/tools/builtin/edit-hint.mjs +141 -0
  220. package/src/agent/orchestrator/tools/builtin/edit-match-utils.mjs +194 -0
  221. package/src/agent/orchestrator/tools/builtin/edit-partial-write.mjs +60 -0
  222. package/src/agent/orchestrator/tools/builtin/edit-stale-refresh.mjs +168 -0
  223. package/src/agent/orchestrator/tools/builtin/edit-tool.mjs +173 -0
  224. package/src/agent/orchestrator/tools/builtin/edit-utf8-guard.mjs +48 -0
  225. package/src/agent/orchestrator/tools/builtin/fs-reachability.mjs +48 -0
  226. package/src/agent/orchestrator/tools/builtin/fuzzy-match.mjs +99 -0
  227. package/src/agent/orchestrator/tools/builtin/glob-walk.mjs +170 -0
  228. package/src/agent/orchestrator/tools/builtin/grep-formatting.mjs +113 -0
  229. package/src/agent/orchestrator/tools/builtin/hash-utils.mjs +6 -0
  230. package/src/agent/orchestrator/tools/builtin/list-formatting.mjs +7 -0
  231. package/src/agent/orchestrator/tools/builtin/list-tool.mjs +593 -0
  232. package/src/agent/orchestrator/tools/builtin/native-edit-runner.mjs +89 -0
  233. package/src/agent/orchestrator/tools/builtin/notebook-edit-tool.mjs +300 -0
  234. package/src/agent/orchestrator/tools/builtin/open-config-tool.mjs +26 -0
  235. package/src/agent/orchestrator/tools/builtin/path-diagnostics.mjs +152 -0
  236. package/src/agent/orchestrator/tools/builtin/path-locks.mjs +35 -0
  237. package/src/agent/orchestrator/tools/builtin/path-utils.mjs +201 -0
  238. package/src/agent/orchestrator/tools/builtin/read-args.mjs +103 -0
  239. package/src/agent/orchestrator/tools/builtin/read-batch.mjs +172 -0
  240. package/src/agent/orchestrator/tools/builtin/read-constants.mjs +40 -0
  241. package/src/agent/orchestrator/tools/builtin/read-formatting.mjs +118 -0
  242. package/src/agent/orchestrator/tools/builtin/read-image-resize.mjs +189 -0
  243. package/src/agent/orchestrator/tools/builtin/read-image.mjs +88 -0
  244. package/src/agent/orchestrator/tools/builtin/read-lines.mjs +12 -0
  245. package/src/agent/orchestrator/tools/builtin/read-mode-tool.mjs +455 -0
  246. package/src/agent/orchestrator/tools/builtin/read-open.mjs +190 -0
  247. package/src/agent/orchestrator/tools/builtin/read-range-index.mjs +271 -0
  248. package/src/agent/orchestrator/tools/builtin/read-ranges.mjs +26 -0
  249. package/src/agent/orchestrator/tools/builtin/read-single-tool.mjs +728 -0
  250. package/src/agent/orchestrator/tools/builtin/read-snapshot-runtime.mjs +173 -0
  251. package/src/agent/orchestrator/tools/builtin/read-special-files.mjs +268 -0
  252. package/src/agent/orchestrator/tools/builtin/read-streaming.mjs +602 -0
  253. package/src/agent/orchestrator/tools/builtin/read-tool.mjs +530 -0
  254. package/src/agent/orchestrator/tools/builtin/read-windows.mjs +107 -0
  255. package/src/agent/orchestrator/tools/builtin/rename-tool.mjs +196 -0
  256. package/src/agent/orchestrator/tools/builtin/rg-runner.mjs +422 -0
  257. package/src/agent/orchestrator/tools/builtin/search-builders.mjs +158 -0
  258. package/src/agent/orchestrator/tools/builtin/search-tool.mjs +869 -0
  259. package/src/agent/orchestrator/tools/builtin/shell-analysis.mjs +653 -0
  260. package/src/agent/orchestrator/tools/builtin/shell-jobs.mjs +936 -0
  261. package/src/agent/orchestrator/tools/builtin/shell-output.mjs +36 -0
  262. package/src/agent/orchestrator/tools/builtin/shell-runtime.mjs +214 -0
  263. package/src/agent/orchestrator/tools/builtin/snapshot-helpers.mjs +143 -0
  264. package/src/agent/orchestrator/tools/builtin/snapshot-store.mjs +206 -0
  265. package/src/agent/orchestrator/tools/builtin/snapshot-validation.mjs +98 -0
  266. package/src/agent/orchestrator/tools/builtin/text-stats.mjs +69 -0
  267. package/src/agent/orchestrator/tools/builtin/windows-roots.mjs +23 -0
  268. package/src/agent/orchestrator/tools/builtin/write-tool.mjs +401 -0
  269. package/src/agent/orchestrator/tools/builtin.mjs +500 -0
  270. package/src/agent/orchestrator/tools/code-graph-prewarm-worker.mjs +39 -0
  271. package/src/agent/orchestrator/tools/code-graph-tool-defs.mjs +24 -0
  272. package/src/agent/orchestrator/tools/code-graph.mjs +4095 -0
  273. package/src/agent/orchestrator/tools/cwd-tool.mjs +298 -0
  274. package/src/agent/orchestrator/tools/destructive-warning.mjs +323 -0
  275. package/src/agent/orchestrator/tools/edit-normalize.mjs +603 -0
  276. package/src/agent/orchestrator/tools/env-scrub.mjs +100 -0
  277. package/src/agent/orchestrator/tools/graph-binary-fetcher.mjs +144 -0
  278. package/src/agent/orchestrator/tools/graph-manifest.json +26 -0
  279. package/src/agent/orchestrator/tools/host-input.mjs +204 -0
  280. package/src/agent/orchestrator/tools/mutation-content-cache.mjs +67 -0
  281. package/src/agent/orchestrator/tools/mutation-planner.mjs +75 -0
  282. package/src/agent/orchestrator/tools/next-call-utils.mjs +48 -0
  283. package/src/agent/orchestrator/tools/patch-binary-fetcher.mjs +133 -0
  284. package/src/agent/orchestrator/tools/patch-manifest.json +26 -0
  285. package/src/agent/orchestrator/tools/patch-tool-defs.mjs +20 -0
  286. package/src/agent/orchestrator/tools/patch.mjs +2754 -0
  287. package/src/agent/orchestrator/tools/progress-message.mjs +118 -0
  288. package/src/agent/orchestrator/tools/result-compression.mjs +279 -0
  289. package/src/agent/orchestrator/tools/shell-command.mjs +865 -0
  290. package/src/agent/orchestrator/tools/shell-exec-policy.mjs +89 -0
  291. package/src/agent/orchestrator/tools/shell-policy-danger-target.mjs +27 -0
  292. package/src/agent/orchestrator/tools/shell-policy-imports.mjs +7 -0
  293. package/src/agent/orchestrator/tools/shell-policy.mjs +345 -0
  294. package/src/agent/orchestrator/tools/shell-snapshot.mjs +313 -0
  295. package/src/agent/orchestrator/workflow-store.mjs +93 -0
  296. package/src/agent/tool-defs.mjs +103 -0
  297. package/src/channels/backends/discord.mjs +784 -0
  298. package/src/channels/data/voice-runtime-manifest.json +138 -0
  299. package/src/channels/index.mjs +3229 -0
  300. package/src/channels/lib/cli-worker-host.mjs +12 -0
  301. package/src/channels/lib/config-lock.mjs +13 -0
  302. package/src/channels/lib/config.mjs +292 -0
  303. package/src/channels/lib/drop-trace.mjs +71 -0
  304. package/src/channels/lib/event-pipeline.mjs +81 -0
  305. package/src/channels/lib/event-queue.mjs +345 -0
  306. package/src/channels/lib/executor.mjs +168 -0
  307. package/src/channels/lib/format.mjs +188 -0
  308. package/src/channels/lib/holidays.mjs +138 -0
  309. package/src/channels/lib/hook-pipe-server.mjs +802 -0
  310. package/src/channels/lib/interaction-workflows.mjs +184 -0
  311. package/src/channels/lib/memory-client.mjs +149 -0
  312. package/src/channels/lib/output-forwarder.mjs +765 -0
  313. package/src/channels/lib/runtime-paths.mjs +479 -0
  314. package/src/channels/lib/scheduler.mjs +723 -0
  315. package/src/channels/lib/session-control.mjs +36 -0
  316. package/src/channels/lib/session-discovery.mjs +103 -0
  317. package/src/channels/lib/settings.mjs +11 -0
  318. package/src/channels/lib/state-file.mjs +68 -0
  319. package/src/channels/lib/status-snapshot.mjs +219 -0
  320. package/src/channels/lib/tool-format.mjs +140 -0
  321. package/src/channels/lib/transcript-discovery.mjs +195 -0
  322. package/src/channels/lib/voice-runtime-fetcher.mjs +734 -0
  323. package/src/channels/lib/webhook.mjs +1179 -0
  324. package/src/channels/lib/whisper-server.mjs +477 -0
  325. package/src/channels/tool-defs.mjs +170 -0
  326. package/src/daemon/host.mjs +118 -0
  327. package/src/daemon/mcp-transport.mjs +47 -0
  328. package/src/daemon/session.mjs +100 -0
  329. package/src/daemon/thin-client.mjs +71 -0
  330. package/src/daemon/transport.mjs +163 -0
  331. package/src/memory/data/runtime-manifest.json +40 -0
  332. package/src/memory/index.mjs +3305 -0
  333. package/src/memory/lib/agent-ipc.mjs +93 -0
  334. package/src/memory/lib/bridge-trace-queries.mjs +120 -0
  335. package/src/memory/lib/core-memory-store.mjs +330 -0
  336. package/src/memory/lib/embedding-provider.mjs +269 -0
  337. package/src/memory/lib/embedding-worker.mjs +323 -0
  338. package/src/memory/lib/llm-worker-host.mjs +17 -0
  339. package/src/memory/lib/memory-cycle.mjs +11 -0
  340. package/src/memory/lib/memory-cycle1.mjs +641 -0
  341. package/src/memory/lib/memory-cycle2.mjs +1284 -0
  342. package/src/memory/lib/memory-cycle3.mjs +540 -0
  343. package/src/memory/lib/memory-embed.mjs +299 -0
  344. package/src/memory/lib/memory-extraction.mjs +5 -0
  345. package/src/memory/lib/memory-maintenance-store.mjs +32 -0
  346. package/src/memory/lib/memory-ops-policy.mjs +190 -0
  347. package/src/memory/lib/memory-recall-id-patch.mjs +15 -0
  348. package/src/memory/lib/memory-recall-read-query.mjs +7 -0
  349. package/src/memory/lib/memory-recall-scope-filter.mjs +63 -0
  350. package/src/memory/lib/memory-recall-store.mjs +621 -0
  351. package/src/memory/lib/memory-retrievers.mjs +112 -0
  352. package/src/memory/lib/memory-score.mjs +71 -0
  353. package/src/memory/lib/memory-text-utils.mjs +58 -0
  354. package/src/memory/lib/memory.mjs +412 -0
  355. package/src/memory/lib/model-profile.mjs +85 -0
  356. package/src/memory/lib/pg/adapter.mjs +308 -0
  357. package/src/memory/lib/pg/process.mjs +360 -0
  358. package/src/memory/lib/pg/supervisor.mjs +396 -0
  359. package/src/memory/lib/project-id-resolver.mjs +86 -0
  360. package/src/memory/lib/runtime-fetcher.mjs +442 -0
  361. package/src/memory/lib/trace-store.mjs +728 -0
  362. package/src/memory/tool-defs.mjs +79 -0
  363. package/src/search/index.mjs +1173 -0
  364. package/src/search/lib/backends/anthropic-oauth.mjs +98 -0
  365. package/src/search/lib/backends/exa.mjs +50 -0
  366. package/src/search/lib/backends/firecrawl.mjs +61 -0
  367. package/src/search/lib/backends/gemini-api.mjs +83 -0
  368. package/src/search/lib/backends/grok-oauth.mjs +86 -0
  369. package/src/search/lib/backends/index.mjs +150 -0
  370. package/src/search/lib/backends/openai-api.mjs +144 -0
  371. package/src/search/lib/backends/openai-oauth.mjs +98 -0
  372. package/src/search/lib/backends/openai-web-search.mjs +76 -0
  373. package/src/search/lib/backends/tavily.mjs +55 -0
  374. package/src/search/lib/backends/xai-api.mjs +113 -0
  375. package/src/search/lib/cache.mjs +131 -0
  376. package/src/search/lib/config.mjs +192 -0
  377. package/src/search/lib/formatter.mjs +115 -0
  378. package/src/search/lib/provider-usage.mjs +67 -0
  379. package/src/search/lib/providers.mjs +47 -0
  380. package/src/search/lib/search-intent.mjs +109 -0
  381. package/src/search/lib/setup-handler.mjs +261 -0
  382. package/src/search/lib/state.mjs +201 -0
  383. package/src/search/lib/web-tools.mjs +1207 -0
  384. package/src/search/tool-defs.mjs +83 -0
  385. package/src/setup/defender-exclusion.mjs +183 -0
  386. package/src/shared/abort-controller.mjs +15 -0
  387. package/src/shared/atomic-file.mjs +420 -0
  388. package/src/shared/config.mjs +350 -0
  389. package/src/shared/daemon-recycle.mjs +108 -0
  390. package/src/shared/disable-claude-builtins.mjs +88 -0
  391. package/src/shared/err-text.mjs +12 -0
  392. package/src/shared/llm/cost.mjs +66 -0
  393. package/src/shared/llm/http-agent.mjs +123 -0
  394. package/src/shared/llm/index.mjs +41 -0
  395. package/src/shared/llm/pid-cleanup.mjs +27 -0
  396. package/src/shared/llm/usage-log.mjs +47 -0
  397. package/src/shared/plugin-paths.mjs +58 -0
  398. package/src/shared/schedules-store.mjs +70 -0
  399. package/src/shared/seed.mjs +119 -0
  400. package/src/shared/user-cwd.mjs +213 -0
  401. package/src/shared/user-data-guard.mjs +238 -0
  402. package/src/status/aggregator.mjs +584 -0
  403. package/src/status/server.mjs +413 -0
  404. package/tools.json +1653 -0
@@ -0,0 +1,4095 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { resolve as pathResolve, isAbsolute, dirname, relative as pathRelative, join } from 'node:path';
3
+ import { readdirSync, readFileSync, statSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'node:fs';
4
+ import { Worker } from 'node:worker_threads';
5
+ import { fileURLToPath } from 'node:url';
6
+ import {
7
+ normalizeInputPath,
8
+ normalizeOutputPath,
9
+ toDisplayPath,
10
+ } from './builtin.mjs';
11
+ import { getPluginData } from '../config.mjs';
12
+ import { ensureGraphBinary, findCachedGraphBinary } from './graph-binary-fetcher.mjs';
13
+ import { getCapabilities } from '../../../shared/config.mjs';
14
+ import { writeJsonAtomicSync } from '../../../shared/atomic-file.mjs';
15
+ import { CODE_GRAPH_TOOL_DEFS } from './code-graph-tool-defs.mjs';
16
+ import { markScopedCacheIncomplete } from '../session/cache/scoped-cache-outcome.mjs';
17
+
18
+ const CODE_GRAPH_TTL_MS = 30_000;
19
+ const CODE_GRAPH_MAX_FILES = 10_000;
20
+ const CODE_GRAPH_WORKER_TIMEOUT_MS = 120_000;
21
+ // Legacy single-file cache. Kept as a constant for the one-shot migration
22
+ // path; new writes go into the per-cwd directory layout below.
23
+ const CODE_GRAPH_DISK_FILE = 'code-graph-cache.json';
24
+ // Per-cwd cache: <data>/code-graph-cache/manifest.json + <hash>.json per
25
+ // indexed root. Avoids the unbounded single-file blob (observed >50 MB on
26
+ // long-running workspaces) that had to be JSON.parsed in full on every
27
+ // fresh process startup.
28
+ const CODE_GRAPH_DISK_DIR = 'code-graph-cache';
29
+ const CODE_GRAPH_DISK_MAX_ENTRIES = 24;
30
+ const CODE_GRAPH_DISK_MAX_BYTES = Math.max(
31
+ 1 * 1024 * 1024,
32
+ Math.floor((Number(process.env.MIXDOG_CODE_GRAPH_CACHE_MAX_MB) || 80) * 1024 * 1024),
33
+ );
34
+ const CODE_GRAPH_MEMORY_MAX_ENTRIES = Math.max(
35
+ 1,
36
+ Math.floor(Number(process.env.MIXDOG_CODE_GRAPH_MEMORY_MAX_ENTRIES) || 6),
37
+ );
38
+ const CODE_GRAPH_MEMORY_MAX_SOURCE_BYTES = Math.max(
39
+ 1 * 1024 * 1024,
40
+ Math.floor((Number(process.env.MIXDOG_CODE_GRAPH_MEMORY_MAX_MB) || 48) * 1024 * 1024),
41
+ );
42
+ const _codeGraphCache = new Map();
43
+ const _diskCodeGraphCache = new Map();
44
+ const _codeGraphDirtyPaths = new Map();
45
+ let _diskCodeGraphCacheLoaded = false;
46
+ let _diskCodeGraphCacheFlushTimer = null;
47
+ // Per-cwd manifest read at boot; per-cwd entries load on demand via
48
+ // _ensureCwdLoaded(cwd). Avoids the cold-start I/O spike that hit every
49
+ // fresh process when the legacy single-file cache grew unbounded.
50
+ let _diskManifest = null;
51
+ // In-flight async builds keyed by canonical graphCwd. Same-cwd parallel
52
+ // callers (prewarm + cache-miss + multiple find_symbol) share one Worker
53
+ // spawn instead of fanning out. Entry removed on settle so the next caller
54
+ // after a failure can retry.
55
+ const _inflightAsyncBuilds = new Map();
56
+ // Dirty-generation guard. markCodeGraphDirtyPaths() bumps the per-cwd
57
+ // counter every time a write invalidates a root. A build captures the
58
+ // generation at start and only commits its result (in-memory + disk) if
59
+ // the generation is unchanged on completion. An in-flight worker build
60
+ // that started BEFORE a write would otherwise repopulate _codeGraphCache
61
+ // with the stale pre-edit graph AFTER markCodeGraphDirtyPaths cleared it,
62
+ // and the TTL fast path would then serve that stale graph for up to
63
+ // CODE_GRAPH_TTL_MS.
64
+ const _codeGraphDirtyGen = new Map();
65
+ function _getCodeGraphGen(graphCwd) {
66
+ return _codeGraphDirtyGen.get(graphCwd) || 0;
67
+ }
68
+ function _bumpCodeGraphGen(graphCwd) {
69
+ _codeGraphDirtyGen.set(graphCwd, (_codeGraphDirtyGen.get(graphCwd) || 0) + 1);
70
+ }
71
+
72
+ function _codeGraphDiskDir() {
73
+ return join(getPluginData(), CODE_GRAPH_DISK_DIR);
74
+ }
75
+
76
+ function _hashCwd(cwd) {
77
+ // Memoize SHA256(canonical cwd) — the same canonical cwd is hashed
78
+ // repeatedly on persist/sweep hot paths. Keyed by canonical cwd; capped
79
+ // so a long-lived process cycling through many cwds can't grow unbounded.
80
+ const canon = _canonicalGraphCwd(cwd);
81
+ const cached = _hashCwdCache.get(canon);
82
+ if (cached !== undefined) return cached;
83
+ const hash = createHash('sha256').update(canon).digest('hex').slice(0, 16);
84
+ if (_hashCwdCache.size >= _HASH_CWD_CACHE_MAX) {
85
+ // Evict oldest insertion (Map preserves insertion order).
86
+ _hashCwdCache.delete(_hashCwdCache.keys().next().value);
87
+ }
88
+ _hashCwdCache.set(canon, hash);
89
+ return hash;
90
+ }
91
+
92
+ const _HASH_CWD_CACHE_MAX = 50;
93
+ const _hashCwdCache = new Map();
94
+
95
+ function _migrateLegacyDiskCache() {
96
+ const legacy = join(getPluginData(), CODE_GRAPH_DISK_FILE);
97
+ if (!existsSync(legacy)) return;
98
+ try {
99
+ const parsed = JSON.parse(readFileSync(legacy, 'utf8'));
100
+ if (parsed && typeof parsed === 'object') {
101
+ for (const [cwd, entry] of Object.entries(parsed)) {
102
+ if (!entry || typeof entry !== 'object') continue;
103
+ _diskCodeGraphCache.set(_canonicalGraphCwd(cwd), entry);
104
+ }
105
+ }
106
+ // Rename rather than delete so a rollback can recover the blob if the
107
+ // per-cwd layout misbehaves. Next persist round writes the new layout
108
+ // and the legacy path no longer exists, so this branch is a one-shot.
109
+ renameSync(legacy, `${legacy}.bak-${Date.now()}`);
110
+ // Schedule an immediate flush so the in-memory entries we just loaded
111
+ // get written out as per-cwd files now, instead of waiting for the
112
+ // next graph rebuild to trigger _setDiskCodeGraphEntry. Without this,
113
+ // the layout transition is half-complete (legacy renamed, new layout
114
+ // empty) until an unrelated build happens to land.
115
+ _scheduleDiskCodeGraphCacheFlush();
116
+ } catch (err) {
117
+ process.stderr.write(`[code-graph] legacy cache migration failed: ${err?.message || err}\n`);
118
+ }
119
+ }
120
+
121
+ function _canonicalGraphCwd(cwd) {
122
+ if (!cwd) throw new Error('code_graph requires cwd — caller did not provide a working directory');
123
+ const full = pathResolve(cwd);
124
+ return process.platform === 'win32' ? full.toLowerCase() : full;
125
+ }
126
+
127
+ function _canonicalGraphPath(p) {
128
+ const full = pathResolve(String(p || ''));
129
+ return process.platform === 'win32' ? full.toLowerCase() : full;
130
+ }
131
+
132
+ // Bump when the per-symbol record SHAPE changes (e.g. adding endLine). The
133
+ // version is folded into the cache signature so graphs built by an older
134
+ // binary/schema (symbols without a finite endLine) no longer match and are
135
+ // rebuilt instead of served — otherwise a stale cache would feed endLine-less
136
+ // symbols and silently defeat body-span containment in _nearestEnclosingSymbol.
137
+ const SYMBOL_SCHEMA_VERSION = 'sym-range-v3-rustimports';
138
+ function _computeGraphSignature(fileMetas) {
139
+ const hash = createHash('sha1');
140
+ hash.update(`${SYMBOL_SCHEMA_VERSION}\n`);
141
+ // R5-③: include rel/path alongside fp so renames and path-swaps (same
142
+ // bytes moved to a different rel, or two files exchanging paths) flip
143
+ // the signature and invalidate the memory/disk cache checks at the
144
+ // call sites just below in buildCodeGraphAsync. Without rel, an fp-only
145
+ // hash collides across rename pairs and the cache serves stale graph
146
+ // topology where node.rel no longer matches what's on disk.
147
+ for (const meta of fileMetas) hash.update(`${meta.rel || ''}\0${meta.fp}\n`);
148
+ return hash.digest('hex');
149
+ }
150
+
151
+ function _serializeGraph(graph) {
152
+ // Compact-on-disk: omit empty / falsy fields. Saves ~30-50% on disk
153
+ // for typical mixed-language graphs because most nodes don't carry
154
+ // packageName / namespaceName / topLevelTypes. Smaller
155
+ // payload → faster JSON.parse on cold-process boot. _deserializeGraph
156
+ // tolerates missing fields by defaulting to '' / [].
157
+ return {
158
+ schemaVersion: SYMBOL_SCHEMA_VERSION,
159
+ builtAt: Number(graph?.builtAt || Date.now()),
160
+ signature: String(graph?.signature || ''),
161
+ truncated: Boolean(graph?.truncated),
162
+ maxFiles: CODE_GRAPH_MAX_FILES,
163
+ nodes: [...(graph?.nodes?.values?.() || [])].map((node) => {
164
+ const out = {
165
+ rel: node.rel,
166
+ lang: node.lang,
167
+ };
168
+ if (node.fingerprint) out.fingerprint = node.fingerprint;
169
+ if (Array.isArray(node.rawImports) && node.rawImports.length) out.rawImports = node.rawImports;
170
+ if (Array.isArray(node.resolvedImportsRel) && node.resolvedImportsRel.length) {
171
+ out.resolvedImports = node.resolvedImportsRel;
172
+ }
173
+ if (Array.isArray(node.importedBy) && node.importedBy.length) {
174
+ out.importedBy = node.importedBy;
175
+ }
176
+ if (node.packageName) out.packageName = node.packageName;
177
+ if (node.namespaceName) out.namespaceName = node.namespaceName;
178
+ if (node.goPackageName) out.goPackageName = node.goPackageName;
179
+ if (Array.isArray(node.topLevelTypes) && node.topLevelTypes.length) {
180
+ out.topLevelTypes = node.topLevelTypes;
181
+ }
182
+ if (Array.isArray(node.tokenSymbols) && node.tokenSymbols.length) {
183
+ out.tokenSymbols = node.tokenSymbols;
184
+ }
185
+ if (Array.isArray(node.symbols) && node.symbols.length) {
186
+ out.symbols = node.symbols;
187
+ }
188
+ return out;
189
+ }),
190
+ };
191
+ }
192
+
193
+ function _deserializeGraph(cwd, payload) {
194
+ if (!payload || typeof payload !== 'object' || !Array.isArray(payload.nodes)) return null;
195
+ const nodes = new Map();
196
+ const reverse = new Map();
197
+ for (const item of payload.nodes) {
198
+ if (!item || typeof item.rel !== 'string' || typeof item.lang !== 'string') continue;
199
+ // Persisted fields are repo-relative, mirroring the live build. The
200
+ // JS resolution layer is gone — resolvedImports/resolvedImportsRel are
201
+ // restored straight from disk; the reverse index is rederived below from
202
+ // the forward edges of every node.
203
+ const resolvedImportsRel = Array.isArray(item.resolvedImports) ? item.resolvedImports.filter((v) => typeof v === 'string') : [];
204
+ const importedBy = Array.isArray(item.importedBy) ? item.importedBy.filter((v) => typeof v === 'string') : [];
205
+ const node = {
206
+ abs: pathResolve(cwd, item.rel),
207
+ rel: item.rel,
208
+ lang: item.lang,
209
+ fingerprint: item.fingerprint || '',
210
+ rawImports: Array.isArray(item.rawImports) ? item.rawImports : [],
211
+ resolvedImportsRel,
212
+ resolvedImports: resolvedImportsRel.map((rel) => pathResolve(cwd, rel)),
213
+ importedBy,
214
+ packageName: item.packageName || '',
215
+ namespaceName: item.namespaceName || '',
216
+ goPackageName: item.goPackageName || '',
217
+ topLevelTypes: Array.isArray(item.topLevelTypes) ? item.topLevelTypes : [],
218
+ tokenSymbols: Array.isArray(item.tokenSymbols) ? item.tokenSymbols : null,
219
+ symbols: Array.isArray(item.symbols) ? item.symbols : [],
220
+ };
221
+ nodes.set(node.rel, node);
222
+ // reverse is derived from the FORWARD edges of every node, not from the
223
+ // persisted importedBy. On the incremental --files path reused nodes carry
224
+ // a stale importedBy, so a fresh edge A→B (A parsed, B reused) would drop
225
+ // B's reverse entry. Walking resolvedImportsRel keeps reverse self-consistent.
226
+ for (const rel of resolvedImportsRel) {
227
+ if (!reverse.has(rel)) reverse.set(rel, new Set());
228
+ reverse.get(rel).add(node.rel);
229
+ }
230
+ }
231
+ const graph = _attachGraphRuntimeCaches({
232
+ cwd,
233
+ nodes,
234
+ reverse,
235
+ // Pre-endLine disk payloads have no schemaVersion → null → dropped by the
236
+ // previousGraph schema guard so their endLine-less nodes never seed reuse.
237
+ schemaVersion: typeof payload.schemaVersion === 'string' ? payload.schemaVersion : null,
238
+ builtAt: Number(payload.builtAt || Date.now()),
239
+ signature: String(payload.signature || ''),
240
+ });
241
+ // Restore the truncation flag persisted from the live build so disk-cache
242
+ // hits keep emitting the WARN line in find_symbol/overview output instead
243
+ // of silently working with a partial graph.
244
+ if (graph && payload.truncated) graph.truncated = true;
245
+ return graph;
246
+ }
247
+
248
+ function _attachGraphRuntimeCaches(graph) {
249
+ if (!graph || typeof graph !== 'object') return graph;
250
+ if (!graph._referenceSearchCache) graph._referenceSearchCache = new Map();
251
+ if (!graph._maskedLinesCache) graph._maskedLinesCache = new Map();
252
+ if (!graph._sourceLinesCache) graph._sourceLinesCache = new Map();
253
+ if (!graph._sourceTextCache) graph._sourceTextCache = new Map();
254
+ if (!graph._symbolTokenIndex) graph._symbolTokenIndex = new Map();
255
+ if (typeof graph._symbolTokenIndexDirty !== 'boolean') graph._symbolTokenIndexDirty = true;
256
+ return graph;
257
+ }
258
+
259
+ function _langUsesDollarInIdentifiers(lang) {
260
+ // `$` is a valid identifier char only in JS/TS/PHP. The 5 new langs are
261
+ // deliberately excluded: kotlin/swift/scala/lua have no `$` in identifiers,
262
+ // and bash's `$` is a variable-expansion sigil (`$var`), not an identifier
263
+ // char — treating it as a word-boundary char would mis-tokenize.
264
+ // Second batch (dart/objc/elixir/zig/r) likewise excluded: none use `$` as
265
+ // an identifier char (objc `$` is invalid; elixir/dart/zig/r have no `$` in
266
+ // names), so they stay out.
267
+ return lang === 'javascript' || lang === 'typescript' || lang === 'php';
268
+ }
269
+
270
+ function _langAllowsBangQuestionSuffix(lang) {
271
+ // Method names may end in `!`/`?` only in ruby (`save!`/`empty?`) and rust
272
+ // (`!` macros). Kotlin is NOT here: its `!!` is the not-null assertion
273
+ // OPERATOR, not an identifier suffix — including it would fold `foo!!` into
274
+ // the `foo` reference and break matching. swift `?`/`!` are optional/
275
+ // force-unwrap operators (not name chars); scala/bash/lua have no suffix.
276
+ // Second batch: elixir function names may end in `?`/`!` (`valid?`/`save!`)
277
+ // exactly like ruby → included. dart/objc/zig/r have no such suffix.
278
+ return lang === 'ruby' || lang === 'rust' || lang === 'elixir';
279
+ }
280
+
281
+ function _estimateGraphRuntimeCacheBytes(graph) {
282
+ if (!graph) return 0;
283
+ let total = 0;
284
+ for (const entry of graph._sourceTextCache?.values() || []) {
285
+ total += Buffer.byteLength(String(entry?.text || ''), 'utf8');
286
+ }
287
+ for (const lines of graph._maskedLinesCache?.values() || []) {
288
+ if (!Array.isArray(lines)) continue;
289
+ for (const line of lines) total += Buffer.byteLength(String(line || ''), 'utf8');
290
+ }
291
+ for (const lines of graph._sourceLinesCache?.values() || []) {
292
+ if (!Array.isArray(lines)) continue;
293
+ for (const line of lines) total += Buffer.byteLength(String(line || ''), 'utf8');
294
+ }
295
+ for (const memo of graph._referenceSearchCache?.values() || []) {
296
+ total += Buffer.byteLength(String(memo || ''), 'utf8');
297
+ }
298
+ return total;
299
+ }
300
+
301
+ function _clearGraphRuntimeCaches(graph) {
302
+ if (!graph) return;
303
+ graph._sourceTextCache?.clear();
304
+ graph._maskedLinesCache?.clear();
305
+ graph._sourceLinesCache?.clear();
306
+ graph._referenceSearchCache?.clear();
307
+ graph._symbolTokenIndex?.clear();
308
+ graph._symbolTokenIndexDirty = true;
309
+ }
310
+
311
+ function _touchCodeGraphCache(graphCwd) {
312
+ const key = _canonicalGraphCwd(graphCwd);
313
+ const entry = _codeGraphCache.get(key);
314
+ if (!entry) return;
315
+ _codeGraphCache.delete(key);
316
+ entry.lastAccess = Date.now();
317
+ _codeGraphCache.set(key, entry);
318
+ }
319
+
320
+ function _setCodeGraphCache(graphCwd, entry) {
321
+ const key = _canonicalGraphCwd(graphCwd);
322
+ const payload = { ...entry, lastAccess: Date.now() };
323
+ if (_codeGraphCache.has(key)) _codeGraphCache.delete(key);
324
+ _codeGraphCache.set(key, payload);
325
+ _pruneCodeGraphMemoryCache();
326
+ }
327
+
328
+ export function _pruneCodeGraphMemoryCache(options = {}) {
329
+ const maxEntries = Number.isFinite(options.maxEntries)
330
+ ? Math.max(1, Math.floor(options.maxEntries))
331
+ : CODE_GRAPH_MEMORY_MAX_ENTRIES;
332
+ const maxBytes = Number.isFinite(options.maxBytes)
333
+ ? Math.max(0, Math.floor(options.maxBytes))
334
+ : CODE_GRAPH_MEMORY_MAX_SOURCE_BYTES;
335
+ const rows = [..._codeGraphCache.entries()].map(([cwd, entry]) => ({
336
+ cwd,
337
+ entry,
338
+ lastAccess: Number(entry?.lastAccess || entry?.ts || 0),
339
+ runtimeBytes: _estimateGraphRuntimeCacheBytes(entry?.graph),
340
+ }));
341
+ rows.sort((a, b) => (a.lastAccess - b.lastAccess) || String(a.cwd).localeCompare(String(b.cwd)));
342
+ const evicted = [];
343
+ let totalRuntimeBytes = rows.reduce((sum, row) => sum + row.runtimeBytes, 0);
344
+ for (const row of rows) {
345
+ if (totalRuntimeBytes <= maxBytes) break;
346
+ if (!row.entry?.graph || row.runtimeBytes <= 0) continue;
347
+ const freed = row.runtimeBytes;
348
+ _clearGraphRuntimeCaches(row.entry.graph);
349
+ row.runtimeBytes = 0;
350
+ totalRuntimeBytes -= freed;
351
+ evicted.push({ cwd: row.cwd, reason: 'max-bytes-runtime', freed });
352
+ }
353
+ while (_codeGraphCache.size > maxEntries) {
354
+ const oldestKey = _codeGraphCache.keys().next().value;
355
+ if (!oldestKey) break;
356
+ _codeGraphCache.delete(oldestKey);
357
+ evicted.push({ cwd: oldestKey, reason: 'max-entries' });
358
+ }
359
+ return { evicted, totalRuntimeBytes: Math.max(0, totalRuntimeBytes), entries: _codeGraphCache.size };
360
+ }
361
+
362
+ function _pruneDiskCodeGraphEntries(now = Date.now()) {
363
+ for (const [cwd, entry] of _diskCodeGraphCache) {
364
+ if (!entry || typeof entry !== 'object') {
365
+ _diskCodeGraphCache.delete(cwd);
366
+ continue;
367
+ }
368
+ // Disk entries are not TTL-evicted: signature validation on load/build
369
+ // plus _pruneCodeGraphManifestForBudget (MIXDOG_CODE_GRAPH_CACHE_MAX_MB)
370
+ // govern freshness and size. Memory cache keeps CODE_GRAPH_TTL_MS.
371
+ }
372
+ while (_diskCodeGraphCache.size > CODE_GRAPH_DISK_MAX_ENTRIES) {
373
+ const oldest = _diskCodeGraphCache.keys().next().value;
374
+ if (!oldest) break;
375
+ _diskCodeGraphCache.delete(oldest);
376
+ }
377
+ }
378
+
379
+ function _isCodeGraphCacheHash(value) {
380
+ return /^[0-9a-f]{8,64}$/i.test(String(value || ''));
381
+ }
382
+
383
+ export function _pruneCodeGraphManifestForBudget(manifest, dir, options = {}) {
384
+ const maxEntries = Number.isFinite(options.maxEntries)
385
+ ? Math.max(0, Math.floor(options.maxEntries))
386
+ : CODE_GRAPH_DISK_MAX_ENTRIES;
387
+ const maxBytes = Number.isFinite(options.maxBytes)
388
+ ? Math.max(0, Math.floor(options.maxBytes))
389
+ : CODE_GRAPH_DISK_MAX_BYTES;
390
+ const rows = [];
391
+ for (const [cwd, meta] of Object.entries(manifest || {})) {
392
+ const hash = String(meta?.hash || '');
393
+ if (!cwd || !_isCodeGraphCacheHash(hash)) continue;
394
+ const file = join(dir, `${hash}.json`);
395
+ let size = 0;
396
+ try { size = statSync(file).size; } catch { continue; }
397
+ rows.push({
398
+ cwd,
399
+ hash,
400
+ builtAt: Number(meta?.builtAt) || 0,
401
+ size: Math.max(0, Number(size) || 0),
402
+ });
403
+ }
404
+ rows.sort((a, b) => (a.builtAt - b.builtAt) || a.cwd.localeCompare(b.cwd));
405
+ const keep = new Set(rows.map((row) => row.cwd));
406
+ let totalBytes = rows.reduce((sum, row) => sum + row.size, 0);
407
+ const evicted = [];
408
+
409
+ const evict = (row, reason) => {
410
+ if (!row || !keep.has(row.cwd)) return false;
411
+ keep.delete(row.cwd);
412
+ totalBytes -= row.size;
413
+ evicted.push({ ...row, reason });
414
+ return true;
415
+ };
416
+
417
+ for (const row of rows) {
418
+ if (keep.size <= maxEntries) break;
419
+ evict(row, 'max-entries');
420
+ }
421
+ for (const row of rows) {
422
+ if (totalBytes <= maxBytes) break;
423
+ evict(row, 'max-bytes');
424
+ }
425
+
426
+ const pruned = {};
427
+ for (const row of rows) {
428
+ if (!keep.has(row.cwd)) continue;
429
+ pruned[row.cwd] = { hash: row.hash, builtAt: row.builtAt };
430
+ }
431
+ return { manifest: pruned, evicted, totalBytes: Math.max(0, totalBytes) };
432
+ }
433
+
434
+ function _loadDiskCodeGraphCache(now = Date.now()) {
435
+ if (_diskCodeGraphCacheLoaded) return;
436
+ _diskCodeGraphCacheLoaded = true;
437
+
438
+ // One-shot migration from the legacy single-file cache. Subsequent boots
439
+ // skip this branch because the source file was renamed to .bak.
440
+ _migrateLegacyDiskCache();
441
+
442
+ // Manifest-only load: per-cwd entries are picked up by _ensureCwdLoaded()
443
+ // at lookup time. Cold start now pays a single small JSON.parse instead
444
+ // of reading every per-cwd file (~24 × ~2 MB on long-running workspaces).
445
+ try {
446
+ const manifestFile = join(_codeGraphDiskDir(), 'manifest.json');
447
+ if (existsSync(manifestFile)) {
448
+ const parsed = JSON.parse(readFileSync(manifestFile, 'utf8'));
449
+ if (parsed && typeof parsed === 'object') _diskManifest = parsed;
450
+ }
451
+ } catch (err) {
452
+ process.stderr.write(`[code-graph] disk manifest load failed: ${err?.message || err}\n`);
453
+ }
454
+ if (!_diskManifest) _diskManifest = {};
455
+ _pruneDiskCodeGraphEntries(now);
456
+ }
457
+
458
+ // Demand-load one cwd's per-file entry. Callers invoke this right before
459
+ // reading `_diskCodeGraphCache.get(cwd)` so the in-memory cache stays
460
+ // populated only for cwds actually looked up in this process lifetime.
461
+ function _ensureCwdLoaded(cwd) {
462
+ const key = _canonicalGraphCwd(cwd);
463
+ if (_diskCodeGraphCache.has(key)) return;
464
+ if (!_diskManifest) return;
465
+ const meta = _diskManifest[key];
466
+ if (!meta || typeof meta !== 'object' || !meta.hash) return;
467
+ try {
468
+ const file = join(_codeGraphDiskDir(), `${meta.hash}.json`);
469
+ if (!existsSync(file)) return;
470
+ const entry = JSON.parse(readFileSync(file, 'utf8'));
471
+ if (entry && typeof entry === 'object') _diskCodeGraphCache.set(key, entry);
472
+ } catch { /* skip corrupt per-cwd file */ }
473
+ }
474
+
475
+ function _persistDiskCodeGraphCacheNow() {
476
+ try {
477
+ _loadDiskCodeGraphCache();
478
+ _pruneDiskCodeGraphEntries();
479
+ const dir = _codeGraphDiskDir();
480
+ mkdirSync(dir, { recursive: true });
481
+
482
+ // Read the on-disk manifest BEFORE writing so cwd entries owned by
483
+ // other instances (MIXDOG_MULTI_INSTANCE=1) survive. Without this,
484
+ // our orphan sweep below would happily unlink another instance's
485
+ // per-cwd files just because we don't have them in our in-memory map.
486
+ let preserved = {};
487
+ try {
488
+ const raw = readFileSync(join(dir, 'manifest.json'), 'utf8');
489
+ const parsed = JSON.parse(raw);
490
+ if (parsed && typeof parsed === 'object') preserved = parsed;
491
+ } catch { /* no existing manifest yet */ }
492
+
493
+ let manifest = { ...preserved };
494
+ const validHashes = new Set();
495
+ for (const [cwd, entry] of _diskCodeGraphCache) {
496
+ const hash = _hashCwd(cwd);
497
+ const file = join(dir, `${hash}.json`);
498
+ writeJsonAtomicSync(file, entry, { compact: true, lock: true });
499
+ manifest[cwd] = { hash, builtAt: entry.builtAt || Date.now() };
500
+ }
501
+ const pruned = _pruneCodeGraphManifestForBudget(manifest, dir);
502
+ manifest = pruned.manifest;
503
+ for (const row of pruned.evicted) {
504
+ _diskCodeGraphCache.delete(row.cwd);
505
+ }
506
+ for (const meta of Object.values(manifest)) {
507
+ if (meta && typeof meta === 'object' && meta.hash) validHashes.add(meta.hash);
508
+ }
509
+
510
+ const manifestFile = join(dir, 'manifest.json');
511
+ writeJsonAtomicSync(manifestFile, manifest, { compact: true, lock: true });
512
+ _diskManifest = manifest;
513
+
514
+ // Sweep orphan per-cwd files. validHashes now includes every hash in
515
+ // the merged manifest (preserved + ours) so cross-instance cache files
516
+ // are never collateral damage.
517
+ try {
518
+ for (const f of readdirSync(dir)) {
519
+ if (f === 'manifest.json' || !f.endsWith('.json')) continue;
520
+ const hash = f.slice(0, -5);
521
+ if (!validHashes.has(hash)) {
522
+ try { unlinkSync(join(dir, f)); } catch { /* best-effort */ }
523
+ }
524
+ }
525
+ } catch { /* sweep best-effort */ }
526
+ } catch (err) {
527
+ process.stderr.write(`[code-graph] disk cache persist failed (target: ${_codeGraphDiskDir()}): ${err?.message || err}\n`);
528
+ }
529
+ }
530
+
531
+ function _scheduleDiskCodeGraphCacheFlush() {
532
+ if (_diskCodeGraphCacheFlushTimer) return;
533
+ _diskCodeGraphCacheFlushTimer = setTimeout(() => {
534
+ _diskCodeGraphCacheFlushTimer = null;
535
+ _persistDiskCodeGraphCacheNow();
536
+ }, 250);
537
+ if (typeof _diskCodeGraphCacheFlushTimer.unref === 'function') _diskCodeGraphCacheFlushTimer.unref();
538
+ }
539
+
540
+ /**
541
+ * Sync-flush any pending code-graph disk cache write before process exit.
542
+ * Cancels the 250ms scheduled-flush timer and runs _persistDiskCodeGraphCacheNow
543
+ * directly so newly-built graphs land on disk regardless of exit timing.
544
+ */
545
+ export function drainCodeGraphCache() {
546
+ if (_diskCodeGraphCacheFlushTimer) {
547
+ clearTimeout(_diskCodeGraphCacheFlushTimer);
548
+ _diskCodeGraphCacheFlushTimer = null;
549
+ _persistDiskCodeGraphCacheNow();
550
+ }
551
+ }
552
+
553
+ /**
554
+ * Fire-and-forget prewarm — schedule a code-graph build for `cwd` on the
555
+ * next tick so the first find_symbol call hits a warm cache instead of
556
+ * paying the cold-build outlier (PG telemetry: avg 4117ms, max 93645ms).
557
+ * Mirrors the warmupCatalogs pattern in providers/registry.mjs (catch-all
558
+ * silent so prewarm never affects the caller). Effect requires that the
559
+ * caller-supplied cwd matches the cwd of the first lookup.
560
+ */
561
+ export function prewarmCodeGraph(cwd) {
562
+ if (!cwd) return;
563
+ // Reuse the buildCodeGraphAsync single-flight path. Fire-and-forget —
564
+ // caller does not await. If buildCodeGraphAsync already has a Worker
565
+ // running for this cwd (or the cache is fresh under TTL), prewarm
566
+ // collapses onto it instead of spawning a duplicate thread.
567
+ buildCodeGraphAsync(cwd).catch(() => { /* best-effort */ });
568
+ }
569
+
570
+ /**
571
+ * Symbol-aware prewarm. After graph build, populate the lazy per-symbol
572
+ * candidate cache for each name in `symbols` so the first find_symbol
573
+ * lookup on those names skips the ~50ms O(N) node scan. Best paired
574
+ * with bridge prefetch args (prefetch.callers / prefetch.references)
575
+ * that already name the symbols the worker plans to query. Fire-and-
576
+ * forget; caller does not await.
577
+ */
578
+ export function prewarmCodeGraphSymbols(cwd, symbols, { language = null } = {}) {
579
+ if (!cwd) return;
580
+ const wanted = (Array.isArray(symbols) ? symbols : [symbols])
581
+ .map((s) => String(s || '').trim())
582
+ .filter(Boolean);
583
+ buildCodeGraphAsync(cwd).then((graph) => {
584
+ if (!graph) return;
585
+ for (const symbol of wanted) {
586
+ try { _lookupCandidateNodes(graph, symbol, language); } catch { /* best-effort */ }
587
+ }
588
+ }).catch(() => { /* best-effort */ });
589
+ }
590
+
591
+ /**
592
+ * Guarded directory prewarm. Schedules a build ONLY when `cwd` sits inside a
593
+ * real project (sentinel at it or an ancestor), and prewarms the detected
594
+ * project ROOT — not an arbitrary subdir — so a later unscoped query (which
595
+ * re-roots to that same project root in executeCodeGraphTool) lands on a warm
596
+ * cache instead of paying the cold build on the query's critical path. Refuses
597
+ * non-project trees (home dir, multi-repo container, plugin cache) so a stray
598
+ * `cwd set` never burns a worker indexing a giant unrelated tree. Fire-and-
599
+ * forget (single-flight + silent via prewarmCodeGraph); returns whether a
600
+ * prewarm was scheduled.
601
+ */
602
+ export function prewarmCodeGraphIfProject(cwd) {
603
+ if (!cwd) return false;
604
+ const root = _findDirProjectRoot(cwd);
605
+ if (!root) return false;
606
+ prewarmCodeGraph(root);
607
+ return true;
608
+ }
609
+
610
+ export async function buildCodeGraphAsync(cwd, signal = null) {
611
+ if (signal?.aborted) throw new Error('aborted');
612
+ const graphCwd = _canonicalGraphCwd(cwd);
613
+ // TTL-bounded cache hit. Signature re-validation requires a sync fs
614
+ // walk (main-loop work we explicitly avoid), so we delegate full
615
+ // re-check to the worker which calls _buildCodeGraph and runs the
616
+ // signature comparison itself. Stale entries past CODE_GRAPH_TTL_MS
617
+ // fall through to a worker rebuild.
618
+ const cached = _codeGraphCache.get(graphCwd);
619
+ if (cached?.graph && Date.now() - cached.ts < CODE_GRAPH_TTL_MS) {
620
+ _touchCodeGraphCache(graphCwd);
621
+ return cached.graph;
622
+ }
623
+ // Single-flight: parallel callers for the same graphCwd collapse onto
624
+ // one Worker spawn. Same Promise is returned to every caller until it
625
+ // settles, then the entry is removed so subsequent callers can retry
626
+ // after a failure or after the next TTL expiry.
627
+ const existing = _inflightAsyncBuilds.get(graphCwd);
628
+ if (existing) {
629
+ if (!signal) return existing;
630
+ let onAbort = null;
631
+ const abortP = new Promise((_, reject) => {
632
+ onAbort = () => reject(new Error('aborted'));
633
+ signal.addEventListener('abort', onAbort, { once: true });
634
+ });
635
+ const cleanup = () => {
636
+ if (onAbort) {
637
+ try { signal.removeEventListener('abort', onAbort); } catch {}
638
+ onAbort = null;
639
+ }
640
+ };
641
+ return Promise.race([existing, abortP]).then(
642
+ (v) => { cleanup(); return v; },
643
+ (e) => { cleanup(); throw e; },
644
+ );
645
+ }
646
+ // Capture the dirty generation at build start. If a write bumps it
647
+ // before the worker returns, the result describes a pre-edit tree and
648
+ // must be dropped rather than cached/persisted.
649
+ const _genAtStart = _getCodeGraphGen(graphCwd);
650
+ let _worker = null;
651
+ const promise = new Promise((resolve, reject) => {
652
+ let settled = false;
653
+ let timeout = null;
654
+ let _onSignalAbort = null;
655
+ const settle = (val) => {
656
+ if (settled) return;
657
+ settled = true;
658
+ if (timeout) { clearTimeout(timeout); timeout = null; }
659
+ if (_onSignalAbort && signal) {
660
+ try { signal.removeEventListener('abort', _onSignalAbort); } catch {}
661
+ _onSignalAbort = null;
662
+ }
663
+ _inflightAsyncBuilds.delete(graphCwd);
664
+ if (val instanceof Error) reject(val);
665
+ else resolve(val);
666
+ };
667
+ try {
668
+ const workerUrl = new URL('./code-graph-prewarm-worker.mjs', import.meta.url);
669
+ _worker = new Worker(workerUrl, {
670
+ workerData: { cwd },
671
+ execArgv: [],
672
+ });
673
+ const w = _worker;
674
+ timeout = setTimeout(() => {
675
+ try { _worker?.terminate(); } catch {}
676
+ settle(new Error(`code-graph worker timed out after ${CODE_GRAPH_WORKER_TIMEOUT_MS}ms for cwd=${graphCwd}`));
677
+ }, CODE_GRAPH_WORKER_TIMEOUT_MS);
678
+ timeout.unref?.();
679
+ if (signal) {
680
+ _onSignalAbort = () => {
681
+ try { _worker?.terminate(); } catch {}
682
+ settle(new Error('aborted'));
683
+ };
684
+ signal.addEventListener('abort', _onSignalAbort, { once: true });
685
+ }
686
+ w.once('message', (msg) => {
687
+ try {
688
+ if (msg && msg.ok && msg.graph && typeof msg.signature === 'string') {
689
+ // Dirty-generation guard: only commit if no write invalidated
690
+ // this root since build start. A stale result is still returned
691
+ // to in-flight callers but never cached or persisted, so the TTL
692
+ // fast path won't serve a pre-edit graph after the next rebuild.
693
+ if (_getCodeGraphGen(graphCwd) === _genAtStart) {
694
+ _setCodeGraphCache(graphCwd, { ts: Date.now(), signature: msg.signature, graph: msg.graph });
695
+ // Mirror the sync build path: persist to disk so the next
696
+ // process boot can hit the cache cold. Without this, async
697
+ // prewarm / find_symbol via worker thread populated only the
698
+ // in-memory map and the per-cwd directory stayed empty until
699
+ // the rare sync rebuild path landed.
700
+ _setDiskCodeGraphEntry(graphCwd, msg.graph);
701
+ }
702
+ settle(msg.graph);
703
+ } else {
704
+ settle(new Error('code-graph prewarm worker failed'));
705
+ }
706
+ } catch (e) { settle(e instanceof Error ? e : new Error(String(e))); }
707
+ });
708
+ w.once('error', (e) => settle(e instanceof Error ? e : new Error(String(e))));
709
+ } catch (e) { settle(e instanceof Error ? e : new Error(String(e))); }
710
+ });
711
+ _inflightAsyncBuilds.set(graphCwd, promise);
712
+ return promise;
713
+ }
714
+
715
+ function _setDiskCodeGraphEntry(cwd, graph) {
716
+ _loadDiskCodeGraphCache();
717
+ // Stamp the cache entry with the persistence timestamp (not the build
718
+ // start) so manifest/signature metadata stays fresh. Disk retention is
719
+ // governed by signature validation and MIXDOG_CODE_GRAPH_CACHE_MAX_MB,
720
+ // not CODE_GRAPH_TTL_MS (memory cache only).
721
+ const serialized = _serializeGraph(graph);
722
+ serialized.builtAt = Date.now();
723
+ _diskCodeGraphCache.set(_canonicalGraphCwd(cwd), serialized);
724
+ _pruneDiskCodeGraphEntries();
725
+ _scheduleDiskCodeGraphCacheFlush();
726
+ }
727
+
728
+ // P0: accept absolute written paths only; resolve affected indexed roots centrally.
729
+ // Callers pass absolute paths only — no cwd parameter.
730
+ export function markCodeGraphDirtyPaths(paths) {
731
+ const values = Array.isArray(paths) ? paths : [paths];
732
+ const cleaned = values
733
+ .filter(Boolean)
734
+ .map((p) => _canonicalGraphPath(p));
735
+ if (cleaned.length === 0) return;
736
+ // Mark every known indexed root that contains any of the written paths.
737
+ const knownRoots = new Set([..._codeGraphDirtyPaths.keys(), ..._codeGraphCache.keys()]);
738
+ const affectedRoots = new Set();
739
+ for (const absPath of cleaned) {
740
+ // P0: per-path tracking. Previous logic used the global affectedRoots.size,
741
+ // which meant once any earlier path matched an indexed root, every later
742
+ // unmatched path skipped the ancestor-chain derivation entirely.
743
+ let matchedThisPath = false;
744
+ for (const root of knownRoots) {
745
+ const canonRoot = _canonicalGraphPath(root);
746
+ if (absPath.startsWith(canonRoot + '/') || absPath.startsWith(canonRoot + '\\') || absPath === canonRoot) {
747
+ affectedRoots.add(root);
748
+ matchedThisPath = true;
749
+ }
750
+ }
751
+ // Derive project root from file's ancestor chain when this specific path
752
+ // matched no indexed root.
753
+ if (!matchedThisPath) {
754
+ let dir = dirname(absPath);
755
+ while (dir && dir !== dirname(dir)) {
756
+ if (existsSync(join(dir, 'package.json')) || existsSync(join(dir, '.git'))) {
757
+ affectedRoots.add(_canonicalGraphCwd(dir));
758
+ break;
759
+ }
760
+ dir = dirname(dir);
761
+ }
762
+ }
763
+ }
764
+ for (const root of affectedRoots) {
765
+ if (!_codeGraphDirtyPaths.has(root)) _codeGraphDirtyPaths.set(root, new Set());
766
+ const set = _codeGraphDirtyPaths.get(root);
767
+ for (const p of cleaned) set.add(p);
768
+ // P0: invalidate the in-memory TTL cache entry for any root that just
769
+ // received dirty paths. Otherwise buildCodeGraphAsync's TTL fast path
770
+ // returns the stale pre-edit graph for up to CODE_GRAPH_TTL_MS (30s)
771
+ // after every write, ignoring the dirty queue entirely.
772
+ const _canonRoot = _canonicalGraphCwd(root);
773
+ _codeGraphCache.delete(_canonRoot);
774
+ // Bump the dirty generation so any build that started before this
775
+ // write drops its now-stale result instead of repopulating the cache.
776
+ _bumpCodeGraphGen(_canonRoot);
777
+ }
778
+ }
779
+
780
+ function _consumeCodeGraphDirtyPaths(cwd) {
781
+ const key = _canonicalGraphCwd(cwd);
782
+ const set = _codeGraphDirtyPaths.get(key);
783
+ if (!set || set.size === 0) return [];
784
+ _codeGraphDirtyPaths.delete(key);
785
+ return [...set];
786
+ }
787
+
788
+ // Unicode-aware word-boundary wrapper for an already-regex-escaped
789
+ // symbol. JS `\b` only fires at ASCII [A-Za-z0-9_] transitions, so
790
+ // CJK / Cyrillic / Greek identifiers never matched the legacy shape.
791
+ // `$` is part of the boundary only for JS/TS/PHP; Ruby/Kotlin/Rust
792
+ // `!?` suffixes are kept distinct from the unsuffixed name when searching.
793
+ function _unicodeBoundaryPattern(escaped, lang = null, symbol = null) {
794
+ const allowDollar = !lang || _langUsesDollarInIdentifiers(lang);
795
+ const before = allowDollar ? '(?<![\\p{ID_Continue}$])' : '(?<![\\p{ID_Continue}])';
796
+ let after = allowDollar ? '(?![\\p{ID_Continue}$])' : '(?![\\p{ID_Continue}])';
797
+ const sym = symbol == null ? '' : String(symbol);
798
+ if (lang && _langAllowsBangQuestionSuffix(lang) && sym && !/[!?]$/.test(sym)) {
799
+ after = allowDollar ? '(?![\\p{ID_Continue}$!?])' : '(?![\\p{ID_Continue}!?])';
800
+ }
801
+ return `${before}${escaped}${after}`;
802
+ }
803
+
804
+ function _extractIdentifierTokens(text, lang = null) {
805
+ const out = new Set();
806
+ const allowDollar = !lang || _langUsesDollarInIdentifiers(lang);
807
+ const before = allowDollar ? '(?<![\\p{ID_Continue}$])' : '(?<![\\p{ID_Continue}])';
808
+ const suffix = lang && _langAllowsBangQuestionSuffix(lang) ? '[!?]?' : '';
809
+ const after = allowDollar ? '(?![\\p{ID_Continue}$])' : '(?![\\p{ID_Continue}])';
810
+ const re = new RegExp(`${before}[$@]?[\\p{ID_Start}_][\\p{ID_Continue}]*${suffix}${after}`, 'gu');
811
+ let match = null;
812
+ const src = String(text || '');
813
+ while ((match = re.exec(src))) {
814
+ out.add(match[0]);
815
+ }
816
+ return [...out];
817
+ }
818
+
819
+ function _getTokenSymbolsForNode(graph, node) {
820
+ if (Array.isArray(node?.tokenSymbols)) return node.tokenSymbols;
821
+ const text = _getSourceTextForNode(graph, node);
822
+ const tokens = _extractIdentifierTokens(text, node.lang);
823
+ node.tokenSymbols = tokens;
824
+ return tokens;
825
+ }
826
+
827
+ function _cloneSymbolTokenIndex(index) {
828
+ const out = new Map();
829
+ for (const [key, rels] of index || []) {
830
+ out.set(key, Array.isArray(rels) ? [...rels] : []);
831
+ }
832
+ return out;
833
+ }
834
+
835
+ // Legacy full-index builder. Kept callable for explicit prewarms, but
836
+ // no longer invoked from lookup paths — those use _lookupCandidateNodes
837
+ // which lazily builds only the requested (language, symbol) bucket.
838
+ // Full build was the dominant cold-process cost (~1-2s on refs/'s
839
+ // 7000-node × ~50-tokens graph) and provided no benefit for the typical
840
+ // 1-3 lookups per bridge worker.
841
+ function _ensureSymbolTokenIndex(graph) {
842
+ if (!graph?._symbolTokenIndex) return;
843
+ if (!graph._symbolTokenIndexDirty && graph._symbolTokenIndex.size > 0) return;
844
+ graph._symbolTokenIndex.clear();
845
+ for (const node of graph.nodes.values()) {
846
+ for (const symbol of _getTokenSymbolsForNode(graph, node)) {
847
+ const langKey = `${node.lang}|${symbol}`;
848
+ const wildKey = `*|${symbol}`;
849
+ if (!graph._symbolTokenIndex.has(langKey)) graph._symbolTokenIndex.set(langKey, []);
850
+ graph._symbolTokenIndex.get(langKey).push(node.rel);
851
+ if (!graph._symbolTokenIndex.has(wildKey)) graph._symbolTokenIndex.set(wildKey, []);
852
+ graph._symbolTokenIndex.get(wildKey).push(node.rel);
853
+ }
854
+ }
855
+ graph._symbolTokenIndexDirty = false;
856
+ }
857
+
858
+ // Lazy per-symbol candidate lookup. Caches the result back into
859
+ // `_symbolTokenIndex` so repeat lookups are O(1). Compared to a full
860
+ // _ensureSymbolTokenIndex sweep, the per-symbol scan is O(N) where N is
861
+ // the node count (~7000 on refs/), and each node's check is a cheap
862
+ // Array.includes on its pre-extracted tokenSymbols. Cold-process first
863
+ // lookup drops from ~1-2s to ~50ms.
864
+ export function _lookupCandidateNodes(graph, symbol, language = null) {
865
+ if (!graph?.nodes) return [];
866
+ const cacheKey = `${language || '*'}|${symbol}`;
867
+ if (graph._symbolTokenIndex?.has(cacheKey)) {
868
+ const rels = graph._symbolTokenIndex.get(cacheKey);
869
+ return rels.map((rel) => graph.nodes.get(rel)).filter(Boolean);
870
+ }
871
+ const candidates = [];
872
+ for (const node of graph.nodes.values()) {
873
+ if (language && node.lang !== language) continue;
874
+ const tokens = _getTokenSymbolsForNode(graph, node);
875
+ if (tokens.includes(symbol)) candidates.push(node);
876
+ }
877
+ if (candidates.length > 0) {
878
+ if (graph._symbolTokenIndex) {
879
+ graph._symbolTokenIndex.set(cacheKey, candidates.map((n) => n.rel));
880
+ }
881
+ return candidates;
882
+ }
883
+ // Token-index miss → fall back to language-filtered full graph scan.
884
+ // _extractIdentifierTokens uses ASCII `\b` word-boundary which misses
885
+ // unicode (Korean/CJK), $-prefixed identifiers in some positions, and
886
+ // certain multi-byte language tokens (Rust raw idents, Go method
887
+ // receivers). The downstream search loop's sourceText.includes()
888
+ // still catches these — we just need to give it the full node set.
889
+ // Not cached: caching the fallback would mask token-extractor
890
+ // improvements and would also keep returning the heavy scan after a
891
+ // future graph rebuild populated the token map for the symbol.
892
+ const fallback = [];
893
+ for (const node of graph.nodes.values()) {
894
+ if (language && node.lang !== language) continue;
895
+ fallback.push(node);
896
+ }
897
+ return fallback;
898
+ }
899
+
900
+ function _extractSymbolsCheap(text, lang) {
901
+ const all = _collectCheapSymbols(text, lang).map((item) => `${item.kind} ${item.name} (L${item.line})`);
902
+ return all.length ? _capGraphList(all).join('\n') : '(no symbols)';
903
+ }
904
+
905
+ // Control-flow keywords that the bare `name(args) {?$` patterns below
906
+ // would otherwise mis-collect as function/method symbols (e.g. an
907
+ // `if (...) {` line). Excluding at the collection stage keeps the
908
+ // invariant out of every downstream label/summarizer.
909
+ const _CHEAP_SYMBOL_CONTROL_FLOW = new Set([
910
+ 'if', 'else', 'elif', 'for', 'foreach', 'while', 'do',
911
+ 'switch', 'case', 'default', 'when', 'select',
912
+ 'try', 'catch', 'finally', 'throw', 'throws',
913
+ 'return', 'yield', 'await', 'goto', 'break', 'continue',
914
+ 'with', 'using', 'lock', 'synchronized', 'unless',
915
+ ]);
916
+
917
+ function _collectCheapSymbols(text, lang) {
918
+ const lines = String(text || '').split(/\r?\n/);
919
+ const out = [];
920
+ const push = (kind, name, idx) => {
921
+ if (!name) return;
922
+ // Skip control-flow keywords so `if(...) {`, `for(...) {`,
923
+ // `while(...) {`, `switch(...) {`, `catch(...) {` no longer leak
924
+ // as function/method symbols through the bare `name(args)` shapes.
925
+ if (_CHEAP_SYMBOL_CONTROL_FLOW.has(name)) return;
926
+ out.push({ kind, name, line: idx + 1 });
927
+ };
928
+ // Slash (`//` `/*`) comments: all C-family langs incl. new kotlin/swift/
929
+ // scala. Excluded: python/ruby (hash), bash (hash), lua (`--`; also `//`
930
+ // is lua integer-division, so slash-stripping would delete code). Second
931
+ // batch: dart/objc/zig are C-family slash-comment (kept by the default);
932
+ // elixir/r are hash-comment (excluded below).
933
+ const supportsSlash = lang !== 'python' && lang !== 'ruby'
934
+ && lang !== 'bash' && lang !== 'lua'
935
+ && lang !== 'elixir' && lang !== 'r';
936
+ // Hash (`#`) comments: python/ruby/php and bash. lua uses `--` (handled by
937
+ // _maskNonCodeText, not needed here since lua has no cheap-symbol matcher).
938
+ // Second batch: elixir and r are `#`-only line-comment langs → included.
939
+ const supportsHash = lang === 'python' || lang === 'ruby' || lang === 'php'
940
+ || lang === 'bash' || lang === 'elixir' || lang === 'r';
941
+ let inBlockComment = false;
942
+ for (let i = 0; i < lines.length; i++) {
943
+ // Per-line comment stripping at the collection stage so header/JSDoc
944
+ // words like "These", "side", "effects" cannot bleed into the
945
+ // overview `symbols:` token list or the cheap summarizer output.
946
+ // An unclosed `/*` keeps the code before it and flips the block flag
947
+ // so code-before-comment lines (and spaced generators like `* gen()`)
948
+ // still reach the per-language matchers below.
949
+ let line = lines[i];
950
+ if (supportsSlash) {
951
+ if (inBlockComment) {
952
+ const endIdx = line.indexOf('*/');
953
+ if (endIdx < 0) continue;
954
+ line = line.slice(endIdx + 2);
955
+ inBlockComment = false;
956
+ }
957
+ while (true) {
958
+ const startIdx = line.indexOf('/*');
959
+ if (startIdx < 0) break;
960
+ const endIdx = line.indexOf('*/', startIdx + 2);
961
+ if (endIdx < 0) {
962
+ line = line.slice(0, startIdx);
963
+ inBlockComment = true;
964
+ break;
965
+ }
966
+ line = line.slice(0, startIdx) + ' ' + line.slice(endIdx + 2);
967
+ }
968
+ const slashIdx = line.indexOf('//');
969
+ if (slashIdx >= 0) line = line.slice(0, slashIdx);
970
+ }
971
+ if (supportsHash) {
972
+ if (/^\s*#/.test(line)) continue;
973
+ }
974
+ if (!line.trim()) continue;
975
+ let m = null;
976
+ if (lang === 'typescript' || lang === 'javascript') {
977
+ if ((m = /\b(class|interface|type|enum)\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push(m[1], m[2], i);
978
+ else if ((m = /\bfunction\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(/.exec(line))) push('function', m[1], i);
979
+ else if ((m = /\b(?:const|let|var)\s+([A-Za-z_][A-Za-z0-9_]*)\b/.exec(line))) push('binding', m[1], i);
980
+ else if ((m = /^\s*(?:static\s+)?(?:async\s+)?(?:get\s+|set\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;]*\)\s*\{?$/.exec(line))) push('method', m[1], i);
981
+ } else if (lang === 'python') {
982
+ if ((m = /^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push('class', m[1], i);
983
+ else if ((m = /^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push('function', m[1], i);
984
+ } else if (lang === 'go') {
985
+ if ((m = /^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s+struct\b/.exec(line))) push('struct', m[1], i);
986
+ else if ((m = /^\s*func(?:\s*\([^)]*\))?\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(/.exec(line))) push('function', m[1], i);
987
+ } else if (lang === 'rust') {
988
+ if ((m = /^\s*(?:pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push('struct', m[1], i);
989
+ else if ((m = /^\s*(?:pub\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(/.exec(line))) push('function', m[1], i);
990
+ } else if (lang === 'kotlin') {
991
+ // Kotlin: `fun name(...)` is the canonical function declaration whether
992
+ // the body is a `{` block or an `= expr` expression body. The shared
993
+ // Java/C#-style `name(...) {` pattern misses expression bodies that
994
+ // end with the expression itself (no trailing `{` or `;`), so caller
995
+ // names disappear for those functions.
996
+ if ((m = /\b(class|interface|enum|object)\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push(m[1], m[2], i);
997
+ else if ((m = /^\s*(?:public\s+|private\s+|protected\s+|internal\s+)?(?:open\s+|abstract\s+|final\s+)?(?:override\s+)?(?:suspend\s+)?(?:inline\s+)?fun\s+(?:<[^>]+>\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(/.exec(line))) push('function', m[1], i);
998
+ else if ((m = /^\s*(?:public\s+|private\s+|protected\s+|internal\s+)?(?:const\s+)?(?:val|var)\s+([A-Za-z_][A-Za-z0-9_]*)\b/.exec(line))) push('binding', m[1], i);
999
+ } else if (lang === 'java' || lang === 'csharp') {
1000
+ if ((m = /\b(class|interface|enum|record)\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push(m[1], m[2], i);
1001
+ else if ((m = /\b([A-Za-z_][A-Za-z0-9_]*)\s*\([^;]*\)\s*\{?$/.exec(line))) push('function', m[1], i);
1002
+ } else if (lang === 'c' || lang === 'cpp') {
1003
+ if ((m = /\b(class|struct|enum)\s+([A-Za-z_][A-Za-z0-9_]*)/.exec(line))) push(m[1], m[2], i);
1004
+ else if ((m = /^\s*[A-Za-z_][\w\s:*<>~]*\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^;]*\)\s*\{?$/.exec(line))) push('function', m[1], i);
1005
+ } else if (lang === 'ruby' || lang === 'php') {
1006
+ if ((m = /^\s*class\s+([A-Za-z_][A-Za-z0-9_:]*)/.exec(line))) push('class', m[1], i);
1007
+ else if ((m = /^\s*def\s+([A-Za-z_][A-Za-z0-9_!?=]*)/.exec(line))) push('function', m[1], i);
1008
+ }
1009
+ // No cheap-regex matcher for swift/scala/bash/lua or the second batch
1010
+ // (dart/objc/elixir/zig/r): the native indexer now emits symbols for these
1011
+ // langs, so _collectCheapSymbols runs only as a fallback when native
1012
+ // symbols are absent. They are deliberately left without a branch (yield no
1013
+ // cheap anchors) rather than guessing with a loose pattern; callers
1014
+ // (overview/anchors) fall back to native symbols.
1015
+ }
1016
+ return out;
1017
+ }
1018
+
1019
+ // Raised from 6 to 50 after HS-A6 surfaced that overview on a ~46KB file
1020
+ // returned only the first 6 anchors (all within the first 87 lines, 5%
1021
+ // of the file). tail-trim still bounds output payload, so a higher cap
1022
+ // surfaces full structure on large files without hurting small ones.
1023
+ function _extractExplainerAnchorLines(node, graph, { limit = 50, maxLineChars = 180 } = {}) {
1024
+ const sourceLines = _getSourceTextForNode(graph, node).split(/\r?\n/);
1025
+ const symbols = Array.isArray(node.symbols) && node.symbols.length
1026
+ ? node.symbols
1027
+ : _collectCheapSymbols(sourceLines.join('\n'), node.lang);
1028
+ const out = [];
1029
+ const seen = new Set();
1030
+ for (const item of symbols) {
1031
+ if (out.length >= limit) break;
1032
+ const idx = item.line - 1;
1033
+ const line = String(sourceLines[idx] || '').trim();
1034
+ if (!line) continue;
1035
+ const key = `${item.name}:${item.line}`;
1036
+ if (seen.has(key)) continue;
1037
+ seen.add(key);
1038
+ out.push(`${item.kind} ${item.name} (L${item.line}): ${line.slice(0, maxLineChars)}`);
1039
+ }
1040
+ return out;
1041
+ }
1042
+
1043
+ function _graphRel(absPath, cwd) {
1044
+ return toDisplayPath(absPath, cwd);
1045
+ }
1046
+
1047
+
1048
+ function _supportsHashComments(lang) {
1049
+ // Hash-comment langs: python/ruby/php plus bash. lua is NOT hash — it uses
1050
+ // `--` line + `--[[ ]]` block comments (see _maskNonCodeText). kotlin/
1051
+ // swift/scala are slash-comment C-family (see _supportsSlashComments).
1052
+ // Second batch: elixir and r are `#`-only line-comment langs → included.
1053
+ // (dart/objc/zig are slash-comment, handled by _supportsSlashComments.)
1054
+ return lang === 'python' || lang === 'ruby' || lang === 'php'
1055
+ || lang === 'bash' || lang === 'elixir' || lang === 'r';
1056
+ }
1057
+
1058
+ function _supportsSlashComments(lang) {
1059
+ // Slash-comment langs: everything C-family, incl. new kotlin/swift/scala.
1060
+ // Excluded: python/ruby/bash (hash) and lua (`--` comments; `//` is lua
1061
+ // integer division, so it must not be treated as a comment opener).
1062
+ // Second batch: dart/objc/zig are C-family slash-comment (kept by the
1063
+ // default). Excluded here: elixir/r (hash-only, see _supportsHashComments).
1064
+ return lang !== 'python' && lang !== 'ruby'
1065
+ && lang !== 'bash' && lang !== 'lua'
1066
+ && lang !== 'elixir' && lang !== 'r';
1067
+ }
1068
+
1069
+ function _supportsSingleQuoteStrings(lang) {
1070
+ return lang === 'typescript'
1071
+ || lang === 'javascript'
1072
+ || lang === 'python'
1073
+ || lang === 'ruby'
1074
+ || lang === 'php'
1075
+ // New langs with single-quote string literals: swift uses double quotes
1076
+ // only (excluded); kotlin uses double/triple-double (excluded); scala
1077
+ // single-quotes are Char literals not strings (excluded); bash and lua
1078
+ // both support `'...'` single-quoted strings.
1079
+ || lang === 'bash'
1080
+ || lang === 'lua'
1081
+ // Second batch. dart: `'...'` is a primary string form → included. r:
1082
+ // `'...'` is a string literal equivalent to `"..."` → included. objc:
1083
+ // `'x'` is a char literal — INCLUDED here so its contents are masked as a
1084
+ // single-quote string. This deliberately DIVERGES from c/cpp, which are
1085
+ // NOT in this list: objc's masker benefits from neutralizing char-literal
1086
+ // bytes, whereas c/cpp char literals are left unmasked.
1087
+ // elixir: EXCLUDED — `'...'` is a charlist, not a string; but charlists
1088
+ // are single-line and `\\`-escaped just like a string, so masking them as
1089
+ // strings would be safe — they are nonetheless left out to keep elixir
1090
+ // string handling limited to `"..."`/`"""` (charlist contents are rare in
1091
+ // code-graph anchors and excluding avoids masking a stray apostrophe in a
1092
+ // comment-less context). zig: EXCLUDED — `'c'` is a char literal only and
1093
+ // zig multiline strings are `\\`-prefixed lines (out of scope), so no
1094
+ // single-quote string form applies.
1095
+ || lang === 'dart'
1096
+ || lang === 'r'
1097
+ || lang === 'objc';
1098
+ }
1099
+
1100
+ function _supportsBacktickStrings(lang) {
1101
+ return lang === 'typescript' || lang === 'javascript' || lang === 'go';
1102
+ }
1103
+
1104
+ function _supportsTripleSingleQuoteStrings(lang) {
1105
+ // `'''` triple-single-quote strings: python, and dart (which supports BOTH
1106
+ // `'''` and `"""` multiline strings). kotlin/scala/swift have `"""` but NOT
1107
+ // `'''`; treating `'''` as a string opener there would mis-mask a
1108
+ // single-quote char/string followed by an empty string.
1109
+ return lang === 'python' || lang === 'dart';
1110
+ }
1111
+
1112
+ function _supportsTripleDoubleQuoteStrings(lang) {
1113
+ // `"""` triple-double-quote raw/multiline strings: python, kotlin, scala
1114
+ // and swift. bash/lua have no triple-quote form (lua long strings use
1115
+ // `[[ ]]`).
1116
+ // Second batch: elixir `"""` heredoc docstrings → included. dart supports
1117
+ // BOTH `'''` and `"""` multiline strings → included here (and in the triple-
1118
+ // single predicate). objc/zig/r have no `"""` form.
1119
+ return lang === 'python' || lang === 'kotlin'
1120
+ || lang === 'scala' || lang === 'swift' || lang === 'elixir'
1121
+ || lang === 'dart';
1122
+ }
1123
+
1124
+ function _isJsLike(lang) {
1125
+ return lang === 'javascript' || lang === 'typescript';
1126
+ }
1127
+
1128
+ function _isWordStartChar(c) {
1129
+ return c === '_' || c === '$'
1130
+ || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1131
+ }
1132
+
1133
+ function _isWordChar(c) {
1134
+ return _isWordStartChar(c) || (c >= '0' && c <= '9');
1135
+ }
1136
+
1137
+ // ECMAScript expression-context keywords that can precede a regex literal.
1138
+ // After any of these, a `/` opens a RegExp literal; after a value (identifier,
1139
+ // number, `)`, `]`), `/` is the division operator. This list is from the
1140
+ // language spec — not a heuristic — and resolves the `/`-ambiguity.
1141
+ const REGEX_PRECEDENT_KEYWORDS = new Set([
1142
+ 'return', 'typeof', 'delete', 'void', 'new', 'throw', 'await', 'yield',
1143
+ 'in', 'of', 'instanceof', 'case', 'do', 'else', 'if', 'while',
1144
+ ]);
1145
+
1146
+ const REGEX_PRECEDENT_CHARS = new Set([
1147
+ '=', '(', ',', ';', ':', '?', '!', '~', '&', '|', '^', '+', '-',
1148
+ '*', '%', '<', '>', '{', '[',
1149
+ ]);
1150
+
1151
+ // Mask a JS regex literal body starting at `start` (which points at `/`).
1152
+ // Handles `\` escapes and `[...]` character classes per ECMAScript spec.
1153
+ // Returns the index just past the closing `/flags`. Bytes between the
1154
+ // delimiters are replaced with spaces in `out` so downstream identifier
1155
+ // searches do not see them.
1156
+ function _maskJsRegexLiteral(src, out, start) {
1157
+ if (src[start] !== '\n') out[start] = ' ';
1158
+ let j = start + 1;
1159
+ let inCharClass = false;
1160
+ while (j < src.length) {
1161
+ const c = src[j];
1162
+ if (c === '\n') return j;
1163
+ if (c === '\\') {
1164
+ if (src[j] !== '\n') out[j] = ' ';
1165
+ if (j + 1 < src.length && src[j + 1] !== '\n') out[j + 1] = ' ';
1166
+ j += 2;
1167
+ continue;
1168
+ }
1169
+ if (c === '[' && !inCharClass) {
1170
+ inCharClass = true;
1171
+ if (src[j] !== '\n') out[j] = ' ';
1172
+ j++;
1173
+ continue;
1174
+ }
1175
+ if (c === ']' && inCharClass) {
1176
+ inCharClass = false;
1177
+ if (src[j] !== '\n') out[j] = ' ';
1178
+ j++;
1179
+ continue;
1180
+ }
1181
+ if (c === '/' && !inCharClass) {
1182
+ if (src[j] !== '\n') out[j] = ' ';
1183
+ j++;
1184
+ while (j < src.length && src[j] >= 'a' && src[j] <= 'z') {
1185
+ if (src[j] !== '\n') out[j] = ' ';
1186
+ j++;
1187
+ }
1188
+ return j;
1189
+ }
1190
+ if (src[j] !== '\n') out[j] = ' ';
1191
+ j++;
1192
+ }
1193
+ return j;
1194
+ }
1195
+
1196
+ function _maskNonCodeText(text, lang) {
1197
+ const src = String(text || '');
1198
+ const out = src.split('');
1199
+ let i = 0;
1200
+ let blockComment = false;
1201
+ // Stack of scanner frames. Top describes current state:
1202
+ // { kind: 'string', delim } — inside single-line string literal (mask body)
1203
+ // { kind: 'triple', delim } — inside triple-quote string (mask body)
1204
+ // { kind: 'interp', braceDepth } — inside backtick `${...}` interpolation
1205
+ // (code mode; bytes preserved so callers
1206
+ // analysis can see fn-calls inside)
1207
+ // Empty stack = top-level code.
1208
+ const stack = [];
1209
+ const top = () => (stack.length ? stack[stack.length - 1] : null);
1210
+ // prevToken tracks ECMAScript token context for the `/`-disambiguation:
1211
+ // 'expr' = expression-start (regex literal may follow)
1212
+ // 'value' = value/operand (`/` is division)
1213
+ // Start of file = expression context.
1214
+ let prevToken = 'expr';
1215
+ while (i < src.length) {
1216
+ if (blockComment) {
1217
+ if (src.startsWith('*/', i)) {
1218
+ out[i] = ' ';
1219
+ if (i + 1 < out.length) out[i + 1] = ' ';
1220
+ i += 2;
1221
+ blockComment = false;
1222
+ continue;
1223
+ }
1224
+ if (src[i] !== '\n') out[i] = ' ';
1225
+ i++;
1226
+ continue;
1227
+ }
1228
+ const t = top();
1229
+ if (t && t.kind === 'triple') {
1230
+ if (src.startsWith(t.delim, i)) {
1231
+ for (let j = 0; j < t.delim.length; j++) {
1232
+ if (src[i + j] !== '\n') out[i + j] = ' ';
1233
+ }
1234
+ i += t.delim.length;
1235
+ stack.pop();
1236
+ prevToken = 'value';
1237
+ continue;
1238
+ }
1239
+ if (src[i] !== '\n') out[i] = ' ';
1240
+ i++;
1241
+ continue;
1242
+ }
1243
+ if (t && t.kind === 'luablock') {
1244
+ // Lua long-bracket comment `--[=*[ ... ]=*]` — mask until the EXACT
1245
+ // matching close delimiter (`]` + same number of `=` + `]`) recorded
1246
+ // on the frame, so `--[==[ ]] ]==]` closes only at `]==]`.
1247
+ if (t.close && src.startsWith(t.close, i)) {
1248
+ for (let j = 0; j < t.close.length; j++) {
1249
+ if (src[i + j] !== '\n') out[i + j] = ' ';
1250
+ }
1251
+ i += t.close.length;
1252
+ stack.pop();
1253
+ prevToken = 'value';
1254
+ continue;
1255
+ }
1256
+ if (src[i] !== '\n') out[i] = ' ';
1257
+ i++;
1258
+ continue;
1259
+ }
1260
+ if (t && t.kind === 'string') {
1261
+ const d = t.delim;
1262
+ if (d === '`' && src.startsWith('${', i)) {
1263
+ // Enter interpolation. `${` itself is code-relevant — leave bytes intact.
1264
+ stack.push({ kind: 'interp', braceDepth: 1 });
1265
+ i += 2;
1266
+ prevToken = 'expr';
1267
+ continue;
1268
+ }
1269
+ // In bash single-quotes `'...'`, backslash is literal (no escape) — the
1270
+ // string closes at the first `'`. Skip the escape consumption there so
1271
+ // `'\'` is not mis-read as an escaped quote. bash `"..."` and all other
1272
+ // langs keep backslash-escape handling.
1273
+ const bashLiteralSingle = t.lang === 'bash' && d === '\'';
1274
+ if (!bashLiteralSingle && src[i] === '\\' && (d === '\'' || d === '"' || d === '`')) {
1275
+ if (src[i] !== '\n') out[i] = ' ';
1276
+ if (i + 1 < src.length && src[i + 1] !== '\n') out[i + 1] = ' ';
1277
+ i += 2;
1278
+ continue;
1279
+ }
1280
+ if (src[i] === d) {
1281
+ if (src[i] !== '\n') out[i] = ' ';
1282
+ i++;
1283
+ stack.pop();
1284
+ prevToken = 'value';
1285
+ continue;
1286
+ }
1287
+ // JS forbids a raw newline inside '...' or "..." — defensive reset. bash
1288
+ // quoted strings legally span newlines, so do NOT reset bash frames.
1289
+ if (src[i] === '\n' && t.lang !== 'bash' && (d === '\'' || d === '"')) {
1290
+ stack.pop();
1291
+ prevToken = 'value';
1292
+ i++;
1293
+ continue;
1294
+ }
1295
+ if (src[i] !== '\n') out[i] = ' ';
1296
+ i++;
1297
+ continue;
1298
+ }
1299
+ if (t && t.kind === 'interp') {
1300
+ // Code mode inside `${...}`. Bytes preserved; track brace depth and
1301
+ // nested constructs so masking resumes once interpolation closes.
1302
+ if (src[i] === '{') {
1303
+ t.braceDepth++;
1304
+ prevToken = 'expr';
1305
+ i++;
1306
+ continue;
1307
+ }
1308
+ if (src[i] === '}') {
1309
+ t.braceDepth--;
1310
+ i++;
1311
+ if (t.braceDepth === 0) {
1312
+ stack.pop();
1313
+ prevToken = 'value';
1314
+ } else {
1315
+ prevToken = 'value';
1316
+ }
1317
+ continue;
1318
+ }
1319
+ if (_supportsSlashComments(lang) && src.startsWith('/*', i)) {
1320
+ out[i] = ' ';
1321
+ if (i + 1 < out.length) out[i + 1] = ' ';
1322
+ i += 2;
1323
+ blockComment = true;
1324
+ continue;
1325
+ }
1326
+ if (_supportsSlashComments(lang) && src.startsWith('//', i)) {
1327
+ while (i < src.length && src[i] !== '\n') {
1328
+ out[i] = ' ';
1329
+ i++;
1330
+ }
1331
+ continue;
1332
+ }
1333
+ if (src[i] === '/' && _isJsLike(lang) && prevToken === 'expr') {
1334
+ i = _maskJsRegexLiteral(src, out, i);
1335
+ prevToken = 'value';
1336
+ continue;
1337
+ }
1338
+ if (src[i] === '"' || (_supportsSingleQuoteStrings(lang) && src[i] === '\'') || (_supportsBacktickStrings(lang) && src[i] === '`')) {
1339
+ if (src[i] !== '\n') out[i] = ' ';
1340
+ stack.push({ kind: 'string', delim: src[i], lang });
1341
+ i++;
1342
+ continue;
1343
+ }
1344
+ if (_isWordStartChar(src[i])) {
1345
+ const start = i;
1346
+ while (i < src.length && _isWordChar(src[i])) i++;
1347
+ const word = src.substring(start, i);
1348
+ prevToken = REGEX_PRECEDENT_KEYWORDS.has(word) ? 'expr' : 'value';
1349
+ continue;
1350
+ }
1351
+ if (src[i] >= '0' && src[i] <= '9') {
1352
+ while (i < src.length && (src[i] === '.' || (src[i] >= '0' && src[i] <= '9'))) i++;
1353
+ prevToken = 'value';
1354
+ continue;
1355
+ }
1356
+ if (src[i] === ' ' || src[i] === '\t' || src[i] === '\r' || src[i] === '\n') {
1357
+ i++;
1358
+ continue;
1359
+ }
1360
+ if (REGEX_PRECEDENT_CHARS.has(src[i])) {
1361
+ prevToken = 'expr';
1362
+ } else {
1363
+ prevToken = 'value';
1364
+ }
1365
+ i++;
1366
+ continue;
1367
+ }
1368
+ // Top-level code.
1369
+ if (_supportsSlashComments(lang) && src.startsWith('/*', i)) {
1370
+ out[i] = ' ';
1371
+ if (i + 1 < out.length) out[i + 1] = ' ';
1372
+ i += 2;
1373
+ blockComment = true;
1374
+ continue;
1375
+ }
1376
+ if (_supportsSlashComments(lang) && src.startsWith('//', i)) {
1377
+ while (i < src.length && src[i] !== '\n') {
1378
+ out[i] = ' ';
1379
+ i++;
1380
+ }
1381
+ continue;
1382
+ }
1383
+ if (_supportsHashComments(lang) && src[i] === '#') {
1384
+ // Bash `#` is a comment ONLY at line start or after whitespace. When it
1385
+ // follows a non-space char it is part of `${var#pat}` / `${var##pat}`
1386
+ // parameter expansion (or `$#`, `arr[#]`, etc.), NOT a comment — masking
1387
+ // there would erase the rest of the line. `#!` shebang sits at file
1388
+ // start (a line start) so it is still masked.
1389
+ if (lang === 'bash') {
1390
+ const prev = i > 0 ? src[i - 1] : '\n';
1391
+ const atCommentPos = prev === '\n' || prev === ' ' || prev === '\t' || prev === '\r';
1392
+ if (!atCommentPos) {
1393
+ prevToken = 'value';
1394
+ i++;
1395
+ continue;
1396
+ }
1397
+ }
1398
+ while (i < src.length && src[i] !== '\n') {
1399
+ out[i] = ' ';
1400
+ i++;
1401
+ }
1402
+ continue;
1403
+ }
1404
+ // Lua comments: `--[=*[ ... ]=*]` long-bracket block and `--` line. Lua is
1405
+ // neither slash nor hash (see comment predicates), so it needs this
1406
+ // dedicated branch. Checked before number/operator handling so the leading
1407
+ // `--` is consumed as a comment, not as two minus operators.
1408
+ if (lang === 'lua' && src.startsWith('--', i)) {
1409
+ // Long-bracket opener: `--` then `[` + zero-or-more `=` + `[`. The level
1410
+ // (`=` count) selects the matching close `]` + same `=` + `]`.
1411
+ const lb = /^--\[(=*)\[/.exec(src.slice(i, i + 64));
1412
+ if (lb) {
1413
+ const open = lb[0];
1414
+ for (let j = 0; j < open.length; j++) {
1415
+ if (src[i + j] !== '\n') out[i + j] = ' ';
1416
+ }
1417
+ i += open.length;
1418
+ stack.push({ kind: 'luablock', close: `]${lb[1]}]` });
1419
+ continue;
1420
+ }
1421
+ // Plain `--` line comment (no long-bracket opener follows).
1422
+ while (i < src.length && src[i] !== '\n') {
1423
+ out[i] = ' ';
1424
+ i++;
1425
+ }
1426
+ continue;
1427
+ }
1428
+ if (_supportsTripleSingleQuoteStrings(lang) && src.startsWith("'''", i)) {
1429
+ out[i] = ' ';
1430
+ if (i + 1 < out.length) out[i + 1] = ' ';
1431
+ if (i + 2 < out.length) out[i + 2] = ' ';
1432
+ i += 3;
1433
+ stack.push({ kind: 'triple', delim: "'''" });
1434
+ continue;
1435
+ }
1436
+ if (_supportsTripleDoubleQuoteStrings(lang) && src.startsWith('"""', i)) {
1437
+ out[i] = ' ';
1438
+ if (i + 1 < out.length) out[i + 1] = ' ';
1439
+ if (i + 2 < out.length) out[i + 2] = ' ';
1440
+ i += 3;
1441
+ stack.push({ kind: 'triple', delim: '"""' });
1442
+ continue;
1443
+ }
1444
+ if (src[i] === '/' && _isJsLike(lang) && prevToken === 'expr') {
1445
+ i = _maskJsRegexLiteral(src, out, i);
1446
+ prevToken = 'value';
1447
+ continue;
1448
+ }
1449
+ if (src[i] === '"' || (_supportsSingleQuoteStrings(lang) && src[i] === '\'') || (_supportsBacktickStrings(lang) && src[i] === '`')) {
1450
+ if (src[i] !== '\n') out[i] = ' ';
1451
+ stack.push({ kind: 'string', delim: src[i], lang });
1452
+ i++;
1453
+ continue;
1454
+ }
1455
+ if (_isWordStartChar(src[i])) {
1456
+ const start = i;
1457
+ while (i < src.length && _isWordChar(src[i])) i++;
1458
+ const word = src.substring(start, i);
1459
+ prevToken = REGEX_PRECEDENT_KEYWORDS.has(word) ? 'expr' : 'value';
1460
+ continue;
1461
+ }
1462
+ if (src[i] >= '0' && src[i] <= '9') {
1463
+ while (i < src.length && (src[i] === '.' || (src[i] >= '0' && src[i] <= '9'))) i++;
1464
+ prevToken = 'value';
1465
+ continue;
1466
+ }
1467
+ if (src[i] === ' ' || src[i] === '\t' || src[i] === '\r' || src[i] === '\n') {
1468
+ i++;
1469
+ continue;
1470
+ }
1471
+ if (REGEX_PRECEDENT_CHARS.has(src[i])) {
1472
+ prevToken = 'expr';
1473
+ } else {
1474
+ prevToken = 'value';
1475
+ }
1476
+ i++;
1477
+ }
1478
+ return out.join('');
1479
+ }
1480
+
1481
+ function _symbolMatchIndices(text, symbol, lang) {
1482
+ const escaped = String(symbol || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1483
+ if (!escaped) return [];
1484
+ const masked = _maskNonCodeText(text, lang);
1485
+ const re = new RegExp(_unicodeBoundaryPattern(escaped, lang, symbol), 'gu');
1486
+ const indices = [];
1487
+ let match = null;
1488
+ while ((match = re.exec(masked))) {
1489
+ indices.push(match.index);
1490
+ }
1491
+ return indices;
1492
+ }
1493
+
1494
+ function _getSourceTextForNode(graph, node, fallbackText = null) {
1495
+ const cached = graph?._sourceTextCache?.get(node.rel);
1496
+ if (cached && cached.fingerprint === (node.fingerprint || '')) {
1497
+ return cached.text;
1498
+ }
1499
+ if (typeof fallbackText === 'string') {
1500
+ graph?._sourceTextCache?.set(node.rel, {
1501
+ fingerprint: node.fingerprint || '',
1502
+ text: fallbackText,
1503
+ });
1504
+ return fallbackText;
1505
+ }
1506
+ let text = '';
1507
+ let readOk = false;
1508
+ try { text = readFileSync(node.abs, 'utf8'); readOk = true; } catch { text = ''; readOk = false; }
1509
+ if (readOk) {
1510
+ graph?._sourceTextCache?.set(node.rel, {
1511
+ fingerprint: node.fingerprint || '',
1512
+ text,
1513
+ });
1514
+ }
1515
+ return text;
1516
+ }
1517
+
1518
+ function _buildExplainerFileSummary(node, graph, cwd) {
1519
+ const topTypes = Array.isArray(node?.topLevelTypes) ? node.topLevelTypes.slice(0, 8) : [];
1520
+ const importsAll = Array.isArray(node?.resolvedImports) ? node.resolvedImports.map((p) => _graphRel(p, cwd)) : [];
1521
+ const imports = importsAll.slice(0, 8);
1522
+ const tokensAll = _getTokenSymbolsForNode(graph, node);
1523
+ // Prefer native tree-sitter symbol names (declarations only — no
1524
+ // comment/string/keyword leakage); fall back to the regex token dump
1525
+ // only when the native graph path didn't populate node.symbols.
1526
+ const hasNativeSymbols = Array.isArray(node?.symbols) && node.symbols.length > 0;
1527
+ const symbolsAll = hasNativeSymbols
1528
+ ? [...new Set(node.symbols.map((s) => s.name))]
1529
+ : tokensAll;
1530
+ const symbolNames = symbolsAll.slice(0, hasNativeSymbols ? 30 : 20);
1531
+ const anchors = _extractExplainerAnchorLines(node, graph);
1532
+ const sourceHead = _getSourceTextForNode(graph, node)
1533
+ .split(/\r?\n/)
1534
+ .slice(0, 6)
1535
+ .join('\n')
1536
+ .trim()
1537
+ .slice(0, 420);
1538
+ const parts = [
1539
+ `file: ${node.rel}`,
1540
+ `language: ${node.lang}`,
1541
+ ];
1542
+ if (topTypes.length) parts.push(`top-level: ${topTypes.join(', ')}`);
1543
+ // A capped list with no marker reads as "this is everything" — when cut,
1544
+ // say so and point at the uncapped mode.
1545
+ if (symbolNames.length) {
1546
+ const more = symbolsAll.length - symbolNames.length;
1547
+ parts.push(`symbols: ${symbolNames.join(', ')}${more > 0 ? `, … +${more} more (mode:symbols for full list)` : ''}`);
1548
+ }
1549
+ if (imports.length) {
1550
+ const more = importsAll.length - imports.length;
1551
+ parts.push(`imports: ${imports.join(', ')}${more > 0 ? `, … +${more} more (mode:imports for full list)` : ''}`);
1552
+ }
1553
+ if (anchors.length) parts.push(`anchors:\n${anchors.join('\n')}`);
1554
+ if (sourceHead) parts.push(`head:\n${sourceHead}`);
1555
+ return parts.join('\n');
1556
+ }
1557
+
1558
+ function _getSourceLinesForNode(graph, node) {
1559
+ const cached = graph?._sourceLinesCache?.get(node.rel);
1560
+ if (cached && cached.fingerprint === (node.fingerprint || '')) {
1561
+ return cached.lines;
1562
+ }
1563
+ const text = _getSourceTextForNode(graph, node);
1564
+ const lines = text.split(/\r?\n/);
1565
+ graph?._sourceLinesCache?.set(node.rel, {
1566
+ fingerprint: node.fingerprint || '',
1567
+ lines,
1568
+ });
1569
+ return lines;
1570
+ }
1571
+
1572
+ function _getMaskedLinesForNode(graph, node) {
1573
+ const cached = graph?._maskedLinesCache?.get(node.rel);
1574
+ if (cached && cached.fingerprint === (node.fingerprint || '')) {
1575
+ return cached.lines;
1576
+ }
1577
+ const text = _getSourceTextForNode(graph, node);
1578
+ const lines = _maskNonCodeText(text, node.lang).split(/\r?\n/);
1579
+ graph?._maskedLinesCache?.set(node.rel, {
1580
+ fingerprint: node.fingerprint || '',
1581
+ lines,
1582
+ });
1583
+ return lines;
1584
+ }
1585
+
1586
+ function _pickCalleeDeclHit(hits, preferRel) {
1587
+ if (!hits?.length) return null;
1588
+ const sameFileDecl = preferRel ? hits.find((h) => h.rel === preferRel && h.declarationLike) : null;
1589
+ if (sameFileDecl) return sameFileDecl;
1590
+ const depthOf = (rel) => String(rel || '').split('/').length;
1591
+ const isCanonicalSrc = (rel) => /^src\//.test(rel || '');
1592
+ const sorted = [...hits].sort((a, b) =>
1593
+ Number(b.declarationLike) - Number(a.declarationLike)
1594
+ || Number(isCanonicalSrc(b.rel)) - Number(isCanonicalSrc(a.rel))
1595
+ || depthOf(a.rel) - depthOf(b.rel)
1596
+ || b.matchCount - a.matchCount
1597
+ || a.rel.localeCompare(b.rel)
1598
+ || a.line - b.line
1599
+ );
1600
+ return sorted.find((h) => h.declarationLike) || sorted[0];
1601
+ }
1602
+
1603
+ function _resolveCalleeDeclaration(graph, name, { language = null, preferRel = null } = {}) {
1604
+ return _pickCalleeDeclHit(_findSymbolHits(graph, name, { language }), preferRel);
1605
+ }
1606
+
1607
+ // Parallel pre-read source text for the indexed candidate files.
1608
+ // Without this, _cheapReferenceSearch performs ~200 sequential
1609
+ // readFileSync calls on warm-cache lookups (the in-memory text cache
1610
+ // is fresh on each new process). For cross-codebase queries like
1611
+ // `parseInt callers cwd=refs`, this was the dominant cost (~3-5s of
1612
+ // the ~6s warm-lookup wall). Reads are dispatched concurrently via
1613
+ // fs/promises so OS I/O scheduler can overlap them.
1614
+ async function _prewarmReferenceSourceText(graph, symbol, language) {
1615
+ const candidateNodes = _lookupCandidateNodes(graph, symbol, language);
1616
+ if (!candidateNodes.length) return;
1617
+ const uncached = [];
1618
+ for (const node of candidateNodes) {
1619
+ const cached = graph._sourceTextCache?.get(node.rel);
1620
+ if (!cached || cached.fingerprint !== (node.fingerprint || '')) {
1621
+ uncached.push(node);
1622
+ }
1623
+ }
1624
+ if (uncached.length === 0) return;
1625
+ const { readFile } = await import('fs/promises');
1626
+ const concurrency = 64;
1627
+ let next = 0;
1628
+ async function worker() {
1629
+ while (true) {
1630
+ const index = next++;
1631
+ if (index >= uncached.length) return;
1632
+ const node = uncached[index];
1633
+ try {
1634
+ const text = await readFile(node.abs, 'utf8');
1635
+ graph._sourceTextCache?.set(node.rel, { fingerprint: node.fingerprint || '', text });
1636
+ } catch { /* skip unreadable file */ }
1637
+ }
1638
+ }
1639
+ const workerCount = Math.min(Math.max(1, concurrency), uncached.length);
1640
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
1641
+ }
1642
+
1643
+ function _cheapReferenceSearch(graph, symbol, cwd, { language = null, limit = null, fileRel = null } = {}) {
1644
+ const escaped = String(symbol || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1645
+ if (!escaped) return '(no references)';
1646
+ // Include the effective cap + file scope in the cache key so a follow-up
1647
+ // call with a larger limit or a different file filter doesn't get served
1648
+ // the previously-trimmed/wide result.
1649
+ // Default `d` marks the env-default cap (REFERENCE_HIT_CAP).
1650
+ const cacheKey = `${language || '*'}|${symbol}|${Number.isFinite(limit) && limit > 0 ? String(Math.floor(limit)) : 'd'}|${fileRel || '*'}`;
1651
+ const cached = graph?._referenceSearchCache?.get(cacheKey);
1652
+ if (typeof cached === 'string') {
1653
+ return cached;
1654
+ }
1655
+ const lines = [];
1656
+ let candidateNodes = _lookupCandidateNodes(graph, symbol, language);
1657
+ if (fileRel) candidateNodes = candidateNodes.filter((node) => node.rel === fileRel);
1658
+ // Output cap. Default raised from 40 to 200 (HS-A5 retry showed the
1659
+ // formatter-layer cap raise was masked because the SEARCH-layer cap
1660
+ // here caps `lines` at 40 before the formatter sees them). 80 chars
1661
+ // per lineText is unchanged.
1662
+ // Per-call cap takes priority over env default so user-supplied limit
1663
+ // bounds the search loop (early break) instead of paying the full env
1664
+ // cap scan + trimming at the formatter.
1665
+ const ENV_CAP = Math.max(1, Number(process.env.REFERENCE_HIT_CAP) || 200);
1666
+ const REFERENCE_HIT_CAP = limit !== null && Number.isFinite(limit) && limit > 0
1667
+ ? Math.min(Math.max(1, Math.floor(limit)), ENV_CAP)
1668
+ : ENV_CAP;
1669
+ const REFERENCE_LINE_CAP = Math.max(20, Number(process.env.REFERENCE_LINE_CAP) || 80);
1670
+ let totalHits = 0;
1671
+ let cappedOut = false;
1672
+ outer: for (const node of candidateNodes) {
1673
+ const sourceText = _getSourceTextForNode(graph, node);
1674
+ if (!sourceText.includes(symbol)) continue;
1675
+ const fileLines = _getMaskedLinesForNode(graph, node);
1676
+ // Masked lines are for MATCHING only (no hits inside strings/comments).
1677
+ // Display must use the RAW line: masking blanks string contents, which
1678
+ // mangles snippets containing template literals / quoted paths. Offsets
1679
+ // are preserved by the space-fill masking, so i / match.index still map.
1680
+ const rawLines = _getSourceLinesForNode(graph, node);
1681
+ for (let i = 0; i < fileLines.length; i++) {
1682
+ const line = fileLines[i];
1683
+ if (!line.trim()) continue;
1684
+ const boundaryLang = language || node.lang;
1685
+ const re = new RegExp(_unicodeBoundaryPattern(escaped, boundaryLang, symbol), 'gu');
1686
+ let match = null;
1687
+ while ((match = re.exec(line))) {
1688
+ totalHits += 1;
1689
+ if (lines.length < REFERENCE_HIT_CAP) {
1690
+ const trimmed = (rawLines[i] ?? line).trim().slice(0, REFERENCE_LINE_CAP);
1691
+ lines.push(`${node.rel}:${i + 1}:${match.index + 1} ${trimmed}`);
1692
+ } else {
1693
+ // Stop as soon as the per-call cap is reached. The previous
1694
+ // 4x-cap scan was used to estimate totalHits for the
1695
+ // "+ N more" footer, but with limit propagation that estimate
1696
+ // is no longer meaningful; users who need accurate totals
1697
+ // pass a higher limit or set REFERENCE_HIT_CAP env.
1698
+ cappedOut = true;
1699
+ break outer;
1700
+ }
1701
+ }
1702
+ }
1703
+ }
1704
+ const result = lines.length ? lines.join('\n') : '(no references)';
1705
+ const finalResult = cappedOut
1706
+ ? `${result}\n\n[truncated — total hits exceeded ${REFERENCE_HIT_CAP * 4}, showing first ${REFERENCE_HIT_CAP}; raise REFERENCE_HIT_CAP env var for more]`
1707
+ : result;
1708
+ graph?._referenceSearchCache?.set(cacheKey, finalResult);
1709
+ return finalResult;
1710
+ }
1711
+
1712
+ function _nativeEndLineForDecl(node, symbolName, declLine) {
1713
+ const symbols = Array.isArray(node?.symbols) ? node.symbols : [];
1714
+ if (!symbols.length || !symbolName) return null;
1715
+ const dl = Number(declLine);
1716
+ if (!Number.isFinite(dl)) return null;
1717
+ let exact = null;
1718
+ let nearest = null;
1719
+ let nearestDist = Infinity;
1720
+ for (const s of symbols) {
1721
+ if (!s || s.name !== symbolName) continue;
1722
+ const sl = Number(s.startLine ?? s.line);
1723
+ const el = Number(s.endLine);
1724
+ if (!Number.isFinite(sl) || !Number.isFinite(el)) continue;
1725
+ if (sl === dl && el >= dl) exact = el;
1726
+ const dist = Math.abs(sl - dl);
1727
+ if (dist < nearestDist) {
1728
+ nearestDist = dist;
1729
+ nearest = el >= sl ? el : null;
1730
+ }
1731
+ }
1732
+ if (exact != null) return exact;
1733
+ return nearestDist <= 2 ? nearest : null;
1734
+ }
1735
+
1736
+ function _formatSymbolHitLocation(hit) {
1737
+ const line = Number(hit.line);
1738
+ const col = Number(hit.col) || 1;
1739
+ const end = Number(hit.endLine);
1740
+ if (Number.isFinite(end) && end >= line) return `${hit.rel}:${line}-${end}:${col}`;
1741
+ return `${hit.rel}:${line}:${col}`;
1742
+ }
1743
+
1744
+ function _sortSymbolHits(hits) {
1745
+ if (!hits?.length) return hits;
1746
+ const depthOf = (rel) => String(rel || '').split('/').length;
1747
+ const isCanonicalSrc = (rel) => /^src\//.test(rel || '');
1748
+ hits.sort((a, b) =>
1749
+ Number(b.declarationLike) - Number(a.declarationLike)
1750
+ || Number(isCanonicalSrc(b.rel)) - Number(isCanonicalSrc(a.rel))
1751
+ || depthOf(a.rel) - depthOf(b.rel)
1752
+ || b.matchCount - a.matchCount
1753
+ || a.rel.localeCompare(b.rel)
1754
+ || a.line - b.line
1755
+ );
1756
+ const declCount = hits.reduce((n, h) => n + (h.declarationLike ? 1 : 0), 0);
1757
+ if (declCount > 1 && hits[0]) hits[0].ambiguousDeclaration = declCount;
1758
+ return hits;
1759
+ }
1760
+
1761
+ function _findSymbolHits(graph, symbol, { language = null } = {}) {
1762
+ const cleanSymbol = String(symbol || '').trim();
1763
+ if (!cleanSymbol) return [];
1764
+ const candidateNodes = _lookupCandidateNodes(graph, cleanSymbol, language);
1765
+ return _findSymbolHitsOnNodes(graph, cleanSymbol, candidateNodes, { language });
1766
+ }
1767
+
1768
+ function _findSymbolHitsOnNodes(graph, cleanSymbol, candidateNodes, { language = null } = {}) {
1769
+ if (!cleanSymbol) return [];
1770
+
1771
+ const escaped = cleanSymbol.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1772
+ // Declaration regex must anchor the symbol immediately after a
1773
+ // declaration keyword. The previous pattern (`\bkeyword\b[^\n]*\bX\b`)
1774
+ // matched ordinary callsites like `const result = doFoo(X)` as a
1775
+ // declaration of X, producing a wrong "best declaration candidate".
1776
+ // Allow optional `export [default]` / `async` modifiers and `function*`.
1777
+ // Declaration keyword set spans JS/TS, Python (def/class), Go (func/type),
1778
+ // Rust (fn/struct/enum/trait/mod), C/C++ (struct/union/typedef), C#/Java/
1779
+ // Kotlin (class/interface/record/object/struct), Ruby/PHP (def/function).
1780
+ // Restricting to only the JS/Py set was producing false "no declaration"
1781
+ // results for cross-language hits.
1782
+ const declRe = new RegExp(
1783
+ `(?:^|[\\s;{(,])(?:export\\s+(?:default\\s+)?)?(?:public\\s+|private\\s+|protected\\s+|internal\\s+|static\\s+|abstract\\s+|final\\s+|sealed\\s+|virtual\\s+|override\\s+|async\\s+|pub\\s+(?:\\([^)]*\\)\\s+)?)*(?:const|let|var|function\\*?|class|interface|type|enum|def|func|fn|struct|union|trait|impl|mod|record|object|typedef|namespace|package)\\s+${escaped}\\b`
1784
+ );
1785
+ // Assignment-style declarations: `const|let|var NAME = (…) =>` and
1786
+ // `const|let|var NAME = function`. tree-sitter often records these as a
1787
+ // variable binding the regex `declRe` already matches, but when native
1788
+ // symbols exist the regex path is gated off (see :declRe usage below), so
1789
+ // the const-arrow/const-function form was understated as `[ref]`. This
1790
+ // regex is consulted regardless of native-symbol presence so a real
1791
+ // function value bound to a name is classified `[decl]`.
1792
+ const assignDeclRe = new RegExp(
1793
+ `(?:^|[\\s;{(,])(?:export\\s+(?:default\\s+)?)?(?:const|let|var)\\s+${escaped}\\s*=\\s*(?:async\\s+)?(?:function\\b|(?:\\([^)]*\\)|[A-Za-z_$][\\w$]*)\\s*=>)`
1794
+ );
1795
+ const hits = [];
1796
+
1797
+ for (const node of candidateNodes) {
1798
+ const sourceText = _getSourceTextForNode(graph, node);
1799
+ if (!sourceText.includes(cleanSymbol)) continue;
1800
+ const boundaryLang = language || node.lang;
1801
+ const re = new RegExp(_unicodeBoundaryPattern(escaped, boundaryLang, cleanSymbol), 'gu');
1802
+ const sourceLines = _getSourceLinesForNode(graph, node);
1803
+ const lines = _getMaskedLinesForNode(graph, node);
1804
+ let firstLine = null;
1805
+ let firstCol = null;
1806
+ let matchCount = 0;
1807
+ let firstContent = '';
1808
+ let contextLines = [];
1809
+ let declarationLike = Array.isArray(node.topLevelTypes) && node.topLevelTypes.includes(cleanSymbol);
1810
+ let declLine = null;
1811
+ let declCol = null;
1812
+ let declContent = '';
1813
+ let declContext = [];
1814
+ // Native declaration lines for `cleanSymbol`, mirroring the references
1815
+ // path (_collectDeclLines / _formatCallerReferences). The regex declRe
1816
+ // cannot recognise tree-sitter method / keyword-less function decls
1817
+ // (Java/C#/C++ `[type] name(args)`), so those were mis-reported as
1818
+ // references / "no declaration". node.symbols already carries the
1819
+ // authoritative {name,line} decl records; consult it (falling back to
1820
+ // the cheap scanner only when the native graph didn't populate it).
1821
+ const hasNativeSymbols = Array.isArray(node.symbols) && node.symbols.length > 0;
1822
+ const nativeDeclLines = new Set();
1823
+ const nativeSymbolSource = hasNativeSymbols ? node.symbols : _collectCheapSymbols(sourceText, node.lang);
1824
+ for (const sym of nativeSymbolSource) {
1825
+ if (sym && sym.name === cleanSymbol) nativeDeclLines.add(sym.line);
1826
+ }
1827
+ let nativeDeclLine = null;
1828
+ let nativeDeclCol = null;
1829
+ let nativeDeclContent = '';
1830
+ let nativeDeclContext = [];
1831
+ for (let i = 0; i < lines.length; i++) {
1832
+ const line = lines[i];
1833
+ if (!line.trim()) continue;
1834
+ re.lastIndex = 0;
1835
+ let localHit = false;
1836
+ let match = null;
1837
+ while ((match = re.exec(line))) {
1838
+ matchCount += 1;
1839
+ localHit = true;
1840
+ if (firstLine == null) {
1841
+ firstLine = i + 1;
1842
+ firstCol = match.index + 1;
1843
+ firstContent = String(sourceLines[i] || '').trim();
1844
+ contextLines = sourceLines.slice(i, i + 3).map((line) => String(line || '').trim()).filter(Boolean);
1845
+ }
1846
+ if (declLine == null && (assignDeclRe.test(line) || (!hasNativeSymbols && declRe.test(line)))) {
1847
+ declLine = i + 1;
1848
+ declCol = match.index + 1;
1849
+ declContent = String(sourceLines[i] || '').trim();
1850
+ declContext = sourceLines.slice(i, i + 3).map((l) => String(l || '').trim()).filter(Boolean);
1851
+ }
1852
+ if (nativeDeclLine == null && nativeDeclLines.has(i + 1)) {
1853
+ nativeDeclLine = i + 1;
1854
+ nativeDeclCol = match.index + 1;
1855
+ nativeDeclContent = String(sourceLines[i] || '').trim();
1856
+ nativeDeclContext = sourceLines.slice(i, i + 3).map((l) => String(l || '').trim()).filter(Boolean);
1857
+ }
1858
+ }
1859
+ if (localHit && (nativeDeclLines.has(i + 1) || assignDeclRe.test(line) || (!hasNativeSymbols && declRe.test(line)))) declarationLike = true;
1860
+ }
1861
+ if (firstLine == null) continue;
1862
+ // Prefer the native decl record over the regex-derived position when they
1863
+ // disagree: tree-sitter knows about keyword-less / method declarations the
1864
+ // regex misses, so it is the more reliable declaration reporter.
1865
+ if (nativeDeclLine != null) {
1866
+ declLine = nativeDeclLine;
1867
+ declCol = nativeDeclCol;
1868
+ declContent = nativeDeclContent;
1869
+ declContext = nativeDeclContext;
1870
+ }
1871
+ const hasDeclPos = declLine != null;
1872
+ const declLineForEnd = hasDeclPos ? declLine : firstLine;
1873
+ const endLine = _nativeEndLineForDecl(node, cleanSymbol, declLineForEnd);
1874
+ hits.push({
1875
+ rel: node.rel,
1876
+ lang: node.lang,
1877
+ line: hasDeclPos ? declLine : firstLine,
1878
+ col: hasDeclPos ? declCol : (firstCol || 1),
1879
+ ...(Number.isFinite(endLine) && endLine >= declLineForEnd ? { endLine } : {}),
1880
+ declarationLike,
1881
+ matchCount,
1882
+ content: hasDeclPos ? declContent : firstContent,
1883
+ context: hasDeclPos ? declContext : contextLines,
1884
+ firstLine,
1885
+ firstCol: firstCol || 1,
1886
+ firstContent,
1887
+ firstContext: contextLines,
1888
+ });
1889
+ }
1890
+
1891
+ if (!hits.length) return [];
1892
+ return _sortSymbolHits(hits);
1893
+ }
1894
+
1895
+ // Brace-delimited languages the callee body scanner supports. Non-brace
1896
+ // languages (Python, Ruby, and the new bash/lua) get a deterministic skip
1897
+ // downstream. kotlin/swift/scala ARE brace-bodied (C-style) so they stay in.
1898
+ // bash uses `do`/`done`/`fi`/`}` function bodies and lua uses `function`/`end`
1899
+ // — neither is `{ }`-delimited in the C sense, so both are deliberately
1900
+ // omitted and fall through to the `(callees unsupported for <lang>)` skip.
1901
+ // Second batch: dart/objc/zig are C-style `{ }`-bodied → included. elixir uses
1902
+ // `do`/`end` blocks (not braces) → excluded like ruby. r IS brace-bodied
1903
+ // (`f <- function(x) { ... }`), but it is excluded for a DIFFERENT reason than
1904
+ // bash/ruby: bash/ruby are excluded as non-C-brace-body languages, whereas r IS
1905
+ // C-brace-bodied — its problem is that the body scanner below only masks `//`
1906
+ // and `/*` comments and does NOT understand r's `#` line comments, so a `}` or
1907
+ // an unbalanced quote inside an r `#` comment would corrupt brace/quote
1908
+ // tracking. r is therefore deliberately omitted and falls through to the
1909
+ // `(callees unsupported for r)` skip.
1910
+ const _CALLEES_BRACE_LANGS = new Set([
1911
+ 'javascript', 'typescript', 'java', 'csharp', 'kotlin', 'go',
1912
+ 'rust', 'c', 'cpp', 'php', 'swift', 'scala', 'dart', 'objc', 'zig',
1913
+ ]);
1914
+
1915
+ // JS/TS reserved words / syntactic keywords that look like call
1916
+ // expressions but are not function invocations.
1917
+ const _CALLEES_JS_KEYWORDS = new Set([
1918
+ 'if', 'else', 'for', 'while', 'do', 'switch', 'case', 'default',
1919
+ 'return', 'yield', 'await', 'throw', 'try', 'catch', 'finally',
1920
+ 'break', 'continue', 'with', 'in', 'of', 'new', 'delete', 'typeof',
1921
+ 'void', 'instanceof', 'function', 'class', 'const', 'let', 'var',
1922
+ 'this', 'super', 'extends', 'import', 'export', 'from', 'as',
1923
+ 'static', 'async', 'true', 'false', 'null', 'undefined',
1924
+ 'sizeof', 'using', 'namespace', 'interface', 'type', 'enum',
1925
+ ]);
1926
+
1927
+ // JS/TS built-in globals / constructors / namespaces. Filtered only when
1928
+ // scanning JS/TS bodies so Go/Rust/etc. callees named Map/Set/parse/get
1929
+ // are not suppressed.
1930
+ const _CALLEES_JS_BUILTINS = new Set([
1931
+ // Constructors / wrappers
1932
+ 'Error', 'TypeError', 'RangeError', 'SyntaxError', 'ReferenceError',
1933
+ 'EvalError', 'URIError', 'AggregateError',
1934
+ 'String', 'Number', 'Boolean', 'Array', 'Object', 'Function',
1935
+ 'Set', 'Map', 'WeakSet', 'WeakMap', 'WeakRef', 'FinalizationRegistry',
1936
+ 'Promise', 'Symbol', 'BigInt', 'Date', 'RegExp', 'Proxy',
1937
+ 'ArrayBuffer', 'SharedArrayBuffer', 'DataView', 'Int8Array', 'Uint8Array',
1938
+ 'Uint8ClampedArray', 'Int16Array', 'Uint16Array', 'Int32Array', 'Uint32Array',
1939
+ 'Float32Array', 'Float64Array', 'BigInt64Array', 'BigUint64Array',
1940
+ // Coercion / parsing
1941
+ 'parseInt', 'parseFloat', 'isNaN', 'isFinite', 'encodeURI',
1942
+ 'encodeURIComponent', 'decodeURI', 'decodeURIComponent', 'eval',
1943
+ 'globalThis', 'NaN', 'Infinity',
1944
+ // Namespaces (called as `Math(...)` etc. is invalid but appears as
1945
+ // `Math.floor(` — bare `Math` won't match the regex but include for
1946
+ // safety against `Math` callable patterns)
1947
+ 'JSON', 'Math', 'Reflect', 'Atomics', 'Intl', 'console', 'process',
1948
+ // Web / DOM globals commonly invoked
1949
+ 'fetch', 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
1950
+ 'queueMicrotask', 'structuredClone', 'requestAnimationFrame',
1951
+ 'cancelAnimationFrame', 'alert', 'confirm', 'prompt',
1952
+ // Eval-shaped / introspection
1953
+ 'require',
1954
+ ]);
1955
+
1956
+ /**
1957
+ * Extract forward callees from a symbol's declaration: locate the body
1958
+ * by brace-depth scan, mask non-code text, harvest `identifier(` and
1959
+ * `obj.method(` calls, filter keywords + JS builtins, resolve each
1960
+ * callee against the graph (preferring same-file decls), and return
1961
+ * structured rows enriched for flow-tracing.
1962
+ *
1963
+ * Returns an array of `{ name, callsitePath, callsiteLine, declPath,
1964
+ * declLine, enclosing, snippet }`. `callsite*` points at the actual
1965
+ * invocation in declHit's file; `decl*` points at the callee's
1966
+ * declaration when the graph can resolve it (else empty); `enclosing`
1967
+ * is the nearest enclosing symbol at the call site.
1968
+ */
1969
+ function _extractCallees(graph, declHit, _cwd, { cap = 200, callerSymbol = null, language = null } = {}) {
1970
+ if (!declHit || !_CALLEES_BRACE_LANGS.has(declHit.lang)) return [];
1971
+ const declNode = graph.nodes.get(declHit.rel);
1972
+ if (!declNode) return [];
1973
+ const sourceText = _getSourceTextForNode(graph, declNode);
1974
+ if (!sourceText) return [];
1975
+
1976
+ // Fast-forward to the declaration line, then walk to the first `{`
1977
+ // outside the parameter parens. Skip braces inside comments/strings.
1978
+ // Anchor body discovery at the DECLARATION's start. Prefer the NATIVE symbol
1979
+ // record for callerSymbol over declHit's regex position: declHit.col can land
1980
+ // on an earlier same-line REFERENCE of the symbol (e.g.
1981
+ // `function a(){ if (b()){...} } function b(){...}` — the `b()` call precedes
1982
+ // `function b` on the line), which would lock body discovery onto the wrong
1983
+ // function. The native record marks the actual declaration, so call-site
1984
+ // occurrences cannot be mistaken for it. Native columns are UTF-8 byte
1985
+ // columns → converted to code units for sourceText indexing. Fall back to
1986
+ // declHit's regex line/col (already code-unit) when no native record matches.
1987
+ let declLineIdx = Math.max(0, (declHit.line || 1) - 1);
1988
+ let nativeStartCol = null;
1989
+ if (callerSymbol && Array.isArray(declNode.symbols)) {
1990
+ const rec = declNode.symbols
1991
+ .filter((s) => s && s.name === callerSymbol
1992
+ && Number.isFinite(Number(s.startLine)) && Number.isFinite(Number(s.startCol)))
1993
+ .sort((a, b) => Math.abs(Number(a.startLine) - (declHit.line || 1))
1994
+ - Math.abs(Number(b.startLine) - (declHit.line || 1)))[0];
1995
+ if (rec) {
1996
+ declLineIdx = Math.max(0, Number(rec.startLine) - 1);
1997
+ nativeStartCol = Number(rec.startCol);
1998
+ }
1999
+ }
2000
+ let i = 0;
2001
+ {
2002
+ let ln = 0;
2003
+ while (i < sourceText.length && ln < declLineIdx) {
2004
+ if (sourceText[i] === '\n') ln += 1;
2005
+ i += 1;
2006
+ }
2007
+ }
2008
+ // Declaration column in code units: from the native byte column (converted
2009
+ // against this line's text) or declHit's regex char column.
2010
+ let declColChar;
2011
+ if (nativeStartCol != null) {
2012
+ const lineEnd0 = sourceText.indexOf('\n', i);
2013
+ const lineText0 = sourceText.slice(i, lineEnd0 < 0 ? sourceText.length : lineEnd0);
2014
+ declColChar = _byteColToCharCol(lineText0, nativeStartCol);
2015
+ } else {
2016
+ declColChar = (Number.isFinite(declHit.col) && declHit.col > 1) ? declHit.col : 1;
2017
+ }
2018
+ // Advance to the declaration column (skips earlier same-line siblings /
2019
+ // references). Clamp to line end defensively.
2020
+ if (declColChar > 1) {
2021
+ const lineEnd = sourceText.indexOf('\n', i);
2022
+ const maxI = lineEnd < 0 ? sourceText.length : lineEnd;
2023
+ i = Math.min(i + (declColChar - 1), maxI);
2024
+ }
2025
+ let inLineComment = false;
2026
+ let inBlockComment = false;
2027
+ let quote = '';
2028
+ let scanI = i;
2029
+ let parenDepth = 0;
2030
+ let bodyStart = -1;
2031
+ while (scanI < sourceText.length) {
2032
+ const ch = sourceText[scanI];
2033
+ const next = sourceText[scanI + 1];
2034
+ if (inLineComment) {
2035
+ if (ch === '\n') inLineComment = false;
2036
+ scanI += 1; continue;
2037
+ }
2038
+ if (inBlockComment) {
2039
+ if (ch === '*' && next === '/') { inBlockComment = false; scanI += 2; continue; }
2040
+ scanI += 1; continue;
2041
+ }
2042
+ if (quote) {
2043
+ if (ch === '\\') { scanI += 2; continue; }
2044
+ if (ch === quote) { quote = ''; }
2045
+ scanI += 1; continue;
2046
+ }
2047
+ if (ch === '/' && next === '/') { inLineComment = true; scanI += 2; continue; }
2048
+ if (ch === '/' && next === '*') { inBlockComment = true; scanI += 2; continue; }
2049
+ if (ch === '"' || ch === "'" || ch === '`') { quote = ch; scanI += 1; continue; }
2050
+ if (ch === '(') { parenDepth += 1; scanI += 1; continue; }
2051
+ if (ch === ')') { if (parenDepth > 0) parenDepth -= 1; scanI += 1; continue; }
2052
+ if (ch === '{' && parenDepth === 0) { bodyStart = scanI; break; }
2053
+ if (ch === ';' && parenDepth === 0) break;
2054
+ scanI += 1;
2055
+ }
2056
+ if (bodyStart < 0) return [];
2057
+
2058
+ // Walk from bodyStart to matching `}` at depth 0.
2059
+ let depth = 0;
2060
+ let bodyEnd = -1;
2061
+ inLineComment = false; inBlockComment = false; quote = '';
2062
+ let j = bodyStart;
2063
+ while (j < sourceText.length) {
2064
+ const ch = sourceText[j];
2065
+ const next = sourceText[j + 1];
2066
+ if (inLineComment) {
2067
+ if (ch === '\n') inLineComment = false;
2068
+ j += 1; continue;
2069
+ }
2070
+ if (inBlockComment) {
2071
+ if (ch === '*' && next === '/') { inBlockComment = false; j += 2; continue; }
2072
+ j += 1; continue;
2073
+ }
2074
+ if (quote) {
2075
+ if (ch === '\\') { j += 2; continue; }
2076
+ if (ch === quote) { quote = ''; }
2077
+ j += 1; continue;
2078
+ }
2079
+ if (ch === '/' && next === '/') { inLineComment = true; j += 2; continue; }
2080
+ if (ch === '/' && next === '*') { inBlockComment = true; j += 2; continue; }
2081
+ if (ch === '"' || ch === "'" || ch === '`') { quote = ch; j += 1; continue; }
2082
+ if (ch === '{') depth += 1;
2083
+ else if (ch === '}') {
2084
+ depth -= 1;
2085
+ if (depth === 0) { bodyEnd = j; break; }
2086
+ }
2087
+ j += 1;
2088
+ }
2089
+ if (bodyEnd < 0) bodyEnd = sourceText.length;
2090
+
2091
+ const rawBody = sourceText.slice(bodyStart + 1, bodyEnd);
2092
+ const maskedBody = _maskNonCodeText(rawBody, declNode.lang);
2093
+ const bodyStartLine = sourceText.slice(0, bodyStart + 1).split('\n').length;
2094
+
2095
+ // Two passes:
2096
+ // 1) Bare identifier calls — `(?<![\p{ID_Continue}$.])foo(` excludes
2097
+ // `.`-preceded, so member-call methods are missed by this pass.
2098
+ // 2) Member-call methods — `obj.method(` / `obj?.method(`. Captures
2099
+ // only the `method` token (the `obj.` part is identifier-bound but
2100
+ // can be anything from a parameter to a chain). Real edges like
2101
+ // `proc.send(`, `server.setRequestHandler(`, `emitter.on(` flow
2102
+ // through this pass.
2103
+ const callRe = /(?<![\p{ID_Continue}$.])([\p{ID_Start}_][\p{ID_Continue}]*)(?=\s*\()/gu;
2104
+ const memberCallRe = /\.\s*\??\.?\s*([\p{ID_Start}_][\p{ID_Continue}]*)(?=\s*\()/gu;
2105
+ const seen = new Map(); // name -> { line }
2106
+ const selfName = callerSymbol || null;
2107
+ // Builtin prototype/static method names. A member call `x.method(` whose
2108
+ // method is one of these is a JS builtin (Array/String/Object/Promise/Map/
2109
+ // Set/Math/JSON/Number/EventTarget) — NOT a navigable user edge; listing it
2110
+ // adds noise and resolves to a bogus same-named decl. Applied ONLY to the
2111
+ // member-call pass so real library edges (send / on / emit /
2112
+ // setRequestHandler) survive and a bare user `parse(` / `map(` is kept.
2113
+ const _CALLEES_JS_METHODS = new Set([
2114
+ 'trim','trimStart','trimEnd','slice','splice','substring','substr','split',
2115
+ 'join','concat','includes','indexOf','lastIndexOf','startsWith','endsWith',
2116
+ 'padStart','padEnd','repeat','charAt','charCodeAt','codePointAt','at',
2117
+ 'toUpperCase','toLowerCase','normalize','match','matchAll','search',
2118
+ 'replace','replaceAll','push','pop','shift','unshift','reverse','sort',
2119
+ 'flat','flatMap','forEach','map','filter','every','some','reduce',
2120
+ 'reduceRight','find','findIndex','findLast','findLastIndex','fill',
2121
+ 'copyWithin','toString','valueOf','hasOwnProperty','keys','values',
2122
+ 'entries','assign','freeze','then','catch','finally','resolve','reject',
2123
+ 'all','allSettled','race','any','get','set','has','add','delete','clear',
2124
+ 'max','min','floor','ceil','round','abs','sqrt','pow','log','sign','trunc',
2125
+ 'random','hypot','parse','stringify','parseInt','parseFloat','isInteger',
2126
+ 'isFinite','isNaN','toFixed','isArray','from','of','addEventListener',
2127
+ 'removeEventListener','dispatchEvent','bind','call','apply',
2128
+ ]);
2129
+ const recordHit = (name, index, isMember) => {
2130
+ if (!name) return;
2131
+ if (_CALLEES_JS_KEYWORDS.has(name)) return;
2132
+ if (_isJsLike(declHit.lang)) {
2133
+ if (_CALLEES_JS_BUILTINS.has(name)) return;
2134
+ if (isMember && _CALLEES_JS_METHODS.has(name)) return;
2135
+ }
2136
+ if (selfName && name === selfName) return;
2137
+ if (seen.has(name)) return;
2138
+ const upto = maskedBody.slice(0, index);
2139
+ const lineInBody = upto.split('\n').length - 1;
2140
+ const absLine = bodyStartLine + lineInBody;
2141
+ // 1-based char column of the call in its physical line, for column-precise
2142
+ // enclosing resolution on same-line / minified bodies (mirrors callers).
2143
+ const absIndex = bodyStart + 1 + index;
2144
+ const lineStart = sourceText.lastIndexOf('\n', absIndex - 1) + 1;
2145
+ const charCol = absIndex - lineStart + 1;
2146
+ seen.set(name, { line: absLine, col: charCol, isMember });
2147
+ };
2148
+ let m = null;
2149
+ while ((m = callRe.exec(maskedBody))) recordHit(m[1], m.index, false);
2150
+ let mm = null;
2151
+ while ((mm = memberCallRe.exec(maskedBody))) {
2152
+ // mm.index points at the `.`; the method name itself starts after
2153
+ // the dot + optional `?` / whitespace. Use the capture-group offset
2154
+ // for line bucketing so the call-site line is precise.
2155
+ const methodStart = mm.index + mm[0].length - mm[1].length;
2156
+ recordHit(mm[1], methodStart, true);
2157
+ }
2158
+ if (seen.size === 0) return [];
2159
+
2160
+ const allUnique = [...seen.entries()];
2161
+ const sliced = allUnique.slice(0, cap);
2162
+ const sourceLines = sourceText.split(/\r?\n/);
2163
+ const rows = [];
2164
+ for (const [name, info] of sliced) {
2165
+ // Resolve callee declaration via the same graph machinery used by
2166
+ // find_symbol. Precision fix: prefer a same-file declaration over
2167
+ // any cross-file same-named decl so local helpers like `fail`/`ok`
2168
+ // bind to the local copy instead of an unrelated file's symbol.
2169
+ let resolvedPath = '';
2170
+ let resolvedLine = 0;
2171
+ let resolvedDecl = false;
2172
+ try {
2173
+ const calleeDecl = _resolveCalleeDeclaration(graph, name, { language, preferRel: declHit.rel });
2174
+ // INVARIANT: only treat the callee as resolved when the graph bound it
2175
+ // to a GENUINE declaration (declarationLike). _pickCalleeDeclHit falls
2176
+ // back to sorted[0] when nothing is declaration-like, which makes Node
2177
+ // builtins / external-module names (readdirSync, join, statSync) bind to
2178
+ // whatever project file merely USES or IMPORTS the same name. Reject that
2179
+ // fallback so the row renders as external instead of a bogus decl + a
2180
+ // wasted next-hint.
2181
+ if (calleeDecl && calleeDecl.declarationLike) {
2182
+ // MEMBER calls (`x.write(`) carry no receiver identity — a same-named
2183
+ // free function elsewhere in the project (state-file.mjs `write` for
2184
+ // `process.stderr.write`) is NOT evidence of an edge. Accept the decl
2185
+ // only when it lives in the caller's own file or a file the caller
2186
+ // DIRECTLY imports. Bare calls keep name resolution as-is: their decl
2187
+ // may legitimately arrive via a re-export chain the import edge check
2188
+ // cannot see (e.g. smartReadTruncate via './tools/builtin.mjs').
2189
+ const memberOk = !info.isMember
2190
+ || calleeDecl.rel === declHit.rel
2191
+ || (Array.isArray(declNode.resolvedImports)
2192
+ && declNode.resolvedImports.some((p) => _graphRel(p, _cwd) === calleeDecl.rel));
2193
+ if (memberOk) {
2194
+ resolvedPath = calleeDecl.rel;
2195
+ resolvedLine = calleeDecl.line || 0;
2196
+ resolvedDecl = true;
2197
+ }
2198
+ }
2199
+ } catch {
2200
+ // Identifier shapes that trip the lookup regex fall through.
2201
+ }
2202
+ const snippetRaw = String(sourceLines[info.line - 1] || '').trim();
2203
+ const snippet = snippetRaw.slice(0, 80);
2204
+ // Enclosing-symbol lookup at the call site. Reuses the same
2205
+ // nearest-enclosing scanner the callers/references formatter uses
2206
+ // so flow-trace output is consistent across modes.
2207
+ let enclosing = '';
2208
+ try {
2209
+ const _encByteCol = _toByteColumn(sourceLines[info.line - 1] || '', info.col);
2210
+ const enc = _nearestEnclosingSymbol(declNode, sourceText, info.line, _encByteCol);
2211
+ enclosing = enc?.name || '';
2212
+ } catch {
2213
+ // Falls through to empty enclosing — non-fatal.
2214
+ }
2215
+ rows.push({
2216
+ name,
2217
+ callsitePath: declHit.rel,
2218
+ callsiteLine: info.line,
2219
+ declPath: resolvedPath,
2220
+ declLine: resolvedLine,
2221
+ external: !resolvedDecl,
2222
+ enclosing,
2223
+ snippet,
2224
+ });
2225
+ }
2226
+ if (allUnique.length > sliced.length) {
2227
+ rows.push({
2228
+ name: '...',
2229
+ callsitePath: '',
2230
+ callsiteLine: 0,
2231
+ declPath: '',
2232
+ declLine: 0,
2233
+ enclosing: '',
2234
+ snippet: `+${allUnique.length - sliced.length} more callees (cap=${cap})`,
2235
+ truncationFooter: true,
2236
+ });
2237
+ }
2238
+ return rows;
2239
+ }
2240
+
2241
+ // Format a callee row for flow-traceable output. Shape:
2242
+ // `name\tcallsite <path:line>\tdecl <path:line>\t(in <enclosing>)\tnext: find_symbol({symbol:"name"})`
2243
+ // `decl` collapses to `(unresolved)` when the graph could not bind the
2244
+ // callee to a declaration; `(in ?)` when no enclosing symbol was found.
2245
+ // When the callee could not be bound to a genuine project declaration
2246
+ // (`row.external` — a Node builtin or external-module name whose only graph
2247
+ // match is an import/usage of the same name), render `decl (external/builtin)`
2248
+ // and OMIT the `next:` hint so the caller is not sent on a wasted find_symbol.
2249
+ function _formatCalleeRow(row) {
2250
+ if (row.truncationFooter) return `... ${row.snippet}`;
2251
+ const callsite = row.callsitePath ? `callsite ${row.callsitePath}:${row.callsiteLine}` : 'callsite (unknown)';
2252
+ if (row.external) {
2253
+ const enclosingExt = row.enclosing ? `(in ${row.enclosing})` : '(in ?)';
2254
+ return `${row.name}\t${callsite}\tdecl (external/builtin)\t${enclosingExt}`;
2255
+ }
2256
+ const decl = row.declPath ? `decl ${row.declPath}:${row.declLine}` : 'decl (unresolved)';
2257
+ const enclosing = row.enclosing ? `(in ${row.enclosing})` : '(in ?)';
2258
+ const next = `next: find_symbol({symbol:"${row.name}"})`;
2259
+ return `${row.name}\t${callsite}\t${decl}\t${enclosing}\t${next}`;
2260
+ }
2261
+
2262
+ function _keywordSymbolSortKey(symbolName, keyword) {
2263
+ const lowerName = String(symbolName || '').toLowerCase();
2264
+ const lowerKey = String(keyword || '').toLowerCase();
2265
+ const idx = lowerName.indexOf(lowerKey);
2266
+ if (idx < 0) return null;
2267
+ const atStart = idx === 0 ? 0 : 1;
2268
+ return [lowerName.length, atStart, idx, symbolName];
2269
+ }
2270
+
2271
+ // Tokenize a search keyword on camelCase boundaries and non-alphanumeric
2272
+ // separators: "capOutput" -> ["cap","output"], "smart_read" -> ["smart","read"].
2273
+ function _tokenizeKeyword(s) {
2274
+ return String(s || '')
2275
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
2276
+ .split(/[^a-zA-Z0-9]+/)
2277
+ .filter(Boolean)
2278
+ .map((t) => t.toLowerCase());
2279
+ }
2280
+
2281
+ // Token START offsets within `sym`, using the SAME boundary rules as
2282
+ // _tokenizeKeyword: a token starts at string start, after any non-alphanumeric
2283
+ // separator, and at a lower/digit -> Upper camelCase transition.
2284
+ function _tokenStartOffsets(sym) {
2285
+ const starts = new Set();
2286
+ let prevAlnum = false;
2287
+ let prevUpper = false;
2288
+ for (let i = 0; i < sym.length; i += 1) {
2289
+ const c = sym[i];
2290
+ const isAlnum = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
2291
+ if (!isAlnum) { prevAlnum = false; prevUpper = false; continue; }
2292
+ const isUpper = c >= 'A' && c <= 'Z';
2293
+ if (!prevAlnum) starts.add(i); // string start / after separator
2294
+ else if (isUpper && !prevUpper) starts.add(i); // camelCase boundary
2295
+ prevAlnum = true;
2296
+ prevUpper = isUpper;
2297
+ }
2298
+ return starts;
2299
+ }
2300
+
2301
+ // True when `lowerKey` occurs in `sym` as a TOKEN-ALIGNED contiguous substring:
2302
+ // some occurrence either starts at a token boundary, or lies entirely within a
2303
+ // single token (no token boundary strictly inside the matched span). This
2304
+ // rejects raw substring noise that crosses a camelCase boundary — e.g. keyword
2305
+ // "redact" inside "sharedActual" ("sha|red" + "act|ual") — while keeping genuine
2306
+ // hits like "redact" in "redactString" or a within-token partial like "edact"
2307
+ // in "redactString".
2308
+ function _contiguousMatchTokenAligned(sym, lowerKey) {
2309
+ const len = lowerKey.length;
2310
+ if (!len) return false;
2311
+ const symLower = sym.toLowerCase();
2312
+ const starts = _tokenStartOffsets(sym);
2313
+ // Align on the first ALPHANUMERIC char of the match: a keyword may carry
2314
+ // leading separators (e.g. "_redact") that _tokenStartOffsets does not count
2315
+ // as token starts, so the boundary check must skip past them.
2316
+ let lead = 0;
2317
+ while (lead < len) {
2318
+ const c = lowerKey[lead];
2319
+ const isAlnum = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
2320
+ if (isAlnum) break;
2321
+ lead += 1;
2322
+ }
2323
+ let from = 0;
2324
+ for (;;) {
2325
+ const idx = symLower.indexOf(lowerKey, from);
2326
+ if (idx < 0) return false;
2327
+ const end = idx + len;
2328
+ const effectiveIdx = idx + lead;
2329
+ if (starts.has(effectiveIdx)) return true;
2330
+ let interiorBoundary = false;
2331
+ for (const s of starts) {
2332
+ if (s > effectiveIdx && s < end) { interiorBoundary = true; break; }
2333
+ }
2334
+ if (!interiorBoundary) return true; // wholly inside one token
2335
+ from = idx + 1;
2336
+ }
2337
+ }
2338
+
2339
+ // Ordered multi-token match: every token must appear in sequence (each after
2340
+ // the previous match end) inside symLower. This is a deliberate widening of the
2341
+ // keyword-match semantics — NOT an error-recovery fallback — so a keyword that
2342
+ // drops a middle camelCase token ("capOutput") still resolves the full symbol
2343
+ // ("capToolOutput", where "capoutput" is not a contiguous substring). The
2344
+ // caller applies the precise contiguous includes() check first and only reaches
2345
+ // this for multi-token keywords, which bounds false positives.
2346
+ function _orderedTokenMatch(symLower, tokens) {
2347
+ let from = 0;
2348
+ for (const t of tokens) {
2349
+ const i = symLower.indexOf(t, from);
2350
+ if (i < 0) return false;
2351
+ from = i + t.length;
2352
+ }
2353
+ return true;
2354
+ }
2355
+
2356
+ function _collectKeywordSymbolNames(graph, keyword, { language = null } = {}) {
2357
+ _ensureSymbolTokenIndex(graph);
2358
+ const lowerKey = String(keyword || '').toLowerCase();
2359
+ if (!lowerKey) return [];
2360
+ const keyTokens = _tokenizeKeyword(keyword);
2361
+ const seen = new Set();
2362
+ const out = [];
2363
+ const index = graph?._symbolTokenIndex;
2364
+ if (!index) return out;
2365
+ for (const key of index.keys()) {
2366
+ if (!key.startsWith('*|')) continue;
2367
+ const sym = key.slice(2);
2368
+ if (!sym || seen.has(sym)) continue;
2369
+ const symLower = sym.toLowerCase();
2370
+ // Tighten the contiguous check: a raw substring that crosses a camelCase
2371
+ // boundary (keyword "redact" inside "sharedActual" = "sha|red"+"act|ual")
2372
+ // is noise, not a real match. Require the contiguous hit to be
2373
+ // token-aligned; only then fall back to the ordered multi-token widening
2374
+ // (multi-token keywords only, to keep single-token searches tight).
2375
+ if (!_contiguousMatchTokenAligned(sym, lowerKey)) {
2376
+ if (keyTokens.length < 2 || !_orderedTokenMatch(symLower, keyTokens)) continue;
2377
+ }
2378
+ if (language) {
2379
+ const langKey = `${language}|${sym}`;
2380
+ if (!index.has(langKey)) continue;
2381
+ }
2382
+ seen.add(sym);
2383
+ out.push(sym);
2384
+ }
2385
+ out.sort((a, b) => {
2386
+ const ka = _keywordSymbolSortKey(a, keyword);
2387
+ const kb = _keywordSymbolSortKey(b, keyword);
2388
+ // Contiguous matches (non-null key) always rank before token-only matches
2389
+ // (null key) so cap=N never hides a better contiguous hit behind a loose
2390
+ // camelCase-token match.
2391
+ if (ka && !kb) return -1;
2392
+ if (!ka && kb) return 1;
2393
+ if (!ka && !kb) return a.localeCompare(b);
2394
+ for (let i = 0; i < 3; i += 1) {
2395
+ if (ka[i] !== kb[i]) return ka[i] - kb[i];
2396
+ }
2397
+ return a.localeCompare(b);
2398
+ });
2399
+ return out;
2400
+ }
2401
+
2402
+ function _formatSearchSymbolRow(name, hit) {
2403
+ const loc = hit ? _formatSymbolHitLocation(hit) : '(unresolved)';
2404
+ const next = `next: find_symbol({symbol:"${name}"})`;
2405
+ return `${name}\t${loc}\t${next}`;
2406
+ }
2407
+
2408
+ function _searchSymbolsByKeyword(graph, keyword, cwd, { language = null, limit = 30 } = {}) {
2409
+ const clean = String(keyword || '').trim();
2410
+ if (!clean) return '(no keyword)';
2411
+ const cap = Math.max(1, Math.min(100, Math.floor(Number(limit) || 30)));
2412
+ const allNames = _collectKeywordSymbolNames(graph, clean, { language });
2413
+ if (!allNames.length) {
2414
+ const nodeCount = graph?.nodes?.size ?? 0;
2415
+ return `(no symbol keyword matches in cwd=${cwd})\ngraph: nodes=${nodeCount}${language ? `, language=${language}` : ''}`;
2416
+ }
2417
+ const entries = allNames.map((name) => {
2418
+ const hits = _sortSymbolHits(_findSymbolHits(graph, name, { language }));
2419
+ const hit = _pickCalleeDeclHit(hits) || hits[0] || null;
2420
+ return { name, hit, resolved: Boolean(hit) };
2421
+ });
2422
+ entries.sort((a, b) => {
2423
+ const rank = Number(b.resolved) - Number(a.resolved);
2424
+ if (rank !== 0) return rank;
2425
+ const ka = _keywordSymbolSortKey(a.name, keyword);
2426
+ const kb = _keywordSymbolSortKey(b.name, keyword);
2427
+ // Contiguous matches rank before token-only matches (see _collectKeywordSymbolNames).
2428
+ if (ka && !kb) return -1;
2429
+ if (!ka && kb) return 1;
2430
+ if (!ka && !kb) return a.name.localeCompare(b.name);
2431
+ for (let i = 0; i < 3; i += 1) {
2432
+ if (ka[i] !== kb[i]) return ka[i] - kb[i];
2433
+ }
2434
+ return a.name.localeCompare(b.name);
2435
+ });
2436
+ const resolvedEntries = entries.filter((e) => e.resolved);
2437
+ const unresolvedNames = entries.filter((e) => !e.resolved).map((e) => e.name);
2438
+ const shownResolved = resolvedEntries.slice(0, cap);
2439
+ const lines = [`# search keyword=${clean} matches=${allNames.length} shown=${shownResolved.length}`];
2440
+ for (const { name, hit } of shownResolved) {
2441
+ lines.push(_formatSearchSymbolRow(name, hit));
2442
+ }
2443
+ if (resolvedEntries.length > shownResolved.length) {
2444
+ lines.push(`...+${resolvedEntries.length - shownResolved.length} more resolved (cap=${cap})`);
2445
+ }
2446
+ if (unresolvedNames.length) {
2447
+ lines.push(`+${unresolvedNames.length} unresolved name variants (token-only, no declaration — find_symbol will miss these; grep to locate): ${unresolvedNames.join(', ')}`);
2448
+ }
2449
+ if (graph?.truncated) {
2450
+ lines.push(`WARN: graph truncated at CODE_GRAPH_MAX_FILES=${CODE_GRAPH_MAX_FILES} — matches may be incomplete. Re-run with a narrower cwd.`);
2451
+ }
2452
+ // Sufficiency steer: this listing IS the symbol index. The next step is a
2453
+ // STRUCTURED find_symbol on the best match (body:true → location + full body
2454
+ // in ONE call), NOT a grep/read sweep across the matches. Once find_symbol
2455
+ // answers the question, report.
2456
+ lines.push(`# NEXT — pick the row that best fits and find_symbol it (body:true returns its location + full body in one call). These are the index; do NOT grep/read to hunt. If find_symbol answers it, report.`);
2457
+ return lines.join('\n');
2458
+ }
2459
+
2460
+ function _findSymbolAcrossGraph(graph, symbol, cwd, { language = null, limit = 5, fileRel = null, body = true } = {}) {
2461
+ // Caller-supplied `language` is a hard scope: never widen to other
2462
+ // languages on miss. Returning a different-language hit was producing
2463
+ // misleading results when callers wanted strict language-narrowed
2464
+ // analysis.
2465
+ const allHits = _findSymbolHits(graph, symbol, { language });
2466
+ // SCOPE ISOLATION: when `file` is set, the caller wants this file's
2467
+ // declaration + refs only — not every same-named symbol across other
2468
+ // files. Filter rather than widen.
2469
+ const hits = fileRel ? allHits.filter((h) => h.rel === fileRel) : allHits;
2470
+
2471
+ if (!hits.length) {
2472
+ // Silent (no match) was burning iters — caller had no signal whether to retry
2473
+ // with a different cwd or accept the miss. Surface graph stats + actionable hint.
2474
+ const nodeCount = graph?.nodes?.size ?? 0;
2475
+ const scopeNote = fileRel ? ` file=${fileRel}` : '';
2476
+ const lines = [`(no symbol matches in cwd=${cwd}${scopeNote})`];
2477
+ lines.push(`graph: nodes=${nodeCount}${language ? `, language=${language}` : ''}`);
2478
+ if (graph?.truncated) {
2479
+ lines.push(`WARN: graph truncated at CODE_GRAPH_MAX_FILES=${CODE_GRAPH_MAX_FILES} — symbol may exist in an un-indexed file. Re-run with a narrower cwd.`);
2480
+ }
2481
+ // Case-insensitive "did you mean" scan over the symbol token index. Catches
2482
+ // common typos (callworker → callWorker) without forcing a paraphrased retry.
2483
+ const lowerSym = symbol.toLowerCase();
2484
+ const ciHits = [];
2485
+ if (graph?._symbolTokenIndex && nodeCount > 0) {
2486
+ for (const key of graph._symbolTokenIndex.keys()) {
2487
+ const idx = key.indexOf('|');
2488
+ if (idx < 0) continue;
2489
+ const symPart = key.slice(idx + 1);
2490
+ if (symPart !== symbol && symPart.toLowerCase() === lowerSym) {
2491
+ if (!ciHits.includes(symPart)) ciHits.push(symPart);
2492
+ if (ciHits.length >= 3) break;
2493
+ }
2494
+ }
2495
+ }
2496
+ return lines.join('\n');
2497
+ }
2498
+
2499
+ const topHits = hits.slice(0, Math.max(1, limit));
2500
+ const primary = topHits[0];
2501
+ const declHits = hits.filter((h) => h.declarationLike);
2502
+ const declCount = declHits.length;
2503
+ const lines = [];
2504
+ // Ambiguity guard: with 2+ genuine declarations of the same name, a caller
2505
+ // acting on the "best candidate" alone may patch the wrong definition.
2506
+ // Prepend an explicit warning listing every declaration's file:line.
2507
+ if (declCount > 1) {
2508
+ lines.push(`⚠ ${declCount} declarations found — verify which one you intend`);
2509
+ for (const h of declHits) lines.push(` ${_formatSymbolHitLocation(h)} [${h.lang}]`);
2510
+ lines.push('');
2511
+ }
2512
+ if (primary?.declarationLike) {
2513
+ // When the graph is truncated, the "best" candidate is only best AMONG
2514
+ // indexed files — the canonical declaration may live in an un-indexed file
2515
+ // (e.g. src/** dropped past CODE_GRAPH_MAX_FILES at a huge cwd). Flag the
2516
+ // caveat inline at the prominent claim, not just the scope footer, so the
2517
+ // confident heading never reads as authoritative under truncation.
2518
+ lines.push(graph?.truncated
2519
+ ? '# best declaration candidate (GRAPH TRUNCATED — may not be canonical; re-run with a narrower cwd to confirm)'
2520
+ : '# best declaration candidate');
2521
+ const multi = declCount > 1 ? `, declarations=${declCount}` : '';
2522
+ lines.push(`${_formatSymbolHitLocation(primary)} (${primary.lang}, matches=${primary.matchCount}${multi})`);
2523
+ // body:true → emit the full declaration span (cap 300 lines) so review/debug
2524
+ // gets the function in ONE call instead of find_symbol + a follow-up read.
2525
+ // Opt-in so plain locate/callee-trace lookups stay compact.
2526
+ let bodyEmitted = false;
2527
+ if (body === true && Number.isFinite(Number(primary.line))) {
2528
+ const node = graph.nodes.get(primary.rel);
2529
+ const srcText = node ? _getSourceTextForNode(graph, node) : null;
2530
+ if (srcText) {
2531
+ const all = srcText.split('\n');
2532
+ const start = Math.max(1, Number(primary.line));
2533
+ let end = Number(primary.endLine);
2534
+ // Assignment-style declarations (`const f = (…) => {`) carry no
2535
+ // endLine in the graph; falling back to the bare declaration line
2536
+ // emits a 1-line body. Recover the span from indentation first.
2537
+ if (!Number.isFinite(end) || end < start) {
2538
+ end = _inferSpanEndByIndent(all, start) ?? start;
2539
+ }
2540
+ end = Math.min(end, start + 299);
2541
+ // Large bodies (up to the 300-line cap) flood context when the caller
2542
+ // only needed location+callees, so above a threshold emit head+tail with
2543
+ // an elision marker; small spans stay whole to keep the one-call utility.
2544
+ const BODY_FULL_MAX = 120; // spans ≤ this emit verbatim
2545
+ const BODY_HEAD = 90; // leading lines kept when eliding
2546
+ const BODY_TAIL = 20; // trailing lines kept when eliding
2547
+ const fmt = (i) => `${start + i}: ${all[start - 1 + i]}`;
2548
+ const span = end - start + 1;
2549
+ if (span > BODY_FULL_MAX) {
2550
+ const head = Array.from({ length: BODY_HEAD }, (_, i) => fmt(i));
2551
+ const tail = Array.from({ length: BODY_TAIL }, (_, i) => fmt(span - BODY_TAIL + i));
2552
+ const elided = span - BODY_HEAD - BODY_TAIL;
2553
+ head.push(`... [${elided} lines elided — full body: read ${primary.rel} symbol=${symbol}]`);
2554
+ lines.push([...head, ...tail].join('\n'));
2555
+ } else {
2556
+ lines.push(all.slice(start - 1, end).map((l, i) => `${start + i}: ${l}`).join('\n'));
2557
+ }
2558
+ bodyEmitted = true;
2559
+ }
2560
+ }
2561
+ if (!bodyEmitted) {
2562
+ if (primary.content) lines.push(primary.content.slice(0, 100));
2563
+ if (Array.isArray(primary.context) && primary.context.length > 1) {
2564
+ lines.push(`context: ${primary.context.slice(0, 2).join(' | ').slice(0, 120)}`);
2565
+ }
2566
+ }
2567
+ if (declCount > 1) {
2568
+ const others = declHits.slice(1, 3).map((h) => `${_formatSymbolHitLocation(h)} [${h.lang}]`);
2569
+ if (others.length) lines.push(`other declarations: ${others.join(', ')}`);
2570
+ }
2571
+ lines.push('');
2572
+ }
2573
+ lines.push('# candidates');
2574
+ lines.push(...topHits.map((hit, idx) => {
2575
+ const kind = hit.declarationLike ? 'decl' : 'ref';
2576
+ const suffix = hit.content ? ` — ${hit.content.slice(0, 100)}` : '';
2577
+ return `${idx + 1}. ${_formatSymbolHitLocation(hit)} [${kind}, ${hit.lang}, matches=${hit.matchCount}]${suffix}`;
2578
+ }));
2579
+ // BUILTIN-COLLISION NOTE: every hit is a `[ref]` with no `[decl]` —
2580
+ // surface that the user has no declaration of this name so the caller
2581
+ // can stop hunting for one (e.g. global `fetch`/`read`/`console`).
2582
+ if (declCount === 0 && hits.length > 0) {
2583
+ lines.push('');
2584
+ lines.push(`(no user declaration found; likely a global/builtin identifier — all ${hits.length} hits are references)`);
2585
+ }
2586
+ // STRUCTURAL FORWARD GRAPH: append the symbol's callees inline so a
2587
+ // single `find_symbol({symbol})` returns declaration + what it calls.
2588
+ // Unconditional — no query-type branch, no opt-in flag. The cap is
2589
+ // smaller than the explicit `callees` mode (which uses 200) to keep
2590
+ // the declaration response compact.
2591
+ if (primary?.declarationLike) {
2592
+ const calleeRows = _extractCallees(graph, primary, cwd, {
2593
+ cap: 25,
2594
+ callerSymbol: symbol,
2595
+ language,
2596
+ });
2597
+ lines.push('');
2598
+ lines.push('# callees');
2599
+ if (calleeRows.length) {
2600
+ for (const row of calleeRows) {
2601
+ lines.push(_formatCalleeRow(row));
2602
+ }
2603
+ } else {
2604
+ lines.push('(no callees)');
2605
+ }
2606
+ }
2607
+ // Footer: surface active scope so the caller sees which graph answered.
2608
+ // Reduces "looks fine, but is this the right project?" doubt and the
2609
+ // wrong-cwd retry that follows when the answer was from an unintended tree.
2610
+ const _nodeCount = graph?.nodes?.size ?? 0;
2611
+ const truncatedSuffix = graph?.truncated
2612
+ ? ` [WARN: graph truncated at CODE_GRAPH_MAX_FILES=${CODE_GRAPH_MAX_FILES} — some files not indexed]`
2613
+ : '';
2614
+ const fileScopeSuffix = fileRel ? ` file=${fileRel}` : '';
2615
+ lines.push(`\n# scope: cwd=${cwd} graph=${_nodeCount}-nodes${language ? ` language=${language}` : ''}${fileScopeSuffix}${truncatedSuffix}`);
2616
+ return lines.join('\n');
2617
+ }
2618
+
2619
+ function _resolveReferenceLanguageNode(graph, symbol, rel, cwd, language = null) {
2620
+ if (rel) {
2621
+ const node = graph.nodes.get(rel);
2622
+ if (!node) {
2623
+ // Path was supplied but the graph never indexed it (typo,
2624
+ // unsupported extension, or outside cwd). Distinct from the
2625
+ // "indexed-but-symbol-absent" miss below so callers can render
2626
+ // a precise error instead of the generic "file not found".
2627
+ return { kind: 'file-not-found', node: null, file: rel };
2628
+ }
2629
+ // P0: verify the symbol actually appears in the file. Returning a
2630
+ // node solely because the path was indexed was producing language
2631
+ // bleed: a caller asking for `references(symbol=Foo, file=bar.py)`
2632
+ // would get bar.py's language even when Foo never appears in it,
2633
+ // narrowing the broader reference search to the wrong language.
2634
+ const tokens = _getTokenSymbolsForNode(graph, node);
2635
+ if (Array.isArray(tokens) && tokens.includes(String(symbol || ''))) {
2636
+ return { kind: 'ok', node, file: rel };
2637
+ }
2638
+ // Fallback: substring scan over source for non-identifier shapes
2639
+ // (e.g. method calls on values whose tokenSymbols misses the name).
2640
+ const text = _getSourceTextForNode(graph, node);
2641
+ if (typeof text === 'string' && text.includes(String(symbol || ''))) {
2642
+ return { kind: 'ok', node, file: rel };
2643
+ }
2644
+ return { kind: 'symbol-not-present', node: null, file: rel };
2645
+ }
2646
+ const hits = _findSymbolHits(graph, symbol, { language });
2647
+ // Caller-specified language is a hard filter — refuse to widen on miss so
2648
+ // a `language: 'python'` query never bleeds into TS/JS results.
2649
+ if (!hits.length) return { kind: 'symbol-not-present', node: null, file: null };
2650
+ const primary = hits.find((hit) => hit.declarationLike) || hits[0];
2651
+ const node = primary?.rel ? graph.nodes.get(primary.rel) || null : null;
2652
+ return node
2653
+ ? { kind: 'ok', node, file: node.rel }
2654
+ : { kind: 'symbol-not-present', node: null, file: null };
2655
+ }
2656
+
2657
+ function _collapseReferenceLinesToCallers(referenceText) {
2658
+ if (typeof referenceText !== 'string' || !referenceText.trim() || referenceText === '(no references)') {
2659
+ return '(no callers)';
2660
+ }
2661
+ const files = new Set();
2662
+ for (const line of referenceText.split('\n')) {
2663
+ const trimmed = line.trim();
2664
+ if (!trimmed) continue;
2665
+ const m = /^(.+?):\d+:\d+(?:[\s\t]+.*)?$/.exec(trimmed);
2666
+ if (m) files.add(m[1]);
2667
+ }
2668
+ if (files.size === 0) return '(no callers)';
2669
+ return [...files].sort().join('\n');
2670
+ }
2671
+
2672
+ function _referenceKind(line, symbol, lang = null) {
2673
+ const escaped = String(symbol || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
2674
+ if (!escaped) return 'reference';
2675
+ const text = String(line || '');
2676
+ // Declaration keywords across every language _collectCheapSymbols supports
2677
+ // (JS/TS, Python, Go, Rust, Java/Kotlin/C#, C/C++, Ruby/PHP). A line where
2678
+ // any of these introduce the target symbol is the declaration site itself,
2679
+ // not a call site, and must be excluded from find_callers. JS-only keywords
2680
+ // previously caused Python `def`, Go `func`, Rust `fn`, Kotlin `fun`,
2681
+ // C/C++ `struct/union/typedef`, Ruby `module` declaration lines to be
2682
+ // classified as `call` (self-match), making caller counts inconsistent
2683
+ // across languages.
2684
+ if (new RegExp(
2685
+ `\\b(?:` +
2686
+ // type-like declarations
2687
+ `function|class|interface|type|enum|record|struct|union` +
2688
+ // scope-like declarations
2689
+ `|namespace|module|package|trait|impl|object` +
2690
+ // binding declarations
2691
+ `|const|let|var|val|typedef` +
2692
+ // single-word function declarations
2693
+ `|def|fn|fun` +
2694
+ `)\\s+${escaped}\\b`,
2695
+ ).test(text)) return 'declaration';
2696
+ // Go `func name(...)` or `func (recv) name(...)` with optional receiver.
2697
+ if (new RegExp(`\\bfunc(?:\\s*\\([^)]*\\))?\\s+${escaped}\\b`).test(text)) return 'declaration';
2698
+ if (new RegExp(`\\bimport\\b[\\s\\S]*${_unicodeBoundaryPattern(escaped, lang, symbol)}`, 'u').test(text)) return 'import';
2699
+ if (new RegExp(`${_unicodeBoundaryPattern(escaped, lang, symbol)}\\s*\\(`, 'u').test(text)) return 'call';
2700
+ return 'reference';
2701
+ }
2702
+
2703
+ // Convert a 1-based UTF-16 char column (the reference scanner emits
2704
+ // `match.index + 1`, a JS string index) into a 1-based UTF-8 byte column,
2705
+ // matching the native symbol's tree-sitter byte columns. Without this, a
2706
+ // same-line non-ASCII prefix before a declaration misaligns JS code-unit
2707
+ // columns against native byte columns and could exclude the correct sibling.
2708
+ function _toByteColumn(lineText, charCol) {
2709
+ if (!Number.isFinite(charCol) || charCol < 1) return charCol;
2710
+ const prefix = String(lineText || '').slice(0, charCol - 1);
2711
+ return Buffer.byteLength(prefix, 'utf8') + 1;
2712
+ }
2713
+
2714
+ // Inverse of _toByteColumn: a 1-based UTF-8 byte column (as emitted by the
2715
+ // native tree-sitter symbol records) back to a 1-based UTF-16 code-unit column
2716
+ // for indexing into a JS string. Walks codepoints so surrogate pairs (e.g.
2717
+ // emoji) advance the code-unit index by 2 while counting their real byte width.
2718
+ function _byteColToCharCol(lineText, byteCol) {
2719
+ if (!Number.isFinite(byteCol) || byteCol < 1) return 1;
2720
+ const s = String(lineText || '');
2721
+ let bytes = 0;
2722
+ let k = 0;
2723
+ while (k < s.length && bytes < byteCol - 1) {
2724
+ const cp = s.codePointAt(k);
2725
+ bytes += Buffer.byteLength(String.fromCodePoint(cp), 'utf8');
2726
+ k += cp > 0xFFFF ? 2 : 1;
2727
+ }
2728
+ return k + 1;
2729
+ }
2730
+
2731
+ function _nearestEnclosingSymbol(node, sourceText, lineNumber, col = null) {
2732
+ // SINGLE SOURCE OF TRUTH: the native graph's per-symbol records, each with a
2733
+ // finite endLine. The cheap regex scanner is intentionally NOT consulted for
2734
+ // enclosing resolution — its loose `name(args){` shapes carry no end-of-body
2735
+ // span, so using them would reintroduce the endLine-less nearest-declaration
2736
+ // mis-attribution this fix removes (the `caller=_pfAbsPath` class). A file
2737
+ // whose language the native binary does not extract symbols for yields no
2738
+ // candidates and resolves to null (no enclosing symbol) rather than a guess.
2739
+ // (kotlin/swift ARE natively extracted now, so they are no longer examples
2740
+ // of the no-extraction case.)
2741
+ const FUNCTION_LIKE = new Set([
2742
+ 'function', 'method', 'arrow', 'class', 'generator', 'fn', 'async-function',
2743
+ // Body-bearing constructs whose kinds come from the native extractor:
2744
+ // constructor_declaration -> 'constructor', local_function_statement ->
2745
+ // 'local-function', record_declaration -> 'record'.
2746
+ 'constructor', 'record', 'local-function',
2747
+ ]);
2748
+ const symbols = Array.isArray(node?.symbols) ? node.symbols : [];
2749
+ // Body-span containment by [line, endLine]. When the call-site column is
2750
+ // known, refine the SAME-LINE boundaries with it so multiple declarations
2751
+ // sharing one physical line (minified / compact code) are disambiguated: a
2752
+ // decl that opens after the call column on its start line, or closes before
2753
+ // the call column on its end line, is excluded. Column is consulted ONLY on
2754
+ // the boundary line(s); ordinary multi-line code is judged exactly as before
2755
+ // (line range only) — no regression. Columns are 1-based to match the
2756
+ // reference scanner's `match.index + 1`.
2757
+ const inRange = (item) => {
2758
+ if (item.line > lineNumber || Number(item.endLine) < lineNumber) return false;
2759
+ if (col != null) {
2760
+ const sl = Number(item.startLine);
2761
+ const sc = Number(item.startCol);
2762
+ const ec = Number(item.endCol);
2763
+ if (Number.isFinite(sl) && sl === lineNumber && Number.isFinite(sc) && col < sc) return false;
2764
+ if (Number(item.endLine) === lineNumber && Number.isFinite(ec) && col > ec) return false;
2765
+ }
2766
+ return true;
2767
+ };
2768
+ // Nearest enclosing wins: latest start line, then rightmost start column on a
2769
+ // tie (innermost of same-line siblings). Prefer a function-like; else the
2770
+ // nearest containing symbol of any kind; else null (no enclosing symbol).
2771
+ const candidates = symbols
2772
+ .filter(inRange)
2773
+ .sort((a, b) => (b.line - a.line) || ((Number(b.startCol) || 0) - (Number(a.startCol) || 0)));
2774
+ const fn = candidates.find((item) => FUNCTION_LIKE.has(String(item.kind || '').toLowerCase()));
2775
+ return fn || candidates[0] || null;
2776
+ }
2777
+
2778
+ // Raised from 40 to 200 after HS-A5 surfaced that callers on a cross-
2779
+ // codebase symbol (`parseInt` across refs/) silently truncated at 40
2780
+ // callers, hiding all codex/ and warp/ matches. tail-trim still bounds
2781
+ // the payload; a higher cap is the invariant-correct fix vs. asking
2782
+ // every caller to pass an explicit limit.
2783
+ // Classify each reference of `symbol` into a structured entry
2784
+ // {file,line,col, kind, caller, lineText}. `caller` is the enclosing function
2785
+ // name for `call` kind (else ''). Shared by the string formatter and the
2786
+ // transitive-callers walker so the latter never has to re-parse formatted text.
2787
+ function _collectCallerEntries(graph, symbol, referenceText) {
2788
+ const entries = _parseReferenceEntries(referenceText);
2789
+ const detailed = [];
2790
+ // Per-file cache of declaration line numbers for `symbol`. Populated
2791
+ // lazily so files that never need the keyword-less-method fallback pay
2792
+ // nothing.
2793
+ const declLinesCache = new Map();
2794
+ for (const entry of entries) {
2795
+ const node = graph.nodes.get(entry.file);
2796
+ if (!node) continue;
2797
+ const sourceText = _getSourceTextForNode(graph, node);
2798
+ const sourceLines = sourceText.split(/\r?\n/);
2799
+ const line = String(sourceLines[entry.line - 1] || '').trim();
2800
+ if (!line) continue;
2801
+ let kind = _referenceKind(line, symbol, node.lang);
2802
+ // Keyword-less method declaration guard. The keyword-based regex in
2803
+ // _referenceKind cannot recognise Java/C#/C++ method declarations
2804
+ // shaped `[modifier] [type] name(args) [{|;]` because they introduce
2805
+ // the symbol with no declaration keyword. The cheap-symbol scanner
2806
+ // already classifies those lines as `function`/`method`/`class`, so
2807
+ // if a `call` line coincides with a cheap-symbol decl of the same
2808
+ // name, promote it to `declaration` and drop it from call sites.
2809
+ if (kind === 'call') {
2810
+ let declLines = declLinesCache.get(node.rel);
2811
+ if (!declLines) {
2812
+ declLines = new Set();
2813
+ for (const sym of (Array.isArray(node.symbols) && node.symbols.length ? node.symbols : _collectCheapSymbols(sourceText, node.lang))) {
2814
+ if (sym && sym.name === symbol) declLines.add(sym.line);
2815
+ }
2816
+ declLinesCache.set(node.rel, declLines);
2817
+ }
2818
+ if (declLines.has(entry.line)) kind = 'declaration';
2819
+ }
2820
+ const _encByteCol = _toByteColumn(sourceLines[entry.line - 1] || '', entry.col);
2821
+ const enclosing = _nearestEnclosingSymbol(node, sourceText, entry.line, _encByteCol);
2822
+ detailed.push({
2823
+ ...entry,
2824
+ kind,
2825
+ caller: kind === 'call' ? (enclosing?.name || '') : '',
2826
+ lineText: line,
2827
+ });
2828
+ }
2829
+ return detailed;
2830
+ }
2831
+
2832
+ function _formatCallerReferences(graph, symbol, referenceText, { limit = 200 } = {}) {
2833
+ const detailed = _collectCallerEntries(graph, symbol, referenceText);
2834
+ if (!detailed.length) return '(no callers)';
2835
+
2836
+ const callSites = detailed.filter((entry) => entry.kind === 'call');
2837
+ const format = (entry) => {
2838
+ const caller = entry.caller ? `\tcaller=${entry.caller}` : '';
2839
+ return `${entry.file}:${entry.line}:${entry.col}\t${entry.kind}${caller}\t${entry.lineText.slice(0, 80)}`;
2840
+ };
2841
+ if (callSites.length) {
2842
+ const total = callSites.length;
2843
+ const head = callSites.slice(0, limit).map(format);
2844
+ const overflow = total > limit ? [`... +${total - limit} more call sites`] : [];
2845
+ return ['# call sites', ...head, ...overflow].join('\n');
2846
+ }
2847
+
2848
+ const NON_CALL_CAP = 40;
2849
+ const nonCallEntries = detailed.slice(0, NON_CALL_CAP);
2850
+ const overflow = detailed.length > NON_CALL_CAP
2851
+ ? `\n... +${detailed.length - NON_CALL_CAP} more non-call references`
2852
+ : '';
2853
+ return [
2854
+ '(no call sites)',
2855
+ nonCallEntries.length ? `# non-call references\n${nonCallEntries.map(format).join('\n')}${overflow}` : '',
2856
+ ].filter(Boolean).join('\n');
2857
+ }
2858
+
2859
+ // Distinct enclosing-function names that call `symbol` (the recursion frontier
2860
+ // for transitive callers). Reads STRUCTURED entries (not formatted text), so a
2861
+ // stray "caller=" inside raw source lineText can never be misread as a caller.
2862
+ // Name-based, like callers mode.
2863
+ function _callerNamesOf(graph, symbol, cwd, language) {
2864
+ const refs = _cheapReferenceSearch(graph, symbol, cwd, { language });
2865
+ // Named callers (recursable), keyed by name → first call-site location, so
2866
+ // the transitive tree annotates each node with `file:line` and a consumer
2867
+ // need not re-grep/read to find where it lives.
2868
+ const byName = new Map();
2869
+ // Anonymous call sites — invocations whose enclosing context has no named
2870
+ // function: setInterval/timer callbacks, event handlers (`backend.onMessage`
2871
+ // = arrow), module top-level boot blocks, fs.watch callbacks. They have no
2872
+ // name to recurse through, so the old name-only walk DROPPED them — yet they
2873
+ // are exactly the entry points a thorough consumer then greps for. Surface
2874
+ // each as a terminal leaf (keyed by location, labelled with its call-site
2875
+ // source line) so the tree is genuinely complete and the chase stops.
2876
+ const leaves = new Map();
2877
+ for (const e of _collectCallerEntries(graph, symbol, refs)) {
2878
+ if (e.kind !== 'call') continue;
2879
+ if (e.caller && e.caller !== symbol) {
2880
+ if (!byName.has(e.caller)) byName.set(e.caller, { name: e.caller, loc: `${e.file}:${e.line}`, leaf: false });
2881
+ } else if (!e.caller) {
2882
+ const loc = `${e.file}:${e.line}`;
2883
+ if (!leaves.has(loc)) {
2884
+ const snippet = String(e.lineText || 'call').replace(/\s+/g, ' ').trim().slice(0, 48);
2885
+ leaves.set(loc, { name: `«${snippet}»`, loc, leaf: true });
2886
+ }
2887
+ }
2888
+ }
2889
+ // Hub guard: a symbol with MANY anonymous call sites is a generic hub
2890
+ // (e.g. handleToolCall, called from test scripts + cross-module dispatch) —
2891
+ // listing them is noise, not entry points. Surface anonymous leaves only
2892
+ // when there are few enough to be this symbol's distinctive entry set
2893
+ // (timer/event/boot triggers); above the threshold, drop them all.
2894
+ const ANON_LEAF_MAX = 6;
2895
+ const leafList = leaves.size <= ANON_LEAF_MAX ? [...leaves.values()] : [];
2896
+ return [...byName.values(), ...leafList];
2897
+ }
2898
+
2899
+ // Transitive upstream caller TREE: BFS over enclosing-function names up to
2900
+ // `depth` levels, returned as an indented tree in ONE call (replaces the
2901
+ // manual per-level callers batching). NAME-BASED like callers mode — two
2902
+ // different functions sharing a name are merged, so this is an upstream-chain
2903
+ // OVERVIEW, not a precise per-declaration graph. Bounded by nodeCap; a symbol
2904
+ // whose callers were already listed is shown once and marked (shared-caller /
2905
+ // cycle guard) so the payload can't blow up.
2906
+ function _formatTransitiveCallers(graph, rootSymbol, cwd, { language = null, depth = 2, pageSize = 100, page = 1, hardMax = 1000 } = {}) {
2907
+ // Walk the whole transitive tree ONCE into a flat, ordered list of
2908
+ // { indent, label } entries (cycle-guarded; bounded by hardMax as an
2909
+ // anti-runaway ceiling), then PAGINATE that list `pageSize` nodes at a time.
2910
+ // Pagination beats a hard node cap: an oversized tree is no longer truncated
2911
+ // into an incomplete "go grep the rest" state — the consumer just asks for
2912
+ // page:N+1 and stays inside code_graph.
2913
+ const expanded = new Set();
2914
+ const collected = [];
2915
+ let overflow = false;
2916
+ const walk = (symbol, level) => {
2917
+ if (overflow || level >= depth) return;
2918
+ if (expanded.has(symbol)) {
2919
+ collected.push({ indent: level + 1, label: `${symbol} … (callers expanded above)` });
2920
+ return;
2921
+ }
2922
+ expanded.add(symbol);
2923
+ for (const entry of _callerNamesOf(graph, symbol, cwd, language)) {
2924
+ if (collected.length >= hardMax) { overflow = true; return; }
2925
+ // Each node carries its call-site file:line so the tree is
2926
+ // self-sufficient — no per-node re-grep/read needed to locate it.
2927
+ collected.push({ indent: level + 1, label: `${entry.name}\t${entry.loc}` });
2928
+ // Leaves are anonymous call sites (timer/event/boot) with no name to
2929
+ // walk through — terminal by construction.
2930
+ if (!entry.leaf) walk(entry.name, level + 1);
2931
+ }
2932
+ };
2933
+ walk(rootSymbol, 0);
2934
+ if (collected.length === 0) return _augmentNoHitDiagnostic('(no callers)', '(no callers)', graph, cwd, rootSymbol);
2935
+
2936
+ const size = Math.max(1, Math.floor(Number(pageSize) || 100));
2937
+ const pg = Math.max(1, Math.floor(Number(page) || 1));
2938
+ const total = collected.length;
2939
+ const lastPage = Math.ceil(total / size);
2940
+ const start = (pg - 1) * size;
2941
+ if (start >= total) {
2942
+ return `# transitive callers of ${rootSymbol} (depth=${depth}) — page ${pg} is past the end (total ${total}${overflow ? '+' : ''} node(s); last page is ${lastPage}).`;
2943
+ }
2944
+ const slice = collected.slice(start, start + size);
2945
+ const hasMore = overflow || (start + slice.length) < total;
2946
+ const lines = [
2947
+ `# transitive callers of ${rootSymbol} (depth=${depth}) — page ${pg}, nodes ${start + 1}-${start + slice.length} of ${total}${overflow ? '+' : ''}; INDENTED children are ITS callers`,
2948
+ rootSymbol,
2949
+ ...slice.map((e) => `${' '.repeat(e.indent)}${e.label}`),
2950
+ ];
2951
+ if (hasMore) {
2952
+ // More nodes remain — steer the continuation into the SAME tool (next
2953
+ // page), never a grep/read sweep.
2954
+ lines.push(`# NEXT — more callers remain; re-run callers with the SAME symbol + depth + page:${pg + 1} for the next ${size} node(s). Every node carries file:line — do NOT grep/read.`);
2955
+ } else {
2956
+ // Final page reached: the full transitive set has now been delivered.
2957
+ lines.push(`# END — complete caller set delivered (page ${pg} of ${lastPage}): named callers PLUS timer/event/module-level call sites (the «…» leaves), each with file:line. No further callers/grep/read is needed.`);
2958
+ }
2959
+ return lines.join('\n');
2960
+ }
2961
+
2962
+ function _referenceFiles(referenceText) {
2963
+ if (typeof referenceText !== 'string' || !referenceText.trim() || referenceText === '(no references)') {
2964
+ return [];
2965
+ }
2966
+ const files = new Set();
2967
+ for (const line of referenceText.split('\n')) {
2968
+ const trimmed = line.trim();
2969
+ if (!trimmed) continue;
2970
+ const m = /^(.+?):\d+:\d+(?:[\s\t]+.*)?$/.exec(trimmed);
2971
+ if (m) files.add(m[1]);
2972
+ }
2973
+ return [...files].sort();
2974
+ }
2975
+
2976
+ function _parseReferenceEntries(referenceText) {
2977
+ if (typeof referenceText !== 'string' || !referenceText.trim() || referenceText === '(no references)') {
2978
+ return [];
2979
+ }
2980
+ const out = [];
2981
+ for (const line of referenceText.split('\n')) {
2982
+ const trimmed = line.trim();
2983
+ if (!trimmed) continue;
2984
+ const m = /^(.+?):(\d+):(\d+)(?:[\s\t]+(.*))?$/.exec(trimmed);
2985
+ if (!m) continue;
2986
+ out.push({
2987
+ file: m[1],
2988
+ line: Number(m[2]),
2989
+ col: Number(m[3]),
2990
+ text: m[4] ? m[4].trim() : '',
2991
+ });
2992
+ }
2993
+ return out;
2994
+ }
2995
+
2996
+ function _formatSymbolImpactLine(item) {
2997
+ const callerSuffix = item.callers.length ? ` -> ${item.callers.join(', ')}` : '';
2998
+ return `${item.symbol}\trefs=${item.references}\tcallers=${item.callers.length}${callerSuffix}`;
2999
+ }
3000
+
3001
+ function _collectImpactSymbols(node, graph) {
3002
+ const names = new Set();
3003
+ for (const typeName of Array.isArray(node?.topLevelTypes) ? node.topLevelTypes : []) names.add(typeName);
3004
+ const text = _getSourceTextForNode(graph, node);
3005
+ for (const item of _collectCheapSymbols(text, node.lang)) names.add(item.name);
3006
+ return [...names];
3007
+ }
3008
+
3009
+ function _buildImpactSummary(node, graph, cwd, targetSymbol = '') {
3010
+ const imports = node.resolvedImports.map((p) => _graphRel(p, cwd));
3011
+ const dependents = [...(graph.reverse.get(node.rel) || [])].sort();
3012
+ const related = [...new Set([...imports, ...dependents])].sort();
3013
+ const symbols = targetSymbol ? [targetSymbol] : _collectImpactSymbols(node, graph).slice(0, 8);
3014
+ const symbolImpact = [];
3015
+ const externalCallers = new Set();
3016
+ let externalReferences = 0;
3017
+ for (const symbol of symbols) {
3018
+ const refs = _parseReferenceEntries(_cheapReferenceSearch(graph, symbol, cwd, { language: node.lang }))
3019
+ .filter((entry) => entry.file !== node.rel);
3020
+ if (refs.length === 0) continue;
3021
+ const callers = [...new Set(refs.map((entry) => entry.file))].sort();
3022
+ for (const caller of callers) externalCallers.add(caller);
3023
+ externalReferences += refs.length;
3024
+ symbolImpact.push({
3025
+ symbol,
3026
+ references: refs.length,
3027
+ callers,
3028
+ });
3029
+ }
3030
+ symbolImpact.sort((a, b) => (b.references - a.references) || a.symbol.localeCompare(b.symbol));
3031
+ return {
3032
+ imports,
3033
+ dependents,
3034
+ related,
3035
+ symbolImpact,
3036
+ externalCallers: [...externalCallers].sort(),
3037
+ externalReferences,
3038
+ scannedSymbols: symbols.length,
3039
+ };
3040
+ }
3041
+
3042
+ // Bound model-facing structural list output (imports/dependents/related,
3043
+ // symbols, external callers) so a high fan-in/fan-out or symbol-dense file
3044
+ // cannot inject an unbounded result — mirrors the find_imports/find_dependents
3045
+ // cap. Function declaration is hoisted, so callers earlier in the file resolve.
3046
+ function _capGraphList(arr, cap = 200) {
3047
+ return arr.length > cap
3048
+ ? [...arr.slice(0, cap), `[truncated — showing first ${cap} of ${arr.length}]`]
3049
+ : arr;
3050
+ }
3051
+
3052
+ function _formatRelated(node, graph, cwd) {
3053
+ const imports = node.resolvedImports.map((p) => _graphRel(p, cwd));
3054
+ const dependents = [...(graph.reverse.get(node.rel) || [])].sort();
3055
+ const related = [...new Set([...imports, ...dependents])].sort();
3056
+ // Align with `impact` mode's schema: emit summary counts + the related
3057
+ // array so callers reading either mode see consistent header fields
3058
+ // (file/language/imports/dependents/related) before the bodies.
3059
+ const lines = [
3060
+ `file\t${node.rel}`,
3061
+ `language\t${node.lang}`,
3062
+ `imports\t${imports.length}`,
3063
+ `dependents\t${dependents.length}`,
3064
+ `related\t${related.length}`,
3065
+ ];
3066
+ lines.push('');
3067
+ lines.push('# imports');
3068
+ lines.push(imports.length ? _capGraphList(imports).join('\n') : '(none)');
3069
+ lines.push('');
3070
+ lines.push('# dependents');
3071
+ lines.push(dependents.length ? _capGraphList(dependents).join('\n') : '(none)');
3072
+ if (related.length) {
3073
+ lines.push('');
3074
+ lines.push('# related');
3075
+ lines.push(..._capGraphList(related));
3076
+ }
3077
+ return lines.join('\n');
3078
+ }
3079
+
3080
+ function _formatImpact(node, graph, cwd, targetSymbol = '') {
3081
+ const summary = _buildImpactSummary(node, graph, cwd, targetSymbol);
3082
+ const lines = [
3083
+ `file\t${node.rel}`,
3084
+ `language\t${node.lang}`,
3085
+ `imports\t${summary.imports.length}`,
3086
+ `dependents\t${summary.dependents.length}`,
3087
+ `related\t${summary.related.length}`,
3088
+ `scanned_symbols\t${summary.scannedSymbols}`,
3089
+ `external_references\t${summary.externalReferences}`,
3090
+ `external_callers\t${summary.externalCallers.length}`,
3091
+ ];
3092
+ if (targetSymbol) lines.push(`symbol\t${targetSymbol}`);
3093
+ if (summary.related.length) {
3094
+ lines.push('');
3095
+ lines.push('# structural');
3096
+ lines.push(..._capGraphList(summary.related));
3097
+ }
3098
+ if (summary.symbolImpact.length) {
3099
+ lines.push('');
3100
+ lines.push(targetSymbol ? '# symbol impact' : '# top symbol impact');
3101
+ lines.push(...summary.symbolImpact.slice(0, 5).map(_formatSymbolImpactLine));
3102
+ }
3103
+ if (summary.externalCallers.length) {
3104
+ lines.push('');
3105
+ lines.push('# external callers');
3106
+ lines.push(..._capGraphList(summary.externalCallers));
3107
+ }
3108
+ return lines.join('\n');
3109
+ }
3110
+
3111
+ // ── Native graph binary (mixdog-graph) — single source of truth for
3112
+ // per-file parsing. There is NO JS parsing fallback: if the binary is
3113
+ // absent the build throws so the caller surfaces a clear, fixable error
3114
+ // instead of silently degrading to a slow path.
3115
+ function _graphBinaryPath() {
3116
+ const override = process.env.MIXDOG_GRAPH_BIN;
3117
+ if (override && existsSync(override)) return override;
3118
+ // fileURLToPath correctly decodes percent-encoded bytes (spaces, non-ASCII)
3119
+ // and strips the leading-slash/drive-letter quirk on Windows. Using
3120
+ // URL.pathname directly leaves `%20` etc. encoded, breaking paths with
3121
+ // spaces or non-ASCII characters.
3122
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
3123
+ const binName = process.platform === 'win32' ? 'mixdog-graph.exe' : 'mixdog-graph';
3124
+ // Prefer a local cargo build, then a previously fetched/cached prebuilt.
3125
+ const localBuild = pathResolve(moduleDir, '../../../../native/mixdog-graph/target/release', binName);
3126
+ if (existsSync(localBuild)) return localBuild;
3127
+ try { return findCachedGraphBinary(getPluginData()); } catch { return null; }
3128
+ }
3129
+
3130
+ async function _runGraphBinaryJsonl(absRoot, extraArgs, stdinLines = null) {
3131
+ let binPath = _graphBinaryPath();
3132
+ if (!binPath) {
3133
+ // No local build or cached binary — fetch the prebuilt from the release
3134
+ // manifest (sha256-verified). No JS parse fallback: if the platform has
3135
+ // no asset or the download fails, the build throws with a fixable error.
3136
+ try {
3137
+ binPath = await ensureGraphBinary(getPluginData());
3138
+ } catch (err) {
3139
+ throw new Error(
3140
+ `[code-graph] mixdog-graph binary unavailable and could not be fetched: ${err?.message || err}. `
3141
+ + 'Build it (cargo build --release in native/mixdog-graph) or check network/release manifest.',
3142
+ );
3143
+ }
3144
+ }
3145
+ const { spawn } = await import('node:child_process');
3146
+ return await new Promise((resolve, reject) => {
3147
+ // When stdinLines is supplied (--files mode), stream one JSON object per
3148
+ // line to the child's STDIN — the reused nodes' metadata — so Rust can
3149
+ // resolve imports across the WHOLE tree (fresh + reused) while only
3150
+ // full-parsing the changed subset passed as argv.
3151
+ const wantsStdin = Array.isArray(stdinLines);
3152
+ const proc = spawn(binPath, [absRoot, ...extraArgs], {
3153
+ stdio: [wantsStdin ? 'pipe' : 'ignore', 'pipe', 'pipe'],
3154
+ // windowsHide: native code-graph binary is a console exe; without this each
3155
+ // call flashes a console window when spawned under the detached daemon.
3156
+ windowsHide: true,
3157
+ });
3158
+ const chunks = [];
3159
+ let stderrText = '';
3160
+ const STDERR_CAP = 8 * 1024;
3161
+ proc.stdout.on('data', (c) => chunks.push(c));
3162
+ proc.stderr.on('data', (c) => {
3163
+ if (stderrText.length >= STDERR_CAP) return;
3164
+ const piece = c.toString('utf8');
3165
+ const room = STDERR_CAP - stderrText.length;
3166
+ stderrText += piece.length > room ? piece.slice(0, room) : piece;
3167
+ });
3168
+ proc.on('error', (err) => reject(err));
3169
+ if (wantsStdin) {
3170
+ proc.stdin.on('error', () => { /* child may close stdin early; ignore EPIPE */ });
3171
+ proc.stdin.write(stdinLines.length ? `${stdinLines.join('\n')}\n` : '');
3172
+ proc.stdin.end();
3173
+ }
3174
+ proc.on('close', (code) => {
3175
+ if (code !== 0) {
3176
+ reject(new Error(`[code-graph] mixdog-graph exited ${code}: ${stderrText.trim().slice(0, 200)}`));
3177
+ return;
3178
+ }
3179
+ const out = [];
3180
+ const buf = Buffer.concat(chunks).toString('utf8');
3181
+ for (const line of buf.split('\n')) {
3182
+ const trimmed = line.trim();
3183
+ if (!trimmed) continue;
3184
+ try {
3185
+ const rec = JSON.parse(trimmed);
3186
+ if (rec && typeof rec.rel === 'string') out.push(rec);
3187
+ } catch { /* skip malformed line */ }
3188
+ }
3189
+ resolve(out);
3190
+ });
3191
+ });
3192
+ }
3193
+
3194
+ function _runGraphManifest(absRoot) { return _runGraphBinaryJsonl(absRoot, ['--manifest']); }
3195
+ function _runGraphWalk(absRoot) { return _runGraphBinaryJsonl(absRoot, []); }
3196
+ // --files (design A: full-graph resolution) full-parses only `rels` (argv) but
3197
+ // resolves imports across the WHOLE tree. The reused nodes' metas are streamed
3198
+ // to the child via STDIN as JSONL — one JSON object per line:
3199
+ // {rel, lang, rawImports, packageName, namespaceName, goPackageName,
3200
+ // topLevelTypes}. Rust builds the index + resolves over ALL nodes (fresh +
3201
+ // reused) and emits fresh rels as full records, reused rels as lightweight
3202
+ // {rel, resolvedImports, importedBy}.
3203
+ function _runGraphFiles(absRoot, rels, reusedMetas) {
3204
+ const lines = Array.isArray(reusedMetas)
3205
+ ? reusedMetas.map((m) => JSON.stringify({
3206
+ rel: m.rel,
3207
+ lang: m.lang,
3208
+ rawImports: Array.isArray(m.rawImports) ? m.rawImports : [],
3209
+ packageName: m.packageName || '',
3210
+ namespaceName: m.namespaceName || '',
3211
+ goPackageName: m.goPackageName || '',
3212
+ topLevelTypes: Array.isArray(m.topLevelTypes) ? m.topLevelTypes : [],
3213
+ }))
3214
+ : [];
3215
+ return _runGraphBinaryJsonl(absRoot, ['--files', ...rels], lines);
3216
+ }
3217
+
3218
+ // Map a Rust FileRecord (rel/lang/fp/tokens/rawImports/resolvedImports/
3219
+ // importedBy/...) onto the JS fileInfo shape the graph assembler expects.
3220
+ // Import resolution — including Go module paths — now happens entirely in
3221
+ // Rust; resolvedImports/importedBy are repo-relative path lists passed
3222
+ // straight through.
3223
+ function _fileInfoFromRustRecord(rec, absRoot) {
3224
+ const rel = rec.rel;
3225
+ const abs = pathResolve(absRoot, rel);
3226
+ const lang = rec.lang;
3227
+ return {
3228
+ abs,
3229
+ rel,
3230
+ lang,
3231
+ fingerprint: typeof rec.fp === 'string' ? rec.fp : '',
3232
+ sourceText: null,
3233
+ rawImports: Array.isArray(rec.rawImports) ? rec.rawImports : [],
3234
+ resolvedImports: Array.isArray(rec.resolvedImports)
3235
+ ? rec.resolvedImports.filter((v) => typeof v === 'string')
3236
+ : [],
3237
+ importedBy: Array.isArray(rec.importedBy)
3238
+ ? rec.importedBy.filter((v) => typeof v === 'string')
3239
+ : [],
3240
+ packageName: typeof rec.packageName === 'string' ? rec.packageName : '',
3241
+ namespaceName: typeof rec.namespaceName === 'string' ? rec.namespaceName : '',
3242
+ goPackageName: typeof rec.goPackageName === 'string' ? rec.goPackageName : '',
3243
+ topLevelTypes: Array.isArray(rec.topLevelTypes) ? rec.topLevelTypes : [],
3244
+ tokenSymbols: Array.isArray(rec.tokens) ? rec.tokens : null,
3245
+ symbols: Array.isArray(rec.symbols) ? rec.symbols : [],
3246
+ };
3247
+ }
3248
+
3249
+ // Reuse a node from the previous graph whose fp is unchanged — skips both
3250
+ // the Rust call and re-parsing for files that did not change.
3251
+ function _reuseFileInfo(prevNode, previousGraph, absRoot) {
3252
+ const rel = prevNode.rel;
3253
+ const fp = prevNode.fingerprint || '';
3254
+ const cachedText = previousGraph?._sourceTextCache?.get(rel);
3255
+ return {
3256
+ abs: prevNode.abs || pathResolve(absRoot, rel),
3257
+ rel,
3258
+ lang: prevNode.lang,
3259
+ fingerprint: fp,
3260
+ sourceText: cachedText?.fingerprint === fp ? cachedText.text : null,
3261
+ rawImports: Array.isArray(prevNode.rawImports) ? prevNode.rawImports : [],
3262
+ resolvedImports: Array.isArray(prevNode.resolvedImportsRel) ? prevNode.resolvedImportsRel : [],
3263
+ importedBy: Array.isArray(prevNode.importedBy) ? prevNode.importedBy : [],
3264
+ packageName: prevNode.packageName || '',
3265
+ namespaceName: prevNode.namespaceName || '',
3266
+ goPackageName: prevNode.goPackageName || '',
3267
+ topLevelTypes: Array.isArray(prevNode.topLevelTypes) ? prevNode.topLevelTypes : [],
3268
+ tokenSymbols: Array.isArray(prevNode.tokenSymbols) ? prevNode.tokenSymbols : null,
3269
+ symbols: Array.isArray(prevNode.symbols) ? prevNode.symbols : [],
3270
+ };
3271
+ }
3272
+
3273
+ /**
3274
+ * Internal — exported solely for `code-graph-prewarm-worker.mjs` to import.
3275
+ * NOT part of the public API. External callers should use `buildCodeGraphAsync`
3276
+ * (worker-thread isolated) or the `code_graph` / `find_symbol` tools, never
3277
+ * this synchronous form on the main event loop.
3278
+ */
3279
+ export async function _buildCodeGraph(cwd) {
3280
+ const now = Date.now();
3281
+ let _tp = performance.now();
3282
+ const _trace = (label) => { if (process.env.MIXDOG_GRAPH_TRACE) { const n = performance.now(); process.stderr.write(`[cg-trace] ${label}=${(n - _tp).toFixed(0)}ms\n`); _tp = n; } };
3283
+ const graphCwd = _canonicalGraphCwd(cwd);
3284
+ const absRoot = graphCwd;
3285
+ // Capture the dirty generation at build start. This build awaits the
3286
+ // manifest/walk; a write landing meanwhile bumps the generation and the
3287
+ // result must not be cached/persisted (it describes a pre-edit tree).
3288
+ const _genAtStart = _getCodeGraphGen(graphCwd);
3289
+ const cached = _codeGraphCache.get(graphCwd);
3290
+ let previousGraph = cached?.graph || null;
3291
+ // Dirty paths are subsumed by the manifest fp-diff below; drain the set
3292
+ // so it does not grow unbounded between builds.
3293
+ _consumeCodeGraphDirtyPaths(graphCwd);
3294
+
3295
+ // 1. Change-detect via Rust --manifest (fp/rel/size only, no parse).
3296
+ // The manifest is the FULL file list; the signature hashes every fp
3297
+ // so a change beyond CODE_GRAPH_MAX_FILES still invalidates the cache
3298
+ // and refreshes the `truncated` flag. Only `indexed` is built.
3299
+ const manifest = await _runGraphManifest(absRoot);
3300
+ const signature = _computeGraphSignature(manifest);
3301
+ _trace('manifest+sig');
3302
+ const truncated = manifest.length > CODE_GRAPH_MAX_FILES;
3303
+ const indexed = truncated ? manifest.slice(0, CODE_GRAPH_MAX_FILES) : manifest;
3304
+
3305
+ // 2. Memory cache hit.
3306
+ if (cached && cached.signature === signature && now - cached.ts < CODE_GRAPH_TTL_MS) {
3307
+ _touchCodeGraphCache(graphCwd);
3308
+ return cached.graph;
3309
+ }
3310
+
3311
+ // 3. Disk cache hit.
3312
+ _loadDiskCodeGraphCache(now);
3313
+ _ensureCwdLoaded(graphCwd);
3314
+ const diskEntry = _diskCodeGraphCache.get(graphCwd);
3315
+ if (diskEntry?.signature === signature) {
3316
+ const graph = _deserializeGraph(graphCwd, diskEntry);
3317
+ if (graph) {
3318
+ // Dirty-generation guard: skip caching if a write invalidated this
3319
+ // root since build start; still return the graph to the caller.
3320
+ if (_getCodeGraphGen(graphCwd) === _genAtStart) {
3321
+ _setCodeGraphCache(graphCwd, { ts: now, signature, graph });
3322
+ }
3323
+ return graph;
3324
+ }
3325
+ }
3326
+ if (!previousGraph && diskEntry) previousGraph = _deserializeGraph(graphCwd, diskEntry);
3327
+ // Schema guard: a graph built under an older symbol schema (pre-endLine)
3328
+ // must not seed incremental reuse. The schema-versioned signature already
3329
+ // blocks it from being SERVED as a direct cache hit, but its unchanged-fp
3330
+ // nodes would still be copied verbatim by _reuseFileInfo into the rebuilt
3331
+ // graph — carrying endLine-less symbols that defeat body-span containment.
3332
+ // Drop it so every node is re-parsed by the current binary.
3333
+ if (previousGraph && previousGraph.schemaVersion !== SYMBOL_SCHEMA_VERSION) {
3334
+ previousGraph = null;
3335
+ }
3336
+
3337
+ // 4. Build fileInfos. Reuse unchanged nodes by fp; parse the rest in
3338
+ // Rust — incrementally (--files) when only a subset changed, else a
3339
+ // full cold walk. There is no JS parse path.
3340
+ const reusable = [];
3341
+ const freshRels = [];
3342
+ for (const meta of indexed) {
3343
+ const previousNode = previousGraph?.nodes?.get(meta.rel) || null;
3344
+ if (previousNode && previousNode.fingerprint === meta.fp) {
3345
+ reusable.push(_reuseFileInfo(previousNode, previousGraph, absRoot));
3346
+ } else {
3347
+ freshRels.push(meta.rel);
3348
+ }
3349
+ }
3350
+ let fileInfos;
3351
+ if (freshRels.length === 0) {
3352
+ fileInfos = reusable;
3353
+ } else if (reusable.length > 0 && freshRels.length <= 256) {
3354
+ // Design A — full-graph resolution. Send the reused nodes' metas to the
3355
+ // child via STDIN so Rust resolves imports over ALL nodes (fresh +
3356
+ // reused), not just freshRels. Rust returns fresh rels as FULL records and
3357
+ // reused rels as lightweight {rel, resolvedImports, importedBy}. Refresh
3358
+ // each reused node's resolved edges in place (its tokens/symbols/rawImports/
3359
+ // package* stay) so newly-satisfied/broken edges and package resolution no
3360
+ // longer go stale until a cold rebuild.
3361
+ const recs = await _runGraphFiles(absRoot, freshRels, reusable);
3362
+ const reusedByRel = new Map(reusable.map((info) => [info.rel, info]));
3363
+ const freshSet = new Set(freshRels);
3364
+ fileInfos = [...reusable];
3365
+ for (const rec of recs) {
3366
+ if (freshSet.has(rec.rel)) {
3367
+ // fresh rel → full new node.
3368
+ fileInfos.push(_fileInfoFromRustRecord(rec, absRoot));
3369
+ } else {
3370
+ // reused rel → keep the existing reused node, overwrite its resolved
3371
+ // edges (rel + abs) with the refreshed full-graph resolution.
3372
+ const reusedInfo = reusedByRel.get(rec.rel);
3373
+ if (!reusedInfo) continue;
3374
+ const resolved = Array.isArray(rec.resolvedImports)
3375
+ ? rec.resolvedImports.filter((v) => typeof v === 'string')
3376
+ : [];
3377
+ reusedInfo.resolvedImports = resolved;
3378
+ if (Array.isArray(rec.importedBy)) {
3379
+ reusedInfo.importedBy = rec.importedBy.filter((v) => typeof v === 'string');
3380
+ }
3381
+ }
3382
+ }
3383
+ } else {
3384
+ // Rust caps --walk at MAX_FILES; this slice is a defensive safety net.
3385
+ // `truncated` is already set from the full manifest above.
3386
+ let recs = await _runGraphWalk(absRoot);
3387
+ if (recs.length > CODE_GRAPH_MAX_FILES) recs = recs.slice(0, CODE_GRAPH_MAX_FILES);
3388
+ fileInfos = recs.map((rec) => _fileInfoFromRustRecord(rec, absRoot));
3389
+ }
3390
+ _trace('walk+parse');
3391
+ const nodes = new Map();
3392
+ const reverse = new Map();
3393
+ for (const info of fileInfos) {
3394
+ // Rust now emits repo-relative resolved edges directly. Keep the
3395
+ // downstream node shape stable: resolvedImports STAYS ABSOLUTE,
3396
+ // resolvedImportsRel is the rel list as-is, and reverse is rederived
3397
+ // below from the forward edges of every node.
3398
+ const resolvedImportsRel = Array.isArray(info.resolvedImports) ? info.resolvedImports : [];
3399
+ const importedBy = Array.isArray(info.importedBy) ? info.importedBy : [];
3400
+ const node = {
3401
+ abs: info.abs,
3402
+ rel: info.rel,
3403
+ lang: info.lang,
3404
+ fingerprint: info.fingerprint,
3405
+ rawImports: info.rawImports,
3406
+ resolvedImportsRel,
3407
+ resolvedImports: resolvedImportsRel.map((rel) => pathResolve(absRoot, rel)),
3408
+ importedBy,
3409
+ packageName: info.packageName,
3410
+ namespaceName: info.namespaceName,
3411
+ goPackageName: info.goPackageName,
3412
+ topLevelTypes: info.topLevelTypes,
3413
+ tokenSymbols: info.tokenSymbols,
3414
+ symbols: Array.isArray(info.symbols) ? info.symbols : [],
3415
+ };
3416
+ nodes.set(info.rel, node);
3417
+ // reverse is derived from the FORWARD edges of every node, not from
3418
+ // importedBy. On the incremental --files path Rust only emits records for
3419
+ // the parsed subset and reused nodes keep a stale importedBy, so a fresh
3420
+ // edge A→B (A parsed, B reused) would drop B's reverse entry until a cold
3421
+ // rebuild. Walking resolvedImportsRel keeps reverse self-consistent.
3422
+ for (const rel of resolvedImportsRel) {
3423
+ if (!reverse.has(rel)) reverse.set(rel, new Set());
3424
+ reverse.get(rel).add(node.rel);
3425
+ }
3426
+ }
3427
+ _trace('assemble');
3428
+ const graph = _attachGraphRuntimeCaches({ cwd: graphCwd, nodes, reverse, schemaVersion: SYMBOL_SCHEMA_VERSION, builtAt: now, signature });
3429
+ // Surface truncation so downstream output (find_symbol, overview) can
3430
+ // warn callers that the graph stopped at CODE_GRAPH_MAX_FILES rather
3431
+ // than indexing every eligible file under cwd.
3432
+ graph.truncated = Boolean(truncated);
3433
+ for (const info of fileInfos) {
3434
+ if (typeof info.sourceText === 'string') {
3435
+ graph._sourceTextCache.set(info.rel, {
3436
+ fingerprint: info.fingerprint || '',
3437
+ text: info.sourceText,
3438
+ });
3439
+ }
3440
+ }
3441
+ graph._symbolTokenIndexDirty = true;
3442
+ // Dirty-generation guard: a write that landed during the manifest/walk
3443
+ // bumped the generation; drop the now-stale result (no cache, no disk)
3444
+ // and return it only to the awaiting caller.
3445
+ if (_getCodeGraphGen(graphCwd) === _genAtStart) {
3446
+ _setCodeGraphCache(graphCwd, { ts: now, signature, graph });
3447
+ _setDiskCodeGraphEntry(graphCwd, graph);
3448
+ _trace('cache+disk');
3449
+ }
3450
+ return graph;
3451
+ }
3452
+
3453
+ // Modes that operate on a single named symbol and can be looped to serve a
3454
+ // multi-symbol request in one call (the graph is cwd-cached, so per-symbol
3455
+ // re-entry is cheap). impact is excluded — it is file-scoped, not symbol-list.
3456
+ const CODE_GRAPH_BATCHABLE_MODES = new Set(['symbol', 'find_symbol', 'callers', 'callees', 'references']);
3457
+ // Collect requested symbol names from symbols[] (array), symbols (comma/space
3458
+ // string), or symbol (single name OR comma/space-separated multi), de-duped in
3459
+ // request order.
3460
+ function _collectGraphSymbolList(args) {
3461
+ const split = (s) => String(s || '').split(/[,\s]+/).map((t) => t.trim()).filter(Boolean);
3462
+ return [...new Set([
3463
+ ...(Array.isArray(args?.symbols) ? args.symbols.map((s) => String(s || '').trim()).filter(Boolean) : []),
3464
+ ...(typeof args?.symbols === 'string' ? split(args.symbols) : []),
3465
+ ...(typeof args?.symbol === 'string' ? split(args.symbol) : []),
3466
+ ])];
3467
+ }
3468
+
3469
+ async function codeGraph(args, cwd, signal = null, options = {}) {
3470
+ const mode = String(args?.mode || '').trim();
3471
+ if (!mode) throw new Error('code_graph: "mode" is required');
3472
+
3473
+ if (mode === 'prewarm') {
3474
+ // R5-③: TRUE fire-and-forget. Previously this function awaited
3475
+ // buildCodeGraphAsync above before branching into the prewarm path,
3476
+ // which defeated the prewarm contract — the caller blocked on the
3477
+ // very build prewarm is supposed to schedule. Handle prewarm BEFORE
3478
+ // the await so the caller returns immediately and the build runs
3479
+ // in the background.
3480
+ //
3481
+ // Build code graph + populate lazy per-symbol candidate cache for
3482
+ // the requested symbols. Caller does not block on the actual build;
3483
+ // returns immediately so the caller can pipeline its real
3484
+ // find_symbol calls right after.
3485
+ // Accepts symbols via: args.symbols (array), args.symbols (comma/space
3486
+ // separated string), or args.symbol (single name OR comma/space
3487
+ // separated multi). Client-side mcp schema caches sometimes strip
3488
+ // unknown fields, so the multi-form via the always-known `symbol`
3489
+ // field is the most portable.
3490
+ const _splitMulti = (s) => String(s || '').split(/[,\s]+/).map((t) => t.trim()).filter(Boolean);
3491
+ const fromSymbolsArr = Array.isArray(args?.symbols)
3492
+ ? args.symbols.map((s) => String(s || '').trim()).filter(Boolean)
3493
+ : [];
3494
+ const fromSymbolsStr = typeof args?.symbols === 'string' ? _splitMulti(args.symbols) : [];
3495
+ const fromSymbolField = typeof args?.symbol === 'string' ? _splitMulti(args.symbol) : [];
3496
+ const symbols = [...new Set([...fromSymbolsArr, ...fromSymbolsStr, ...fromSymbolField])];
3497
+ if (symbols.length > 0) prewarmCodeGraphSymbols(cwd, symbols);
3498
+ else prewarmCodeGraph(cwd);
3499
+ return `prewarm scheduled: cwd=${cwd} symbols=${symbols.length}${symbols.length ? ` (${symbols.slice(0, 5).join(',')}${symbols.length > 5 ? `,+${symbols.length - 5}` : ''})` : ''}`;
3500
+ }
3501
+
3502
+ const graph = await buildCodeGraphAsync(cwd, signal);
3503
+ if (!graph || graph.nodes.size === 0) {
3504
+ throw new Error(`code_graph: cwd '${cwd}' is not an indexed/known project root or contains zero eligible files`);
3505
+ }
3506
+ if (options?.scopedCacheOutcome && graph.truncated) {
3507
+ markScopedCacheIncomplete(options.scopedCacheOutcome);
3508
+ }
3509
+ const normFile = normalizeInputPath(args?.file);
3510
+ const abs = normFile ? (isAbsolute(normFile) ? pathResolve(normFile) : pathResolve(cwd, normFile)) : null;
3511
+ const rel = abs ? _graphRel(abs, cwd) : null;
3512
+ const node = rel ? graph.nodes.get(rel) : null;
3513
+
3514
+ if (mode === 'overview') {
3515
+ if (rel && !node) return `Error: code_graph overview: file not found in graph: ${normFile}`;
3516
+ if (node) return _buildExplainerFileSummary(node, graph, cwd);
3517
+ const byLang = new Map();
3518
+ for (const node of graph.nodes.values()) {
3519
+ byLang.set(node.lang, (byLang.get(node.lang) || 0) + 1);
3520
+ }
3521
+ const lines = [
3522
+ `files\t${graph.nodes.size}`,
3523
+ `edges\t${Array.from(graph.nodes.values()).reduce((sum, n) => sum + n.resolvedImports.length, 0)}`,
3524
+ ];
3525
+ for (const [lang, count] of [...byLang.entries()].sort((a, b) => b[1] - a[1])) {
3526
+ lines.push(`${lang}\t${count}`);
3527
+ }
3528
+ if (graph?.truncated) {
3529
+ lines.push(`WARN: graph truncated at CODE_GRAPH_MAX_FILES=${CODE_GRAPH_MAX_FILES} — some files under cwd were not indexed`);
3530
+ }
3531
+ return lines.join('\n');
3532
+ }
3533
+
3534
+ if (mode === 'imports') {
3535
+ if (!node) return `Error: code_graph imports: file not found in graph: ${normFile || '(missing file)'}`;
3536
+ const GRAPH_LIST_CAP = 200;
3537
+ const resolvedAll = node.resolvedImports.map((p) => _graphRel(p, cwd));
3538
+ const rawAll = node.rawImports;
3539
+ const resolved = resolvedAll.slice(0, GRAPH_LIST_CAP);
3540
+ const raw = rawAll.slice(0, GRAPH_LIST_CAP);
3541
+ const parts = [];
3542
+ if (resolved.length) parts.push(resolved.join('\n'));
3543
+ if (raw.length) parts.push(`# raw\n${raw.join('\n')}`);
3544
+ if (resolvedAll.length > resolved.length || rawAll.length > raw.length) {
3545
+ parts.push(`[truncated — showing first ${GRAPH_LIST_CAP} of ${resolvedAll.length} resolved / ${rawAll.length} raw imports]`);
3546
+ }
3547
+ return parts.join('\n\n') || '(no imports)';
3548
+ }
3549
+
3550
+ if (mode === 'dependents') {
3551
+ if (!rel) throw new Error('code_graph dependents: "file" is required');
3552
+ // Validate the path is actually indexed before answering. Without
3553
+ // this check, a typo or unsupported extension silently returns
3554
+ // "(no dependents)" — indistinguishable from a real zero-dependent
3555
+ // file and a frequent source of "graph says nothing depends on X"
3556
+ // false negatives.
3557
+ if (!node) return `Error: code_graph dependents: file not found in graph: ${normFile || '(missing file)'}`;
3558
+ const GRAPH_LIST_CAP = 200;
3559
+ const depsAll = [...(graph.reverse.get(rel) || [])].sort();
3560
+ if (!depsAll.length) return '(no dependents)';
3561
+ const deps = depsAll.slice(0, GRAPH_LIST_CAP);
3562
+ // Enrich each dependent with the import line so callers do not need
3563
+ // a follow-up grep for `file:line`. Best-effort: if the importer
3564
+ // file cannot be read or no matching import line is found, fall back
3565
+ // to the bare relative path.
3566
+ const basename = rel.split('/').pop();
3567
+ const stem = basename.replace(/\.[^/.]+$/, '');
3568
+ const enriched = deps.map((dep) => {
3569
+ const depNode = graph.nodes.get(dep);
3570
+ if (!depNode) return dep;
3571
+ let text;
3572
+ try { text = readFileSync(depNode.abs, 'utf8'); } catch { return dep; }
3573
+ const linesArr = text.split(/\r?\n/);
3574
+ for (let i = 0; i < linesArr.length; i++) {
3575
+ const ln = linesArr[i];
3576
+ // The specifier line of a MULTI-LINE import (`} from './x.mjs';`) and
3577
+ // re-exports (`export ... from`) carry no import/require keyword on
3578
+ // that line — match the `from '...'` tail too, or those dependents
3579
+ // lose their :line.
3580
+ if (!/(?:^|\W)(?:import|require)\b|\bfrom\s*['"]/.test(ln)) continue;
3581
+ if (ln.includes(`/${basename}`) || ln.includes(`/${stem}`) || ln.includes(`'${basename}'`) || ln.includes(`"${basename}"`)) {
3582
+ return `${dep}:${i + 1}`;
3583
+ }
3584
+ }
3585
+ return dep;
3586
+ });
3587
+ const out = enriched.join('\n');
3588
+ return depsAll.length > deps.length
3589
+ ? `${out}\n[truncated — showing first ${GRAPH_LIST_CAP} of ${depsAll.length} dependents]`
3590
+ : out;
3591
+ }
3592
+
3593
+ if (mode === 'related') {
3594
+ if (!node) return `Error: code_graph related: file not found in graph: ${normFile || '(missing file)'}`;
3595
+ return _formatRelated(node, graph, cwd);
3596
+ }
3597
+
3598
+ if (mode === 'impact') {
3599
+ if (!node) return `Error: code_graph impact: file not found in graph: ${normFile || '(missing file)'}`;
3600
+ const targetSymbol = String(args?.symbol || '').trim();
3601
+ return _formatImpact(node, graph, cwd, targetSymbol);
3602
+ }
3603
+
3604
+ if (mode === 'callees') {
3605
+ // FORWARD call navigation: mirror of `callers` (reverse). Given a
3606
+ // symbol X, locate its declaration via the existing find_symbol path,
3607
+ // then delegate body extraction + callee resolution to the shared
3608
+ // `_extractCallees` helper. The default `find_symbol` declaration
3609
+ // path also calls the same helper so structural forward-graph results
3610
+ // are returned without the caller having to pass mode:"callees".
3611
+ const symbol = String(args?.symbol || '').trim();
3612
+ if (!symbol) throw new Error('code_graph callees: "symbol" is required.');
3613
+ const explicitLanguage = String(args?.language || '').trim() || null;
3614
+ if (rel && !node) return `Error: code_graph callees: file not found in graph: ${normFile || '(missing file)'}`;
3615
+ const allHits = _findSymbolHits(graph, symbol, { language: explicitLanguage });
3616
+ const hits = rel ? allHits.filter((h) => h.rel === rel) : allHits;
3617
+ const declHit = hits.find((h) => h.declarationLike) || hits[0];
3618
+ if (!declHit) {
3619
+ const scopeNote = rel ? ` file=${rel}` : '';
3620
+ return `(no symbol matches in cwd=${cwd}${scopeNote})`;
3621
+ }
3622
+ if (!_CALLEES_BRACE_LANGS.has(declHit.lang)) {
3623
+ return `(callees unsupported for ${declHit.lang})`;
3624
+ }
3625
+ const rows = _extractCallees(graph, declHit, cwd, {
3626
+ cap: 200,
3627
+ callerSymbol: symbol,
3628
+ language: explicitLanguage,
3629
+ });
3630
+ if (!rows.length) return `(no callees)`;
3631
+ const out = ['# callees'];
3632
+ for (const row of rows) out.push(_formatCalleeRow(row));
3633
+ return out.join('\n');
3634
+ }
3635
+
3636
+ if (mode === 'symbols') {
3637
+ if (!node) return `Error: code_graph symbols: file not found in graph: ${normFile || '(missing file)'}`;
3638
+ let text = '';
3639
+ try { text = readFileSync(node.abs, 'utf8'); } catch { return '(no symbols)'; }
3640
+ return _extractSymbolsCheap(text, node.lang);
3641
+ }
3642
+
3643
+ if (mode === 'find_symbol') {
3644
+ const symbol = String(args?.symbol || '').trim();
3645
+ if (!symbol) throw new Error('code_graph find_symbol: "symbol" is required.');
3646
+ const language = String(args?.language || '').trim() || null;
3647
+ const limit = Math.max(1, Math.min(50, Number(args?.limit || 20)));
3648
+ // SCOPE ISOLATION: if caller narrowed by `file`, validate it's indexed
3649
+ // then restrict hits to that file only (drop same-named symbols in
3650
+ // unrelated files).
3651
+ if (rel && !node) return `Error: code_graph find_symbol: file not found in graph: ${normFile || '(missing file)'}`;
3652
+ return _findSymbolAcrossGraph(graph, symbol, cwd, { language, limit, fileRel: rel, body: args?.body !== false });
3653
+ }
3654
+
3655
+ if (mode === 'search') {
3656
+ const keyword = String(args?.symbol || '').trim();
3657
+ if (!keyword) throw new Error('code_graph search: "symbol" is required.');
3658
+ const language = String(args?.language || '').trim() || null;
3659
+ const limit = Math.max(1, Math.min(100, Number(args?.limit || 30)));
3660
+ return _searchSymbolsByKeyword(graph, keyword, cwd, { language, limit });
3661
+ }
3662
+
3663
+ if (mode === 'references') {
3664
+ const symbol = String(args?.symbol || '').trim();
3665
+ if (!symbol) throw new Error('code_graph references: "symbol" is required.');
3666
+ const explicitLanguage = String(args?.language || '').trim() || null;
3667
+ if (explicitLanguage) {
3668
+ const langHasFiles = [...graph.nodes.values()].some((n) => n.lang === explicitLanguage);
3669
+ if (!langHasFiles) {
3670
+ throw new Error(`code_graph references: language '${explicitLanguage}' has no adapter topLevelTypes and is not in supportedRegexLangs for this project`);
3671
+ }
3672
+ }
3673
+ const narrowedByCaller = Boolean(rel || explicitLanguage);
3674
+ const resolved = _resolveReferenceLanguageNode(graph, symbol, rel, cwd, explicitLanguage);
3675
+ // Distinguish "file path was never indexed" from "file is indexed but the
3676
+ // symbol never appears in it". The former is a path/scope problem (typo,
3677
+ // unsupported extension); the latter is a real zero-hit answer scoped to
3678
+ // the requested file. Both still terminate the request when the caller
3679
+ // narrowed by file, but the message lets the caller pick the right
3680
+ // recovery (fix the path vs. drop the file filter / widen the search).
3681
+ if (rel && resolved.kind === 'file-not-found') {
3682
+ return `Error: code_graph references: file not found in graph: ${normFile || '(missing file)'}`;
3683
+ }
3684
+ if (rel && resolved.kind === 'symbol-not-present') {
3685
+ return `Error: code_graph references: symbol "${symbol}" not found in ${normFile || rel}`;
3686
+ }
3687
+ const resolvedNode = resolved.kind === 'ok' ? resolved.node : null;
3688
+ // Bare references (no file/language narrow) → search every language so
3689
+ // a symbol with the same name in TS+PY isn't quietly truncated to
3690
+ // whichever language the first hit happened to land in.
3691
+ // Explicit language is a hard scope — preserve it even when the resolver
3692
+ // failed to land on a node, so the search doesn't silently widen to every
3693
+ // language (mirrors callers mode at the matching site). Bare refs with no
3694
+ // file/language narrow still search all languages.
3695
+ const lang = explicitLanguage
3696
+ || ((narrowedByCaller && resolvedNode) ? resolvedNode.lang : null);
3697
+ // Only use args.limit when it's a positive finite number. 0/negative/
3698
+ // missing all fall back to null → ENV_CAP (REFERENCE_HIT_CAP) so the
3699
+ // no-limit caller gets the full result set as before. Clamp upper
3700
+ // bound at 500 to keep payloads sane.
3701
+ const rawLimit = Number(args?.limit);
3702
+ const userLimit = Number.isFinite(rawLimit) && rawLimit > 0
3703
+ ? Math.min(500, Math.floor(rawLimit))
3704
+ : null;
3705
+ // Parallel pre-read so the sync search inside _cheapReferenceSearch
3706
+ // hits the in-memory text cache instead of paying ~200 serial disk reads.
3707
+ await _prewarmReferenceSourceText(graph, symbol, lang);
3708
+ // SCOPE ISOLATION: when `file` is set, restrict reference search to
3709
+ // that single file so a caller asking "refs in foo.mjs" doesn't get
3710
+ // hits from every other file that happens to share the identifier.
3711
+ const refResult = _cheapReferenceSearch(graph, symbol, cwd, { language: lang, limit: userLimit, fileRel: rel });
3712
+ return narrowedByCaller ? refResult : _augmentNoHitDiagnostic(refResult, '(no references)', graph, cwd, symbol);
3713
+ }
3714
+
3715
+ if (mode === 'callers') {
3716
+ const symbol = String(args?.symbol || '').trim();
3717
+ if (!symbol) throw new Error('code_graph callers: "symbol" is required.');
3718
+ const explicitLanguage = String(args?.language || '').trim() || null;
3719
+ // Validate explicit-language scope up front so callers mode mirrors the
3720
+ // references-mode contract: an unrecognised/unindexed language is a
3721
+ // hard scope error, not a silent fall-through to a broader search.
3722
+ if (explicitLanguage) {
3723
+ const langHasFiles = [...graph.nodes.values()].some((n) => n.lang === explicitLanguage);
3724
+ if (!langHasFiles) {
3725
+ throw new Error(`code_graph callers: language '${explicitLanguage}' has no adapter topLevelTypes and is not in supportedRegexLangs for this project`);
3726
+ }
3727
+ }
3728
+ const narrowedByCaller = Boolean(rel || explicitLanguage);
3729
+ const resolved = _resolveReferenceLanguageNode(graph, symbol, rel, cwd, explicitLanguage);
3730
+ if (rel && resolved.kind === 'file-not-found') {
3731
+ return `Error: code_graph callers: file not found in graph: ${normFile || '(missing file)'}`;
3732
+ }
3733
+ if (rel && resolved.kind === 'symbol-not-present') {
3734
+ return `Error: code_graph callers: symbol "${symbol}" not found in ${normFile || rel}`;
3735
+ }
3736
+ const resolvedNode = resolved.kind === 'ok' ? resolved.node : null;
3737
+ // Explicit language is a hard scope — keep it even when the resolver
3738
+ // failed to land on a node, so the downstream cheap reference search
3739
+ // doesn't silently widen to every language.
3740
+ const lang = explicitLanguage
3741
+ || ((narrowedByCaller && resolvedNode) ? resolvedNode.lang : null);
3742
+ // Only positive finite limits propagate. 0/negative/missing fall back
3743
+ // to ENV_CAP via the formatter+search defaults.
3744
+ const rawLimit = Number(args?.limit);
3745
+ const userLimit = Number.isFinite(rawLimit) && rawLimit > 0
3746
+ ? Math.min(500, Math.floor(rawLimit))
3747
+ : null;
3748
+ // Parallel pre-read so the sync search hits the in-memory text cache.
3749
+ await _prewarmReferenceSourceText(graph, symbol, lang);
3750
+ // Transitive upstream tree: depth>1 walks caller-of-caller up to `depth`
3751
+ // levels in ONE call (replaces manual per-level callers batching). depth<=1
3752
+ // keeps the single-level path byte-identical. Graph-wide by design (the
3753
+ // chain crosses modules); file: scope is ignored for the transitive walk.
3754
+ const depth = Math.max(1, Math.min(5, Math.floor(Number(args?.depth) || 1)));
3755
+ if (depth > 1) {
3756
+ return _formatTransitiveCallers(graph, symbol, cwd, { language: lang, depth, page: args?.page });
3757
+ }
3758
+ // SCOPE ISOLATION: file-narrowed callers stays within that file too.
3759
+ const refs = _cheapReferenceSearch(graph, symbol, cwd, { language: lang, limit: userLimit, fileRel: rel });
3760
+ const callerResult = _formatCallerReferences(graph, symbol, refs, userLimit ? { limit: userLimit } : undefined);
3761
+ return narrowedByCaller ? callerResult : _augmentNoHitDiagnostic(callerResult, '(no callers)', graph, cwd, symbol);
3762
+ }
3763
+
3764
+ throw new Error(`code_graph: unknown mode "${mode}"`);
3765
+ }
3766
+
3767
+ async function findSymbolTool(args, cwd, signal = null, options = {}) {
3768
+ // Prewarm short-circuit: no graph build await, fire-and-forget. Returns
3769
+ // immediately so Lead can issue prewarm at session start then pipeline
3770
+ // real find_symbol calls without blocking on the cold-process scan.
3771
+ if (args?.mode === 'prewarm') {
3772
+ const _splitMulti = (s) => String(s || '').split(/[,\s]+/).map((t) => t.trim()).filter(Boolean);
3773
+ const fromSymbolsArr = Array.isArray(args?.symbols)
3774
+ ? args.symbols.map((s) => String(s || '').trim()).filter(Boolean)
3775
+ : [];
3776
+ const fromSymbolsStr = typeof args?.symbols === 'string' ? _splitMulti(args.symbols) : [];
3777
+ const fromSymbolField = typeof args?.symbol === 'string' ? _splitMulti(args.symbol) : [];
3778
+ const symbols = [...new Set([...fromSymbolsArr, ...fromSymbolsStr, ...fromSymbolField])];
3779
+ if (symbols.length > 0) prewarmCodeGraphSymbols(cwd, symbols);
3780
+ else prewarmCodeGraph(cwd);
3781
+ return `prewarm scheduled: cwd=${cwd} symbols=${symbols.length}${symbols.length ? ` (${symbols.slice(0, 5).join(',')}${symbols.length > 5 ? `,+${symbols.length - 5}` : ''})` : ''}`;
3782
+ }
3783
+ const graph = await buildCodeGraphAsync(cwd, signal);
3784
+ if (!graph) throw new Error(`find_symbol: cwd '${cwd}' is not an indexed/known project root or contains zero eligible files`);
3785
+ if (options?.scopedCacheOutcome && graph.truncated) {
3786
+ markScopedCacheIncomplete(options.scopedCacheOutcome);
3787
+ }
3788
+ const symbol = String(args?.symbol || '').trim();
3789
+ const language = String(args?.language || '').trim() || null;
3790
+ const limit = Math.max(1, Math.min(50, Number(args?.limit || 20)));
3791
+ // SCOPE ISOLATION: when `file` is supplied, restrict hits to that file's
3792
+ // declaration + refs (don't return every same-named symbol across the
3793
+ // tree). Validates the path is actually indexed so a typo surfaces a
3794
+ // clear error instead of a silent "(no symbol matches)".
3795
+ const normFile = normalizeInputPath(args?.file);
3796
+ const abs = normFile ? (isAbsolute(normFile) ? pathResolve(normFile) : pathResolve(cwd, normFile)) : null;
3797
+ const fileRel = abs ? _graphRel(abs, cwd) : null;
3798
+ if (fileRel && !graph.nodes.get(fileRel)) {
3799
+ return `Error: find_symbol: file not found in graph: ${normFile}`;
3800
+ }
3801
+ // FILE-OVERVIEW MODE: `symbol` omitted but `file` given → list that file's
3802
+ // symbols (mirrors the dispatcher's `symbols` mode). The tool spec allows
3803
+ // "symbol (to locate) OR file (to list its symbols)"; the bare-`symbol`
3804
+ // guard here used to reject this otherwise-valid file-only call.
3805
+ if (!symbol) {
3806
+ if (fileRel) {
3807
+ const node = graph.nodes.get(fileRel);
3808
+ let text = '';
3809
+ try { text = readFileSync(node.abs, 'utf8'); } catch { return '(no symbols)'; }
3810
+ return _extractSymbolsCheap(text, node.lang);
3811
+ }
3812
+ throw new Error('find_symbol: provide "symbol" (to locate) or "file" (to list its symbols).');
3813
+ }
3814
+ return _findSymbolAcrossGraph(graph, symbol, cwd, { language, limit, fileRel, body: args?.body !== false });
3815
+ }
3816
+
3817
+
3818
+
3819
+ export { CODE_GRAPH_TOOL_DEFS } from './code-graph-tool-defs.mjs';
3820
+
3821
+ /**
3822
+ * Resolve a symbol name to a 1-based [startLine, endLine] declaration span for read().
3823
+ * Returns `{ offset, limit, startLine, endLine, rel, note? }` or `{ error }`.
3824
+ */
3825
+ // Recover the end line of a brace-delimited declaration whose endLine the
3826
+ // graph does not record (assignment-style decls): the body closes at the
3827
+ // first `}`-leading line indented at or left of the declaration line. Exact
3828
+ // for conventionally-indented code; returns null (caller falls back) when no
3829
+ // such line exists within the scan window — e.g. minified or single-line.
3830
+ const SYMBOL_SPAN_SCAN_MAX_LINES = 400;
3831
+ function _inferSpanEndByIndent(allLines, startLine) {
3832
+ const decl = allLines[startLine - 1];
3833
+ if (typeof decl !== 'string' || !/[{([]\s*$/.test(decl.trimEnd())) return null;
3834
+ const declIndent = decl.match(/^[ \t]*/)[0].length;
3835
+ const last = Math.min(allLines.length, startLine - 1 + SYMBOL_SPAN_SCAN_MAX_LINES);
3836
+ for (let i = startLine; i < last; i++) {
3837
+ const line = allLines[i];
3838
+ if (!/^[ \t]*[})\]]/.test(line)) continue;
3839
+ const indent = line.match(/^[ \t]*/)[0].length;
3840
+ if (indent <= declIndent) return i + 1;
3841
+ }
3842
+ return null;
3843
+ }
3844
+
3845
+ export async function resolveSymbolReadSpan(cwd, { symbol, path = null, language = null, line = null } = {}) {
3846
+ const cleanSymbol = String(symbol || '').trim();
3847
+ if (!cleanSymbol) return { error: 'symbol is required' };
3848
+ let graph;
3849
+ try {
3850
+ graph = await buildCodeGraphAsync(cwd);
3851
+ } catch (err) {
3852
+ return { error: `symbol read: code graph unavailable (${err?.message || err})` };
3853
+ }
3854
+ if (!graph) return { error: 'symbol read: code graph could not be built for cwd' };
3855
+
3856
+ const normFile = path ? normalizeInputPath(path) : null;
3857
+ const abs = normFile ? (isAbsolute(normFile) ? pathResolve(normFile) : pathResolve(cwd, normFile)) : null;
3858
+ const fileRel = abs ? _graphRel(abs, cwd) : null;
3859
+ if (fileRel && !graph.nodes.get(fileRel)) {
3860
+ return { error: `symbol '${cleanSymbol}' not found — file not indexed: ${path}; use find_symbol` };
3861
+ }
3862
+
3863
+ let hits = _findSymbolHits(graph, cleanSymbol, { language });
3864
+ if (fileRel) hits = hits.filter((h) => h.rel === fileRel);
3865
+ if (!hits.length) {
3866
+ const scope = fileRel ? ` in ${fileRel}` : '';
3867
+ return { error: `symbol '${cleanSymbol}' not found${scope}; use find_symbol to locate it` };
3868
+ }
3869
+
3870
+ const disambigLine = Number(line);
3871
+ let primary;
3872
+ if (Number.isFinite(disambigLine) && disambigLine > 0) {
3873
+ const onLine = hits.filter((h) => h.line === disambigLine);
3874
+ primary = _pickCalleeDeclHit(onLine.length ? onLine : hits, fileRel);
3875
+ } else {
3876
+ primary = _pickCalleeDeclHit(hits, fileRel);
3877
+ }
3878
+ if (!primary) return { error: `symbol '${cleanSymbol}' not found; use find_symbol` };
3879
+
3880
+ const startLine = Number(primary.line);
3881
+ let endLine = Number(primary.endLine);
3882
+ let approximate = false;
3883
+ if (!Number.isFinite(startLine) || startLine < 1) {
3884
+ return { error: `symbol '${cleanSymbol}' has no valid declaration line; use find_symbol` };
3885
+ }
3886
+ if (!Number.isFinite(endLine) || endLine < startLine) {
3887
+ // Assignment-style decls record no endLine; a fixed +79 window over-reads
3888
+ // short arrows ~4x and truncates longer ones. Recover the real span from
3889
+ // indentation; fall back to the fixed window only when the scan fails.
3890
+ const node = graph.nodes.get(primary.rel);
3891
+ const srcText = node ? _getSourceTextForNode(graph, node) : null;
3892
+ const inferred = srcText ? _inferSpanEndByIndent(srcText.split('\n'), startLine) : null;
3893
+ if (inferred) {
3894
+ endLine = inferred;
3895
+ } else {
3896
+ endLine = startLine + 79;
3897
+ approximate = true;
3898
+ }
3899
+ }
3900
+ const declCount = hits.filter((h) => h.declarationLike).length;
3901
+ const notes = [];
3902
+ if (approximate) notes.push('end line unknown — approximate range from declaration line');
3903
+ if (!fileRel && (hits.length > 1 || declCount > 1)) {
3904
+ notes.push('other matches exist — pass path= (and line= to disambiguate) to scope');
3905
+ } else if (fileRel && declCount > 1) {
3906
+ notes.push(
3907
+ `${declCount} declarations of '${cleanSymbol}' in this file — reading the first; pass line= to pick another`,
3908
+ );
3909
+ }
3910
+
3911
+ return {
3912
+ rel: primary.rel,
3913
+ startLine,
3914
+ endLine,
3915
+ offset: startLine - 1,
3916
+ limit: endLine - startLine + 1,
3917
+ approximate,
3918
+ note: notes.length ? notes.join('; ') : undefined,
3919
+ };
3920
+ }
3921
+
3922
+ // MCP clients sometimes inject empty-string defaults for optional schema
3923
+ // fields (e.g. `file: ""`). That empty path round-trips through
3924
+ // normalizeInputPath as a literal string, populating `rel` and tripping
3925
+ // the "file not found in graph" early-return in callers/references modes
3926
+ // even when the caller intended bare-symbol search. Strip empty/null
3927
+ // optional path-like fields before dispatch.
3928
+ function _stripEmptyArgs(args) {
3929
+ const a = { ...(args || {}) };
3930
+ for (const k of ['file', 'language']) {
3931
+ if (a[k] === '' || a[k] === null) delete a[k];
3932
+ }
3933
+ return a;
3934
+ }
3935
+
3936
+ // P1: project-root sentinels. A directory containing any of these (or with one
3937
+ // at an ancestor) is treated as a real project we may index. Used to (a) re-root
3938
+ // a file that sits outside cwd to its own project, and (b) refuse to index an
3939
+ // arbitrary non-project tree (home dir, multi-repo container, plugin cache) on
3940
+ // an implicit cwd.
3941
+ const _PROJECT_ROOT_SENTINELS = ['package.json', '.git', 'Cargo.toml', 'go.mod', 'pyproject.toml', 'setup.py', 'pom.xml', 'build.gradle', 'build.gradle.kts', 'build.sbt', 'Package.swift'];
3942
+
3943
+ // P1: resolve a file to its nearest project root (sentinel ancestor).
3944
+ // Returns null when no root found; caller throws rather than falling back silently.
3945
+ function _resolveFileProjectRoot(file) {
3946
+ if (!file) return null;
3947
+ const abs = pathResolve(file);
3948
+ let dir = dirname(abs);
3949
+ while (dir && dir !== dirname(dir)) {
3950
+ if (_PROJECT_ROOT_SENTINELS.some((s) => existsSync(join(dir, s)))) return dir;
3951
+ dir = dirname(dir);
3952
+ }
3953
+ return null;
3954
+ }
3955
+
3956
+ // P1: nearest project root for a DIRECTORY (the dir itself or any ancestor).
3957
+ // Returns null when the dir sits in no project — the signal to refuse an
3958
+ // unscoped, implicit-cwd index of an arbitrary tree.
3959
+ function _findDirProjectRoot(dir) {
3960
+ if (!dir) return null;
3961
+ let d = pathResolve(dir);
3962
+ while (d && d !== dirname(d)) {
3963
+ if (_PROJECT_ROOT_SENTINELS.some((s) => existsSync(join(d, s)))) return d;
3964
+ d = dirname(d);
3965
+ }
3966
+ return null;
3967
+ }
3968
+
3969
+ // #4: when an UNSCOPED refs/callers query comes back empty, the symbol is absent
3970
+ // from the graph entirely — often because cwd points at the wrong tree. Append
3971
+ // the graph root + indexed-file count so the caller can tell "genuinely no
3972
+ // callers" from "wrong cwd". A file/language-scoped empty result is a real
3973
+ // scoped answer and is left untouched (caller passes narrowedByCaller).
3974
+ function _augmentNoHitDiagnostic(result, emptyToken, graph, cwd, symbol) {
3975
+ if (typeof result !== 'string' || result.trim() !== emptyToken) return result;
3976
+ const n = graph?.nodes?.size || 0;
3977
+ const trunc = graph?.truncated ? `, graph truncated at ${CODE_GRAPH_MAX_FILES} files` : '';
3978
+ // Distinguish "defined but no edge" from "not indexed at all". An empty
3979
+ // callers/references/callees result for a symbol that HAS a declaration in
3980
+ // this graph means it is genuinely unreferenced here — NOT missing. Telling
3981
+ // the caller it is "not present / likely outside cwd" sends them on a
3982
+ // needless re-scope/grep hunt.
3983
+ let declHit = null;
3984
+ try { declHit = (_sortSymbolHits(_findSymbolHits(graph, symbol, {})) || [])[0] || null; } catch {}
3985
+ if (declHit) {
3986
+ return `${emptyToken}\n# '${symbol}' IS defined (${_formatSymbolHitLocation(declHit)}) but is genuinely unreferenced in this graph — present, not missing. No re-scope / grep needed.`;
3987
+ }
3988
+ return `${emptyToken}\n# '${symbol}' not present in graph rooted at ${cwd} (${n} files indexed${trunc}). `
3989
+ + `If it should exist, the target is likely outside this cwd — pass an explicit 'cwd' (repo root) or 'file' anchor, or run 'cwd set <repo>'.`;
3990
+ }
3991
+
3992
+ export async function executeCodeGraphTool(name, args, cwd, signal = null, options = {}) {
3993
+ if (!cwd) throw new Error('find_symbol/code_graph requires cwd — caller did not provide a working directory');
3994
+ const fileArg = (args && typeof args.file === 'string' && args.file.trim()) ? args.file.trim() : '';
3995
+ const baseCwd = (args && typeof args.cwd === 'string' && args.cwd.trim()) ? args.cwd.trim() : cwd;
3996
+ let effectiveCwd = baseCwd;
3997
+ if (fileArg) {
3998
+ const abs = isAbsolute(fileArg) ? pathResolve(fileArg) : pathResolve(baseCwd, fileArg);
3999
+ const rel = pathRelative(pathResolve(baseCwd), abs);
4000
+ const insideCwd = rel && !rel.startsWith('..') && !isAbsolute(rel);
4001
+ if (!insideCwd) {
4002
+ // P1: file outside cwd — require explicit cwd arg or detectable project root; throw otherwise.
4003
+ const hasExplicitCwd = args && typeof args.cwd === 'string' && args.cwd.trim();
4004
+ if (!hasExplicitCwd) {
4005
+ const fileRoot = _resolveFileProjectRoot(abs);
4006
+ if (!fileRoot) {
4007
+ throw new Error(`find_symbol: file '${fileArg}' is outside cwd '${baseCwd}' and has no detectable project root (no package.json/.git ancestor). Provide an explicit cwd.`);
4008
+ }
4009
+ effectiveCwd = fileRoot;
4010
+ }
4011
+ }
4012
+ }
4013
+ // P1 (fail-loud root): an UNSCOPED query (no file anchor) on an IMPLICIT cwd
4014
+ // must sit inside a real project. Otherwise we would index whatever giant tree
4015
+ // the session cwd points at (home dir, a multi-repo container, a plugin cache)
4016
+ // — burning the worker-build budget and then silently answering refs/callers
4017
+ // from the wrong graph. An explicit `cwd` arg is trusted (the caller opted in,
4018
+ // e.g. a large monorepo). Refuse loudly otherwise.
4019
+ if (!fileArg && !(args && typeof args.cwd === 'string' && args.cwd.trim())) {
4020
+ const projectRoot = _findDirProjectRoot(effectiveCwd);
4021
+ if (!projectRoot) {
4022
+ throw new Error(
4023
+ `${name}: cwd '${effectiveCwd}' is not inside a project (no `
4024
+ + `${_PROJECT_ROOT_SENTINELS.join('/')} at it or any ancestor). Refusing to `
4025
+ + `index an arbitrary tree. Run 'cwd set <repo>', or pass an explicit `
4026
+ + `'cwd' (repo root) or a 'file' anchor.`);
4027
+ }
4028
+ // ② Re-root an implicit SUBDIR cwd up to its project root so an unscoped
4029
+ // query covers the whole repo (e.g. callers in sibling dirs like scripts/,
4030
+ // not just the subtree under cwd). effectiveCwd flows consistently into the
4031
+ // build root, rel-keys, output scope, and cache key downstream — the same
4032
+ // dispatch-boundary re-root the file-anchor branch performs above. A cwd
4033
+ // that is already its own project root re-roots to itself (no-op).
4034
+ effectiveCwd = projectRoot;
4035
+ }
4036
+ if (signal?.aborted) throw new Error('aborted');
4037
+ const _work = (() => {
4038
+ switch (name) {
4039
+ case 'code_graph': {
4040
+ // `find_symbol` mode keeps the legacy plain-declaration lookup that the
4041
+ // standalone find_symbol tool used to provide (prewarm + file-overview
4042
+ // without a symbol). All other modes flow through codeGraph().
4043
+ const rawMode = String(args?.mode || '').trim();
4044
+ const declModes = new Set(['symbol', 'find_symbol']);
4045
+ const dispatchOne = (a) => (declModes.has(rawMode)
4046
+ ? findSymbolTool(_stripEmptyArgs(a), effectiveCwd, signal, options)
4047
+ : codeGraph(a, effectiveCwd, signal, options));
4048
+ // Multi-symbol batch: run a batchable mode once per requested name and
4049
+ // concatenate, so N lookups cost ONE call. A single name falls through
4050
+ // unchanged (no header) — existing single calls are byte-identical.
4051
+ if (CODE_GRAPH_BATCHABLE_MODES.has(rawMode)) {
4052
+ const symbolList = _collectGraphSymbolList(args);
4053
+ if (symbolList.length > 1) {
4054
+ return (async () => {
4055
+ const sections = [];
4056
+ for (const sym of symbolList) {
4057
+ let body;
4058
+ try { body = await dispatchOne({ ...args, symbol: sym, symbols: undefined }); }
4059
+ catch (e) { body = `Error: ${e?.message || String(e)}`; }
4060
+ sections.push(`# ${rawMode} ${sym}\n${body}`);
4061
+ }
4062
+ return sections.join('\n\n');
4063
+ })();
4064
+ }
4065
+ if (symbolList.length === 1 && args?.symbol !== symbolList[0]) {
4066
+ return dispatchOne({ ...args, symbol: symbolList[0], symbols: undefined });
4067
+ }
4068
+ }
4069
+ return dispatchOne(args);
4070
+ }
4071
+ default: throw new Error(`Unknown code-graph tool: ${name}`);
4072
+ }
4073
+ })();
4074
+ if (!signal) return _work;
4075
+ let onAbort = null;
4076
+ const abortP = new Promise((_, reject) => {
4077
+ if (signal.aborted) { reject(new Error('aborted')); return; }
4078
+ onAbort = () => reject(new Error('aborted'));
4079
+ signal.addEventListener('abort', onAbort, { once: true });
4080
+ });
4081
+ const cleanup = () => {
4082
+ if (onAbort) {
4083
+ try { signal.removeEventListener('abort', onAbort); } catch {}
4084
+ onAbort = null;
4085
+ }
4086
+ };
4087
+ return Promise.race([_work, abortP]).then(
4088
+ (v) => { cleanup(); return v; },
4089
+ (e) => { cleanup(); throw e; },
4090
+ );
4091
+ }
4092
+
4093
+ export function isCodeGraphTool(name) {
4094
+ return CODE_GRAPH_TOOL_DEFS.some((t) => t.name === name);
4095
+ }