@gajae-code/coding-agent 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +1 -1
  3. package/dist/types/async/job-manager.d.ts +145 -2
  4. package/dist/types/commands/harness.d.ts +37 -0
  5. package/dist/types/config/settings-schema.d.ts +13 -3
  6. package/dist/types/config/settings.d.ts +3 -1
  7. package/dist/types/deep-interview/render-middleware.d.ts +5 -0
  8. package/dist/types/discovery/helpers.d.ts +1 -0
  9. package/dist/types/exec/bash-executor.d.ts +8 -1
  10. package/dist/types/extensibility/custom-tools/types.d.ts +1 -0
  11. package/dist/types/extensibility/extensions/types.d.ts +6 -0
  12. package/dist/types/extensibility/shared-events.d.ts +1 -0
  13. package/dist/types/gjc-runtime/restricted-role-agent-bash.d.ts +2 -0
  14. package/dist/types/gjc-runtime/state-graph.d.ts +4 -0
  15. package/dist/types/gjc-runtime/state-migrations.d.ts +24 -0
  16. package/dist/types/gjc-runtime/state-renderer.d.ts +65 -0
  17. package/dist/types/gjc-runtime/state-runtime.d.ts +2 -0
  18. package/dist/types/gjc-runtime/state-validation.d.ts +6 -0
  19. package/dist/types/gjc-runtime/state-writer.d.ts +137 -0
  20. package/dist/types/gjc-runtime/team-runtime.d.ts +81 -7
  21. package/dist/types/gjc-runtime/workflow-manifest.d.ts +54 -0
  22. package/dist/types/harness-control-plane/classifier.d.ts +13 -0
  23. package/dist/types/harness-control-plane/control-endpoint.d.ts +30 -0
  24. package/dist/types/harness-control-plane/finalize.d.ts +47 -0
  25. package/dist/types/harness-control-plane/frame-mapper.d.ts +29 -0
  26. package/dist/types/harness-control-plane/operate.d.ts +35 -0
  27. package/dist/types/harness-control-plane/owner.d.ts +46 -0
  28. package/dist/types/harness-control-plane/preserve.d.ts +19 -0
  29. package/dist/types/harness-control-plane/receipts.d.ts +88 -0
  30. package/dist/types/harness-control-plane/rpc-adapter.d.ts +66 -0
  31. package/dist/types/harness-control-plane/seams.d.ts +21 -0
  32. package/dist/types/harness-control-plane/session-lease.d.ts +65 -0
  33. package/dist/types/harness-control-plane/state-machine.d.ts +19 -0
  34. package/dist/types/harness-control-plane/storage.d.ts +53 -0
  35. package/dist/types/harness-control-plane/types.d.ts +162 -0
  36. package/dist/types/hooks/skill-keywords.d.ts +2 -1
  37. package/dist/types/hooks/skill-state.d.ts +2 -29
  38. package/dist/types/modes/acp/acp-client-bridge.d.ts +1 -1
  39. package/dist/types/modes/components/hook-selector.d.ts +1 -0
  40. package/dist/types/modes/components/skill-hud/render.d.ts +1 -1
  41. package/dist/types/modes/interactive-mode.d.ts +2 -0
  42. package/dist/types/modes/theme/defaults/index.d.ts +45 -9477
  43. package/dist/types/modes/theme/theme.d.ts +1 -5
  44. package/dist/types/modes/types.d.ts +2 -0
  45. package/dist/types/sdk.d.ts +4 -0
  46. package/dist/types/session/agent-session.d.ts +8 -0
  47. package/dist/types/session/streaming-output.d.ts +11 -0
  48. package/dist/types/skill-state/active-state.d.ts +3 -0
  49. package/dist/types/skill-state/deep-interview-mutation-guard.d.ts +1 -1
  50. package/dist/types/skill-state/workflow-state-contract.d.ts +24 -0
  51. package/dist/types/task/executor.d.ts +3 -0
  52. package/dist/types/task/types.d.ts +56 -3
  53. package/dist/types/tools/bash-allowed-prefixes.d.ts +5 -0
  54. package/dist/types/tools/bash.d.ts +24 -0
  55. package/dist/types/tools/cron.d.ts +110 -0
  56. package/dist/types/tools/index.d.ts +4 -0
  57. package/dist/types/tools/monitor.d.ts +54 -0
  58. package/dist/types/tools/subagent.d.ts +11 -1
  59. package/dist/types/web/search/index.d.ts +1 -0
  60. package/dist/types/web/search/provider.d.ts +11 -4
  61. package/dist/types/web/search/providers/duckduckgo.d.ts +57 -0
  62. package/dist/types/web/search/types.d.ts +1 -1
  63. package/package.json +7 -7
  64. package/src/async/job-manager.ts +522 -6
  65. package/src/cli/agents-cli.ts +3 -0
  66. package/src/cli/auth-broker-cli.ts +1 -0
  67. package/src/cli/config-cli.ts +10 -2
  68. package/src/cli.ts +2 -0
  69. package/src/commands/harness.ts +592 -0
  70. package/src/commands/team.ts +36 -39
  71. package/src/config/settings-schema.ts +15 -2
  72. package/src/config/settings.ts +49 -7
  73. package/src/deep-interview/render-middleware.ts +366 -0
  74. package/src/defaults/gjc/skills/deep-interview/SKILL.md +9 -2
  75. package/src/defaults/gjc/skills/ralplan/SKILL.md +8 -4
  76. package/src/defaults/gjc/skills/team/SKILL.md +47 -21
  77. package/src/defaults/gjc/skills/ultragoal/SKILL.md +78 -11
  78. package/src/discovery/helpers.ts +5 -0
  79. package/src/eval/js/shared/rewrite-imports.ts +1 -2
  80. package/src/exec/bash-executor.ts +20 -9
  81. package/src/extensibility/custom-tools/types.ts +1 -0
  82. package/src/extensibility/extensions/types.ts +6 -0
  83. package/src/extensibility/shared-events.ts +1 -0
  84. package/src/gjc-runtime/deep-interview-runtime.ts +40 -21
  85. package/src/gjc-runtime/goal-mode-request.ts +11 -3
  86. package/src/gjc-runtime/ralplan-runtime.ts +27 -10
  87. package/src/gjc-runtime/restricted-role-agent-bash.ts +5 -0
  88. package/src/gjc-runtime/state-graph.ts +86 -0
  89. package/src/gjc-runtime/state-migrations.ts +132 -0
  90. package/src/gjc-runtime/state-renderer.ts +345 -0
  91. package/src/gjc-runtime/state-runtime.ts +733 -21
  92. package/src/gjc-runtime/state-validation.ts +49 -0
  93. package/src/gjc-runtime/state-writer.ts +718 -0
  94. package/src/gjc-runtime/team-runtime.ts +1083 -89
  95. package/src/gjc-runtime/ultragoal-runtime.ts +348 -19
  96. package/src/gjc-runtime/workflow-manifest.generated.json +1497 -0
  97. package/src/gjc-runtime/workflow-manifest.ts +425 -0
  98. package/src/harness-control-plane/classifier.ts +128 -0
  99. package/src/harness-control-plane/control-endpoint.ts +137 -0
  100. package/src/harness-control-plane/finalize.ts +222 -0
  101. package/src/harness-control-plane/frame-mapper.ts +286 -0
  102. package/src/harness-control-plane/operate.ts +225 -0
  103. package/src/harness-control-plane/owner.ts +553 -0
  104. package/src/harness-control-plane/preserve.ts +102 -0
  105. package/src/harness-control-plane/receipts.ts +216 -0
  106. package/src/harness-control-plane/rpc-adapter.ts +276 -0
  107. package/src/harness-control-plane/seams.ts +39 -0
  108. package/src/harness-control-plane/session-lease.ts +388 -0
  109. package/src/harness-control-plane/state-machine.ts +97 -0
  110. package/src/harness-control-plane/storage.ts +257 -0
  111. package/src/harness-control-plane/types.ts +214 -0
  112. package/src/hooks/skill-keywords.ts +4 -2
  113. package/src/hooks/skill-state.ts +25 -42
  114. package/src/internal-urls/docs-index.generated.ts +6 -4
  115. package/src/lsp/render.ts +1 -1
  116. package/src/modes/acp/acp-agent.ts +1 -1
  117. package/src/modes/acp/acp-client-bridge.ts +1 -1
  118. package/src/modes/components/agent-dashboard.ts +1 -1
  119. package/src/modes/components/assistant-message.ts +5 -1
  120. package/src/modes/components/diff.ts +2 -2
  121. package/src/modes/components/hook-selector.ts +72 -2
  122. package/src/modes/components/skill-hud/render.ts +7 -2
  123. package/src/modes/controllers/event-controller.ts +71 -6
  124. package/src/modes/controllers/extension-ui-controller.ts +6 -0
  125. package/src/modes/controllers/input-controller.ts +19 -3
  126. package/src/modes/controllers/selector-controller.ts +3 -2
  127. package/src/modes/interactive-mode.ts +21 -2
  128. package/src/modes/theme/defaults/index.ts +0 -196
  129. package/src/modes/theme/theme.ts +35 -35
  130. package/src/modes/types.ts +2 -0
  131. package/src/prompts/agents/architect.md +5 -1
  132. package/src/prompts/agents/critic.md +5 -1
  133. package/src/prompts/agents/executor.md +13 -0
  134. package/src/prompts/agents/frontmatter.md +1 -0
  135. package/src/prompts/agents/planner.md +5 -1
  136. package/src/prompts/tools/bash.md +9 -0
  137. package/src/prompts/tools/cron.md +25 -0
  138. package/src/prompts/tools/monitor.md +30 -0
  139. package/src/prompts/tools/subagent.md +33 -3
  140. package/src/runtime-mcp/oauth-flow.ts +4 -2
  141. package/src/sdk.ts +7 -0
  142. package/src/session/agent-session.ts +247 -38
  143. package/src/session/session-manager.ts +13 -1
  144. package/src/session/streaming-output.ts +21 -0
  145. package/src/skill-state/active-state.ts +222 -78
  146. package/src/skill-state/deep-interview-mutation-guard.ts +91 -13
  147. package/src/skill-state/initial-phase.ts +2 -0
  148. package/src/skill-state/workflow-state-contract.ts +26 -0
  149. package/src/task/agents.ts +1 -0
  150. package/src/task/executor.ts +51 -8
  151. package/src/task/index.ts +120 -8
  152. package/src/task/render.ts +6 -3
  153. package/src/task/types.ts +57 -3
  154. package/src/tools/ask.ts +28 -7
  155. package/src/tools/bash-allowed-prefixes.ts +169 -0
  156. package/src/tools/bash.ts +190 -29
  157. package/src/tools/browser/tab-worker.ts +1 -1
  158. package/src/tools/cron.ts +665 -0
  159. package/src/tools/index.ts +20 -2
  160. package/src/tools/monitor.ts +136 -0
  161. package/src/tools/subagent.ts +255 -64
  162. package/src/vim/engine.ts +3 -3
  163. package/src/web/search/index.ts +31 -18
  164. package/src/web/search/provider.ts +57 -12
  165. package/src/web/search/providers/duckduckgo.ts +279 -0
  166. package/src/web/search/types.ts +2 -0
  167. package/src/modes/theme/dark.json +0 -95
  168. package/src/modes/theme/defaults/alabaster.json +0 -93
  169. package/src/modes/theme/defaults/amethyst.json +0 -96
  170. package/src/modes/theme/defaults/anthracite.json +0 -93
  171. package/src/modes/theme/defaults/basalt.json +0 -91
  172. package/src/modes/theme/defaults/birch.json +0 -95
  173. package/src/modes/theme/defaults/dark-abyss.json +0 -91
  174. package/src/modes/theme/defaults/dark-arctic.json +0 -104
  175. package/src/modes/theme/defaults/dark-aurora.json +0 -95
  176. package/src/modes/theme/defaults/dark-catppuccin.json +0 -107
  177. package/src/modes/theme/defaults/dark-cavern.json +0 -91
  178. package/src/modes/theme/defaults/dark-copper.json +0 -95
  179. package/src/modes/theme/defaults/dark-cosmos.json +0 -90
  180. package/src/modes/theme/defaults/dark-cyberpunk.json +0 -102
  181. package/src/modes/theme/defaults/dark-dracula.json +0 -98
  182. package/src/modes/theme/defaults/dark-eclipse.json +0 -91
  183. package/src/modes/theme/defaults/dark-ember.json +0 -95
  184. package/src/modes/theme/defaults/dark-equinox.json +0 -90
  185. package/src/modes/theme/defaults/dark-forest.json +0 -96
  186. package/src/modes/theme/defaults/dark-github.json +0 -105
  187. package/src/modes/theme/defaults/dark-gruvbox.json +0 -112
  188. package/src/modes/theme/defaults/dark-lavender.json +0 -95
  189. package/src/modes/theme/defaults/dark-lunar.json +0 -89
  190. package/src/modes/theme/defaults/dark-midnight.json +0 -95
  191. package/src/modes/theme/defaults/dark-monochrome.json +0 -94
  192. package/src/modes/theme/defaults/dark-monokai.json +0 -98
  193. package/src/modes/theme/defaults/dark-nebula.json +0 -90
  194. package/src/modes/theme/defaults/dark-nord.json +0 -97
  195. package/src/modes/theme/defaults/dark-ocean.json +0 -101
  196. package/src/modes/theme/defaults/dark-one.json +0 -100
  197. package/src/modes/theme/defaults/dark-poimandres.json +0 -141
  198. package/src/modes/theme/defaults/dark-rainforest.json +0 -91
  199. package/src/modes/theme/defaults/dark-reef.json +0 -91
  200. package/src/modes/theme/defaults/dark-retro.json +0 -92
  201. package/src/modes/theme/defaults/dark-rose-pine.json +0 -96
  202. package/src/modes/theme/defaults/dark-sakura.json +0 -95
  203. package/src/modes/theme/defaults/dark-slate.json +0 -95
  204. package/src/modes/theme/defaults/dark-solarized.json +0 -97
  205. package/src/modes/theme/defaults/dark-solstice.json +0 -90
  206. package/src/modes/theme/defaults/dark-starfall.json +0 -91
  207. package/src/modes/theme/defaults/dark-sunset.json +0 -99
  208. package/src/modes/theme/defaults/dark-swamp.json +0 -90
  209. package/src/modes/theme/defaults/dark-synthwave.json +0 -103
  210. package/src/modes/theme/defaults/dark-taiga.json +0 -91
  211. package/src/modes/theme/defaults/dark-terminal.json +0 -95
  212. package/src/modes/theme/defaults/dark-tokyo-night.json +0 -101
  213. package/src/modes/theme/defaults/dark-tundra.json +0 -91
  214. package/src/modes/theme/defaults/dark-twilight.json +0 -91
  215. package/src/modes/theme/defaults/dark-volcanic.json +0 -91
  216. package/src/modes/theme/defaults/graphite.json +0 -92
  217. package/src/modes/theme/defaults/light-arctic.json +0 -107
  218. package/src/modes/theme/defaults/light-aurora-day.json +0 -91
  219. package/src/modes/theme/defaults/light-canyon.json +0 -91
  220. package/src/modes/theme/defaults/light-catppuccin.json +0 -106
  221. package/src/modes/theme/defaults/light-cirrus.json +0 -90
  222. package/src/modes/theme/defaults/light-coral.json +0 -95
  223. package/src/modes/theme/defaults/light-cyberpunk.json +0 -96
  224. package/src/modes/theme/defaults/light-dawn.json +0 -90
  225. package/src/modes/theme/defaults/light-dunes.json +0 -91
  226. package/src/modes/theme/defaults/light-eucalyptus.json +0 -95
  227. package/src/modes/theme/defaults/light-forest.json +0 -100
  228. package/src/modes/theme/defaults/light-frost.json +0 -95
  229. package/src/modes/theme/defaults/light-github.json +0 -115
  230. package/src/modes/theme/defaults/light-glacier.json +0 -91
  231. package/src/modes/theme/defaults/light-gruvbox.json +0 -108
  232. package/src/modes/theme/defaults/light-haze.json +0 -90
  233. package/src/modes/theme/defaults/light-honeycomb.json +0 -95
  234. package/src/modes/theme/defaults/light-lagoon.json +0 -91
  235. package/src/modes/theme/defaults/light-lavender.json +0 -95
  236. package/src/modes/theme/defaults/light-meadow.json +0 -91
  237. package/src/modes/theme/defaults/light-mint.json +0 -95
  238. package/src/modes/theme/defaults/light-monochrome.json +0 -101
  239. package/src/modes/theme/defaults/light-ocean.json +0 -99
  240. package/src/modes/theme/defaults/light-one.json +0 -99
  241. package/src/modes/theme/defaults/light-opal.json +0 -91
  242. package/src/modes/theme/defaults/light-orchard.json +0 -91
  243. package/src/modes/theme/defaults/light-paper.json +0 -95
  244. package/src/modes/theme/defaults/light-poimandres.json +0 -141
  245. package/src/modes/theme/defaults/light-prism.json +0 -90
  246. package/src/modes/theme/defaults/light-retro.json +0 -98
  247. package/src/modes/theme/defaults/light-sand.json +0 -95
  248. package/src/modes/theme/defaults/light-savanna.json +0 -91
  249. package/src/modes/theme/defaults/light-solarized.json +0 -102
  250. package/src/modes/theme/defaults/light-soleil.json +0 -90
  251. package/src/modes/theme/defaults/light-sunset.json +0 -99
  252. package/src/modes/theme/defaults/light-synthwave.json +0 -98
  253. package/src/modes/theme/defaults/light-tokyo-night.json +0 -111
  254. package/src/modes/theme/defaults/light-wetland.json +0 -91
  255. package/src/modes/theme/defaults/light-zenith.json +0 -89
  256. package/src/modes/theme/defaults/limestone.json +0 -94
  257. package/src/modes/theme/defaults/mahogany.json +0 -97
  258. package/src/modes/theme/defaults/marble.json +0 -93
  259. package/src/modes/theme/defaults/obsidian.json +0 -91
  260. package/src/modes/theme/defaults/onyx.json +0 -91
  261. package/src/modes/theme/defaults/pearl.json +0 -93
  262. package/src/modes/theme/defaults/porcelain.json +0 -91
  263. package/src/modes/theme/defaults/quartz.json +0 -96
  264. package/src/modes/theme/defaults/sandstone.json +0 -95
  265. package/src/modes/theme/defaults/titanium.json +0 -90
  266. package/src/modes/theme/light.json +0 -93
@@ -1,6 +1,6 @@
1
1
  // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
2
2
 
3
- export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","brand-assets.md","codebase-overview.md","compaction.md","environment-variables.md","fs-scan-cache-architecture.md","gjc-dogfood-skill-template.md","handoff-generation-pipeline.md","keybindings.md","lsp-config.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","onboarding-packet.md","onboarding-receipt.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/read.md","tools/recall.md","tools/recipe.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md"];
3
+ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["ERRATA-GPT5-HARMONY.md","REBRANDING_PLAN_260525.md","ai-schema-normalize.md","auth-broker-gateway.md","bash-tool-runtime.md","blob-artifact-architecture.md","brand-assets.md","codebase-overview.md","compaction.md","environment-variables.md","fs-scan-cache-architecture.md","gjc-dogfood-skill-template.md","handoff-generation-pipeline.md","keybindings.md","lsp-config.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","onboarding-packet.md","onboarding-receipt.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","render-mermaid.md","resolve-tool-runtime.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","theme.md","tools/ask.md","tools/ast-edit.md","tools/ast-grep.md","tools/bash.md","tools/browser.md","tools/calc.md","tools/checkpoint.md","tools/cron.md","tools/debug.md","tools/edit.md","tools/eval.md","tools/find.md","tools/github.md","tools/inspect_image.md","tools/irc.md","tools/job.md","tools/lsp.md","tools/monitor.md","tools/read.md","tools/recall.md","tools/recipe.md","tools/reflect.md","tools/render_mermaid.md","tools/resolve.md","tools/retain.md","tools/rewind.md","tools/search.md","tools/search_tool_bm25.md","tools/ssh.md","tools/task.md","tools/todo_write.md","tools/web_search.md","tools/write.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md"];
4
4
 
5
5
  export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
6
6
  "ERRATA-GPT5-HARMONY.md": "# ERRATA — GPT-5 Harmony-Header Leakage\n\n## 1. The problem\n\nOpenAI frames tool calls in the Harmony chat protocol:\n\n```\n<|start|>assistant<|channel|>commentary to=functions.<NAME><|message|>{ARGS}<|call|>\n```\n\n`<|channel|>commentary to=functions.NAME` is the **routing header** —\ncontrol tokens consumed by the runtime to dispatch the call. These\ntokens never appear as content under normal operation; the runtime\nstrips them.\n\nThe defect: gpt-5 models occasionally emit, **as ordinary content\ninside `{ARGS}`**, the **plain-text shadow** of these routing tokens —\nthe same characters without the `<|…|>` brackets — and continue\nproducing more pseudo-routing structure (channel name, body marker,\nmultilingual spam, fake tool-result framing). The contamination lives\ninside the visible tool argument and is dispatched to the tool as if it\nwere intended content.\n\n**Critical detail.** The actual `<|start|>` / `<|channel|>` /\n`<|message|>` / `<|call|>` special tokens almost never appear in tool\nargs. What leaks is the bracket-less spelling — `analysis to=functions.X\ncode …` — because OpenAI applies a logit mask suppressing the\ncontrol-token IDs inside the args region. The mass that would have gone\nto those special tokens redistributes onto the un-bracketed plain-text\nrepresentation the model also learned. This makes the leak structurally\ninvisible to the routing parser and lands it in the tool input verbatim.\n\nManifestation in tool args (real corpus example):\n\n```\n~ add_function(iso, ctx, ns, \"installSystemChangeObserver\",\n os_install_system_change_observer);】【\"】【analysis to=functions.edit\n code above เงินไทยฟรีuser to=functions.edit code …\n```\n\nThe leading code is real and intended. Everything after the first\nnon-Latin token through the next clean structural boundary is corruption.\n\n---\n\n## 2. Observed statistics & failure modes\n\nSource: `~/.gjc/stats.db` (`ss_tool_calls`, `ss_assistant_msgs`), through\n2026-05-10. 1.05M tool calls scanned.\n\n### 2.1 Rate\n\n| Model | Leaks in tool args | Calls | per million |\n|------------------|-------------------:|--------:|------------:|\n| gpt-5.4 | 37 | 226,957 | 163 |\n| gpt-5.3-openai-code | 17 | 112,243 | 151 |\n| gpt-5.5 | 2 | 80,750 | 25 |\n| gpt-5.2-openai-code | 0 | — | — |\n\nPlus 15 hits in assistant visible text / thinking blobs.\n\n### 2.2 Tool distribution\n\n| Tool | Hits |\n|---------------------|-----:|\n| `edit` | 38 |\n| `eval` | 11 |\n| `report_tool_issue` | 3 |\n| `grep`/`read`/`search`/`yield` | 1 each |\n\nConcentrated in tools with free-form (non-JSON-schema) argument formats.\n\n### 2.3 Leak shape (deterministic)\n\n```\nLEAK ::= JUNK_PREFIX MARKER CHANNEL_BODY (LEAK)?\nMARKER ::= \"to=functions.\" TOOL_NAME\nCHANNEL_BODY ::= \" code \" (SPAM | reasoning_prose | fake_tool_output)*\nJUNK_PREFIX ::= (GLITCH_TOKEN | CHANNEL_WORD | NON_LATIN_RUN | \"}\" | \"】【\")+\n```\n\n**Cascading is common.** Of 96 marker occurrences across 71 contaminated\nrecords, 39 contain ≥2 markers and 7 contain ≥3 — the model emits\nmultiple fake `to=functions.X code …` blocks back-to-back, often with\nfake `code_output\\nCell N:\\n…` framing between them. Once the\nplain-text scaffolding is in the residual stream, the prefix now *looks\nlike* a fresh tool envelope start, so the macro prior over continuations\nkeeps voting for more scaffolding. Self-amplifying.\n\n### 2.4 Glitch tokens\n\nSingle-token identifiers in `o200k_base` whose embeddings appear to be\nnear-init from underrepresentation in post-training. ASCII residue\nimmediately before the marker in the natural corpus:\n\n| Surface string | Single-token | Token ID | Hits in corpus |\n|-------------------|:-:|---------:|---:|\n| `Japgolly` | ✅ | 199,745 | 1 |\n| `Jsii` | ✅ | 114,318 | (subtoken of `Jsii_commentary`) |\n| `Jsii_commentary` | — (3 toks) | — | 2 |\n| `changedFiles` | — (2 toks) | — | 8 |\n| `RTLU` | — (2 toks) | — | 3 |\n\n`Japgolly` is in the last 0.13% of the vocabulary — the same family of\nGitHub-corpus residue that produced `SolidGoldMagikarp` in the 2023\nGPT-2 vocabulary (Rumbelow & Watkins). `SolidGoldMagikarp` itself\ntokenizes to 5 tokens in `o200k_base` — that specific token was retired,\nbut the class wasn't.\n\nFor the multi-token entries, the corpus-level signature is the surface\nstring; the underlying glitch trigger is a sub-token (e.g. `Jsii` inside\n`Jsii_commentary`). The detector list (`G` signal) keys on the surface\nstrings.\n\nStable across unrelated sessions. Treated as a high-precision detector\nsignal.\n\n### 2.5 Channel-word leakage\n\n`analysis` (5), `assistant` (5), `commentary` (3), `user` (1) appear\ndirectly preceding `to=`. Always bare words; never `<|channel|>analysis`\nor any other bracketed form. Consistent with §1 — the brackets are\nmasked, the words are not.\n\n### 2.6 Non-Latin spam residue\n\n96 marker hits, by script: CJK 40, Cyrillic 12, Telugu/Kannada/Malayalam\n18, Thai 8, Georgian 7, Armenian 7, Arabic 1. Recurring fragments are\nChinese gambling SEO (`大发时时彩`, `天天中彩票`), Georgian/Abkhaz junk,\nand Thai casino spam — well-known low-quality crawl residue.\n\nThis is the same script distribution observed in the controlled\nreproduction (§7.3), independent of the prompt's natural language.\n\n### 2.7 Failure-mode breakdown for the `edit` tool\n\nThe `edit` tool exists in two variants in the corpus:\n\n| Variant | Calls | Recovery |\n|--------------------------|------:|----------|\n| Patch-DSL (`§PATH`/anchor/`«»≔` ops) | 27 | **Recoverable** by op-truncation (§3.3) |\n| JSON-schema (`{path,edits:[…]}`) | 11 | **Not recoverable** — contamination is escaped *inside* JSON strings, parser accepts it cleanly, content would be written verbatim into source files |\n\nFor Patch-DSL leaks specifically:\n\n- 20/27 cases: contamination on the last input line; nothing follows.\n- 7/27 cases: contamination mid-input; what follows is one of: a\n duplicate replay of an earlier file/anchor, intended content for a\n *different* tool call (the model started its next call inline), or\n pure hallucination. Post-contamination content is never trustworthy.\n\n### 2.8 Mechanism (confirmed)\n\n**Prior collapse from null-embedding glitch tokens, into a\ncontrol-token-masked basin whose mass redistributes onto the\nplain-text shadow of the Harmony protocol.**\n\nStep by step:\n\n1. The model is mid-`{ARGS}` of a Harmony tool call. The runtime applies\n a logit mask suppressing structural control tokens (`<|channel|>`,\n `<|message|>`, `<|call|>`, `<|start|>`, `<|end|>`) inside the args\n region. Without this mask, normal generation would constantly\n hallucinate envelope-closes; with it, those token IDs have logit\n `-∞` in args.\n2. A glitch token `g` is sampled. By construction `g` was in the BPE\n merge corpus but barely in LM/RL training, so its **input embedding\n `e_g` ≈ near-init noise of small norm**.\n3. At position t+1, the residual update `h_{t+1} ≈ LN(h_t + e_g + Attn +\n MLP)` is dominated by the prefix-derived terms; the just-emitted-token\n signal is effectively absent. Generation diversity normally comes\n from `e_x` steering the residual into different sub-regions —\n stripped here.\n4. The next-token distribution therefore collapses onto the **conditional\n prior over continuations of the prefix, with local conditioning\n removed**. In a tool-calling rollout context, that prior is sharply\n peaked on Harmony scaffolding (control tokens + routing tokens) —\n that's what RL trained.\n5. The mask zeros the control-token IDs. Mass redistributes onto the\n **next-best continuation**: the un-bracketed surface-form spelling of\n the same protocol (`analysis`, `commentary`, ` to=functions.X`,\n ` code `). This spelling is unmasked because those characters are\n ordinary tokens.\n6. Once a few tokens of plain-text scaffolding land in the residual\n stream, the prefix now resembles a fresh envelope start. The macro\n prior keeps voting for more scaffolding. Cascading (§2.3) follows.\n7. Multilingual spam after the marker is the same prior-collapse\n continuation, drawn from the training neighborhood of the glitch\n token (often ESL/auto-generated multilingual web junk — exactly the\n crawl residue in §2.6).\n\n**Two corollaries the corpus data demanded but only the experiment\nexplained:**\n\n- **The brackets never appear** (§1, §2.5). The mask is what makes the\n leak land in plain text instead of as a real envelope-close.\n- **Counterintuitive grammar dependency** (§7.4). The leak is *worse* in\n formats closest to OpenAI's training distribution. Off-distribution\n custom grammars dampen the macro-prior basin; the official\n `*** Begin Patch` format is the strongest collapse target.\n\nThe 2023 SolidGoldMagikarp paper documented mechanism (1)+(2)+(4). The\nnew piece is (5): when constrained decoding masks the natural collapse\ntarget, the mass laundered through the un-masked plain-text shadow\nbecomes a structurally-invisible exfiltration channel.",
@@ -12,7 +12,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
12
12
  "brand-assets.md": "# Brand assets\n\nGajae-Code uses the current GJC character and hero images in `assets/` for README and documentation surfaces.\n\n| Asset | Purpose |\n| --- | --- |\n| [`assets/hero.png`](../assets/hero.png) | Wide README/docs hero image for Gajae-Code. |\n| [`assets/character.png`](../assets/character.png) | Standalone Gajae-Code character mascot. |\n| [`assets/tool-image-fixture.webp`](../assets/tool-image-fixture.webp) | Minimal WebP fixture for terminal image rendering tests. Not a product brand asset. |\n\nThe old legacy demo artwork has been removed from the active asset set; new public surfaces should reference the Gajae-Code assets above.\n",
13
13
  "codebase-overview.md": "# Codebase Overview\n\nThis document maps the main parts of the `gajae-code` repository. The root README stays intentionally small; this file is the architecture-oriented companion.\n\n## Product shape\n\nGajae-Code (`gjc`) is centered on `packages/coding-agent/`. The public workflow surface is intentionally fixed at four source-bundled skills and four public role subagents. Runtime state, specs, plans, goals, team state, and local overrides live under `.gjc/`.\n\nDefault workflow skills are embedded from:\n\n```text\npackages/coding-agent/src/defaults/gjc/skills/<name>/SKILL.md\n```\n\nPublic role subagent prompts are embedded from:\n\n```text\npackages/coding-agent/src/prompts/agents/<role>.md\n```\n\nThe runtime can still discover project/user overrides, but the bundled defaults are loaded from source so a missing project `.gjc` directory does not remove the default workflow surface.\n\n## Packages\n\n### `packages/coding-agent/`\n\nMain `gjc` CLI and product runtime.\n\n- `packages/coding-agent/package.json` exposes the `gjc` binary at `src/cli.ts` and the SDK/barrel entrypoint at `src/index.ts`.\n- `packages/coding-agent/src/cli.ts` is the executable bootstrap. It registers CLI commands such as `setup`, `deep-interview`, `ralplan`, `ultragoal`, `team`, and the default launch path.\n- `packages/coding-agent/src/main.ts` adapts CLI options into session creation and dispatches interactive, print, RPC, RPC-UI, and ACP modes.\n- `packages/coding-agent/src/sdk.ts` assembles settings, model registry, auth, workspace/context discovery, skills, rules, tools, system prompt, and the underlying `@gajae-code/agent-core` agent.\n- `packages/coding-agent/src/tools/index.ts` is the built-in tool registry for file/code/runtime tools such as read, bash, edit, AST tools, eval, find/search, LSP, browser, task/subagent, recipe, IRC, todo, web search, write, and memory tools.\n- `packages/coding-agent/src/defaults/gjc-defaults.ts` embeds and installs the default workflow skills.\n- `packages/coding-agent/src/task/agents.ts` embeds bundled task-agent prompts. The public contract is `executor`, `architect`, `planner`, and `critic`; other bundled prompts are internal/runtime utilities.\n\n### `packages/ai/`\n\nProvider/model boundary for LLM access.\n\n- `packages/ai/src/index.ts` exports model registry/resolution, provider implementations, auth broker/gateway/storage, streaming, usage, retry/overflow utilities, OAuth, discovery, and validation helpers.\n- `packages/ai/src/types.ts` defines provider, model, context, message, tool, usage, reasoning, and stream-event contracts.\n- `packages/ai/src/stream.ts` dispatches model-driven streams to the right provider/API implementation and normalizes streaming events.\n- `packages/ai/src/model-manager.ts` merges static, cached, dynamic, and remote model sources.\n- `packages/ai/README.md` documents tool calling, partial streaming tool calls, thinking/reasoning, provider configuration, context handoff, and OAuth flows.\n\n### `packages/agent/`\n\nStateful agent runtime built on `@gajae-code/ai`.\n\n- `packages/agent/src/index.ts` exports the `Agent`, loop APIs, append-only context, compaction, telemetry, proxy utilities, thinking helpers, and shared types.\n- `packages/agent/src/agent-loop.ts` owns the turn loop: transform context, call the model stream, execute tool calls, append tool results, and emit lifecycle events.\n- `packages/agent/src/agent.ts` wraps the loop with mutable state, subscriptions, prompt/continue/abort APIs, queues, provider session state, telemetry, and state mutation helpers.\n- `packages/agent/src/types.ts` defines `AgentMessage`, `AgentTool`, loop config, event, and runtime state contracts.\n\n### `packages/tui/`\n\nTerminal UI framework used by the CLI.\n\n- `packages/tui/src/index.ts` exports components, keybindings, autocomplete, terminal abstractions, image support, TUI core, and utilities.\n- `packages/tui/src/tui.ts` manages component rendering, focus, overlays, terminal dimensions, diff state, and synchronized output.\n- `packages/tui/src/terminal.ts` abstracts terminal lifecycle, dimensions, cursor controls, title/progress, Kitty protocol state, and appearance notifications.\n- `packages/tui/README.md` documents the component model and built-in components such as text, input, editor, markdown, loaders, select/settings lists, spacer, image, box, and container.\n\n### `packages/natives/` and Rust crates\n\nNative helper layer exposed through N-API.\n\n- `packages/natives/package.json` exports `native/index.js` and generated TypeScript definitions.\n- `packages/natives/native/loader-state.js` resolves platform/CPU-specific native binaries and validates package/native version alignment.\n- `crates/pi-natives/src/lib.rs` is the N-API root for appearance, AST search/editing, clipboard, filesystem scan/cache, grep/glob, syntax highlighting, HTML-to-Markdown, keyboard parsing, process/PTY/shell support, SIXEL, code summarization, token counting, text measurement/wrapping/truncation, workspace scanning, power assertions, and isolation helpers.\n- `crates/pi-shell/src/lib.rs` exposes brush-based shell execution primitives used by the native shell adapter.\n- `crates/pi-shell/src/shell.rs` implements persistent and one-shot shell execution, streaming, environment handling, cancellation, and output minimizer telemetry.\n- `crates/pi-shell/src/fixup.rs` performs conservative AST-based bash command fixups.\n- `crates/pi-natives/src/pty.rs` implements interactive PTY sessions.\n\n### `packages/utils/`\n\nShared TypeScript utilities.\n\n- `packages/utils/src/index.ts` exports abortable/async helpers, color/env/dir utilities, fetch retry, formatting, frontmatter, glob helpers, JSON helpers, logging, MIME detection, prompt rendering, process-tree helpers, sanitization, streams, temp files, tab spacing, type guards, and executable lookup.\n- `packages/utils/src/ptree.ts` and `packages/utils/src/procmgr.ts` wrap native process helpers for ergonomic TypeScript use.\n\n### `packages/stats/`\n\nLocal observability dashboard for session and model usage.\n\n- `packages/stats/src/index.ts` exposes the `gjc-stats` CLI entrypoint and exports aggregation/server APIs.\n- `packages/stats/src/aggregator.ts` parses session-derived request metrics and writes aggregated data through SQLite.\n- `packages/stats/src/server.ts` serves local dashboard API routes and static SPA assets.\n- `packages/stats/src/types.ts` and `packages/stats/src/shared-types.ts` define dashboard and aggregate metric shapes.\n\n### `packages/swarm-extension/`\n\nOptional YAML/DAG multi-agent extension outside the fixed default workflow surface.\n\n- `packages/swarm-extension/README.md` documents standalone `gjc-swarm` execution and in-TUI `/swarm` commands.\n- Swarm workflows define agents, tasks, dependency edges, waves, and shared workspace state under `.swarm_<name>/`.\n\n### `packages/typescript-edit-benchmark/`\n\nPrivate benchmark package for TypeScript edit tasks.\n\n- `packages/typescript-edit-benchmark/package.json` exposes `typescript-edit-benchmark` and depends on the coding-agent, agent-core, ai, tui, utils, diff, prettier, and Babel tooling.\n- `packages/typescript-edit-benchmark/src/index.ts` is the benchmark CLI: it resolves fixtures, loads tasks, runs edit attempts, records progress, and writes reports/conversation dumps under `runs/`.\n\n## Python packages\n\n### `python/gjc-rpc/`\n\nTyped Python client for `gjc --mode rpc`.\n\n- `python/gjc-rpc/pyproject.toml` packages `gjc-rpc` for Python 3.11+.\n- `python/gjc-rpc/README.md` documents the process-backed stdio client, typed command methods, startup flags, event listeners, todo seeding, host-owned tools, and host-owned URI schemes.\n\n### `python/robogjc/`\n\nSelf-hosted GitHub triage/fix bot that drives `gjc --mode rpc`.\n\n- `python/robogjc/AGENTS.md` is the authoritative local contract for this subtree.\n- `python/robogjc/pyproject.toml` packages `robogjc` for Python 3.11+ with FastAPI, httpx, pydantic settings, Click, and `gjc-rpc`.\n- `python/robogjc/README.md` documents the webhook-to-worktree-to-gjc flow, GitHub sidecar trust boundary, persistent per-issue sessions, and audit trail.\n- Important modules include `src/server.py`, `src/queue.py`, `src/tasks.py`, `src/worker.py`, `src/host_tools.py`, `src/sandbox.py`, `src/github_client.py`, `src/github_events.py`, `src/db.py`, and `src/config.py`.\n\n## Runtime flow\n\nA normal CLI session starts in `packages/coding-agent/src/cli.ts`, routes through command handling, then reaches `packages/coding-agent/src/main.ts`. `main.ts` converts CLI/runtime settings into `CreateAgentSessionOptions` and calls `createAgentSession()` in `packages/coding-agent/src/sdk.ts`.\n\nThe SDK builds the session context, loads the default skills, creates built-in tools, resolves model/auth state through `@gajae-code/ai`, constructs the system prompt, and instantiates `@gajae-code/agent-core`. The agent loop streams model events, executes tools, records tool results, and hands state back to the selected mode: interactive TUI, print, RPC, RPC-UI, or ACP.\n\n## Verification and gates\n\nPackage-local checks are defined in each `package.json`. For workflow-definition or default-surface changes, the focused gates are:\n\n```sh\nbun scripts/check-visible-definitions.ts\nbun scripts/verify-g002-gates.ts\nbun scripts/rebrand-inventory.ts --strict\nbun test packages/coding-agent/test/default-gjc-definitions.test.ts\n```\n\nFor broader TypeScript verification, use the root script:\n\n```sh\nbun run check:ts\n```\n\nDo not use `tsc` or `npx tsc` directly in this repository.\n",
14
14
  "compaction.md": "# Compaction and Branch Summaries\n\nCompaction and branch summaries are the two mechanisms that keep long sessions usable without losing prior work context.\n\n- **Compaction** rewrites old history into a summary on the current branch.\n- **Branch summary** captures abandoned branch context during `/tree` navigation.\n\nBoth are persisted as session entries and converted back into user-context messages when rebuilding LLM input.\n\n## Key implementation files\n\n- `packages/agent/src/compaction/compaction.ts` (context-full summarization and handoff generation)\n- `packages/agent/src/compaction/branch-summarization.ts`\n- `packages/agent/src/compaction/pruning.ts`\n- `packages/agent/src/compaction/utils.ts`\n- `packages/agent/src/compaction/openai.ts`\n- `packages/coding-agent/src/session/session-manager.ts`\n- `packages/coding-agent/src/session/agent-session.ts`\n- `packages/coding-agent/src/session/messages.ts`\n- `packages/coding-agent/src/extensibility/hooks/types.ts`\n- `packages/coding-agent/src/config/settings-schema.ts`\n\n## Session entry model\n\nCompaction and branch summaries are first-class session entries, not plain assistant/user messages.\n\n- `CompactionEntry`\n - `type: \"compaction\"`\n - `summary`, optional `shortSummary`\n - `firstKeptEntryId` (compaction boundary)\n - `tokensBefore`\n - optional `details`, `preserveData`, `fromExtension`\n- `BranchSummaryEntry`\n - `type: \"branch_summary\"`\n - `fromId`, `summary`\n - optional `details`, `fromExtension`\n\nWhen context is rebuilt (`buildSessionContext`):\n\n1. Latest compaction on the active path is converted to one `compactionSummary` message.\n2. Kept entries from `firstKeptEntryId` to the compaction point are re-included.\n3. Later entries on the path are appended.\n4. `branch_summary` entries are converted to `branchSummary` messages.\n5. `custom_message` entries are converted to `custom` messages.\n\nThose custom roles are then transformed into LLM-facing user messages in `convertToLlm()` using the static templates:\n\n- `packages/agent/src/compaction/prompts/compaction-summary-context.md`\n- `packages/agent/src/compaction/prompts/branch-summary-context.md`\n- `packages/agent/src/compaction/prompts/handoff-document.md`\n\n## Compaction pipeline\n\n### Triggers\n\nCompaction/context maintenance can run in four ways:\n\n1. **Manual context compaction**: `/compact [instructions]` calls `AgentSession.compact(...)`.\n2. **Automatic overflow recovery**: after a same-model assistant error that matches context overflow.\n3. **Automatic threshold maintenance**: after a successful turn when context exceeds the resolved threshold.\n4. **Idle maintenance**: `runIdleCompaction()` can invoke the same auto-maintenance path with reason `\"idle\"`.\n\n### Compaction shape (visual)\n\n```text\nBefore compaction:\n\n entry: 0 1 2 3 4 5 6 7 8 9\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┘\n └────────┬───────┘ └──────────────┬──────────────┘\n messagesToSummarize kept messages\n ↑\n firstKeptEntryId (entry 4)\n\nAfter compaction (new entry appended):\n\n entry: 0 1 2 3 4 5 6 7 8 9 10\n ┌─────┬─────┬─────┬──────┬─────┬─────┬──────┬──────┬─────┬──────┬─────┐\n │ hdr │ usr │ ass │ tool │ usr │ ass │ tool │ tool │ ass │ tool │ cmp │\n └─────┴─────┴─────┴──────┴─────┴─────┴──────┴──────┴─────┴──────┴─────┘\n └──────────┬──────┘ └──────────────────────┬───────────────────┘\n not sent to LLM sent to LLM\n ↑\n starts from firstKeptEntryId\n\nWhat the LLM sees:\n\n ┌────────┬─────────┬─────┬─────┬──────┬──────┬─────┬──────┐\n │ system │ summary │ usr │ ass │ tool │ tool │ ass │ tool │\n └────────┴─────────┴─────┴─────┴──────┴──────┴─────┴──────┘\n ↑ ↑ └─────────────────┬────────────────┘\n prompt from cmp messages from firstKeptEntryId\n```\n\n### Overflow-retry vs threshold/idle maintenance\n\nThe automatic paths are intentionally different:\n\n- **Overflow recovery**\n - Trigger: current-model assistant error is detected as context overflow and the error is not older than the latest compaction.\n - The failing assistant error message is removed from active agent state before retry.\n - Context promotion is tried first; if a configured larger model is available, the agent switches model and retries without compacting.\n - If promotion is unavailable and compaction is enabled, context-full compaction runs with `reason: \"overflow\"` and `willRetry: true`; handoff strategy is not used for overflow.\n - On success, agent auto-continues (`agent.continue()`) after compaction.\n\n- **Threshold maintenance**\n - Trigger: successful, non-error assistant message whose adjusted context tokens exceed `resolveThresholdTokens(...)`.\n - Tool-output pruning can reduce the measured token count before threshold comparison.\n - Context promotion is tried before compaction.\n - If promotion is unavailable, auto maintenance runs with `reason: \"threshold\"` and `willRetry: false`.\n - With `compaction.strategy: \"handoff\"`, threshold maintenance starts a new handoff session instead of writing a compaction entry; if handoff returns no document without aborting, it falls back to context-full compaction.\n - On success, if `compaction.autoContinue !== false`, schedules an agent-authored developer auto-continue prompt from `prompts/system/auto-continue.md`.\n\n- **Idle maintenance**\n - Trigger: `runIdleCompaction()` when not streaming or already compacting.\n - Uses `reason: \"idle\"` and does not auto-continue afterward.\n\n### Pre-compaction pruning\n\nBefore compaction checks, tool-result pruning may run (`pruneToolOutputs`).\n\nDefault prune policy:\n\n- Protect newest `40_000` tool-output tokens.\n- Require at least `20_000` total estimated savings.\n- Never prune tool results from `skill` or `read`.\n\nPruned tool results are replaced with:\n\n- `[Output truncated - N tokens]`\n\nIf pruning changes entries, session storage is rewritten and agent message state is refreshed before compaction decisions.\n\n### Boundary and cut-point logic\n\n`prepareCompaction()` only considers entries since the last compaction entry (if any).\n\n1. Find previous compaction index.\n2. Compute `boundaryStart = prevCompactionIndex + 1`.\n3. Adapt `keepRecentTokens` using measured usage ratio when available.\n4. Run `findCutPoint()` over the boundary window.\n\nValid cut points include:\n\n- message entries with roles: `user`, `assistant`, `bashExecution`, `hookMessage`, `branchSummary`, `compactionSummary`\n- `custom_message` entries\n- `branch_summary` entries\n\nHard rule: never cut at `toolResult`.\n\nIf there are non-message metadata entries immediately before the cut point (`model_change`, `thinking_level_change`, labels, etc.), they are pulled into the kept region by moving cut index backward until a message or compaction boundary is hit.\n\n### Split-turn handling\n\nIf cut point is not at a user-turn start, compaction treats it as a split turn.\n\nTurn start detection treats these as user-turn boundaries:\n\n- `message.role === \"user\"`\n- `message.role === \"bashExecution\"`\n- `custom_message` entry\n- `branch_summary` entry\n\nSplit-turn compaction generates two summaries:\n\n1. History summary (`messagesToSummarize`)\n2. Turn-prefix summary (`turnPrefixMessages`)\n\nFinal stored summary is merged as:\n\n```markdown\n<history summary>\n\n---\n\n**Turn Context (split turn):**\n\n<turn prefix summary>\n```\n\n### Summary generation\n\n`compact(...)` builds summaries from serialized conversation text:\n\n1. Convert messages via `convertToLlm()`.\n2. Serialize with `serializeConversation()`.\n3. Wrap in `<conversation>...</conversation>`.\n4. Optionally include `<previous-summary>...</previous-summary>`.\n5. Optionally inject hook context as `<additional-context>` list.\n6. Execute summarization prompt with `SUMMARIZATION_SYSTEM_PROMPT`.\n\nPrompt selection:\n\n- first compaction: `compaction-summary.md`\n- iterative compaction with prior summary: `compaction-update-summary.md`\n- split-turn second pass: `compaction-turn-prefix.md`\n- short UI summary: `compaction-short-summary.md`\n- handoff document: `handoff-document.md` (used by `generateHandoff(...)`, not serialized compaction)\n\nRemote summarization modes:\n\n- If `compaction.remoteEndpoint` is set and remote compaction is enabled, local summary generation POSTs:\n - `{ systemPrompt, prompt }`\n- Expects JSON containing at least `{ summary }`.\n- For OpenAI/OpenAI code provider models, compaction first tries the provider-native `/responses/compact` endpoint when remote compaction is enabled. It preserves provider replacement history in `preserveData.openaiRemoteCompaction` and falls back to local summarization if that native request fails.\n\n### Handoff generation\n\n`packages/agent/src/compaction/compaction.ts` also exports `generateHandoff(...)`. Handoff generation uses the same `completeSimple(...)` oneshot style as summarization, but it preserves the live agent cache prefix by sending the active system prompt, tool array, and real LLM message history, then appending one agent-attributed `user` message containing the handoff prompt. It forces `toolChoice: \"none\"` and returns joined text blocks directly.\n\nHandoff does not write a `CompactionEntry`. `AgentSession.handoff()` owns the session transition: it starts a new session, injects the generated document as a visible `custom_message` with `customType: \"handoff\"`, and rebuilds agent messages from that new session.\n\n### File-operation context in summaries\n\nCompaction tracks cumulative file activity using assistant tool calls:\n\n- `read(path)` → read set\n- `write(path)` → modified set\n- `edit(path)` → modified set\n\nCumulative behavior:\n\n- Includes prior compaction details only when prior entry is pi-generated (`fromExtension !== true`).\n- In split turns, includes turn-prefix file ops too.\n- `readFiles` excludes files also modified.\n\nSummary text gets file tags appended via prompt template:\n\n```xml\n<read-files>\n...\n</read-files>\n<modified-files>\n...\n</modified-files>\n```\n\n### Persist and reload\n\nAfter summary generation (or hook-provided summary), agent session:\n\n1. Appends `CompactionEntry` with `appendCompaction(...)` for context-full maintenance; handoff strategy creates a new session and injects a handoff `custom_message` instead.\n2. Rebuilds display context from the active leaf via `buildDisplaySessionContext()`.\n3. Replaces live agent messages with rebuilt context.\n4. Emits `session_compact` hook event.\n\n## Branch summarization pipeline\n\nBranch summarization is tied to tree navigation, not token overflow.\n\n### Trigger\n\nDuring `navigateTree(...)`:\n\n1. Compute abandoned entries from old leaf to common ancestor using `collectEntriesForBranchSummary(...)`.\n2. If caller requested summary (`options.summarize`), generate summary before switching leaf.\n3. If summary exists, attach it at the navigation target using `branchWithSummary(...)`.\n\nOperationally this is commonly driven by `/tree` flow when `branchSummary.enabled` is enabled.\n\n### Branch switch shape (visual)\n\n```text\nTree before navigation:\n\n ┌─ B ─ C ─ D (old leaf, being abandoned)\n A ───┤\n └─ E ─ F (target)\n\nCommon ancestor: A\nEntries to summarize: B, C, D\n\nAfter navigation with summary:\n\n ┌─ B ─ C ─ D ─ [summary of B,C,D]\n A ───┤\n └─ E ─ F (new leaf)\n```\n\n### Preparation and token budget\n\n`generateBranchSummary(...)` computes budget as:\n\n- `tokenBudget = model.contextWindow - branchSummary.reserveTokens`\n\n`prepareBranchEntries(...)` then:\n\n1. First pass: collect cumulative file ops from all summarized entries, including prior pi-generated `branch_summary` details.\n2. Second pass: walk newest → oldest, adding messages until token budget is reached.\n3. Prefer preserving recent context.\n4. May still include large summary entries near budget edge for continuity.\n\nCompaction entries are included as messages (`compactionSummary`) during branch summarization input.\n\n### Summary generation and persistence\n\nBranch summarization:\n\n1. Converts and serializes selected messages.\n2. Wraps in `<conversation>`.\n3. Uses custom instructions if supplied, otherwise `branch-summary.md`.\n4. Calls summarization model with `SUMMARIZATION_SYSTEM_PROMPT`.\n5. Prepends `branch-summary-preamble.md`.\n6. Appends file-operation tags.\n\nResult is stored as `BranchSummaryEntry` with optional details (`readFiles`, `modifiedFiles`).\n\n## Extension and hook touchpoints\n\n### `session_before_compact`\n\nPre-compaction hook.\n\nCan:\n\n- cancel compaction (`{ cancel: true }`)\n- provide full custom compaction payload (`{ compaction: CompactionResult }`)\n\n### `session.compacting`\n\nPrompt/context customization hook for default compaction.\n\nCan return:\n\n- `prompt` (override base summary prompt)\n- `context` (extra context lines injected into `<additional-context>`)\n- `preserveData` (stored on compaction entry)\n\n### `session_compact`\n\nPost-compaction notification with saved `compactionEntry` and `fromExtension` flag.\n\n### `session_before_tree`\n\nRuns on tree navigation before default branch summary generation.\n\nCan:\n\n- cancel navigation\n- provide custom `{ summary: { summary, details } }` used when user requested summarization\n\n### `session_tree`\n\nPost-navigation event exposing new/old leaf and optional summary entry.\n\n## Runtime behavior and failure semantics\n\n- Manual compaction aborts current agent operation first.\n- `abortCompaction()` cancels both manual and auto-compaction controllers.\n- Auto compaction emits start/end session events for UI/state updates.\n- Auto compaction can try multiple model candidates and retry transient failures; long retry delays prefer the next candidate when one is available.\n- Overflow errors are excluded from generic retry path because they are handled by context promotion/compaction.\n- If auto-compaction fails:\n - overflow path emits `Context overflow recovery failed: ...`\n - threshold path emits `Auto-compaction failed: ...`\n- Branch summarization can be cancelled via abort signal (e.g., Escape), returning canceled/aborted navigation result.\n\n## Settings and defaults\n\nFrom `settings-schema.ts`:\n\n- `compaction.enabled` = `true`\n- `compaction.strategy` = `\"context-full\"` (`\"handoff\"` and `\"off\"` are also supported)\n- `compaction.reserveTokens` = `16384`\n- `compaction.keepRecentTokens` = `20000`\n- `compaction.autoContinue` = `true`\n- `compaction.remoteEnabled` = `true`\n- `compaction.remoteEndpoint` = `undefined`\n- `compaction.thresholdPercent` = `-1` and `compaction.thresholdTokens` = `-1`; when no positive override is set, the threshold is `contextWindow - max(15% of contextWindow, reserveTokens)`\n- `compaction.idleEnabled` = `true`\n- `branchSummary.enabled` = `false`\n- `branchSummary.reserveTokens` = `16384`\n\nThese values are consumed at runtime by `AgentSession` and compaction/branch summarization modules.\n",
15
- "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@gajae-code/utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.gjc/agent/.env`, respecting `GJC_CONFIG_DIR` / `GJC_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.gjc/.env`, respecting `GJC_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `GJC_*` keys are mirrored to `GJC_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `AZURE_OPENAI_API_KEY` | Azure OpenAI auth | Using `azure-openai` / `azure-openai-responses` models | Pair with `AZURE_OPENAI_BASE_URL` or `AZURE_OPENAI_RESOURCE_NAME` |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `gjc auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `GJC_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `GJC_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.gjc/` (respecting `GJC_CONFIG_DIR`). |\n\nThe gateway has no dedicated env vars — it inherits `GJC_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `gjc auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `ANTHROPIC_MODEL_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `ANTHROPIC_MODEL_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\nCredential fallback order is static env (`AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` plus optional `AWS_SESSION_TOKEN`), named profile / SSO / `credential_process`, then EC2 IMDSv2. `models.yml` Bedrock entries use `api: bedrock-converse-stream` and do not require `apiKey` or `apiKeyEnv` because the provider signs requests from this AWS chain.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Model provider base URL overrides\n\nBuilt-in model provider base URLs resolve with this precedence:\n\n1. `models.yml` / model config provider `baseUrl`\n2. provider-specific base URL environment variable\n3. bundled provider default\n\nSupported aliases:\n\n| Provider | Variables |\n| --- | --- |\n| OpenAI | `OPENAI_BASE_URL` |\n| Anthropic | `ANTHROPIC_BASE_URL` |\n| Google Gemini | `GOOGLE_BASE_URL`, `GEMINI_BASE_URL` |\n| Google Antigravity | `GOOGLE_ANTIGRAVITY_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Gemini CLI | `GOOGLE_GEMINI_CLI_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Vertex | `GOOGLE_VERTEX_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Any provider id | derived `<PROVIDER_ID>_BASE_URL`, uppercased with non-alphanumerics converted to `_` (for example `my-proxy` → `MY_PROXY_BASE_URL`) |\n\nOpenAI-compatible proxy note: the built-in `openai` provider keeps its bundled API transport (`openai-responses`). Setting `OPENAI_BASE_URL` changes the host but still calls `<baseUrl>/responses`. If your proxy only supports Chat Completions, configure a custom `models.yml` provider with `api: openai-completions` instead of using the built-in OpenAI provider override:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKey: OPENAI_API_KEY\n api: openai-completions\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n api: openai-completions\n```\n\nFor OpenRouter traffic, GJC explicitly sends `User-Agent: Gajae-Code/<package version>` plus OpenRouter attribution headers. For the built-in OpenAI Responses transport and generic OpenAI-compatible Chat Completions transport, GJC passes model/provider headers through the OpenAI JavaScript SDK and does not set a GJC user-agent unless the provider-specific code adds one.\n\n### OpenAI-compatible proxy provider config\n\nFor OpenAI-compatible proxies that only implement Chat Completions, prefer a custom `models.yml` provider over `OPENAI_BASE_URL`:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n reasoning: false\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n```\n\n`models.yml` is strict: unsupported provider/model keys fail validation before the provider request is dispatched.\n\n### GJC workflow bridge commands\n\n`gjc ralplan`, `gjc deep-interview`, and `gjc state` are private runtime bridge commands. They require `GJC_RUNTIME_BINARY` (or legacy `GJC_LEGACY_RUNTIME_BINARY`) to point at the private runtime executable; public bundled workflow use remains through `/skill:ralplan` and `/skill:deep-interview` inside a GJC session.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_RUNTIME_BINARY` | Private runtime bridge binary for `gjc ralplan`, `gjc deep-interview`, and `gjc state` |\n| `GJC_LEGACY_RUNTIME_BINARY` | Legacy fallback bridge binary name |\n\n### Team dry-run and state paths\n\n`gjc team ... --dry-run --json` creates the same machine-readable state tree as a team launch without starting tmux panes. By default that state is written under `<cwd>/.gjc/state/team/<team>/`; treat it as ephemeral smoke-test/review state. Do not commit generated `.gjc/state/team` contents. Remove the generated team directory after a dry-run when the harness no longer needs it.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_TEAM_STATE_ROOT` | Overrides the team state root (default `<cwd>/.gjc/state/team`) |\n| `GJC_TEAM_TMUX_COMMAND` | tmux binary/command override for team launch |\n| `GJC_TEAM_WORKER_COMMAND` | Worker GJC command override |\n| `GJC_TEAM_WORKER_CLI` | Team worker CLI selector; accepted values are `auto` or `gjc` |\n| `GJC_TEAM_WORKER_CLI_MAP` | Comma-separated worker CLI selector map; entries must be `auto` or `gjc` |\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `GJC_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI code provider responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `GJC_OPENAI_CODE_DEBUG` | `1`/`true` enables OpenAI code provider debug logging |\n| `GJC_OPENAI_CODE_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `GJC_OPENAI_CODE_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `GJC_OPENAI_CODE_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `GJC_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `GJC_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / OpenAI code OAuth in DB | OpenAI code search provider availability/auth |\n| `GJC_OPENAI_CODE_WEB_SEARCH_MODEL` | OpenAI code search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.gjc/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `anthropic-model-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `GJC_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `GJC_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `GJC_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `GJC_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `GJC_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `GJC_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `GJC_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `GJC_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `GJC_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `GJC_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `GJC_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `GJC_SUBPROCESS_CMD` | Overrides subagent spawn command (`gjc` / `gjc.cmd` resolution bypass) |\n| `GJC_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `GJC_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `GJC_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `GJC_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `GJC_TIMING=full` lists every module-load entry instead of just the top N. |\n| `GJC_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `GJC_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `GJC_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `GJC_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `GJC_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `GJC_FORCE_IMAGE_PROTOCOL=sixel` |\n| `GJC_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`GJC_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@gajae-code/utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `GJC_CONFIG_DIR` | Config root dirname under home (default `.gjc`) |\n| `GJC_CODING_AGENT_DIR` | Full override for agent directory (default `~/<GJC_CONFIG_DIR or .gjc>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `GJC_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `ANTHROPIC_MODEL_BASH_NO_CI` | Legacy alias fallback for `GJC_BASH_NO_CI` |\n| `GJC_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `ANTHROPIC_MODEL_BASH_NO_LOGIN` | Legacy alias fallback for `GJC_BASH_NO_LOGIN` |\n| `GJC_SHELL_PREFIX` | Optional command prefix wrapper |\n| `ANTHROPIC_MODEL_CODE_SHELL_PREFIX` | Legacy alias fallback for `GJC_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `GJC_BASH_NO_LOGIN`/`ANTHROPIC_MODEL_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `GJC_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `GJC_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `GJC_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `GJC_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `GJC_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `GJC_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `GJC_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `GJC_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `GJC_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
15
+ "environment-variables.md": "# Environment Variables (Current Runtime Reference)\n\nThis reference is derived from current code paths in:\n\n- `packages/coding-agent/src/**`\n- `packages/ai/src/**` (provider/auth resolution used by coding-agent)\n- `packages/utils/src/**` and `packages/tui/src/**` where those vars directly affect coding-agent runtime\n\nIt documents only active behavior.\n\n## Resolution model and precedence\n\nMost runtime lookups use `$env` from `@gajae-code/utils` (`packages/utils/src/env.ts`).\n\n`$env` loading order:\n\n1. Existing process environment (`Bun.env`)\n2. Project `.env` (`$PWD/.env`) for keys not already set\n3. Agent `.env` (`~/.gjc/agent/.env`, respecting `GJC_CONFIG_DIR` / `GJC_CODING_AGENT_DIR`) for keys not already set\n4. Config-root `.env` (`~/.gjc/.env`, respecting `GJC_CONFIG_DIR`) for keys not already set\n5. Home `.env` (`~/.env`) for keys not already set\n\nAdditional rule inside each `.env` file: `GJC_*` keys are mirrored to `GJC_*` keys in that parsed file.\n\n---\n\n## 1) Model/provider authentication\n\nThese are consumed via `getEnvApiKey()` (`packages/ai/src/stream.ts`) unless noted otherwise.\n\n### Core provider credentials\n\n| Variable | Used for | Required when | Notes / precedence |\n| ------------------------------- | ------------------------------------------------ | -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |\n| `ANTHROPIC_OAUTH_TOKEN` | Anthropic API auth | Using Anthropic with OAuth token auth | Takes precedence over `ANTHROPIC_API_KEY` for provider auth resolution |\n| `ANTHROPIC_API_KEY` | Anthropic API auth | Using Anthropic without OAuth token | Fallback after `ANTHROPIC_OAUTH_TOKEN` |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Anthropic via Azure Foundry / enterprise gateway | `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` enabled | Takes precedence over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled |\n| `OPENAI_API_KEY` | OpenAI auth | Using OpenAI-family providers without explicit apiKey argument | Used by OpenAI Completions/Responses providers |\n| `GEMINI_API_KEY` | Google Gemini auth | Using `google` provider models | Primary key for Gemini provider mapping |\n| `GOOGLE_API_KEY` | Gemini image tool auth fallback | Using `gemini_image` tool without `GEMINI_API_KEY` | Used by coding-agent image tool fallback path |\n| `GROQ_API_KEY` | Groq auth | Using Groq models | |\n| `CEREBRAS_API_KEY` | Cerebras auth | Using Cerebras models | |\n| `FIREWORKS_API_KEY` | Fireworks auth | Using Fireworks models | |\n| `TOGETHER_API_KEY` | Together auth | Using `together` provider | |\n| `HUGGINGFACE_HUB_TOKEN` | Hugging Face auth | Using `huggingface` provider | Primary Hugging Face token env var |\n| `HF_TOKEN` | Hugging Face auth | Using `huggingface` provider | Fallback when `HUGGINGFACE_HUB_TOKEN` is unset |\n| `SYNTHETIC_API_KEY` | Synthetic auth | Using Synthetic models | |\n| `NVIDIA_API_KEY` | NVIDIA auth | Using `nvidia` provider | |\n| `NANO_GPT_API_KEY` | NanoGPT auth | Using `nanogpt` provider | |\n| `VENICE_API_KEY` | Venice auth | Using `venice` provider | |\n| `LITELLM_API_KEY` | LiteLLM auth | Using `litellm` provider | OpenAI-compatible LiteLLM proxy key |\n| `LM_STUDIO_API_KEY` | LM Studio auth (optional) | Using `lm-studio` provider with authenticated hosts | Local LM Studio usually runs without auth; any non-empty token works when a key is required |\n| `OLLAMA_API_KEY` | Ollama auth (optional) | Using `ollama` provider with authenticated hosts | Local Ollama usually runs without auth; any non-empty token works when a key is required |\n| `LLAMA_CPP_API_KEY` | llama.cpp auth (optional) | Using `llama.cpp` provider with authenticated hosts | Local llama.cpp usually runs without auth; any non-empty token works when a key is configured |\n| `XIAOMI_API_KEY` | Xiaomi MiMo auth | Using `xiaomi` provider | |\n| `MOONSHOT_API_KEY` | Moonshot auth | Using `moonshot` provider | |\n| `XAI_API_KEY` | xAI auth | Using xAI models | |\n| `OPENROUTER_API_KEY` | OpenRouter auth | Using OpenRouter models | Also used by image tool when preferred/auto provider is OpenRouter |\n| `MISTRAL_API_KEY` | Mistral auth | Using Mistral models | |\n| `ZAI_API_KEY` | z.ai auth | Using z.ai models | Also used by z.ai web search provider |\n| `MINIMAX_API_KEY` | MiniMax auth | Using `minimax` provider | |\n| `AZURE_OPENAI_API_KEY` | Azure OpenAI auth | Using `azure-openai` / `azure-openai-responses` models | Pair with `AZURE_OPENAI_BASE_URL` or `AZURE_OPENAI_RESOURCE_NAME` |\n| `MINIMAX_CODE_API_KEY` | MiniMax Code auth | Using `minimax-code` provider | |\n| `MINIMAX_CODE_CN_API_KEY` | MiniMax Code CN auth | Using `minimax-code-cn` provider | |\n| `OPENCODE_API_KEY` | OpenCode auth | Using `opencode-go` / `opencode-zen` models | |\n| `QIANFAN_API_KEY` | Qianfan auth | Using `qianfan` provider | |\n| `QWEN_OAUTH_TOKEN` | Qwen Portal auth | Using `qwen-portal` with OAuth token | Takes precedence over `QWEN_PORTAL_API_KEY` |\n| `QWEN_PORTAL_API_KEY` | Qwen Portal auth | Using `qwen-portal` with API key | Fallback after `QWEN_OAUTH_TOKEN` |\n| `ZENMUX_API_KEY` | ZenMux auth | Using `zenmux` provider | Used for ZenMux OpenAI and Anthropic-compatible routes |\n| `VLLM_API_KEY` | vLLM auth/discovery opt-in | Using `vllm` provider (local OpenAI-compatible servers) | Any non-empty value works for no-auth local servers |\n| `CURSOR_ACCESS_TOKEN` | Cursor provider auth | Using Cursor provider | |\n| `AI_GATEWAY_API_KEY` | Vercel AI Gateway auth | Using `vercel-ai-gateway` provider | |\n| `CLOUDFLARE_AI_GATEWAY_API_KEY` | Cloudflare AI Gateway auth | Using `cloudflare-ai-gateway` provider | Base URL must be configured as `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic` |\n| `ALIBABA_CODING_PLAN_API_KEY` | Alibaba Coding Plan auth | Using `alibaba-coding-plan` provider | |\n| `DEEPSEEK_API_KEY` | DeepSeek auth | Using DeepSeek models | |\n| `KILO_API_KEY` | Kilo auth | Using Kilo models | |\n| `OLLAMA_CLOUD_API_KEY` | Ollama Cloud auth | Using `ollama-cloud` provider | |\n| `GITLAB_TOKEN` | GitLab Duo auth | Using `gitlab-duo` provider | |\n\n### GitHub/Copilot token chains\n\n| Variable | Used for | Chain |\n| ---------------------- | ------------------------------------------------ | ---------------------------------------------------- |\n| `COPILOT_GITHUB_TOKEN` | GitHub Copilot provider auth | `COPILOT_GITHUB_TOKEN` → `GH_TOKEN` → `GITHUB_TOKEN` |\n| `GH_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: `GITHUB_TOKEN` → `GH_TOKEN` |\n| `GITHUB_TOKEN` | Copilot fallback; GitHub API auth in web scraper | In web scraper: checked before `GH_TOKEN` |\n\n### Auth broker / auth gateway (remote credential vault)\n\nWhen the broker is enabled, the local SQLite credential store is bypassed and all OAuth refresh / access tokens live on the broker host. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full protocol, CLI surface, and 5-min/15-s usage cache layering.\n\n| Variable | Used for | Required when | Notes / precedence |\n| ----------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `GJC_AUTH_BROKER_URL` | Base URL of the remote auth-broker (e.g. `https://broker.tailnet:8765`); selects broker mode | Resolving credentials through a broker; also required by `gjc auth-gateway serve` (the gateway is itself a broker client) | Wins over `auth.broker.url` in `config.yml`. When set with no resolvable token, `resolveAuthBrokerConfig()` hard-errors instead of falling back to local SQLite. |\n| `GJC_AUTH_BROKER_TOKEN` | Bearer token sent on every broker endpoint except `/v1/healthz` | `GJC_AUTH_BROKER_URL` is set and no token is available from `auth.broker.token` or `<config-dir>/auth-broker.token` | Resolution: this env → `auth.broker.token` (`$ENV_NAME` indirection supported) → `<config-dir>/auth-broker.token` (mode `0600`). `<config-dir>` is `~/.gjc/` (respecting `GJC_CONFIG_DIR`). |\n\nThe gateway has no dedicated env vars — it inherits `GJC_AUTH_BROKER_*`. Its own inbound bearer token lives at `<config-dir>/auth-gateway.token` and is managed via `gjc auth-gateway token`.\n\n---\n\n## 2) Provider-specific runtime configuration\n\n### Anthropic Foundry Gateway (Azure / enterprise proxy)\n\nWhen `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled, Anthropic requests switch to Foundry mode:\n\n- Base URL resolves from `FOUNDRY_BASE_URL` (fallback remains model/default base URL if unset).\n- API key resolution for provider `anthropic` becomes:\n `ANTHROPIC_FOUNDRY_API_KEY` → `ANTHROPIC_OAUTH_TOKEN` → `ANTHROPIC_API_KEY`.\n- `ANTHROPIC_CUSTOM_HEADERS` is parsed as comma/newline-separated `key: value` pairs and merged into request headers.\n- TLS client/server material can be injected from env values:\n `NODE_EXTRA_CA_CERTS`, `ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`.\n Each accepts either:\n - a filesystem path to PEM content, or\n - inline PEM (including escaped `\\n` sequences).\n\n| Variable | Value type | Behavior |\n| --------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------- |\n| `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` | Boolean-like string (`1`, `true`, `yes`, `on`) | Enables Foundry mode for Anthropic provider |\n| `FOUNDRY_BASE_URL` | URL string | Anthropic endpoint base URL in Foundry mode |\n| `ANTHROPIC_FOUNDRY_API_KEY` | Token string | Used for `Authorization: Bearer <token>` |\n| `ANTHROPIC_CUSTOM_HEADERS` | Header list string | Extra headers; format `header-a: value, header-b: value` or newline-separated |\n| `NODE_EXTRA_CA_CERTS` | PEM path or inline PEM | Extra CA chain for server certificate validation |\n| `ANTHROPIC_MODEL_CODE_CLIENT_CERT` | PEM path or inline PEM | mTLS client certificate |\n| `ANTHROPIC_MODEL_CODE_CLIENT_KEY` | PEM path or inline PEM | mTLS client private key (must be paired with cert) |\n\n### Amazon Bedrock\n\n| Variable | Default / behavior |\n| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- |\n| `AWS_REGION` | Primary region source |\n| `AWS_DEFAULT_REGION` | Fallback if `AWS_REGION` unset |\n| `AWS_PROFILE` | Enables named profile auth path |\n| `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | Enables IAM key auth path |\n| `AWS_BEARER_TOKEN_BEDROCK` | Enables bearer token auth path |\n| `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` / `AWS_CONTAINER_CREDENTIALS_FULL_URI` | Enables ECS task credential path |\n| `AWS_WEB_IDENTITY_TOKEN_FILE` + `AWS_ROLE_ARN` | Enables web identity auth path |\n| `AWS_BEDROCK_SKIP_AUTH` | If `1`, injects dummy credentials (proxy/non-auth scenarios) |\n| `AWS_BEDROCK_FORCE_HTTP1` | If `1`, forces Node HTTP/1 request handler |\n| `HTTPS_PROXY` / `HTTP_PROXY` / `ALL_PROXY` | Routes Bedrock runtime and AWS SSO credential calls through the configured proxy using HTTP/1 |\n| `NO_PROXY` | Excludes matching hosts from proxy routing when a proxy variable is configured |\n\nRegion fallback in provider code: `options.region` → `AWS_REGION` → `AWS_DEFAULT_REGION` → `us-east-1`.\n\nCredential fallback order is static env (`AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` plus optional `AWS_SESSION_TOKEN`), named profile / SSO / `credential_process`, then EC2 IMDSv2. `models.yml` Bedrock entries use `api: bedrock-converse-stream` and do not require `apiKey` or `apiKeyEnv` because the provider signs requests from this AWS chain.\n\n### Azure OpenAI Responses\n\n| Variable | Default / behavior |\n| ---------------------------------- | --------------------------------------------------------------------------- |\n| `AZURE_OPENAI_API_KEY` | Required unless API key passed as option |\n| `AZURE_OPENAI_API_VERSION` | Default `v1` |\n| `AZURE_OPENAI_BASE_URL` | Direct base URL override |\n| `AZURE_OPENAI_RESOURCE_NAME` | Used to construct base URL: `https://<resource>.openai.azure.com/openai/v1` |\n| `AZURE_OPENAI_DEPLOYMENT_NAME_MAP` | Optional mapping string: `modelId=deploymentName,model2=deployment2` |\n\nBase URL resolution: option `azureBaseUrl` → env `AZURE_OPENAI_BASE_URL` → option/env resource name → `model.baseUrl`.\n\n### Model provider base URL overrides\n\nBuilt-in model provider base URLs resolve with this precedence:\n\n1. `models.yml` / model config provider `baseUrl`\n2. provider-specific base URL environment variable\n3. bundled provider default\n\nSupported aliases:\n\n| Provider | Variables |\n| --- | --- |\n| OpenAI | `OPENAI_BASE_URL` |\n| Anthropic | `ANTHROPIC_BASE_URL` |\n| Google Gemini | `GOOGLE_BASE_URL`, `GEMINI_BASE_URL` |\n| Google Antigravity | `GOOGLE_ANTIGRAVITY_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Gemini CLI | `GOOGLE_GEMINI_CLI_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Google Vertex | `GOOGLE_VERTEX_BASE_URL`, then `GOOGLE_BASE_URL`, then `GEMINI_BASE_URL` |\n| Any provider id | derived `<PROVIDER_ID>_BASE_URL`, uppercased with non-alphanumerics converted to `_` (for example `my-proxy` → `MY_PROXY_BASE_URL`) |\n\nOpenAI-compatible proxy note: the built-in `openai` provider keeps its bundled API transport (`openai-responses`). Setting `OPENAI_BASE_URL` changes the host but still calls `<baseUrl>/responses`. If your proxy only supports Chat Completions, configure a custom `models.yml` provider with `api: openai-completions` instead of using the built-in OpenAI provider override:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKey: OPENAI_API_KEY\n api: openai-completions\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n api: openai-completions\n```\n\nFor OpenRouter traffic, GJC explicitly sends `User-Agent: Gajae-Code/<package version>` plus OpenRouter attribution headers. For the built-in OpenAI Responses transport and generic OpenAI-compatible Chat Completions transport, GJC passes model/provider headers through the OpenAI JavaScript SDK and does not set a GJC user-agent unless the provider-specific code adds one.\n\n### OpenAI-compatible proxy provider config\n\nFor OpenAI-compatible proxies that only implement Chat Completions, prefer a custom `models.yml` provider over `OPENAI_BASE_URL`:\n\n```yaml\nproviders:\n openai-compatible:\n baseUrl: https://proxy.example.com/v1\n apiKeyEnv: OPENAI_API_KEY\n api: openai-completions\n auth: apiKey\n headers:\n User-Agent: curl/8.7.1\n models:\n - id: gpt-4o\n name: GPT-4o via proxy\n reasoning: false\n input: [text]\n cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }\n```\n\n`models.yml` is strict: unsupported provider/model keys fail validation before the provider request is dispatched.\n\n### GJC workflow bridge commands\n\n`gjc ralplan`, `gjc deep-interview`, and `gjc state` are private runtime bridge commands. They require `GJC_RUNTIME_BINARY` (or legacy `GJC_LEGACY_RUNTIME_BINARY`) to point at the private runtime executable; public bundled workflow use remains through `/skill:ralplan` and `/skill:deep-interview` inside a GJC session.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_RUNTIME_BINARY` | Private runtime bridge binary for `gjc ralplan`, `gjc deep-interview`, and `gjc state` |\n| `GJC_LEGACY_RUNTIME_BINARY` | Legacy fallback bridge binary name |\n\n### Team tmux backend, dry-run, and state paths\n\n`gjc team ...` starts tmux worker panes from the current tmux-backed leader session. Start that leader with `gjc --tmux` first; `gjc team` intentionally does not create or attach the leader session itself.\n\n`gjc team ... --dry-run --json` creates the same machine-readable state tree as a team launch without starting tmux panes. By default that state is written under `<cwd>/.gjc/state/team/<team>/`; treat it as ephemeral smoke-test/review state. Do not commit generated `.gjc/state/team` contents. Remove the generated team directory after a dry-run when the harness no longer needs it.\n\n| Variable | Behavior |\n| --- | --- |\n| `GJC_TEAM_STATE_ROOT` | Overrides the team state root (default `<cwd>/.gjc/state/team`) |\n| `GJC_TEAM_TMUX_COMMAND` | tmux binary/command override for team launch |\n| `GJC_TEAM_WORKER_COMMAND` | Worker GJC command override |\n| `GJC_TEAM_WORKER_CLI` | Team worker CLI selector; accepted values are `auto` or `gjc` |\n| `GJC_TEAM_WORKER_CLI_MAP` | Comma-separated worker CLI selector map; entries must be `auto` or `gjc` |\n\n### Google Vertex AI\n\n| Variable | Required? | Notes |\n| -------------------------------- | ------------------------------ | ------------------------------------------------------------------------------------------------------------------------- |\n| `GOOGLE_CLOUD_PROJECT` | Yes (unless passed in options) | Fallback: `GCLOUD_PROJECT` |\n| `GCLOUD_PROJECT` | Fallback | Used as alternate project ID source |\n| `GOOGLE_CLOUD_PROJECT_ID` | OAuth login helper only | Used by Gemini CLI OAuth project discovery |\n| `GOOGLE_CLOUD_LOCATION` | Yes (unless passed in options) | No default in provider |\n| `GOOGLE_CLOUD_API_KEY` | Conditional | Direct Vertex API-key auth; otherwise ADC fallback can authenticate when project and location are set |\n| `GOOGLE_APPLICATION_CREDENTIALS` | Conditional | If set, file must exist; otherwise ADC fallback path is checked (`~/.config/gcloud/application_default_credentials.json`) |\n\n### Kimi\n\n| Variable | Default / behavior |\n| ---------------------- | -------------------------------------------------------- |\n| `KIMI_CODE_OAUTH_HOST` | Primary OAuth host override |\n| `KIMI_OAUTH_HOST` | Fallback OAuth host override |\n| `KIMI_CODE_BASE_URL` | Overrides Kimi usage endpoint base URL (`usage/kimi.ts`) |\n\nOAuth host chain: `KIMI_CODE_OAUTH_HOST` → `KIMI_OAUTH_HOST` → `https://auth.kimi.com`.\n\n### Gemini CLI compatibility\n\n| Variable | Default / behavior |\n| -------------------------- | --------------------------------------------------------------- |\n| `GJC_AI_GEMINI_CLI_VERSION` | Overrides Gemini CLI user-agent version tag (`0.35.3` if unset) |\n\n### OpenAI code provider responses (feature/debug controls)\n\n| Variable | Behavior |\n| ------------------------------------ | ---------------------------------------------------- |\n| `GJC_OPENAI_CODE_DEBUG` | `1`/`true` enables OpenAI code provider debug logging |\n| `GJC_OPENAI_CODE_WEBSOCKET` | `1`/`true` enables websocket transport preference |\n| `GJC_OPENAI_CODE_WEBSOCKET_V2` | `1`/`true` enables websocket v2 path |\n| `GJC_OPENAI_CODE_WEBSOCKET_IDLE_TIMEOUT_MS` | Positive integer override (default 300000) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_BUDGET` | Non-negative integer override (default 5) |\n| `GJC_OPENAI_CODE_WEBSOCKET_RETRY_DELAY_MS` | Positive integer base backoff override (default 500) |\n| `GJC_OPENAI_STREAM_IDLE_TIMEOUT_MS` | Positive integer OpenAI stream idle timeout override |\n\n### Cursor provider debug\n\n| Variable | Behavior |\n| ------------------ | ------------------------------------------------------------------------ |\n| `DEBUG_CURSOR` | Enables provider debug logs; `2`/`verbose` for detailed payload snippets |\n| `DEBUG_CURSOR_LOG` | Optional file path for JSONL debug log output |\n\n### Prompt cache compatibility switch\n\n| Variable | Behavior |\n| -------------------- | ----------------------------------------------------------------------------------------------------------------- |\n| `GJC_CACHE_RETENTION` | If `long`, enables long retention where supported (`anthropic`, `openai-responses`, Bedrock retention resolution) |\n\n---\n\n## 3) Web search subsystem\n\n### Search provider credentials\n\n| Variable | Used by |\n| --------------------------------------------------- | ------------------------------------------------------------- |\n| `EXA_API_KEY` | Exa search provider |\n| `BRAVE_API_KEY` | Brave search provider |\n| `PERPLEXITY_API_KEY` | Perplexity search provider API-key mode |\n| `PERPLEXITY_COOKIES` | Perplexity cookie-auth search mode |\n| `TAVILY_API_KEY` | Tavily search provider |\n| `ZAI_API_KEY` | z.ai search provider (also checks stored OAuth in `agent.db`) |\n| `OPENAI_API_KEY` / OpenAI code OAuth in DB | OpenAI code search provider availability/auth |\n| `GJC_OPENAI_CODE_WEB_SEARCH_MODEL` | OpenAI code search provider model override |\n| `MOONSHOT_SEARCH_API_KEY` / `KIMI_SEARCH_API_KEY` | Kimi/Moonshot search provider env auth |\n| `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` | Kimi/Moonshot search endpoint override |\n| `KAGI_API_KEY` | Kagi search provider |\n| `JINA_API_KEY` | Jina search provider |\n| `PARALLEL_API_KEY` | Parallel search provider |\n| `SEARXNG_ENDPOINT`, `SEARXNG_TOKEN` | SearXNG endpoint and optional bearer token |\n| `SEARXNG_BASIC_USERNAME`, `SEARXNG_BASIC_PASSWORD` | SearXNG HTTP Basic Auth credentials |\n\nSearXNG also reads the equivalent `searxng.endpoint`, `searxng.token`, `searxng.basicUsername`, and `searxng.basicPassword` settings from `~/.gjc/agent/config.yml`; environment variables are fallbacks.\n\n### Anthropic web search auth chain\n\nAnthropic web search uses `findAnthropicAuth()` from `packages/ai/src/utils/anthropic-auth.ts` in this order:\n\n1. `ANTHROPIC_SEARCH_API_KEY` (+ optional `ANTHROPIC_SEARCH_BASE_URL`)\n2. `ANTHROPIC_FOUNDRY_API_KEY` when `ANTHROPIC_MODEL_CODE_USE_FOUNDRY` is enabled\n3. Anthropic OAuth credentials from `agent.db` (must not expire within 5-minute buffer)\n4. Anthropic API-key credentials from `agent.db`\n5. Generic Anthropic env fallback: provider key (`ANTHROPIC_FOUNDRY_API_KEY` in Foundry mode, otherwise `ANTHROPIC_OAUTH_TOKEN`/`ANTHROPIC_API_KEY`) + optional `ANTHROPIC_BASE_URL` (`FOUNDRY_BASE_URL` when Foundry mode is enabled)\n\nRelated vars:\n\n| Variable | Default / behavior |\n| --------------------------- | ---------------------------------------------------- |\n| `ANTHROPIC_SEARCH_API_KEY` | Highest-priority explicit search key |\n| `ANTHROPIC_SEARCH_BASE_URL` | Defaults to `https://api.anthropic.com` when omitted |\n| `ANTHROPIC_SEARCH_MODEL` | Defaults to `anthropic-model-haiku-4-5` |\n| `ANTHROPIC_BASE_URL` | Generic fallback base URL for tier-4 auth path |\n\n### Perplexity OAuth flow behavior flag\n\n| Variable | Behavior |\n| ------------------- | ------------------------------------------------------------------------------- |\n| `GJC_AUTH_NO_BORROW` | If set, disables macOS native-app token borrowing path in Perplexity login flow |\n\n---\n\n## 4) Python tooling and kernel runtime\n\n| Variable | Default / behavior |\n| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |\n| `GJC_PY` | Eval backend override: `0`/`bash`=JavaScript only, `1`/`py`=Python only, `mix`/`both`=both; invalid values ignored |\n| `GJC_PYTHON_SKIP_CHECK` | If `1`, skips Python interpreter availability checks (subprocess runner still starts on demand) |\n| `GJC_PYTHON_INTEGRATION` | If `1`, opts gated integration tests in (e.g. `python-runner.integration.test.ts`) into running against real Python |\n| `GJC_PYTHON_IPC_TRACE` | If `1`, logs NDJSON frames exchanged with the Python runner subprocess |\n| `VIRTUAL_ENV` | Highest-priority venv path for Python runtime resolution |\n\nExtra conditional behavior:\n\n- If `BUN_ENV=test` or `NODE_ENV=test`, Python availability checks are treated as OK and warming is skipped.\n- Python env filtering denies common API keys and allows safe base vars + `LC_`, `XDG_`, `GJC_` prefixes.\n\n---\n\n## 5) Agent/runtime behavior toggles\n\n| Variable | Default / behavior |\n| ---------------------------- | -------------------------------------------------------------------------------------------------- |\n| `GJC_SMOL_MODEL` | Ephemeral model-role override for `smol` (CLI `--smol` takes precedence) |\n| `GJC_SLOW_MODEL` | Ephemeral model-role override for `slow` (CLI `--slow` takes precedence) |\n| `GJC_PLAN_MODEL` | Ephemeral model-role override for `plan` (CLI `--plan` takes precedence) |\n| `GJC_NO_TITLE` | If set (any non-empty value), disables auto session title generation on first user message |\n| `NULL_PROMPT` | If `true`, system prompt builder returns empty string |\n| `GJC_BLOCKED_AGENT` | Blocks a specific subagent type in task tool |\n| `GJC_SUBPROCESS_CMD` | Overrides subagent spawn command (`gjc` / `gjc.cmd` resolution bypass) |\n| `GJC_TASK_MAX_OUTPUT_BYTES` | Max captured output bytes per subagent (default `500000`) |\n| `GJC_TASK_MAX_OUTPUT_LINES` | Max captured output lines per subagent (default `5000`) |\n| `GJC_TIMING` | If set (any non-empty value), prints a hierarchical timing-span tree to **stderr** via `logger.printTimings()`. In interactive mode the tree prints once the agent is ready (before the TUI starts); in print mode it prints after the whole prompt batch completes. Print-mode prompts are wrapped in `print:prompt:initial` / `print:prompt:next` spans so each user message shows up as its own row. `GJC_TIMING=x` exits the process with code 0 right after printing in interactive mode (use to measure cold startup only). `GJC_TIMING=full` lists every module-load entry instead of just the top N. |\n| `GJC_PACKAGE_DIR` | Overrides package asset base dir resolution (docs/examples/changelog path lookup) |\n| `GJC_DISABLE_LSPMUX` | If `1`, disables lspmux detection/integration and forces direct LSP server spawning |\n| `GJC_RPC_EMIT_TITLE` | Boolean-like flag enabling title events in RPC mode |\n| `SMITHERY_URL` | Smithery web URL override (default `https://smithery.ai`) |\n| `SMITHERY_API_URL` | Smithery API base URL override (default `https://api.smithery.ai`) |\n| `PUPPETEER_EXECUTABLE_PATH` | Browser tool Chromium executable override |\n| `LM_STUDIO_BASE_URL` | Default implicit LM Studio discovery base URL override (`http://127.0.0.1:1234/v1` if unset) |\n| `OLLAMA_BASE_URL` | Default implicit Ollama discovery base URL override (`http://127.0.0.1:11434` if unset) |\n| `LLAMA_CPP_BASE_URL` | Default implicit Llama.cpp discovery base URL override (`http://127.0.0.1:8080` if unset) |\n| `GJC_EDIT_VARIANT` | Forces edit tool variant when valid (`patch`, `replace`, `hashline`, `atom`, `vim`, `apply_patch`) |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces supported image protocol (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) where used |\n| `GJC_ALLOW_SIXEL_PASSTHROUGH` | Allows SIXEL passthrough when `GJC_FORCE_IMAGE_PROTOCOL=sixel` |\n| `GJC_NO_PTY` | If `1`, disables interactive PTY path for bash tool |\n\n`GJC_NO_PTY` is also set internally when CLI `--no-pty` is used.\n\n---\n\n## 6) Storage and config root paths\n\nThese are consumed via `@gajae-code/utils/dirs` and affect where coding-agent stores data.\n\n| Variable | Default / behavior |\n| --------------------- | ----------------------------------------------------------------------------- |\n| `GJC_CONFIG_DIR` | Config root dirname under home (default `.gjc`) |\n| `GJC_CODING_AGENT_DIR` | Full override for agent directory (default `~/<GJC_CONFIG_DIR or .gjc>/agent`) |\n| `PWD` | Used when matching canonical current working directory in path helpers |\n\n---\n\n## 7) Shell/tool execution environment\n\n(From `packages/utils/src/procmgr.ts` and coding-agent bash tool integration.)\n\n| Variable | Behavior |\n| -------------------------- | ------------------------------------------------------------------------------ |\n| `GJC_BASH_NO_CI` | Suppresses automatic `CI=true` injection into spawned shell env |\n| `ANTHROPIC_MODEL_BASH_NO_CI` | Legacy alias fallback for `GJC_BASH_NO_CI` |\n| `GJC_BASH_NO_LOGIN` | Disables login-shell mode; shell args become `['-c']` instead of `['-l','-c']` |\n| `ANTHROPIC_MODEL_BASH_NO_LOGIN` | Legacy alias fallback for `GJC_BASH_NO_LOGIN` |\n| `GJC_SHELL_PREFIX` | Optional command prefix wrapper |\n| `ANTHROPIC_MODEL_CODE_SHELL_PREFIX` | Legacy alias fallback for `GJC_SHELL_PREFIX` |\n| `VISUAL` | Preferred external editor command |\n| `EDITOR` | Fallback external editor command |\n\nCurrent implementation: `GJC_BASH_NO_LOGIN`/`ANTHROPIC_MODEL_BASH_NO_LOGIN` are active; when either is set, `getShellArgs()` returns `['-c']`.\n\n---\n\n## 8) UI/theme/session detection (auto-detected env)\n\nThese are read as runtime signals; they are usually set by the terminal/OS rather than manually configured.\n\n| Variable | Used for |\n| ------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------- |\n| `COLORTERM`, `TERM`, `WT_SESSION` | Color capability detection (theme color mode) |\n| `COLORFGBG` | Terminal background light/dark auto-detection |\n| `TERM_PROGRAM`, `TERM_PROGRAM_VERSION`, `TERMINAL_EMULATOR` | Terminal identity in system prompt/context |\n| `KDE_FULL_SESSION`, `XDG_CURRENT_DESKTOP`, `DESKTOP_SESSION`, `XDG_SESSION_DESKTOP`, `GDMSESSION`, `WINDOWMANAGER` | Desktop/window-manager detection in system prompt/context |\n| `KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION` | Stable per-terminal session breadcrumb IDs |\n| `SHELL`, `ComSpec`, `TERM_PROGRAM`, `TERM` | System info diagnostics |\n| `APPDATA`, `XDG_CONFIG_HOME` | lspmux config path resolution |\n| `HOME` | Path shortening in command UI |\n\n---\n\n## 9) TUI runtime flags (shared package, affects coding-agent UX)\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------------------------- |\n| `GJC_NOTIFICATIONS` | `off` / `0` / `false` suppress desktop notifications |\n| `GJC_TUI_WRITE_LOG` | If set, logs TUI writes to file |\n| `GJC_HARDWARE_CURSOR` | If `1`, enables hardware cursor mode |\n| `GJC_CLEAR_ON_SHRINK` | If `1`, clears empty rows when content shrinks |\n| `GJC_DEBUG_REDRAW` | If `1`, enables redraw debug logging |\n| `GJC_TUI_DEBUG` | If `1`, enables deep TUI debug dump path |\n| `GJC_FORCE_IMAGE_PROTOCOL` | Forces terminal image protocol detection (`kitty`, `iterm2`/`iterm`, `sixel`, `none`) |\n\n---\n\n## 10) Commit generation controls\n\n| Variable | Behavior |\n| ------------------------- | ------------------------------------------------------------------- |\n| `GJC_COMMIT_TEST_FALLBACK` | If `true` (case-insensitive), force commit fallback generation path |\n| `GJC_COMMIT_NO_FALLBACK` | If `true`, disables fallback when agent returns no proposal |\n| `GJC_COMMIT_MAP_REDUCE` | If `false`, disables map-reduce commit analysis path |\n| `DEBUG` | If set, commit agent error stack traces are printed |\n\n---\n\n## Security-sensitive variables\n\nTreat these as secrets; do not log or commit them:\n\n- Provider/API keys and OAuth/bearer credentials (all `*_API_KEY`, `*_TOKEN`, OAuth access/refresh tokens)\n- Cloud credentials (`AWS_*`, `GOOGLE_APPLICATION_CREDENTIALS` path may expose service-account material)\n- Search/provider auth vars (`EXA_API_KEY`, `BRAVE_API_KEY`, `PERPLEXITY_API_KEY`, Anthropic search keys)\n- Foundry mTLS material (`ANTHROPIC_MODEL_CODE_CLIENT_CERT`, `ANTHROPIC_MODEL_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS` when it points to private CA bundles)\n\nPython runtime also explicitly strips many common key vars before spawning kernel subprocesses (`packages/coding-agent/src/eval/py/runtime.ts`).\n",
16
16
  "fs-scan-cache-architecture.md": "# Filesystem Scan Cache Architecture Contract\n\nThis document defines the current contract for the shared filesystem scan cache implemented in Rust (`crates/pi-natives/src/fs_cache.rs`) and consumed by native discovery/search APIs exposed to `packages/coding-agent`.\n\n## What this cache is\n\nThe cache stores full directory-scan entry lists (`GlobMatch[]`) keyed by scan scope and traversal policy, then lets higher-level operations (glob filtering, fuzzy scoring, grep file selection) run against those cached entries.\n\nPrimary goals:\n\n- avoid repeated filesystem walks for repeated discovery/search calls\n- keep consistency across `glob`, `fuzzyFind`, and `grep` when they share the same scan policy\n- allow explicit staleness recovery for empty results and explicit invalidation after file mutations\n\n## Ownership and public surface\n\n- Cache implementation and policy: `crates/pi-natives/src/fs_cache.rs`\n- Native consumers:\n - `crates/pi-natives/src/glob.rs`\n - `crates/pi-natives/src/fd.rs` (`fuzzyFind`)\n - `crates/pi-natives/src/grep.rs`\n- JS binding/export:\n - `packages/natives/src/glob/index.ts` (`invalidateFsScanCache`)\n - `packages/natives/src/glob/types.ts`\n - `packages/natives/src/grep/types.ts`\n- Coding-agent mutation invalidation helpers:\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts`\n\n## Cache key partitioning (hard contract)\n\nEach entry is keyed by:\n\n- canonicalized `root` directory path\n- `include_hidden` boolean\n- `use_gitignore` boolean\n- `skip_node_modules` boolean\n\nImplications:\n\n- Hidden and non-hidden scans do **not** share entries.\n- Gitignore-respecting and ignore-disabled scans do **not** share entries.\n- Scans that prune `node_modules` do **not** share entries with scans that include it.\n- Consumers must pass stable semantics for hidden/gitignore/node_modules behavior; changing any flag creates a different cache partition.\n\n## Scan collection behavior\n\nCache population uses a deterministic walker (`ignore::WalkBuilder`) configured by `include_hidden`, `use_gitignore`, and `skip_node_modules`:\n\n- `follow_links(false)`\n- sorted by file path\n- `.git` is always skipped\n- `node_modules` is pruned at traversal time when `skip_node_modules=true`\n- entry file type + `mtime` are captured via `symlink_metadata`\n\nSearch roots are resolved by `resolve_search_path`:\n\n- relative paths are resolved against current cwd\n- target must be an existing directory\n- root is canonicalized when possible\n\n## Freshness and eviction policy\n\nGlobal policy (environment-overridable):\n\n- `FS_SCAN_CACHE_TTL_MS` (default `1000`)\n- `FS_SCAN_EMPTY_RECHECK_MS` (default `200`)\n- `FS_SCAN_CACHE_MAX_ENTRIES` (default `16`)\n\nBehavior:\n\n- `get_or_scan(...)`\n - if TTL is `0`: bypass cache entirely, always fresh scan (`cache_age_ms = 0`)\n - on cache hit within TTL: return cached entries + non-zero `cache_age_ms`\n - on expired hit: evict key, rescan, store fresh entry\n- max entry enforcement is oldest-first eviction by `created_at`\n\n## Empty-result fast recheck (separate from normal hits)\n\nNormal cache hit:\n\n- a cache hit inside TTL returns cached entries and does nothing else.\n\nEmpty-result fast recheck:\n\n- this is a **caller-side** policy using `ScanResult.cache_age_ms`\n- if filtered/query result is empty and cached scan age is at least `empty_recheck_ms()`, caller performs one `force_rescan(...)` and retries\n- intended to reduce stale-negative results when files were recently added but cache is still within TTL\n\nCurrent consumers:\n\n- `glob`: rechecks when filtered matches are empty and scan age exceeds threshold\n- `fuzzyFind` (`fd.rs`): rechecks only when query is non-empty and scored matches are empty\n- `grep`: rechecks when selected candidate file list is empty\n\n## Consumer defaults and cache usage\n\nCache is opt-in on all exposed APIs (`cache?: boolean`, default `false`).\n\nCurrent defaults in native APIs:\n\n- `glob`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` included only when the pattern mentions `node_modules`\n- `fuzzyFind`: `hidden=false`, `gitignore=true`, `cache=false`, and `node_modules` is skipped\n- `grep`: `hidden=true`, `gitignore=true`, `cache=false`, and `node_modules` included only when the glob mentions `node_modules`\n\nCoding-agent callers today:\n\n- High-volume mention candidate discovery enables cache:\n - `packages/coding-agent/src/utils/file-mentions.ts`\n - profile: `hidden=true`, `gitignore=true`, `includeNodeModules=true`, `cache=true`\n- Tool-level `grep` integration currently disables scan cache (`cache: false`):\n - `packages/coding-agent/src/tools/grep.ts`\n\n## Invalidation contract\n\nNative invalidation entrypoint:\n\n- `invalidateFsScanCache(path?: string)`\n - with `path`: remove cache entries whose root is a prefix of target path\n - without path: clear all scan cache entries\n\nPath handling details:\n\n- relative invalidation paths are resolved against cwd\n- invalidation attempts canonicalization\n- if target does not exist (e.g., delete), fallback canonicalizes parent and reattaches filename when possible\n- this preserves invalidation behavior for create/delete/rename where one side may not exist\n\n## Coding-agent mutation flow responsibilities\n\nCoding-agent code must invalidate after successful filesystem mutations.\n\nCentral helpers:\n\n- `invalidateFsScanAfterWrite(path)`\n- `invalidateFsScanAfterDelete(path)`\n- `invalidateFsScanAfterRename(oldPath, newPath)` (invalidates both sides when paths differ)\n\nCurrent mutation tool callsites:\n\n- `packages/coding-agent/src/tools/write.ts`\n- `packages/coding-agent/src/patch/index.ts` (hashline/patch/replace flows)\n\nRule: if a flow mutates filesystem content or location and bypasses these helpers, cache staleness bugs are expected.\n\n## Adding a new cache consumer safely\n\nWhen introducing cache use in a new scanner/search path:\n\n1. **Use stable scan policy inputs**\n - decide hidden/gitignore/node_modules semantics first\n - pass them consistently to `get_or_scan`/`force_rescan` so cache partitions are intentional\n\n2. **Treat cache data as pre-filtered only by traversal policy**\n - apply tool-specific filtering (glob patterns, type filters, scoring) after retrieval\n - never assume cached entries already reflect your higher-level filters\n\n3. **Implement empty-result fast recheck only for stale-negative risk**\n - use `scan.cache_age_ms >= empty_recheck_ms()`\n - retry once with `force_rescan(..., store=true, ...)`\n - keep this path separate from normal cache-hit logic\n\n4. **Respect no-cache mode explicitly**\n - when caller disables cache, call `force_rescan(..., store=false, ...)`\n - do not populate shared cache in a no-cache request path\n\n5. **Wire mutation invalidation for any new write path**\n - after successful write/edit/delete/rename, call the coding-agent invalidation helper\n - for rename/move, invalidate both old and new paths\n\n6. **Do not add per-call TTL knobs**\n - current contract is global policy only (env-configured), no per-request TTL override\n\n## Known boundaries\n\n- Cache scope is process-local in-memory (`DashMap`), not persisted across process restarts.\n- Cache stores scan entries, not final tool results.\n- `glob`/`fuzzyFind`/`grep` share scan entries only when key dimensions (`root`, `hidden`, `gitignore`, `skip_node_modules`) match.\n- `.git` is always excluded at scan collection time regardless of caller options.\n",
17
17
  "gjc-dogfood-skill-template.md": "# GJC dogfood local skill template\n\nIssue #93 requested a gaebal-gajae/operator dogfood skill. The live issue has no comment approving a fifth bundled default workflow skill, so this stays a local template instead of changing the default workflow surface. Operators can copy it into a user or project override when they want GJC-first session guidance:\n\n```sh\nmkdir -p ~/.gjc/skills/gjc-dogfood\ncp docs/gjc-dogfood-skill-template.md ~/.gjc/skills/gjc-dogfood/SKILL.md\n```\n\nFor a single project, copy it to `<project>/.gjc/skills/gjc-dogfood/SKILL.md` instead. Do not commit that project `.gjc` copy unless the project explicitly wants a local override.\n\n---\nname: gjc-dogfood\ndescription: Use when running or reviewing work through GJC sessions, dogfooding Gajae-Code, or migrating an operator workflow from OMX to GJC.\n---\n\n# GJC Dogfood Operator Workflow\n\nUse GJC first for coding, review, planning, and follow-up sessions. Treat OMX as a fallback only when GJC is unavailable, broken, or missing a required capability.\n\n## Locate and launch GJC\n\n- Installed CLI: run `command -v gjc` and then launch with `gjc --tmux`.\n- Repository checkout: from the gajae-code repo, prefer `bun packages/coding-agent/src/cli.ts --tmux` when testing source changes before install.\n- Worktree isolation: for branch-specific work, launch from or point at the branch worktree with `gjc --tmux --worktree <path>`.\n- Name sessions explicitly with the project and issue, for example `gajae-code-93-dogfood-skill`, so tmux panes, logs, and exports remain traceable.\n\n## Start the session\n\n- Put git operations inside the GJC session: fetch, branch/worktree setup, focused commits, pushes, and PR creation should be visible in-session.\n- Submit the initial prompt with the issue URL, target branch, acceptance criteria, verification limits, and any existing plan/spec link.\n- Verify the prompt was accepted: the TUI should show the user prompt, an active assistant turn, or a tool/action request. If the session silently idles, resend once with a shorter prompt and capture the failure.\n- Verify working state before leaving the session unattended: confirm the target cwd/worktree, branch, and issue scope are visible in the transcript or command output.\n\n## During work\n\n- Keep session names and branch names issue-scoped.\n- Prefer GJC workflow skills only when they fit: `deep-interview` for unclear requirements, `ralplan` for planning, `ultragoal` for durable ledgers, and `team` for coordinated tmux execution.\n- Keep evidence in the session: issue reads, focused tests/checks, screenshots only when visual behavior matters, and PR URLs.\n- When GJC is weaker than OMX, finish the urgent work with the smallest safe fallback and file a gajae-code follow-up issue with the missing capability, exact command/session context, expected behavior, and evidence.\n\n## Fallback policy\n\nUse OMX or another operator path only when:\n\n- `gjc` cannot be located or launched after checking installed and repo-local commands;\n- authentication, model routing, tmux, or prompt submission is broken;\n- GJC lacks a required capability that OMX already has;\n- an urgent production/review deadline would be missed by debugging GJC first.\n\nRecord the fallback reason and create or link the gajae-code issue that would make GJC sufficient next time.\n\n## Evidence checklist\n\nReport:\n\n- project, issue, branch/worktree, and session name;\n- whether GJC was installed or repo-local;\n- prompt acceptance and working-state evidence;\n- git operations performed in-session;\n- focused verification commands and results;\n- PR/issue URLs;\n- follow-up gajae-code issues for any GJC gap or fallback.\n",
18
18
  "handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `completeSimple(...)` directly:\n\n```ts\nawait completeSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: Effort.High,\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset hindsight state for the new session.\n7. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\"handoff\", handoffContent, true, undefined, \"agent\");\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, restores the previous Escape handler, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe command controller installs a temporary Escape handler for `/handoff` while the loader is visible. Pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `completeSimple(...)` request with live system prompt, tools, message history, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
@@ -46,7 +46,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
46
46
  "session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.gjc/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextPrefix(..., 4096)`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads full session files.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps).\n - Drops sessions with zero `message` entries.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or latest compaction `shortSummary`\n- `firstMessage` is first user message text or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.gjc/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`KITTY_WINDOW_ID`, `TMUX_PANE`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. ID prefix value\n - find match in `SessionManager.list(cwd, sessionDir)` by `id.startsWith(sessionArg)`\n - if no local match and `sessionDir` is not forced, try `SessionManager.listAll()`\n - first match is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders a message instead of crashing\n- Enter on empty does nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: UI text says `Press Tab to view all`, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- ID matching uses `startsWith` and takes first match in sorted list.\n- No ambiguity UI if multiple sessions share prefix.\n- `SessionManager.list(...)` excludes sessions with zero messages, so those sessions are not resumable via ID match/list picker.\n",
47
47
  "session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, plain `Approve`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied with `sessionManager.setSessionName(name, \"auto\")`. Because `setSessionName` is a no-op when `titleSource === \"user\"`, the seeded name never overrides a name the user already chose (e.g. on the `preserveContext` path where the session continues with prior naming).\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
48
48
  "session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.gjc/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.gjc/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.gjc/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"anthropic-model-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextPrefix`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.gjc/agent/sessions`\n\nMetadata extraction reads only a prefix (`readTextPrefix(..., 4096)`) where possible.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.gjc/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
49
- "theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@gajae-code/natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`dark.json`, `light.json`, and all `defaults/*.json` compiled into `defaultThemes`, including the red-claw crustacean theme)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.gjc/agent/themes`\n- overridden by `GJC_CODING_AGENT_DIR` (`$GJC_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nBuilt-in theme note: `red-claw` is the default dark GJC theme, with red/orange brand tokens, separate semantic error/warning/diff-removal tokens, and crab-oriented symbol overrides.\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"red-claw\"`\n- `theme.light = \"light\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nThe settings theme picker is confirm-only; arrow-key browsing does not call `previewTheme`, so the rendered theme and displayed/persisted theme name stay aligned until Enter confirms a new selection.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.gjc/agent`\n- effective default file: `~/.gjc/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.gjc/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want. For the bundled crustacean look, choose `red-claw`.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and confirm the custom theme in the dark/light theme picker; arrow-key browsing is intentionally non-mutating.\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
49
+ "theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@gajae-code/natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`red-claw.json` and `blue-crab.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.gjc/agent/themes`\n- overridden by `GJC_CODING_AGENT_DIR` (`$GJC_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nBuilt-in theme note: `red-claw` is the default dark GJC theme, and `blue-crab` is the default light-slot theme. Both are crustacean brand themes with separate semantic error/warning/diff-removal tokens and crab-oriented symbol overrides.\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"red-claw\"`\n- `theme.light = \"blue-crab\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nThe settings theme picker is confirm-only; arrow-key browsing does not call `previewTheme`, so the rendered theme and displayed/persisted theme name stay aligned until Enter confirms a new selection.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.gjc/agent`\n- effective default file: `~/.gjc/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection; legacy built-in names `dark`/`light` map to `red-claw`/`blue-crab` unless matching custom theme files exist.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.gjc/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want. For bundled crustacean themes, choose `red-claw` or `blue-crab`.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and confirm the custom theme in the dark/light theme picker; arrow-key browsing is intentionally non-mutating.\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
50
50
  "tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more choices or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Explicit options. The UI always appends `Other (type your own)`; callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds, and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `30` seconds; `0` disables timeout (`packages/coding-agent/src/config/settings-schema.ts`).\n- Prompt guidance says provide 2-5 options, but code does not enforce that (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
51
51
  "tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/coding-agent/src/hashline/hash.ts` — stable hashline diff anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines: `-REF|before` and `+REF|after` in hashline mode, or `-LINE:COLUMN before` / `+LINE:COLUMN after` when hashlines are off.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `GJC_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and rejects the apply as an error if the live result no longer matches the preview (`stalePreview`).\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `GJC_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than applying a mismatched preview silently.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated only by totals and per-file counts, not by a byte-for-byte diff of every replacement payload.",
52
52
  "tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-natives/src/language/mod.rs` — language aliases and extension inference\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — anchor-prefixed match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-natives/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered as `*LINE+HASH|text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
@@ -54,6 +54,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
54
54
  "tools/browser.md": "# browser\n\n> Open, reuse, close, and script Puppeteer tabs against headless Chromium or CDP-attached apps.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/browser.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/browser.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/browser/tab-supervisor.ts` — global tab registry; worker lifecycle; run/close coordination.\n - `packages/coding-agent/src/tools/browser/tab-worker.ts` — executes `run` code; implements the `tab` helper API.\n - `packages/coding-agent/src/tools/browser/tab-worker-entry.ts` — worker-thread transport bootstrap.\n - `packages/coding-agent/src/tools/browser/registry.ts` — browser-handle registry keyed by browser kind.\n - `packages/coding-agent/src/tools/browser/launch.ts` — Puppeteer loading, Chromium resolution/download, headless launch, stealth injection.\n - `packages/coding-agent/src/tools/browser/attach.ts` — CDP attach/reuse, target picking, spawned-app process handling.\n - `packages/coding-agent/src/tools/browser/tab-protocol.ts` — worker init/run/result message schema.\n - `packages/coding-agent/src/tools/browser/readable.ts` — `tab.extract()` readability extraction.\n - `packages/coding-agent/src/tools/browser/render.ts` — TUI rendering for `open`/`close` status lines and `run` JS cells.\n - `packages/coding-agent/src/tools/puppeteer/00_stealth_tampering.txt` — mask patched functions/descriptors as native.\n - `packages/coding-agent/src/tools/puppeteer/01_stealth_activity.txt` — synthesize visibility/focus/scroll activity.\n - `packages/coding-agent/src/tools/puppeteer/02_stealth_hairline.txt` — fix Modernizr hairline detection.\n - `packages/coding-agent/src/tools/puppeteer/03_stealth_botd.txt` — spoof `navigator.webdriver`, `window.chrome`, and Chrome fingerprint surfaces.\n - `packages/coding-agent/src/tools/puppeteer/04_stealth_iframe.txt` — patch iframe `contentWindow`/`srcdoc` behavior.\n - `packages/coding-agent/src/tools/puppeteer/05_stealth_webgl.txt` — spoof WebGL vendor/renderer/precision.\n - `packages/coding-agent/src/tools/puppeteer/06_stealth_screen.txt` — normalize screen/viewport/device-pixel-ratio values.\n - `packages/coding-agent/src/tools/puppeteer/07_stealth_fonts.txt` — spoof local fonts and perturb canvas text rendering.\n - `packages/coding-agent/src/tools/puppeteer/08_stealth_audio.txt` — spoof audio latency/sample-rate and perturb offline rendering.\n - `packages/coding-agent/src/tools/puppeteer/09_stealth_locale.txt` — force locale/languages/timezone/date strings.\n - `packages/coding-agent/src/tools/puppeteer/10_stealth_plugins.txt` — synthesize `navigator.plugins`/`navigator.mimeTypes`.\n - `packages/coding-agent/src/tools/puppeteer/11_stealth_hardware.txt` — spoof `navigator.hardwareConcurrency`.\n - `packages/coding-agent/src/tools/puppeteer/12_stealth_codecs.txt` — spoof media codec support.\n - `packages/coding-agent/src/tools/puppeteer/13_stealth_worker.txt` — carry UA/platform spoofing into `Worker`/`SharedWorker`.\n\n## Inputs\n\n### Shared fields\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"open\" \\| \"close\" \\| \"run\"` | Yes | Dispatches to the open/close/run path. |\n| `name` | `string` | No | Tab id. Defaults to `\"main\"`. Tabs live in a process-global map, so the same name is reused across later calls and in-process subagents until closed. |\n| `timeout` | `number` | No | Tool wall-clock timeout in seconds. Defaults to `30`; clamped to the browser tool range before execution. |\n\n### `action: \"open\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `url` | `string` | No | Navigate after the tab is ready. Existing reusable tabs also navigate when `url` is supplied. |\n| `viewport` | `{ width: number; height: number; scale?: number }` | No | Requested viewport. For headless launch this becomes the initial viewport; for a page it is applied with `page.setViewport()`. `scale` maps to Puppeteer `deviceScaleFactor`. |\n| `wait_until` | `\"load\" \\| \"domcontentloaded\" \\| \"networkidle0\" \\| \"networkidle2\"` | No | Navigation wait condition. Defaults to `\"networkidle2\"` where omitted. |\n| `dialogs` | `\"accept\" \\| \"dismiss\"` | No | Installs a page `dialog` handler that auto-accepts or auto-dismisses dialogs. Omitted means no handler. |\n| `app` | `{ path?: string; cdp_url?: string; args?: string[]; target?: string }` | No | Selects browser kind. No `app` uses the session `browser.headless` setting. `app.path` is resolved against the session cwd and used as the executable path for spawn/attach reuse. `app.cdp_url` connects to an existing CDP endpoint. `args` are appended only when spawning `app.path`. `target` is only used for attached/spawned-app page selection. |\n\n### `action: \"close\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `all` | `boolean` | No | Close every known tab. Omitted closes only `name`. |\n| `kill` | `boolean` | No | When a tab release drops a spawned-app browser handle to refcount 0, also terminate its process tree. Has no effect on headless shutdown and only disconnects connected CDP browsers. |\n\n### `action: \"run\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `code` | `string` | Yes | Async-function body executed in a VM context with `page`, `browser`, `tab`, `display`, `assert`, `wait`, `console`, timers, `URL`, `TextEncoder`, `TextDecoder`, and `Buffer` in scope. |\n\n## Outputs\nThe tool returns one result per call; no streaming partial output is emitted from the browser implementation itself.\n\n- `open`: text content with `Opened` or `Reused`, browser description, URL, and optional title. `details` includes `action`, `name`, `browser`, `url`, `viewport`, and the same text in `details.result`.\n- `close`: text content with either `Closed ...` or `No tab named ...`. `details` includes `action`, `name`, and `details.result`.\n- `run`: ordered `content` array built as:\n 1. every `display(value)` call in execution order,\n 2. final return value, JSON-stringified unless already a string,\n 3. or `Ran code on tab \"...\"` if nothing else was produced.\n- `display(value)` coercion in `packages/coding-agent/src/tools/browser/tab-worker.ts`:\n - `{ type: \"image\", data: string, mimeType: string }` becomes image content,\n - `string` becomes text content,\n - other values become pretty JSON text when serializable, else `String(value)`.\n- `tab.screenshot()` also appends text plus an image content item unless `silent: true`; `details.screenshots` records persisted screenshot metadata `{ dest, mimeType, bytes, width, height }`.\n- `run` `details` includes `action`, `name`, current `browser`/`url` when the tab exists, optional `screenshots`, and `details.result` containing only the concatenated text outputs.\n\n## Flow\n1. `BrowserTool.execute()` (`packages/coding-agent/src/tools/browser.ts`) abort-checks, clamps `timeout` via `clampTimeout(\"browser\", ...)`, defaults `name` to `\"main\"`, and dispatches on `action`.\n2. `open` resolves browser kind with `resolveBrowserKind()`:\n - `app.cdp_url` → `{ kind: \"connected\" }` after trimming trailing slashes.\n - `app.path` → `{ kind: \"spawned\" }` after resolving against session cwd.\n - otherwise → `{ kind: \"headless\", headless: session.settings.get(\"browser.headless\") }`.\n3. `open` rejects reusing the same tab name across different browser kinds (`sameBrowserKind()`); callers must close first.\n4. `open` acquires a browser handle through `acquireBrowser()` (`packages/coding-agent/src/tools/browser/registry.ts`):\n - existing connected handle is reused by browser-kind key;\n - stale disconnected handles are disposed and recreated;\n - headless launches via `launchHeadlessBrowser()`;\n - `connected` waits for `${cdpUrl}/json/version`, then `puppeteer.connect()`;\n - `spawned` first tries `findReusableCdp()`, else kills same-path processes, allocates a free loopback port, spawns the executable with `--remote-debugging-port=<port>`, waits for CDP, then connects.\n5. `open` acquires a tab through `acquireTab()` (`packages/coding-agent/src/tools/browser/tab-supervisor.ts`):\n - same-name + same-browser + alive tab is reused unless `dialogs` changed;\n - same-name but different browser handle, dead state, or changed dialog policy forces release and recreation;\n - reusing with a new `url` navigates by issuing `await tab.goto(...)` through the worker.\n6. New tabs build a `WorkerInitPayload` in `buildInitPayload()`:\n - headless mode sends `url`, `waitUntil`, `viewport`, `dialogs`, and timeout;\n - attach mode resolves a page with `pickElectronTarget()`, gets its target id, and sends `targetId` plus `dialogs`.\n7. `acquireTab()` spawns a dedicated Bun `Worker` from `tab-worker-entry.ts`; if that fails it falls back to inline execution in the main thread (`spawnInlineWorker()`), preserving behavior but losing protection against synchronous infinite loops.\n8. `WorkerCore.#init()` (`packages/coding-agent/src/tools/browser/tab-worker.ts`) connects back to the browser websocket endpoint. Headless mode opens a new page, applies stealth patches, applies viewport, installs dialog handling if requested, and optionally navigates. Attach mode resolves the requested target page and optionally installs dialog handling.\n9. On success the worker sends `ready` with `{ url, title, viewport, targetId }`; the supervisor stores a `TabSession`, increments browser-handle refcount with `holdBrowser()`, and keeps the tab in a process-global `Map<string, TabSession>`.\n10. `run` requires non-empty `code`, looks up the tab with `getTab()`, then delegates to `runInTab()`.\n11. `runInTabWithSnapshot()` rejects dead tabs and concurrent runs (`Tab ... is busy`), captures session cwd plus optional `browser.screenshotDir`, registers an abort hook, sends a `run` message to the worker, and races the result against `timeoutMs + 750` ms. Timeouts force-kill the tab worker and, for headless tabs, close the orphaned page target.\n12. `WorkerCore.#run()` creates a VM context, exposes the raw Puppeteer `page`/`browser` plus a synthetic `tab` API, and executes `(async () => { ...code... })()` via `vm.runInContext()`.\n13. The `tab` helper API implemented in `#createTabApi()` is:\n - `tab.name: string`\n - `tab.page: Page`\n - `tab.signal?: AbortSignal`\n - `tab.url(): string`\n - `tab.title(): Promise<string>`\n - `tab.goto(url, { waitUntil? })`\n - `tab.observe({ includeAll?, viewportOnly? })`\n - `tab.screenshot({ selector?, fullPage?, save?, silent? })`\n - `tab.extract(format = \"markdown\")`\n - `tab.click(selector)`\n - `tab.type(selector, text)`\n - `tab.fill(selector, value)`\n - `tab.press(key, { selector? })`\n - `tab.scroll(deltaX, deltaY)`\n - `tab.drag(from, to)`\n - `tab.waitFor(selector)`\n - `tab.evaluate(fn, ...args)`\n - `tab.scrollIntoView(selector)`\n - `tab.select(selector, ...values)`\n - `tab.uploadFile(selector, ...filePaths)`\n - `tab.waitForUrl(pattern, { timeout? })`\n - `tab.waitForResponse(pattern, { timeout? })`\n - `tab.id(n)`\n14. Selector handling in `normalizeSelector()` accepts plain CSS and Puppeteer query handlers, and rewrites legacy Playwright-style prefixes `p-text/`, `p-xpath/`, `p-pierce/`, `p-aria/`; other `p-*` prefixes throw a `ToolError`.\n15. `tab.observe()` clears the element cache, takes a Puppeteer accessibility snapshot, filters to interactive nodes unless `includeAll`, optionally filters to viewport-visible nodes, assigns numeric ids, caches `ElementHandle`s, and returns URL/title/viewport/scroll metadata plus `elements`.\n16. `tab.id(n)` resolves the cached `ElementHandle`, verifies `el.isConnected`, and throws a stale-id error after cache invalidation if the DOM changed or the cache was cleared.\n17. `tab.goto()` clears the cached element ids before navigating. Any new `tab.observe()` also clears and rebuilds the cache.\n18. `tab.click()` uses a custom retry loop for `text/...` selectors to find an actionable visible match; other selectors use `page.locator(...).click()` with the run timeout.\n19. `tab.screenshot()` captures either the whole page or a selector PNG, downsizes a copy for model output, chooses a persistence path, writes the image to disk, records metadata, and optionally emits text + image display entries.\n20. `display()` calls accumulate in an array. After code finishes, the worker posts `{ displays, returnValue, screenshots }`; `BrowserTool.#run()` appends the return value as trailing text content when not `undefined`.\n21. `close` releases one tab or all tabs via `releaseTab()` / `releaseAllTabs()`. Each tab aborts pending runs, asks the worker to close, waits up to `750` ms for a `closed` ack, terminates the worker, decrements browser refcount, and disposes the browser handle when refcount reaches zero.\n\n## Modes / Variants\n- **Action dispatch**\n - `open` — acquire/reuse browser + tab.\n - `close` — release one tab or all tabs.\n - `run` — execute JS inside the tab worker.\n- **Browser kind**\n - **Headless**: launches local Chromium with Puppeteer, applies stealth patches, and creates a fresh page per tab.\n - **Spawned app (`app.path`)**: reuses an existing CDP-enabled process for that executable when possible; otherwise kills same-path processes, spawns the executable with remote debugging enabled, then attaches. No stealth patches are injected.\n - **Connected browser (`app.cdp_url`)**: attaches to an already-running CDP endpoint. No process ownership; close only disconnects.\n- **Target selection for attached/spawned browsers**\n - With `app.target`, `pickElectronTarget()` returns the first page whose URL or title contains the case-insensitive substring.\n - Without `app.target`, it skips titles/URLs matching `request handler|devtools|background page|background host|service worker` and otherwise falls back to the first page.\n- **Worker mode**\n - **Dedicated worker**: normal path; user code runs off the main thread and can be aborted even when it blocks synchronously.\n - **Inline fallback**: activated when Bun worker spawn fails; behavior matches, but synchronous infinite loops on user code cannot be interrupted.\n- **Dialog policy**\n - No `dialogs` field: no auto-handler.\n - `accept`/`dismiss`: page `dialog` events are handled automatically.\n - Changing dialog policy on an existing live tab forces tab recreation instead of mutating the worker in place.\n- **Screenshot persistence**\n - `save` provided: persist full-resolution PNG at the resolved cwd-relative or absolute path.\n - `browser.screenshotDir` session setting set: persist full-resolution PNG under that directory with a timestamped filename.\n - Neither set: persist the resized image to a temp-file path under the OS temp dir.\n\n## Side Effects\n- Filesystem\n - `loadPuppeteer()` writes `{}` to `<puppeteer-safe-dir>/package.json` before importing `puppeteer-core`.\n - First headless launch may download Chromium into the Puppeteer cache directory returned by `getPuppeteerDir()`.\n - `tab.screenshot()` creates parent directories and writes image files.\n - `tab.uploadFile()` resolves supplied paths against the session cwd.\n- Network\n - CDP attach paths poll `http://127.0.0.1:<port>/json/version` or the supplied `cdp_url` `/json/version`.\n - Headless/browser-attach sessions create CDP websocket connections.\n - Headless first-use Chromium download uses `@puppeteer/browsers`.\n - User `page` / `tab` operations perform normal browser network traffic.\n- Subprocesses / native bindings\n - Headless mode launches Chromium through Puppeteer.\n - `app.path` mode may spawn the target executable via `Bun.spawn()`.\n - `killExistingByPath()` / `gracefulKillTreeOnce()` use `@gajae-code/natives` process inspection/termination.\n - Worker mode uses Bun `Worker`; fallback mode does not.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Browser handles are cached in a process-global `Map` keyed by browser kind in `packages/coding-agent/src/tools/browser/registry.ts`.\n - Tabs are cached in a process-global `Map` keyed by `name` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`.\n - `run` captures session cwd and optional `browser.screenshotDir` for screenshot/save path resolution.\n - `restartForModeChange()` drops only headless tabs.\n- User-visible prompts / interactive UI\n - None beyond normal tool output. Dialog auto-handling is invisible unless it fails and emits debug logs.\n- Background work / cancellation\n - `open`, `run`, CDP waits, and browser actions thread through abort signals.\n - A timed-out `run` aborts the worker execution path and can tear down the tab.\n\n## Limits & Caps\n- Tool timeout clamp: default `30` s, min `1` s, max `30` s (`TOOL_TIMEOUTS.browser` in `packages/coding-agent/src/tools/tool-timeouts.ts`).\n- Supervisor grace period around init/run/close: `750` ms (`GRACE_MS` in `packages/coding-agent/src/tools/browser/tab-supervisor.ts`).\n- Puppeteer protocol timeout for launch/connect operations: `60_000` ms (`BROWSER_PROTOCOL_TIMEOUT_MS` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Connected-browser CDP readiness wait: `5_000` ms before `puppeteer.connect()` (`packages/coding-agent/src/tools/browser/registry.ts`).\n- Spawned-app CDP readiness wait after spawn: `30_000` ms (`packages/coding-agent/src/tools/browser/registry.ts`).\n- CDP polling cadence: 150 ms in `waitForCdp()` (`packages/coding-agent/src/tools/browser/attach.ts`).\n- Headless default viewport: `1365x768` at `deviceScaleFactor: 1.25` (`DEFAULT_VIEWPORT` in `packages/coding-agent/src/tools/browser/launch.ts`).\n- Screenshot model-attachment resize cap: `maxWidth 1024`, `maxHeight 1024`, `maxBytes 150 * 1024`, `jpegQuality 70` (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- `tab.waitForUrl()` polling interval: `200` ms (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n- Drag simulation uses `12` mouse-move steps (`packages/coding-agent/src/tools/browser/tab-worker.ts`).\n\n## Errors\n- `BrowserTool.execute()` converts DOM-style `AbortError` into `ToolAbortError`; other errors propagate.\n- `run` hard-fails on missing code: `Missing required parameter 'code' for action 'run'.`\n- `open` fails when reusing a name across browser kinds: `Tab \"...\" is bound to a different browser (...). Close it first.`\n- `runInTabWithSnapshot()` fails when the tab is absent/dead (`Tab \"...\" is not alive. Reopen it.`) or already running (`Tab \"...\" is busy`).\n- Worker init failures and run failures are serialized through `RunErrorPayload`; `ToolError` and abort state are reconstructed on the host side by `errorFromPayload()`.\n- Attached-target mismatches surface as:\n - `No page targets available on the attached browser`\n - `No page target matched \"...\". Available pages:\\n...`\n - `Target ... is no longer available on the attached browser`\n- Spawned-app path validation requires an absolute executable path, not an app bundle path.\n- Spawn/attach failures are wrapped into `ToolError`s such as `Timed out waiting for CDP endpoint ...`, `Failed to attach to ...`, or `Connected to ... but puppeteer.connect failed: ...`.\n- `tab` helper errors are user-visible `ToolError`s, including unsupported selector prefix, stale/unknown element id, invalid drag target, missing upload files, non-`<select>` for `tab.select()`, non-file-input for `tab.uploadFile()`, and screenshot selector misses.\n- On run timeout, the worker reports `Browser code execution timed out after <ms>ms`; the supervisor may escalate to `Browser code execution hung past grace; tab killed` if the worker does not respond after the grace window.\n\n## Notes\n- `loadPuppeteer()` and `loadPuppeteerInWorker()` temporarily redirect `cwd` to a safe Puppeteer directory before importing `puppeteer-core`, because Puppeteer probes the current working directory during module load.\n- Headless launch prefers a detected system Chrome/Chromium, then `PUPPETEER_EXECUTABLE_PATH`, and only then downloads Chromium.\n- Headless launch always passes `--no-sandbox`, `--disable-setuid-sandbox`, `--disable-blink-features=AutomationControlled`, and a `--window-size=...` matching the initial viewport. It also ignores Puppeteer default args `--disable-extensions`, `--disable-default-apps`, and `--disable-component-extensions-with-background-pages`.\n- Proxy-related env vars only affect headless launch: `PUPPETEER_PROXY`, `PUPPETEER_PROXY_BYPASS_LOOPBACK`, and `PUPPETEER_PROXY_IGNORE_CERT_ERRORS`.\n- Stealth patches are applied only in headless mode. Spawned or externally connected browsers are intentionally left untouched.\n- `applyStealthPatches()` also strips Puppeteer's `//# sourceURL=__puppeteer_evaluation_script__` suffix from CDP `Runtime.evaluate` / `Runtime.callFunctionOn` payloads.\n- `tab.extract()` reads `page.content()`, runs Readability first, then falls back to `main article`/`article`/`main`/`[role='main']`/`body`, and returns `null` if neither extraction path yields content.\n- `close(all: true, kill: false)` disconnects from spawned/connected browsers when the last tab closes but leaves spawned app processes running.\n- Headless orphan cleanup is best-effort: if a worker dies before closing its page, the supervisor searches browser targets by `targetId` and closes that page.\n- Console methods inside `run` do not appear in tool output; they are forwarded as debug/warn/error logs through the worker transport.",
55
55
  "tools/calc.md": "# calc\n\n> Evaluates one or more arithmetic expressions and returns formatted numeric results.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/calculator.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/calculator.md`\n- Key collaborators:\n - `packages/coding-agent/src/tui.ts` — status lines and tree-list rendering\n - `packages/coding-agent/src/tools/render-utils.ts` — preview limits and formatting helpers\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `calculations` | `Calculation[]` | Yes | Batch of expressions to evaluate in order. |\n\n### `Calculation`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `expression` | `string` | Yes | Arithmetic expression string. |\n| `prefix` | `string` | Yes | Prepended verbatim to the rendered numeric result. |\n| `suffix` | `string` | Yes | Appended verbatim to the rendered numeric result. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is the newline-joined `prefix + value + suffix` string for each calculation.\n- `details.results` is an array of `{ expression, value, output }`.\n- On renderer fallback, if `details` is missing but `content[0].text` exists, the TUI tries to pair each output line with the original expressions from call args.\n\n## Flow\n1. `execute()` wraps evaluation in `untilAborted(...)`.\n2. For each entry, `evaluateExpression(...)` tokenizes the expression, parses it with a recursive-descent parser, rejects non-finite outputs, and normalizes `-0` to `0`.\n3. `tokenizeExpression(...)` accepts whitespace, parentheses, operators, and number literals; any other character throws immediately.\n4. `ExpressionParser` applies precedence in this order: `+ -`, `* / %`, unary `+ -`, exponentiation `**`, parentheses/literals.\n5. Exponentiation is right-associative (`2 ** 3 ** 2` parses as `2 ** (3 ** 2)`).\n6. Each numeric result is formatted with `String(value)` and wrapped with the provided `prefix` and `suffix`.\n7. The tool returns text output plus structured `details`.\n\n## Side Effects\n- Background work / cancellation\n - Supports abort via `untilAborted(...)`.\n- Session state\n - None.\n- Filesystem / Network / Subprocesses\n - None.\n\n## Limits & Caps\n- Supported operators: `+`, `-`, `*`, `/`, `%`, `**` (`packages/coding-agent/src/tools/calculator.ts`).\n- Supported numeric literals:\n - decimal integers/floats, including leading-dot forms like `.5`\n - scientific notation like `1e10`, `2.5E-3`\n - hexadecimal `0x...`\n - binary `0b...`\n - octal `0o...`\n- Results must be finite; `Infinity` and `NaN` are rejected.\n- The renderer collapses long result lists using `PREVIEW_LIMITS.COLLAPSED_ITEMS` from `packages/coding-agent/src/tools/render-utils.ts`.\n\n## Errors\n- Invalid characters: e.g. `Invalid character \"x\" in expression`.\n- Malformed numbers: invalid prefixed literal, invalid exponent, invalid number.\n- Syntax errors: `Unexpected token in expression`, `Unexpected end of expression`, `Missing closing parenthesis`, `Expression is empty`.\n- Non-finite arithmetic: `Expression result is not a finite number`.\n- Any evaluation error aborts the whole batch; the tool does not return partial successes.\n\n## Notes\n- Despite the schema example showing `sqrt(16)`, the parser does not support functions, identifiers, units, or constants; only numeric literals, operators, and parentheses are accepted.\n- Precision is plain JavaScript `number` semantics throughout, including floating-point rounding behavior.\n- `/` and `%` use JavaScript numeric operators directly; there is no integer-only mode or unit handling.\n- Unary operators bind tighter than `*`/`/`/`%` but looser than exponentiation because unary parsing delegates to `#parsePower()`.\n",
56
56
  "tools/checkpoint.md": "# checkpoint\n\n> Mark the current top-level conversation state so later `rewind` can collapse exploratory context into a report.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/checkpoint.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/checkpoint.md`\n- Key collaborators:\n - `packages/coding-agent/src/session/agent-session.ts` — captures the active checkpoint after tool success.\n - `packages/coding-agent/src/session/session-manager.ts` — persists the normal session entry stream; not the active checkpoint marker.\n - `packages/coding-agent/src/tools/index.ts` — registers the tool and gates it behind `checkpoint.enabled`.\n - `packages/coding-agent/src/config/settings-schema.ts` — defines the disabled-by-default feature flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `goal` | `string` | Yes | Investigation goal. Required by the schema and echoed in the tool result. |\n\n## Outputs\nThe tool returns a single text result plus structured details:\n\n- text body:\n - `Checkpoint created.`\n - `Goal: <goal>`\n - `Run your investigation, then call rewind with a concise report.`\n- `details`:\n - `goal: string`\n - `startedAt: string` — ISO timestamp created inside `CheckpointTool.execute()`\n\nNo checkpoint ID, artifact URI, job handle, file path, or restore token is returned.\n\n## Flow\n1. `CheckpointTool.createIf()` in `packages/coding-agent/src/tools/checkpoint.ts` returns `null` for subagents by checking `session.taskDepth`; only top-level sessions can see the tool.\n2. `CheckpointTool.execute()` rejects subagent calls again with `ToolError(\"Checkpoint not available in subagents.\")`.\n3. It rejects nested checkpoints with `ToolError(\"Checkpoint already active.\")` when `session.getCheckpointState?.()` is already set.\n4. It creates `startedAt = new Date().toISOString()` and returns a normal `toolResult()` payload. The tool itself does not persist anything.\n5. On the later `tool_execution_end` event, `AgentSession` in `packages/coding-agent/src/session/agent-session.ts` detects successful `checkpoint` execution and captures three in-memory fields:\n - `checkpointMessageCount` — current `agent.state.messages.length`, after the checkpoint tool result has already been appended\n - `checkpointEntryId` — `sessionManager.getEntries().at(-1)?.id ?? null`, i.e. the last persisted session entry ID at checkpoint time\n - `startedAt` — copied from tool details or regenerated\n6. `AgentSession` stores that object in its private `#checkpointState` field and clears `#pendingRewindReport`.\n\n## Side Effects\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Sets `AgentSession.#checkpointState` in memory.\n - Records the checkpoint boundary as a message count plus a session entry ID.\n - Enables the later yield guard: if a checkpoint is active and no rewind report is pending, `#enforceRewindBeforeYield()` injects a developer-role warning and schedules another turn.\n- User-visible prompts / interactive UI\n - The tool result tells the model to call `rewind` after the investigation.\n - If the agent tries to `yield` first, `AgentSession` injects:\n\n```text\n<system-warning>\nYou are in an active checkpoint. You MUST call rewind with your investigation findings before yielding. Do NOT yield without completing the checkpoint.\n</system-warning>\n```\n\n## Limits & Caps\n- Availability is gated by `checkpoint.enabled`, default `false`, in `packages/coding-agent/src/config/settings-schema.ts`.\n- The tool is registered as discoverable in `packages/coding-agent/src/tools/index.ts`.\n- Only one active checkpoint is allowed per top-level session.\n- Checkpoint state is not persisted as a dedicated session entry. If the process exits, a resumed session can reload the conversation history, but not the live `#checkpointState` guard.\n- Session persistence still applies to the ordinary checkpoint tool call message. Global session persistence truncation is `MAX_PERSIST_CHARS = 500_000` in `packages/coding-agent/src/session/session-manager.ts`.\n\n## Errors\n- `ToolError(\"Checkpoint not available in subagents.\")` — thrown for subagent sessions.\n- `ToolError(\"Checkpoint already active.\")` — thrown when a prior checkpoint has not been rewound or cleared.\n- The tool body has no local `try/catch`; unexpected exceptions propagate.\n\n## Notes\n- Despite the summary string `Create a git-based checkpoint to save and restore session state`, the implementation does not call git and does not snapshot filesystem state.\n- Captured state is conversation/session metadata only:\n - in-memory message count\n - session entry ID in the session tree\n - timestamp\n- Not captured:\n - working tree contents\n - staged changes\n - artifacts\n - blob-store contents\n - SQLite history rows from `packages/coding-agent/src/session/history-storage.ts`\n - auth or agent records from `packages/coding-agent/src/session/agent-storage.ts`\n- If the turn ends with `stopReason === \"aborted\"` while a checkpoint is active, `AgentSession` clears `#checkpointState` and `#pendingRewindReport` instead of preserving a half-finished checkpoint.\n",
57
+ "tools/cron.md": "# cron (CronCreate, CronList, CronDelete)\n\n> Three sibling inline tools that mirror Claude Code's `CronCreate`, `CronList`, and `CronDelete` surface. Schedule recurring or one-shot prompts within the current session.\n\n## Source\n\n- Entry: `packages/coding-agent/src/tools/cron.ts`\n- Model-facing prompt (shared by all three tools): `packages/coding-agent/src/prompts/tools/cron.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — provides the `registerOwnerCleanup` / `runOwnerCleanups` primitives that clear schedules on session/agent teardown.\n - `packages/coding-agent/src/session/agent-session.ts` — invokes `runOwnerCleanups({ ownerId })` from `#cancelOwnAsyncJobs()` before cancelling owned jobs, so cron timers cannot race teardown.\n\n## Tools\n\n| Name | Purpose |\n| --- | --- |\n| `CronCreate` | Schedule a prompt on a 5-field cron expression. Returns an 8-character job id. |\n| `CronList` | List every scheduled task in this session (per calling agent). |\n| `CronDelete` | Cancel a scheduled task by id. |\n\nEach session can hold up to **50** scheduled tasks per owner. Recurring tasks\nauto-expire **7 days** after creation. One-shot tasks delete themselves after\nfiring.\n\n## Inputs — CronCreate\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cron_expression` | `string` | Yes | Standard 5-field cron expression in local time: `minute hour day-of-month month day-of-week`. |\n| `prompt` | `string` | Yes | Prompt to inject between turns when the cron fires. |\n| `recurring` | `boolean` | Yes | `true` to fire on every match (recurring, auto-expires after 7 days); `false` to fire once and self-delete. |\n\nSupported field syntax: `*`, single values (`5`), steps (`*/15`), ranges\n(`1-5`), comma lists (`1,15,30`). Day-of-week uses `0`/`7` for Sunday through\n`6` for Saturday. Extended syntax such as `L`, `W`, `?`, or weekday/month\nname aliases is **not** supported and the tool will reject expressions that use\nthem.\n\n## Inputs — CronList\n\nNo parameters.\n\n## Inputs — CronDelete\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | The 8-character job ID returned by `CronCreate`. |\n\n## Outputs\n\n- `CronCreate.content`: `Scheduled <id> (<human-schedule>)`. `details`: `{ id, cron_expression, recurring, nextFireAt }`.\n- `CronList.content`: lines of `<id> (<human-schedule>): <prompt preview>`, or `No scheduled jobs` when empty. `details.jobs`: array of `{ id, cron, recurring, prompt, humanSchedule }`.\n- `CronDelete.content`: `Cancelled <id>` on success, `Failed to remove scheduled task '<id>'` otherwise. `details`: `{ id, deleted }`.\n\n## Behavior / Lifecycle\n\n1. Each cron tool gates on `isBackgroundJobSupportEnabled(session.settings)`. When async is disabled, none of the cron tools are registered in `BUILTIN_TOOLS`.\n2. Schedules are stored in-memory per `ownerId` (resolved via `session.getAgentId()`). Subagents have their own isolated schedule lists.\n3. The first `CronCreate` call for a new owner registers an owner cleanup with `AsyncJobManager.registerOwnerCleanup(ownerId, fn)`. The cleanup clears every schedule for that owner and is run by:\n - `AgentSession.#cancelOwnAsyncJobs()` on dispose / new-session / session-switch / handoff / branch\n - `AsyncJobManager.dispose()` as a run-and-clear safety net\n4. Cron expression validation rejects malformed input synchronously with a `ToolError` whose message names the offending field.\n5. Each task is backed by a `setTimeout` for the next jitter-adjusted match. One-shot tasks self-delete after firing; recurring tasks reschedule until the 7-day expiry timer deletes them.\n6. The per-owner 50-task cap is enforced on `CronCreate`; the caller receives a `ToolError` rather than a silent drop.\n\n## Errors\n\n- `ToolError`: `Async execution is disabled; cron is unavailable in this session.`\n- `ToolError`: `Invalid cron expression: ...` (field count, range, step value, ordering)\n- `ToolError`: `Cron task limit reached (50). Cancel an existing task with CronDelete first.`\n- `ToolError`: `Cron is disabled by CLAUDE_CODE_DISABLE_CRON=1.`\n- zod validation errors for missing or wrong-typed inputs.\n\n## Examples\n\nSchedule a 5-minute deployment poll:\n\n```jsonc\n// CronCreate\n{\n \"cron_expression\": \"*/5 * * * *\",\n \"prompt\": \"Check whether the staging deployment finished and tell me what happened\",\n \"recurring\": true\n}\n```\n\nOne-shot reminder at 9am local:\n\n```jsonc\n// CronCreate\n{\n \"cron_expression\": \"0 9 * * *\",\n \"prompt\": \"Remind me to push the release branch\",\n \"recurring\": false\n}\n```\n\nCancel a scheduled task:\n\n```jsonc\n// CronDelete\n{ \"id\": \"ab12cd34\" }\n```\n\n## Parity oracle\n\nEach tool's schema is pinned by a frozen fixture under\n`packages/coding-agent/test/fixtures/claude-code-tools/`:\n\n- `cron-create.schema.json`\n- `cron-list.schema.json`\n- `cron-delete.schema.json`\n\nFixtures were captured from the upstream Claude Code CLI (`claude --version 2.1.152`).\nAny divergence between the fixture and the tool surface is a parity bug.\n",
57
58
  "tools/debug.md": "# debug\n\n> Drive one DAP debug session; adjacent debug UI code reuses the same subsystem for logs, raw SSE capture, reports, profiling, and system diagnostics.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/debug.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/debug.md`\n- Key collaborators:\n - `packages/coding-agent/src/dap/session.ts` — session lifecycle, breakpoint/state cache\n - `packages/coding-agent/src/dap/client.ts` — adapter process/socket transport, DAP message loop\n - `packages/coding-agent/src/dap/config.ts` — adapter resolution and auto-selection\n - `packages/coding-agent/src/dap/defaults.json` — built-in adapter definitions\n - `packages/coding-agent/src/dap/types.ts` — request/response/capability shapes\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — per-tool timeout clamp\n - `packages/coding-agent/src/debug/index.ts` — interactive debug selector menu\n - `packages/coding-agent/src/debug/log-viewer.ts` — recent-log TUI viewer\n - `packages/coding-agent/src/debug/raw-sse.ts` — raw SSE TUI viewer\n - `packages/coding-agent/src/debug/raw-sse-buffer.ts` — bounded SSE capture buffer\n - `packages/coding-agent/src/debug/profiler.ts` — CPU/heap profiling helpers\n - `packages/coding-agent/src/debug/report-bundle.ts` — `.tar.gz` report bundling, log source, cache cleanup\n - `packages/coding-agent/src/debug/system-info.ts` — system snapshot collection and env redaction\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | `\"launch\" \\| \"attach\" \\| \"set_breakpoint\" \\| \"remove_breakpoint\" \\| \"set_instruction_breakpoint\" \\| \"remove_instruction_breakpoint\" \\| \"data_breakpoint_info\" \\| \"set_data_breakpoint\" \\| \"remove_data_breakpoint\" \\| \"continue\" \\| \"step_over\" \\| \"step_in\" \\| \"step_out\" \\| \"pause\" \\| \"evaluate\" \\| \"stack_trace\" \\| \"threads\" \\| \"scopes\" \\| \"variables\" \\| \"disassemble\" \\| \"read_memory\" \\| \"write_memory\" \\| \"modules\" \\| \"loaded_sources\" \\| \"custom_request\" \\| \"output\" \\| \"terminate\" \\| \"sessions\"` | Yes | Dispatch key for the tool switch in `packages/coding-agent/src/tools/debug.ts`. |\n| `program` | `string` | No | Launch target path. Required for `launch`. Resolved relative to `cwd` if provided, otherwise session cwd. |\n| `args` | `string[]` | No | Program argv for `launch`. |\n| `adapter` | `string` | No | Explicit adapter name. Otherwise `selectLaunchAdapter()` / `selectAttachAdapter()` auto-pick from `packages/coding-agent/src/dap/config.ts`. |\n| `cwd` | `string` | No | Launch/attach working directory. Defaults to session cwd. |\n| `file` | `string` | No | Source file path for source breakpoints. |\n| `line` | `number` | No | Source line for source breakpoints. |\n| `function` | `string` | No | Function breakpoint name. Mutually exclusive with `file`+`line` in breakpoint actions. |\n| `name` | `string` | No | Data breakpoint info target name. Required for `data_breakpoint_info`. |\n| `condition` | `string` | No | Conditional expression for source/function/instruction/data breakpoints. |\n| `hit_condition` | `string` | No | Hit-count condition for instruction/data breakpoints. |\n| `expression` | `string` | No | Expression or raw debugger command. Required for `evaluate`. |\n| `context` | `string` | No | Evaluate context. Defaults to `\"repl\"`. Passed through as DAP evaluate context. |\n| `frame_id` | `number` | No | Frame selector for `evaluate`, `scopes`, `data_breakpoint_info`. `scopes` and `evaluate` default to the current stopped frame when omitted. |\n| `scope_id` | `number` | No | Variables reference from a scope. Accepted by `variables`; also used as a fallback variables reference for `data_breakpoint_info`. |\n| `variable_ref` | `number` | No | Variables reference for `variables`; preferred over `scope_id` when both are present. |\n| `pid` | `number` | No | Local process id for `attach`. `attach` requires `pid` or `port`. |\n| `port` | `number` | No | Remote attach port. If no adapter is forced, attach prefers `debugpy` when `port` is present. |\n| `host` | `string` | No | Remote attach host for `attach`. |\n| `levels` | `number` | No | Max stack frames for `stack_trace`. |\n| `memory_reference` | `string` | No | Memory reference/address for `disassemble`, `read_memory`, `write_memory`. `disassemble` also accepts it via `instruction_reference` fallback logic in `resolveDisassemblyReference()`. |\n| `instruction_reference` | `string` | No | Instruction breakpoint reference; required for instruction breakpoint actions. |\n| `instruction_count` | `number` | No | Required for `disassemble`. |\n| `instruction_offset` | `number` | No | Instruction offset for `disassemble`. |\n| `count` | `number` | No | Byte count for `read_memory`. Required there. |\n| `data` | `string` | No | Base64 payload for `write_memory`. Required there. |\n| `data_id` | `string` | No | Data breakpoint id. Required for `set_data_breakpoint` / `remove_data_breakpoint`. |\n| `access_type` | `\"read\" \\| \"write\" \\| \"readWrite\"` | No | Access filter for `set_data_breakpoint`. |\n| `command` | `string` | No | Custom DAP request command. Required for `custom_request`. |\n| `arguments` | `Record<string, unknown>` | No | Custom DAP request body for `custom_request`. |\n| `offset` | `number` | No | Offset for instruction breakpoints, disassembly, memory read, memory write. |\n| `resolve_symbols` | `boolean` | No | `disassemble` symbol-resolution flag. |\n| `allow_partial` | `boolean` | No | `write_memory` partial-write allowance. |\n| `start_module` | `number` | No | Modules pagination start index for `modules`. |\n| `module_count` | `number` | No | Modules pagination count for `modules`. |\n| `timeout` | `number` | No | Per-request timeout in seconds. Default `30`, clamped to `5..300`. |\n\n### Action-specific requirements\n- `launch`: `program`\n- `attach`: `pid` or `port`\n- `set_breakpoint` / `remove_breakpoint`: `function`, or `file` + `line`\n- `set_instruction_breakpoint` / `remove_instruction_breakpoint`: `instruction_reference`\n- `data_breakpoint_info`: `name`\n- `set_data_breakpoint` / `remove_data_breakpoint`: `data_id`\n- `evaluate`: `expression`\n- `variables`: `variable_ref` or `scope_id`\n- `disassemble`: capability `supportsDisassembleRequest`, plus `instruction_count`\n- `read_memory`: capability `supportsReadMemoryRequest`, plus `memory_reference` and `count`\n- `write_memory`: capability `supportsWriteMemoryRequest`, plus `memory_reference` and `data`\n- `modules`: capability `supportsModulesRequest`\n- `loaded_sources`: capability `supportsLoadedSourcesRequest`\n- `custom_request`: `command`\n\n### Interactive selector values\n`packages/coding-agent/src/debug/index.ts` also exposes a fixed UI-only selector with values `open-artifacts`, `performance`, `work`, `dump`, `memory`, `logs`, `system`, `raw-sse`, `transcript`, `clear-cache`. These are not model-callable through `debugSchema`; they are local TUI menu routes.\n\n## Outputs\nThe agent tool returns a standard `toolResult()` payload from `packages/coding-agent/src/tools/debug.ts`:\n- `content`: one text block. Every action renders human-readable text; there is no structured JSON block in `content`.\n- `details.action`: echoed action.\n- `details.success`: always initialized `true`; failures surface by throwing before a result is returned.\n- `details.snapshot`: present for actions that operate on or create a session, using `DapSessionSummary` from `packages/coding-agent/src/dap/types.ts`.\n- Action-specific `details` fields:\n - `launch` / `attach`: `adapter`\n - breakpoint actions: `breakpoints`, `functionBreakpoints`, `instructionBreakpoints`, `dataBreakpoints`\n - `data_breakpoint_info`: `dataBreakpointInfo`\n - `continue` / `step_*`: `state`, `timedOut`\n - `threads`: `threads`\n - `stack_trace`: `stackFrames`\n - `scopes`: `scopes`\n - `variables`: `variables`\n - `evaluate`: `evaluation`\n - `disassemble`: `disassembly`\n - `read_memory`: `memoryAddress`, `memoryData`, `unreadableBytes`\n - `write_memory`: `bytesWritten`\n - `modules`: `modules`\n - `loaded_sources`: `sources`\n - `custom_request`: `customBody`\n - `output`: `output`\n - `sessions`: `sessions`\n\nStreaming/UI behavior:\n- The tool renderer merges call and result (`mergeCallAndResult: true`) and renders inline.\n- `debug.ts` itself does not emit progress updates through `_onUpdate`; result delivery is single-shot.\n- The interactive selector is UI-driven instead of model-driven. It swaps TUI components, appends status lines to the chat pane, opens files in external viewers, or writes archives/temp files.\n\nSide-channel artifacts outside the model tool result:\n- `createReportBundle()` writes `gjc-report-<timestamp>.tar.gz` under the reports dir and returns the filesystem path to the UI handler.\n- `#handleWorkReport()` writes `/tmp/work-profile-<Date.now()>.svg` before opening it.\n- `RawSseViewerComponent` and `DebugLogViewerComponent` can copy captured text to the clipboard.\n\n## Flow\n1. Tool registration is conditional: `DebugTool.createIf()` in `packages/coding-agent/src/tools/debug.ts` returns `null` unless `session.settings.get(\"debug.enabled\")` is true. `packages/coding-agent/src/tools/index.ts` wires the factory and rechecks the same setting in tool filtering.\n2. `DebugTool.execute()` clamps `params.timeout` through `clampTimeout(\"debug\", params.timeout)` and composes the caller `AbortSignal` with `AbortSignal.timeout(...)`.\n3. `launch` and `attach` resolve cwd/program paths, select an adapter in `packages/coding-agent/src/dap/config.ts`, then delegate to `dapSessionManager.launch()` / `.attach()`.\n4. `DapSessionManager.launch()` / `.attach()` enforce the single-session rule with `#ensureLaunchSlot()`, spawn the adapter through `DapClient.spawn()`, register listeners, send `initialize`, cache capabilities, start listening for an initial stop event before sending `launch`/`attach`, then complete the `initialized` → `configurationDone` handshake in `#completeConfigurationHandshake()`.\n5. `DapClient.spawn()` starts the adapter detached with `NON_INTERACTIVE_ENV`. Most adapters use stdio; socket-mode adapters (`dlv`) use `#spawnSocketUnix()` on Linux or `#spawnSocketClientAddr()` on macOS/other.\n6. `#registerSession()` in `packages/coding-agent/src/dap/session.ts` installs reverse-request handlers:\n - `runInTerminal`: spawns the requested debuggee command detached via `ptree.spawn()` and returns `{ processId }`\n - `startDebugging`: logs the child-session request and returns `{}`; it does not create nested sessions\n - events: `output`, `initialized`, `stopped`, `continued`, `exited`, `terminated` update cached session state\n7. Operational actions (`set_breakpoint`, `evaluate`, `threads`, `read_memory`, `custom_request`, and similar) call `dapSessionManager` methods. Most flow through `#sendRequestWithConfig()`, which first sends `configurationDone` when required, then sends the DAP request, then updates `lastUsedAt`.\n8. Breakpoint actions maintain local cached breakpoint sets in `DapSessionManager` and remap adapter responses back onto those cached records.\n9. `continue` and the three step actions clear cached stop state, subscribe for `stopped`/`terminated`/`exited` before sending the DAP request, then `#awaitStopOutcome()` either returns the new stopped location or reports that the program is still running after timeout.\n10. `pause` sends DAP `pause`, waits for a stopped event if needed, and reuses cached stop state if the program was already stopped.\n11. `stack_trace`, `scopes`, `variables`, and `evaluate` default to the current stopped thread/frame when the caller omits ids and cached state is available.\n12. `output` reads the in-memory output ring from `DapSessionManager.getOutput()`. `terminate` sends `terminate` when supported, always attempts `disconnect`, marks the session terminated, and disposes the client.\n13. `sessions` reads the manager’s current map and formats all summaries. Although the manager stores a map, only one active session can exist because new launch/attach calls are blocked until the active one is terminated or cleaned up.\n14. The interactive selector in `packages/coding-agent/src/debug/index.ts` builds a `SelectList` of fixed values and dispatches each to a handler:\n - `performance`: `startCpuProfile()`, wait for Enter/Escape, stop profiling, read a 30-second work profile with `getWorkProfile(30)`, then bundle via `createReportBundle()`\n - `work`: read `getWorkProfile(30)`, write a temp SVG, open it externally\n - `dump`: create a report bundle immediately\n - `memory`: force GC, call `Bun.generateHeapSnapshot(\"v8\")`, then bundle\n - `logs`: build a `DebugLogSource` and mount `DebugLogViewerComponent`\n - `raw-sse`: resolve a `RawSseDebugBuffer` from the session and mount `RawSseViewerComponent`\n - `system`: call `collectSystemInfo()` and render `formatSystemInfo()` into the chat pane\n - `open-artifacts`: open the current session artifact directory if it exists\n - `transcript`: delegates to `ctx.handleDebugTranscriptCommand()`\n - `clear-cache`: show confirmation, then remove artifact directories older than 30 days with `clearArtifactCache()`\n\n## Modes / Variants\n- **Availability gate**\n - Tool hidden when `debug.enabled` is false.\n- **Adapter selection**\n - `launch`: explicit `adapter` wins; otherwise `selectLaunchAdapter()` ranks available adapters by extension match, root-marker match, then native-debugger preference (`gdb`, `lldb-dap`) for extensionless binaries.\n - `attach`: explicit `adapter` wins; otherwise remote `port` prefers `debugpy`, then native debuggers, then first available adapter.\n- **Transport**\n - stdio adapters: direct `stdin`/`stdout` framing.\n - socket adapters: Unix domain socket on Linux; TCP callback on macOS/other.\n- **DAP agent-tool actions**\n - `launch` — spawn adapter, initialize session, maybe stop on entry; returns formatted session snapshot and `details.adapter`.\n - `attach` — connect to a live process or remote port; same output shape as `launch`.\n - `set_breakpoint` — source or function breakpoint add/update; returns the current breakpoint list for that target.\n - `remove_breakpoint` — source or function breakpoint removal; returns the remaining breakpoint list.\n - `set_instruction_breakpoint` / `remove_instruction_breakpoint` — require `supportsInstructionBreakpoints`; return current instruction breakpoint list.\n - `data_breakpoint_info` — require `supportsDataBreakpoints`; asks the adapter for a `dataId`, access types, and description for `name`.\n - `set_data_breakpoint` / `remove_data_breakpoint` — require `supportsDataBreakpoints`; return the cached data-breakpoint list.\n - `continue` / `step_over` / `step_in` / `step_out` — return text describing whether execution stopped, terminated, or kept running, plus `details.state` and `details.timedOut`.\n - `pause` — interrupts a running target and returns a stopped snapshot.\n - `evaluate` — adapter expression evaluation; defaults context to `repl`.\n - `stack_trace` — fetches frames for the resolved thread.\n - `threads` — fetches current threads.\n - `scopes` — frame scopes for an explicit `frame_id` or the current stopped frame.\n - `variables` — variables for `variable_ref` or `scope_id`.\n - `disassemble` — require `supportsDisassembleRequest`; disassembles around a memory reference.\n - `read_memory` — require `supportsReadMemoryRequest`; returns address, base64 data, unreadable-byte count.\n - `write_memory` — require `supportsWriteMemoryRequest`; writes base64 data and reports bytes written.\n - `modules` — require `supportsModulesRequest`; optional pagination via `start_module` / `module_count`.\n - `loaded_sources` — require `supportsLoadedSourcesRequest`; returns loaded source descriptors.\n - `custom_request` — sends any DAP request name with arbitrary arguments.\n - `output` — dumps captured stdout/stderr/console text from the session cache.\n - `terminate` — disconnects and disposes the active session; returns `No debug session to terminate.` when none exists.\n - `sessions` — lists all cached session summaries.\n- **Interactive selector routes (UI-only)**\n - `logs` — loads today’s log tail and optional older daily log files into `DebugLogViewerComponent`; supports copy, range selection, pid filtering, load-older.\n - `raw-sse` — live view over the session’s `RawSseDebugBuffer`; supports tail-follow, scrolling, copy-all.\n - `performance` — CPU profile + 30-second work profile + report bundle.\n - `memory` — heap snapshot + report bundle.\n - `dump` — report bundle without profiler artifacts.\n - `work` — standalone work-profile flamegraph export/open.\n - `system` — formatted OS/arch/CPU/memory/version/cwd/shell/terminal dump.\n - `open-artifacts` / `transcript` / `clear-cache` — artifact directory open, transcript export, artifact-cache pruning.\n\n## Side Effects\n- Filesystem\n - Resolves program/file/cwd paths against the session cwd.\n - Report creation writes `.tar.gz` bundles and may read the session JSONL, artifact files, subagent session JSONLs, and log files.\n - Work-profile export writes `/tmp/work-profile-<timestamp>.svg`.\n - Log source reads daily log files from the logs dir.\n - Artifact-cache cleanup removes session artifact directories older than the cutoff.\n - `resolveRawSseDebugBuffer()` may attach a non-enumerable `rawSseDebugBuffer` property to the owner object.\n- Network\n - Socket-mode adapters bind/connect local sockets.\n - Remote attach may connect through the adapter to a remote debug port.\n- Subprocesses / native bindings\n - Spawns debugger adapters (`gdb`, `lldb-dap`, `python -m debugpy.adapter`, `dlv`, and others from `defaults.json`) detached.\n - Reverse DAP `runInTerminal` requests spawn the debuggee detached via `ptree.spawn()`.\n - `getWorkProfile(30)` comes from `@gajae-code/natives`.\n - CPU profiling uses `node:inspector/promises`; heap snapshots use `Bun.generateHeapSnapshot(\"v8\")`; raw/log viewers sanitize text via `@gajae-code/natives`.\n - `openPath()` launches the OS default file/browser handler for artifact dirs and SVGs.\n - Log/raw-SSE viewers can call `copyToClipboard()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - `DapSessionManager` keeps session summaries, breakpoints, threads, stack frames, stop location, output capture, capabilities, and last-used timestamps in memory.\n - Active-session id is global to the singleton `dapSessionManager`.\n - `RawSseDebugBuffer` stores recent SSE events per owner/session.\n - The tool is `exclusive`; concurrent debug tool calls are blocked by the scheduler.\n- User-visible prompts / interactive UI\n - Debug selector shows confirmation before cache deletion.\n - Performance profiling temporarily hijacks editor Enter/Escape handlers until profiling stops.\n - Log/raw-SSE viewers replace the editor pane with custom components.\n- Background work / cancellation\n - Every DAP request accepts an `AbortSignal`; timeouts and caller cancellation abort the active request, not the whole session lifetime.\n - `DapSessionManager` runs a background cleanup loop every 30 seconds.\n - Raw SSE viewers subscribe to buffer updates until closed.\n\n## Limits & Caps\n- Tool timeout clamp: `default=30`, `min=5`, `max=300` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Per-request DAP default timeout: `DEFAULT_REQUEST_TIMEOUT_MS = 30_000` in `packages/coding-agent/src/dap/client.ts`.\n- Single active session: enforced by `#ensureLaunchSlot()` in `packages/coding-agent/src/dap/session.ts`.\n- Idle session cleanup: `IDLE_TIMEOUT_MS = 10 * 60 * 1000`, checked every `CLEANUP_INTERVAL_MS = 30 * 1000`.\n- Adapter liveness heartbeat: `HEARTBEAT_INTERVAL_MS = 5 * 1000`.\n- Output capture cap: `MAX_OUTPUT_BYTES = 128 * 1024`; older text is trimmed in ~1 KiB slices and `outputTruncated` is recorded.\n- Initial stop capture timeout after launch/attach: `STOP_CAPTURE_TIMEOUT_MS = 5_000`.\n- Socket-mode adapter readiness timeout: `10_000` ms in `waitForCondition()` and TCP connect timeout logic in `packages/coding-agent/src/dap/client.ts`.\n- Raw SSE buffer caps in `packages/coding-agent/src/debug/raw-sse-buffer.ts`:\n - `MAX_RAW_SSE_EVENTS = 1_000`\n - `MAX_RAW_SSE_CHARS = 512_000`\n - `MAX_RAW_SSE_EVENT_CHARS = 64_000` per event, with `: gjc-debug-truncated ...` marker appended on trim\n- Log viewer window in `packages/coding-agent/src/debug/log-viewer.ts`:\n - `INITIAL_LOG_CHUNK = 50`\n - `LOAD_OLDER_CHUNK = 50`\n- Report/log ingestion caps in `packages/coding-agent/src/debug/report-bundle.ts`:\n - `MAX_LOG_LINES = 5000` for interactive log reading\n - `MAX_LOG_BYTES = 2 * 1024 * 1024` tail-read ceiling\n - report bundles include only the last `1000` log lines\n - subagent session inclusion is capped at the most recent `10` JSONL files\n- Interactive profiling windows in `packages/coding-agent/src/debug/index.ts`: both performance and work reports request `getWorkProfile(30)`.\n- Artifact cache pruning default: `30` days in `clearArtifactCache()` and the selector confirmation text.\n\n## Errors\n- Parameter validation in `packages/coding-agent/src/tools/debug.ts` throws `ToolError` with explicit messages such as:\n - `program is required for launch`\n - `attach requires pid or port`\n - `set_breakpoint requires file+line or function`\n - `variables requires variable_ref or scope_id`\n - `memory_reference is required for read_memory`\n - `count is required for read_memory`\n - `data is required for write_memory`\n - `command is required for custom_request`\n- Adapter selection failure throws `No debugger adapter available. Installed adapters: ...`.\n- Capability-gated actions throw from `requireCapability(...)`, e.g. `Active adapter does not support memory reads.`\n- No-session and state errors come from `DapSessionManager`, e.g. `No active debug session. Launch or attach first.`, `No active stack frame. Run stack_trace first or supply frame_id.`, `Debugger reported no threads.`\n- Launching a second live session throws `Debug session <id> is still active. Terminate it before launching another.`\n- DAP transport/request failures surface as thrown errors from `DapClient`:\n - `DAP request <command> timed out after <ms>ms`\n - `DAP event <event> timed out after <ms>ms`\n - `DAP adapter <name> is not running`\n - `DAP adapter exited (code N): <stderr>` or `DAP adapter exited unexpectedly (code N)`\n - adapter response `message` when a DAP request fails\n- `continue` / `step_*` are intentionally non-fatal when the target stays running past the timeout: they return `details.timedOut = true` and `state: \"running\"` instead of throwing.\n- `terminate` suppresses adapter errors while sending `terminate`/`disconnect`; it still disposes the client and returns the last summary when possible.\n- Interactive selector handlers report UI errors instead of throwing:\n - profiler start/stop, report bundling, log reading, system-info collection, cache clearing, and artifact opening use `ctx.showError(...)` / `ctx.showWarning(...)`\n - empty logs and empty artifact caches are warnings/status messages, not failures\n - copy failures in log/raw-SSE viewers become status/error text in the UI\n- Report-bundle helpers are intentionally best-effort for many file reads: missing session files, missing artifact dirs, unreadable artifact files, missing log dirs, inaccessible cache dirs, and missing subagent files are skipped silently.\n- `collectSystemInfo()` is best-effort for CPU probing; failure there falls back to `Unknown CPU`.\n\n## Notes\n- `packages/coding-agent/src/prompts/tools/debug.md` tells the model only one active session is supported; that is not advisory, it is enforced in code.\n- `configurationDone` is sent automatically both during launch/attach handshake and lazily before later requests if the adapter required it and the initial handshake did not complete.\n- `startDebugging` reverse requests are acknowledged but not implemented; child debug sessions are not spawned.\n- `output` exposes the merged `output` event stream only; the tool does not distinguish stdout, stderr, and console categories.\n- Session summaries expose `needsConfigurationDone`; this is derived from adapter capabilities and whether `configurationDone` has been sent.\n- Source breakpoint file paths are normalized with `path.resolve()` before caching and sending to the adapter.\n- `evaluate` defaults to `repl`, so the tool can forward raw debugger commands when the adapter supports them.\n- `disassemble` resolves its target from `memory_reference` first, then `instruction_reference`; it throws if neither is present.\n- `RawSseDebugBuffer.recordEvent()` increments `totalEvents` before bounded retention. A snapshot can therefore show fewer retained records than total observed events.\n- Raw SSE buffer listener failures are swallowed so viewer bugs do not break capture.\n- `createDebugLogSource()` walks daily log files newest-first, but `loadOlderLogs()` reverses each requested slice before concatenation so older chunks prepend in chronological order.\n- `clearArtifactCache()` deletes directories by directory mtime, not per-file age.\n- `addDirectoryToArchive()` reads artifact files as text with `Bun.file(...).text()`. Binary artifact contents are not preserved byte-for-byte in the report bundle.\n- The tool renderer truncates displayed output for the TUI preview, but the underlying text result still contains the full returned string.\n",
58
59
  "tools/edit.md": "# edit\n\n> Applies source edits; default mode is the hashline patch language consumed from a single `input` string.\n\n## Source\n- Entry: `packages/coding-agent/src/edit/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/hashline.md`\n- Key collaborators:\n - `packages/coding-agent/src/utils/edit-mode.ts` — selects active edit mode\n - `packages/coding-agent/src/hashline/grammar.lark` — custom-tool grammar for hashline mode\n - `packages/coding-agent/src/hashline/input.ts` — splits `§PATH` sections\n - `packages/coding-agent/src/hashline/parser.ts` — parses op-prefixed edits and verbatim payload lines\n - `packages/coding-agent/src/hashline/apply.ts` — validates anchors and applies edits\n - `packages/coding-agent/src/hashline/anchors.ts` — stale-anchor mismatch formatting\n - `packages/coding-agent/src/hashline/recovery.ts` — cache-based stale-anchor recovery\n - `packages/coding-agent/src/hashline/hash.ts` — computes `LINEhh|` anchors shared with `read`/`search`\n - `packages/coding-agent/src/edit/file-read-cache.ts` — per-session read snapshot cache\n - `packages/coding-agent/src/tools/read.ts` — emits anchored lines and records read snapshots\n - `packages/coding-agent/src/tools/search.ts` — records sparse snapshots from matches/context\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidates FS scan caches after writes\n - `packages/coding-agent/src/edit/streaming.ts` — computes in-flight diff previews for the TUI\n\n## Inputs\n\n### Hashline mode (default)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `input` | `string` | Yes | One or more edit sections. First non-blank line must be `§PATH` unless the caller supplies the legacy fallback `path` outside the model schema and the body already looks like hashline ops (`packages/coding-agent/src/hashline/input.ts`). Optional `*** Begin Patch` / `*** End Patch` envelope is ignored if present. |\n\nPatch language inside `input`:\n\n- Section header: `§PATH`\n- Insert after: `»ANCHOR`\n- Insert before: `«ANCHOR`\n- Replace/delete range: `≔A..B`\n- Single-line replace/delete sugar: `≔A` means `≔A..A`\n- `≔A..B` with no payload deletes the range. To keep a blank line, include one explicit empty payload line.\n- Payload lines: verbatim file content after `»`, `«`, or `≔`\n- Special anchors: `BOF`, `EOF`\n- Anchor token: `<line><2-char-hash>`, for example `41th`\n\nAnchors come from `read`/`search` output. `read` formats lines as `LINEhh|TEXT` via `formatHashLine` / `formatHashLines` in `packages/coding-agent/src/hashline/hash.ts`; copy only the token left of `|` into op lines.\n\nOther edit modes exist (`replace`, `patch`, `vim`, `apply_patch`) and are selected outside the tool payload by `resolveEditMode()` in `packages/coding-agent/src/utils/edit-mode.ts`. Their schemas are different; this document covers the default hashline mode.\n\n## Outputs\n- Single-shot tool result; hashline mode does not use a `resolve` preview/apply handshake.\n- `content` contains one text block per call. For a successful single-file edit it is either:\n - `<path>:` plus a compact diff preview from `packages/coding-agent/src/hashline/diff-preview.ts`, or\n - `Updated <path>` / `Created <path>` when no compact preview text is emitted.\n- Parse or recovery warnings are appended as:\n\n```text\nWarnings:\n...\n```\n\n- `details` is `EditToolDetails` from `packages/coding-agent/src/edit/renderer.ts`:\n - `diff`: unified diff string\n - `firstChangedLine`: first changed post-edit line\n - `diagnostics`: LSP/format result if available\n - `op`: `\"create\"` or `\"update\"` for hashline mode\n - `meta`: output metadata\n - `perFileResults`: present for multi-section input\n- Multi-section input returns one aggregated result with combined text and per-file details.\n- While the model is still typing arguments, the TUI can compute a diff preview with `packages/coding-agent/src/edit/streaming.ts`; that preview is not a deferred action and does not block execution.\n\n## Flow\n1. `EditTool.execute()` in `packages/coding-agent/src/edit/index.ts` resolves the active mode. Default is `hashline`; `customFormat` exposes `packages/coding-agent/src/hashline/grammar.lark` with `$HFMT$` / `$HOP_INSERT_BEFORE$` / `$HOP_INSERT_AFTER$` / `$HOP_REPLACE$` / `$HOP_CHARS$` / `$HFILE$` placeholders filled from `packages/coding-agent/src/hashline/hash.ts`.\n2. `executeHashlineSingle()` in `packages/coding-agent/src/hashline/execute.ts` splits the raw `input` into `§PATH` sections with `splitHashlineInputs()`.\n3. If multiple sections target the same path, `mergeSamePathSections()` concatenates them before execution so every op still refers to the original file snapshot.\n4. Multi-section calls run a preflight pass (`preflightHashlineSection()`): parse ops, enforce plan-mode write rules, load the current file, reject anchor-scoped edits against missing files, reject auto-generated files, apply edits in memory, and fail if the result is a no-op. This prevents partial batches.\n5. `parseHashlineWithWarnings()` in `packages/coding-agent/src/hashline/parser.ts` tokenizes the diff body:\n - ignores blank lines and optional `*** Begin Patch`\n - stops at `*** End Patch`\n - stops at `*** Abort` and emits `ABORT_WARNING`\n - turns `»` / `«` payload runs into one `insert` edit per payload line\n - turns `≔A..B` with payload into inserts before `A`, then deletes for `A..B`\n - turns `≔A..B` with no payload into one `delete` edit per line in the range; a blank-in-place edit requires one explicit empty payload line\n6. `applyHashlineEdits()` in `packages/coding-agent/src/hashline/apply.ts` validates every referenced anchor before mutating anything. Each anchor hash is recomputed from current file content with `computeLineHash()`.\n7. If any anchor hash differs, `applyHashlineEdits()` throws `HashlineMismatchError`. `execute.ts` catches only that class and calls `tryRecoverHashlineWithCache()`.\n8. Recovery replays the edits against the most recent cached read/search snapshot for that path (`packages/coding-agent/src/edit/file-read-cache.ts`), then 3-way merges the result onto current disk content using `Diff.applyPatch(..., { fuzzFactor: 3 })` in `packages/coding-agent/src/hashline/recovery.ts`. On success the edit proceeds with a warning; on failure the original mismatch error is re-thrown.\n9. Before splicing lines, `absorbReplacementBoundaryDuplicates()` normalizes some malformed-but-recoverable ranges:\n - duplicate prefix/suffix lines adjacent to a replacement can be absorbed by widening the delete range\n - pure inserts can auto-drop duplicated leading/trailing payload lines when `edit.hashlineAutoDropPureInsertDuplicates` is enabled\n - all such fixes append warnings\n10. `after_anchor` inserts are normalized to `before_anchor` of the next line, or `EOF` if the anchor was the last line.\n11. Anchor-targeted edits are bucketed by target line and applied bottom-up so earlier splices do not invalidate later original line numbers. `BOF` and `EOF` inserts are applied after that.\n12. The edited text is restored to the original BOM and line ending style with helpers from `packages/coding-agent/src/edit/normalize.ts` and persisted via `serializeEditFileText()` in `packages/coding-agent/src/edit/read-file.ts`.\n13. The writethrough callback from `createLspWritethrough()` may format the file and fetch diagnostics. Late diagnostics are queued back into session state as a hidden deferred message by `EditTool.#injectLateDiagnostics()` in `packages/coding-agent/src/edit/index.ts`.\n14. `invalidateFsScanAfterWrite()` calls `invalidateFsScanCache(path)` so filesystem-backed tools do not serve stale scan results.\n15. The session file-read cache is refreshed with the post-edit file text via `recordContiguous()`, making the just-written content the new recovery base for subsequent stale-anchor merges.\n16. The final response is built from a unified diff (`generateDiffString()`), a compact preview, and any accumulated warnings.\n\n## Modes / Variants\n- `hashline` — default mode; line-anchored patch language described here (`packages/coding-agent/src/utils/edit-mode.ts`).\n- `replace` — exact/fuzzy old/new text replacement (`packages/coding-agent/src/edit/modes/replace.ts`).\n- `patch` — structured JSON diff-hunk mode (`packages/coding-agent/src/edit/modes/patch.ts`).\n- `apply_patch` — freeform patch-envelope `*** Begin Patch` envelope, internally expanded into patch-mode entries (`packages/coding-agent/src/edit/modes/apply-patch.ts`).\n- `vim` — persistent modal editing buffer (`packages/coding-agent/src/tools/vim.ts`).\n\nHashline op examples:\n\n```text\n§src/a.ts\n»4fb\nconst added = true;\n```\n\n```text\n§src/a.ts\n«4fb\nconst addedBefore = true;\n```\n\n```text\n§src/a.ts\n≔4fb..6qx\n```\n\n```text\n§src/a.ts\n≔4fb..5dm\nconst clean = (name || DEF).trim();\nreturn clean.length === 0 ? DEF : clean.toUpperCase();\n```\n\nBOF/EOF examples:\n\n```text\n§src/a.ts\n»BOF\nconst HEADER = true;\n```\n\n```text\n§src/a.ts\n»EOF\nexport const done = true;\n```\n\nDelete / blank examples:\n\n```text\n§src/a.ts\n≔4fb\n```\n\n```text\n§src/a.ts\n≔4fb\n\n»EOF\nexport const done = true;\n```\n\n## Side Effects\n- Filesystem\n - Reads target files with `readEditFileText()`.\n - Writes full updated file contents with `serializeEditFileText()`.\n - Preserves BOM and original line-ending style.\n- Subprocesses / native bindings\n - `createLspWritethrough()` may trigger formatter / diagnostics work through the LSP subsystem.\n - `invalidateFsScanAfterWrite()` calls native `invalidateFsScanCache()` from `@gajae-code/natives`.\n- Session state\n - Reads and updates the per-session `FileReadCache` used for stale-anchor recovery.\n - Stores pending deferred-diagnostics abort controllers per path inside `EditTool`.\n - Queues late diagnostics back into the session transcript as a hidden custom message.\n- Background work / cancellation\n - A new edit to the same path aborts the prior deferred diagnostics fetch for that path (`packages/coding-agent/src/edit/index.ts`).\n - The tool itself is marked `nonAbortable = true` and `concurrency = \"exclusive\"` in `packages/coding-agent/src/edit/index.ts`.\n\n## Limits & Caps\n- Default mode is `hashline` (`DEFAULT_EDIT_MODE`) in `packages/coding-agent/src/utils/edit-mode.ts`.\n- Anchor hashes are always 2 lowercase letters from a stable 647-entry bigram table (`HL_BIGRAMS_COUNT`) in `packages/coding-agent/src/hashline/hash.ts`.\n- The visible mismatch report shows 2 lines of context on each side (`MISMATCH_CONTEXT`) in `packages/coding-agent/src/hashline/constants.ts`.\n- Stale-anchor recovery uses `fuzzFactor: 3` (`HASHLINE_RECOVERY_FUZZ_FACTOR`) in `packages/coding-agent/src/hashline/recovery.ts`.\n- The per-session read cache keeps at most 30 paths (`MAX_PATHS_PER_SESSION`) in `packages/coding-agent/src/edit/file-read-cache.ts`.\n- Hashline streaming chunk defaults are 200 lines or 64 KiB per chunk (`packages/coding-agent/src/hashline/types.ts`, consumed by `packages/coding-agent/src/hashline/stream.ts`).\n- `HL_OP_INSERT_BEFORE` is `«`, `HL_OP_INSERT_AFTER` is `»`, `HL_OP_REPLACE` is `≔`, `HL_OP_CHARS` is `«»≔`, `HL_FILE_PREFIX` is `§`, and `HL_BODY_SEP` is `|` (`packages/coding-agent/src/hashline/hash.ts`).\n\n## Errors\n- Missing section header:\n - `input must begin with \"§PATH\" on the first non-blank line; got: ... Example: \"§src/foo.ts\" then edit ops.`\n- Empty header:\n - `Input header \"§\" is empty; provide a file path.`\n- Bad anchor token:\n - `line N: expected a full anchor such as \"119sr\"; got \"...\".`\n- Bad range syntax:\n - `line N: explicit ranges are required for replacement...`\n - `line N: range must include exactly two full anchors separated by \"..\".`\n - `line N: range A..B ends before it starts.`\n - `line N: range A..B uses two different hashes for the same line.`\n- Missing payload for `»` / `«`:\n - `line N: » and « operations require at least one verbatim payload line.`\n- Stray payload line:\n - `line N: payload line has no preceding », «, or ≔ operation.`\n- Unknown op:\n - `line N: unrecognized op. Use «ANCHOR..., »ANCHOR..., ≔A..B...`\n- Delete vs blank:\n - `≔A..B` with no payload deletes. To blank in place, include one explicit empty payload line before the next op/header/EOF.\n- Missing file for anchor-scoped edits:\n - `File not found: <path>`\n- Out-of-range anchor:\n - `Line N does not exist (file has M lines)`\n- Stale anchors throw `HashlineMismatchError`. The error message contains re-read guidance and reprints nearby current file lines as `LINEhh|TEXT`; mismatched lines are marked `*`. `displayMessage` renders the same information in a code-frame style.\n- No-op edit:\n - `Edits to <path> resulted in no changes being made.`\n- Recovery failure is silent internally: if cache-based merge cannot prove a valid result, the original mismatch error is surfaced unchanged.\n\n## Notes\n- `read` and `search` are the authoritative source of anchors. The edit parser does not want the trailing `|TEXT`; copy only the `LINEhh` token.\n- Multi-op patches are parsed against the original file snapshot. Do not renumber later anchors after earlier ops; `applyHashlineEdits()` buckets and applies them bottom-up.\n- `≔A..B` is not a primitive replace in the parser. With payload, it expands to inserts before `A` plus deletes for `A..B`; with no payload, it only deletes `A..B`. To blank in place, include one explicit empty payload line. Stale-anchor checking still happens on the original range lines.\n- Interior lines of a multi-line range use hash `**` (`RANGE_INTERIOR_HASH`) and are not individually verified; only the first and last anchor hashes are checked.\n- `computeLineHash()` trims trailing whitespace before hashing. Anchors survive line-ending changes and trailing-space-only changes, but not substantive line edits.\n- For punctuation-only lines, the hash mixes in the line number; identical `}` lines on different lines intentionally get different anchors.\n- `splitHashlineInputs()` normalizes absolute `§PATH` headers back to a cwd-relative path when the file is inside the current working tree. Headers with any run of leading `§` chars (e.g. `§foo.ts`, `§§foo.ts`, `§§§foo.ts`) are accepted; the canonical form is `§PATH`.\n- Optional `*** Begin Patch` / `*** End Patch` markers are accepted in hashline mode, but the file sections are still `§PATH`-based, not OpenAI code `*** Update File:` hunks.\n- `*** Abort` terminates parsing early and returns `ABORT_WARNING`; ops parsed before the marker still apply.\n- File-read cache invalidation is conflict-based, not write-through invalidation. If `read` later records content for a line that disagrees with the cached snapshot, the entire snapshot for that path is replaced with the newly observed lines (`packages/coding-agent/src/edit/file-read-cache.ts`).\n- There is no resolve-style apply/discard phase for hashline edits. The only preview path is the transient TUI diff preview in `packages/coding-agent/src/edit/streaming.ts`.\n",
59
60
  "tools/eval.md": "# eval\n\n> Execute Python or JavaScript code in persistent cell-based runtimes.\n\n> **Notice:** Do not shell out to `python -c`/`python -e`, `bun -e`, or `node -e` via the `bash` tool for ad-hoc code execution. Use this tool instead — it gives you persistent state across cells, structured `display()` output, image/JSON capture, and proper cancellation/timeout handling that one-shot `-e`/`-c` invocations cannot provide.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/eval.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/eval.md`\n- Key collaborators:\n - `packages/coding-agent/src/eval/backend.ts` — backend execution contract\n - `packages/coding-agent/src/eval/js/index.ts` — JS backend adapter\n - `packages/coding-agent/src/eval/js/executor.ts` — JS execution + output sink\n - `packages/coding-agent/src/eval/js/context-manager.ts` — persistent VM contexts, prelude, tool bridge\n - `packages/coding-agent/src/eval/js/prelude.txt` — JS global helpers\n - `packages/coding-agent/src/eval/py/index.ts` — Python backend adapter\n - `packages/coding-agent/src/eval/py/executor.ts` — kernel session retention, reset, cleanup\n - `packages/coding-agent/src/eval/py/kernel.ts` — Jupyter gateway/kernel protocol, display capture\n - `packages/coding-agent/src/eval/py/prelude.py` — Python helper functions and status events\n - `packages/coding-agent/src/session/streaming-output.ts` — truncation, artifacts, streamed chunks\n - `docs/python-repl.md` — Python kernel/gateway internals\n\n## Inputs\n\nTool parameters are a JSON object with a single `cells` field — an ordered array of cell objects. Each cell is a structured record; there is no `*** Cell` header parsing, no language sniffing, and no implicit single-cell fallback. Cells run in array order; state persists within each language across cells and across tool calls.\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `cells` | `EvalCellInput[]` | Yes | Cells executed in order. At least one cell is required (`.min(1)`). |\n\nEach `EvalCellInput` (from `evalCellSchema` in `packages/coding-agent/src/tools/eval.ts`):\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `language` | `\"py\" \\| \"js\"` | Yes | Backend selector. `\"py\"` maps to the IPython/Jupyter kernel (`python` backend); `\"js\"` maps to the persistent JavaScript VM. |\n| `code` | `string` | Yes | Cell body, verbatim. JSON-encoded — embed newlines, quotes, and indentation directly; no fences, no headers. |\n| `title` | `string` | No | Short label rendered in the transcript (e.g. `\"imports\"`, `\"load config\"`). |\n| `timeout` | `integer` | No | Per-cell timeout in seconds, clamped to `1..600`. Defaults to 30 when omitted. |\n| `reset` | `boolean` | No | Wipe this cell's language kernel before running. Reset is per-language: a `py` cell's reset does not touch the JS VM and vice versa. Defaults to `false`. |\n\nMinimal example matching the live schema:\n\n```json\n{\n \"cells\": [\n { \"language\": \"py\", \"title\": \"imports\", \"timeout\": 10, \"code\": \"import json\\nfrom pathlib import Path\" },\n { \"language\": \"py\", \"title\": \"load config\", \"code\": \"data = json.loads(read('package.json'))\\ndisplay(data)\" },\n { \"language\": \"js\", \"title\": \"summary\", \"reset\": true, \"code\": \"const data = JSON.parse(await read('package.json'));\\ndisplay(data);\\nreturn data.name;\" }\n ]\n}\n```\n\n## Outputs\n\nFinal result from `EvalTool.execute()` is single-shot, but `onUpdate` streams partial text and `details` while cells run.\n\nReturned shape:\n\n- `content`: one text block containing combined cell output, or `(no text output)` / `(no output)` when only rich outputs exist.\n- `details` (`EvalToolDetails` from `packages/coding-agent/src/eval/types.ts`):\n - `cells`: per-cell code, status (`pending`/`running`/`complete`/`error`), output, duration, exit code, status events, markdown flag\n - `language`: first backend used\n - `languages`: distinct backends used, in first-use order\n - `jsonOutputs`: structured values emitted via `display(...)`\n - `images`: image payloads emitted by Python rich display or JS `display({ type: \"image\", ... })`\n - `statusEvents`: aggregated helper/tool status events\n - `notice`: backend fallback notice (currently unused; reserved for future per-cell notices)\n - `meta`: truncation metadata\n - `isError`: set on cell failure or cancellation\n\nRenderer behavior in `packages/coding-agent/src/tools/eval.ts`:\n\n- call preview renders each cell's `code` with syntax highlighting based on its declared `language`\n- result view renders each cell separately, including status, duration, and output\n- markdown outputs are rendered with the Markdown component instead of plain text\n- `jsonOutputs` render as a tree, collapsed or expanded depending on UI state\n- timeout / truncation notices render as dim metadata lines\n- images are carried in `details.images`; generic tool UI image handling renders them outside the text block\n\nSide-channel artifacts:\n\n- `session.allocateOutputArtifact?.(\"eval\")` may allocate an `artifact://...` backing store for spilled output.\n- Truncated output metadata points at that artifact when available.\n\n## Flow\n\n1. `EvalTool.execute()` in `packages/coding-agent/src/tools/eval.ts` receives `params.cells` already validated by the Zod schema — no string parsing step.\n2. For each cell, `execute()` maps `cell.language` to an `EvalLanguage` (`\"py\"` → `\"python\"`, `\"js\"` → `\"js\"`) and calls `resolveBackend(session, language)`:\n - `python` is gated on `eval.py !== false` and `pythonBackend.isAvailable(session)`.\n - `js` is gated on `eval.js !== false`.\n - A disabled or unavailable requested backend throws `ToolError`; there is no auto-fallback or sniffing.\n3. The tool allocates an `OutputSink`, a `TailBuffer`, per-cell result objects, and a `sessionAbortController`. `session.trackEvalExecution?.(...)` can wrap the whole run for external cancellation tracking.\n4. Cells execute sequentially. For each cell, `execute()`:\n - clamps `(cell.timeout ?? 30) * 1000` ms through `clampTimeout(\"eval\", ...)`\n - builds a combined abort signal from the tool signal, the timeout, and the session abort controller\n - marks the cell `running` and emits an update\n - calls the backend’s `execute()` with `cwd`, `sessionId`, `sessionFile`, `kernelOwnerId`, `deadlineMs`, `reset` (defaults to `false`), artifact info, and chunk callback\n5. JS cells dispatch through `packages/coding-agent/src/eval/js/index.ts` into `executeJs()`; Python cells dispatch through `packages/coding-agent/src/eval/py/index.ts` into `executePython()`.\n6. Backend text chunks stream into the shared `OutputSink`; rich outputs are accumulated separately as JSON, images, markdown markers, and status events.\n7. After each cell:\n - text output is trimmed and stored on that cell result\n - multi-cell runs prefix text with `[i/n]` and the optional title\n - cancellations return early with `isError: true` and a cell-specific abort message\n - non-zero exit codes return early with `isError: true` and a message naming the failed cell\n - later cells are skipped after the first error, but earlier cell state persists in the underlying runtime\n8. On success, the tool joins all cell outputs, synthesizes `(no text output)` or `(no output)` when needed, and attaches truncation metadata from `summarizeFinal()`.\n9. The renderer uses `details.cells`, `details.jsonOutputs`, and `details.statusEvents` to build notebook-style output. `mergeCallAndResult = true` and `inline = true`, so call and result render together in the transcript.\n\n## Modes / Variants\n\n### Backend selection\n\nBackend choice is **explicit per cell** — there is no auto-detection.\n\n- `language: \"py\"` → Python (IPython/Jupyter) backend\n- `language: \"js\"` → JavaScript VM backend\n\nIf the requested backend is disabled or unavailable, the tool throws `ToolError` for that cell. The caller chooses; the tool does not silently substitute.\n\n### JavaScript runtime\n\nImplemented in `packages/coding-agent/src/eval/js/context-manager.ts` and `packages/coding-agent/src/eval/js/prelude.txt`.\n\n- Persistent `vm.Context` instances keyed by `js:${sessionId}` in `vmContexts`\n- `reset: true` calls `resetVmContext(sessionKey)` before the cell executes\n- Top-level `await` and bare `return` are supported by wrapping code in an async IIFE when `wrapCode()` sees `await` or `return`\n- Top-level static `import ... from ...` and dynamic `import(...)` calls are routed through `rewriteImports()`, which sends them via `__gjc_import__` so the specifier resolves against the session cwd\n- Module cache is busted for **local** imports between cells so edits to source files are picked up without restarting the runtime. `__gjc_import__` deletes `require.cache[absPath]` before re-importing whenever the original specifier is a filesystem path: relative (`./x`, `../x`, `.`, `..`), POSIX-absolute (`/...`), home-prefixed (`~/...`), or Windows drive-letter (`C:\\...` / `C:/...`). Bare specifiers (`react`, `lodash/x`) and URL/scheme specifiers (`node:fs`, `file://...`, `https://...`) are left in cache so package identity stays stable across cells. The cache-bust only fires when the resolved target is an absolute path — unresolved bare-package fallbacks (`resolveImportSpecifier()` returning the original specifier) skip it.\n- The prelude installs globals:\n - `display`, `print`\n - `read`, `write`, `append`, `sort`, `uniq`, `counter`, `diff`, `tree`, `env`, `output`\n - `tool.<name>(args)` proxy for arbitrary session tool calls\n- JS helpers are async because they cross the VM/tool boundary\n- `display(value)` behavior:\n - plain objects/arrays become JSON outputs\n - `{ type: \"image\", data, mimeType }` becomes an image output\n - scalars become text\n- The VM exposes a restricted `process` subset plus `Buffer`, `fetch`, `Blob`, `File`, `Headers`, `Request`, `Response`, `fs`, `require`, and browser-style globals\n- Per-session VM runs are serialized with `runQueued()`\n\n### Python runtime\n\nImplemented in `packages/coding-agent/src/eval/py/executor.ts`, `packages/coding-agent/src/eval/py/kernel.ts`, and `packages/coding-agent/src/eval/py/prelude.py`. See `docs/python-repl.md` for gateway and kernel details.\n\n- Default mode is retained `session` kernels keyed by `python:${sessionId}`\n- Optional `python.kernelMode = \"per-call\"` creates a fresh kernel for each cell and shuts it down afterward\n- `reset: true` disposes the retained kernel for that session before the cell runs; later Python cells in the same tool call reuse the fresh kernel\n- Startup path:\n - availability check\n - create/connect kernel\n - initialize cwd / env / `sys.path`\n - execute `PYTHON_PRELUDE`\n- Python cells run inside IPython/Jupyter, so top-level `await` works; the prompt warns not to use `asyncio.run(...)`\n- The Python prelude defines synchronous helpers with the same surface as JS (except `tool.<name>` exists only in JS)\n- `display(value)` wraps dict/list/tuple values in `IPython.display.JSON`; rich display MIME bundles are preserved\n- Kernel `display_data` / `execute_result` messages map to:\n - `application/x-gjc-status` → status event\n - `image/png` → image output\n - `application/json` → JSON output\n - `text/markdown` → markdown output\n - `text/plain` → text output\n - `text/html` → HTML converted to markdown with `htmlToBasicMarkdown()`\n- Interactive stdin is rejected: `input_request` sends an empty reply, marks `stdinRequested`, and the executor returns exit code `1`\n\n### Multi-language call behavior\n\nA single tool call can mix Python and JS cells. Persistence is per language runtime:\n\n- `reset: true` on a Python cell does not touch JS state\n- `reset: true` on a JS cell does not touch Python state\n- each backend keeps its own retained session keyed from the same session-derived ID\n\n## Side Effects\n\n- Filesystem\n - JS/Python prelude helpers can read, write, append, diff, and traverse files under the session cwd or absolute paths.\n - Output may spill to an artifact file via `OutputSink`.\n- Network\n - Python backend speaks NDJSON to a local `python3` subprocess over stdin/stdout (no network).\n - JS runtime exposes `fetch` and `tool.<name>()`; those tools may perform additional network I/O.\n- Subprocesses / native bindings\n - Python availability check runs `<python> -c ...`.\n - Python backend spawns one `python -u runner.py` subprocess per kernel; cancellation sends `SIGINT`. Details in `docs/python-repl.md`.\n- Session state\n - `session.assertEvalExecutionAllowed?.()` can block execution.\n - `session.trackEvalExecution?.(...)` can register cancellable eval work.\n - `session.getSessionFile?.()` and `session.getEvalKernelOwnerId?.()` influence kernel reuse and artifact lookup.\n - JS VM contexts persist in `vmContexts` across eval calls until reset/disposal.\n - Python retained kernels persist in `kernelSessions` until reset, eviction, idle cleanup, or owner cleanup.\n- User-visible prompts / interactive UI\n - none; stdin requests are rejected programmatically\n- Background work / cancellation\n - Python retained kernels have heartbeat and idle cleanup timers.\n - Cancellation interrupts a running Python kernel and aborts JS promise waits.\n\n## Limits & Caps\n\n- Per-cell timeout default: 30s (applied when `timeout` is omitted in `EvalTool.execute()`; clamped through `TOOL_TIMEOUTS.eval.default` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Schema-level `timeout` range: integer `1..600` seconds (enforced by Zod on the cell schema)\n- Timeout clamp at runtime: 1s minimum, 600s maximum (`TOOL_TIMEOUTS.eval` in `packages/coding-agent/src/tools/tool-timeouts.ts`)\n- Transcript code/output preview: 10 lines by default (`EVAL_DEFAULT_PREVIEW_LINES` in `packages/coding-agent/src/tools/eval.ts`)\n- Output truncation window: 50KB default (`DEFAULT_MAX_BYTES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Output line cap inside truncation helpers: 3000 lines (`DEFAULT_MAX_LINES` in `packages/coding-agent/src/session/streaming-output.ts`)\n- Streaming tail buffer for live updates: `DEFAULT_MAX_BYTES * 2` = 100KB (`packages/coding-agent/src/tools/eval.ts`)\n- Python retained kernel idle timeout: 5 minutes (`IDLE_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cap: 4 sessions (`MAX_KERNEL_SESSIONS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python retained kernel cleanup sweep: every 30s (`CLEANUP_INTERVAL_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python owner-cleanup shutdown wait: 2000ms (`OWNER_CLEANUP_KERNEL_SHUTDOWN_TIMEOUT_MS` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python heartbeat interval: 5s (`ensureKernelHeartbeat()` in `packages/coding-agent/src/eval/py/executor.ts`)\n- Python external gateway availability check timeout: 5s (`AbortSignal.timeout(5000)` in `packages/coding-agent/src/eval/py/kernel.ts`)\n- Python auto-restart budget: one restart per retained session before hard failure (`restartCount > 1` in `packages/coding-agent/src/eval/py/executor.ts`)\n\n## Errors\n\n- Zod validation rejects malformed `cells` arrays before `execute()` runs (missing `language`/`code`, out-of-range `timeout`, empty `cells`).\n- Missing session without proxy executor throws `ToolError(\"Eval tool requires a session when not using proxy executor\")`.\n- Disabled/unavailable backends throw `ToolError` from `resolveBackend()`:\n - `eval.py = false` and a `py` cell is requested\n - `eval.js = false` and a `js` cell is requested\n - Python kernel unavailable and a `py` cell is requested\n- JS runtime exceptions are converted into text output plus `exitCode: 1`; cancellations return `cancelled: true` and may append `Command timed out`.\n- Python execution errors from the kernel become text output and `exitCode: 1`; later cells are skipped.\n- Python stdin requests are treated as errors with the message `Kernel requested stdin; interactive input is not supported.`\n- Cancellation is returned, not thrown, once backend execution has started. The tool formats it as a cell failure and sets `details.isError = true`.\n- If output truncates, the tool still succeeds; truncation is surfaced through `details.meta` and artifact-backed full output when available.\n\n## Notes\n\n- Backend selection is now strictly explicit per cell: `language` must be `\"py\"` or `\"js\"`. The previous `*** Cell` header parser, the `eval.lark` constrained grammar, and the sniffer-based fallback have all been removed.\n- `EvalTool.customFormat` no longer exists. Tool calls flow through the standard JSON schema; there is no Lark-constrained sampling path.\n- `tool.<name>()` exists only in JS. Python prelude helpers do not call back into the full tool registry.\n- JS helper paths reject protocol URIs (`://`) in `resolvePath()`; the JS prelude is filesystem-only unless the code calls `tool.read(...)` or another tool explicitly.\n- Python helper `output(...)` depends on `GJC_SESSION_FILE`; it fails outside a session-backed run.\n- `display()` can produce text and structured outputs from the same value; the renderer prefers markdown over `text/plain` when both exist.\n- JS static imports are rewritten only at top level. Nested imports stay invalid and surface normal JS syntax/runtime errors.\n- `EvalTool` is `concurrency = \"exclusive\"`, so eval calls do not overlap within a session.\n- The tool description shown to the model is templated by backend availability (`getEvalToolDescription()`); if Python is unavailable, the prompt omits Python-specific instructions.\n",
@@ -63,6 +64,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
63
64
  "tools/irc.md": "# irc\n\n> Send short prose messages to other live agents in the current process.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/irc.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/irc.md`\n- Key collaborators:\n - `packages/coding-agent/src/registry/agent-registry.ts` — process-global live agent directory.\n - `packages/coding-agent/src/session/agent-session.ts` — side-channel reply generation and history injection.\n - `packages/coding-agent/src/prompts/system/irc-incoming.md` — no-tools auto-reply prompt.\n - `packages/coding-agent/src/tools/index.ts` — tool availability gating.\n - `packages/coding-agent/src/config/settings-schema.ts` — `irc.enabled` default.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — renders IRC events into chat UI.\n - `packages/coding-agent/src/modes/utils/ui-helpers.ts` — formats `[IRC]` transcript lines.\n - `packages/coding-agent/src/task/executor.ts` — carries `irc.enabled` into subagents.\n\n## Inputs\n\n### `op: \"list\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"list\"` | Yes | Lists peers visible to the caller. |\n\n### `op: \"send\"`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"send\"` | Yes | Sends one message to one peer or to `\"all\"`. |\n| `to` | `string` | Yes | Peer id such as `0-Main`, or `\"all\"` for broadcast. Whitespace is trimmed. |\n| `message` | `string` | Yes | Message body. Whitespace is trimmed; empty-after-trim is rejected. |\n| `awaitReply` | `boolean` | No | Wait for prose replies. Defaults to `true` for direct messages and `false` for `to: \"all\"`. |\n\n## Outputs\n- Single-shot `AgentToolResult`; no streaming updates.\n- `content` is one text block.\n - `list` returns either `No other live agents.` or a bullet list headed by `<n> peer(s):`.\n - `send` returns delivery summary text, then optional `## Replies`, `## Failed`, and `Unknown / unavailable peers:` sections.\n- `details` is structured metadata:\n - `list`: `{ op, from, peers, channels }`\n - `send`: `{ op, from, to, delivered, replies?, failed?, notFound? }`\n- The tool does not return raw IRC frames, message ids, or a transcript object.\n\n## Flow\n1. `IrcTool.createIf` only constructs the tool when `irc.enabled` is on and the session has both an `AgentRegistry` and `getAgentId` (`packages/coding-agent/src/tools/irc.ts`).\n2. Tool discovery adds another gate in `packages/coding-agent/src/tools/index.ts`: if the caller is `0-Main` and `async.enabled` is off, `irc` is hidden because the main agent cannot talk to concurrent peers in sync mode.\n3. `execute` resolves the process-global registry and sender id. Missing either returns a text error result instead of throwing.\n4. `op: \"list\"` calls `registry.listVisibleTo(senderId)`, which exposes every other agent in flat namespace whose status is `running` or `idle` (`packages/coding-agent/src/registry/agent-registry.ts`).\n5. `list` formats human-readable lines and returns `channels` as `['all', ...peerIds]`. These are logical targets only; there is no channel join state.\n6. `op: \"send\"` trims `to` and `message`; missing values produce text errors.\n7. `send` resolves targets:\n - `to === \"all\"`: all visible peers.\n - otherwise: one exact registry id, excluding self and excluding peers not in `running`/`idle`.\n8. `send` chooses `awaitReply = params.awaitReply ?? !isBroadcast`.\n9. Each target is dispatched in parallel via `target.session.respondAsBackground(...)`. One slow or failing peer does not block dispatch to the others.\n10. `respondAsBackground` emits an `irc_message` session event, forwards a display-only relay to the main session UI, and either:\n - queues just the incoming message for later history injection when `awaitReply === false`, or\n - renders `packages/coding-agent/src/prompts/system/irc-incoming.md`, runs `runEphemeralTurn` with `toolChoice: \"none\"`, emits an auto-reply event, then queues both incoming and reply messages for history injection.\n11. Deferred injection waits until the recipient is no longer streaming; `#flushPendingBackgroundExchanges` appends the custom messages through normal `message_start`/`message_end` external events so persistence and listeners see them.\n12. `send` aggregates `delivered`, `replies`, `failed`, and `notFound`, then returns one text summary plus matching `details`.\n\n## Modes / Variants\n- `list`: enumerate visible peers and logical channels.\n- `send` direct message: one exact peer id, default synchronous auto-reply.\n- `send` broadcast: `to: \"all\"`, default fire-and-forget (`awaitReply: false`) to every visible peer.\n- `send` with `awaitReply: false`: recipient records the incoming message but does not generate a reply.\n- `send` with `awaitReply: true`: recipient performs a no-tools ephemeral LLM turn and returns prose.\n\n## Side Effects\n- Session state\n - Reads from the process-global `AgentRegistry`.\n - Emits `irc_message` session events on recipient sessions.\n - Queues IRC custom messages into recipient persisted history after the current stream finishes.\n - For non-main recipients, forwards display-only relay observations into the main session UI; these relays are not persisted to the main agent history.\n - Subagents inherit `irc.enabled` from task executor settings.\n- User-visible prompts / interactive UI\n - IRC events render as `[IRC]` transcript lines in the TUI.\n - Auto-replies are generated from `packages/coding-agent/src/prompts/system/irc-incoming.md` and explicitly forbid tool use.\n- Background work / cancellation\n - `send` starts one background `respondAsBackground` call per target.\n - The caller's `AbortSignal` is forwarded into each background reply turn.\n- Network\n - No IRC server connection.\n - When `awaitReply: true`, the recipient may make model-provider API calls through `runEphemeralTurn`.\n- Filesystem\n - No direct filesystem writes in the tool itself.\n\n## Limits & Caps\n- Availability gates:\n - `irc.enabled` defaults to `true` in `packages/coding-agent/src/config/settings-schema.ts`.\n - Main agent tool discovery suppresses `irc` when `async.enabled` is off (`packages/coding-agent/src/tools/index.ts`).\n- Visibility scope: only peers in status `running` or `idle` are addressable via `listVisibleTo`.\n- Reply execution:\n - No tools are available in auto-reply turns (`toolChoice: \"none\"` in `runEphemeralTurn`).\n - No internal timeout, retry, backoff, rate limit, or reply length cap is defined in `irc.ts`; behavior relies on the underlying model stream and any upstream API limits.\n- Flush scheduling: deferred history injection polls every `50` ms while the recipient is still streaming (`#scheduleBackgroundExchangeFlush` in `packages/coding-agent/src/session/agent-session.ts`).\n\n## Errors\n- The tool returns text errors, not thrown exceptions, for:\n - missing registry: `IRC is unavailable in this session.`\n - missing sender id: `IRC is unavailable: caller has no agent id.`\n - missing `to`: `` `to` is required for op=\"send\". ``\n - missing `message`: `` `message` is required for op=\"send\". ``\n - unknown op: `Unknown irc op.`\n- Unknown, self-addressed, non-running, and non-idle direct targets are reported under `details.notFound` and in the text footer `Unknown / unavailable peers:`.\n- If a target has no attached session, it is treated as not found.\n- Exceptions thrown by `respondAsBackground` or `runEphemeralTurn` are caught per-target and surfaced under `details.failed` as `{ id, error }`; other recipients still complete.\n- If no target succeeds, `send` still returns normally with `No recipients received the message.` and optional `failed`/`notFound` metadata.\n\n## Notes\n- This is IRC-like naming only. There are no servers, sockets, nick registration, auth handshakes, channels beyond `all`, or commands such as join/part/topic.\n- Addressing is by exact agent id from the registry; there is no fuzzy lookup or aliasing.\n- `channels` in `list` is synthetic output: `all` plus visible peer ids. Nothing is persisted across calls as channel membership.\n- Persistence is per recipient history, not per sender history. The sender gets the tool result; the recipient later sees injected custom messages on its next turn.\n- The main UI may show IRC relays for conversations it was not part of, but those relay records are explicitly display-only.\n- Because reply generation snapshots in-flight assistant text, a recipient can answer based on partially streamed context.\n- Direct self-messaging is rejected by resolving the target as unavailable.",
64
65
  "tools/job.md": "# job\n\n> Wait for or cancel background jobs managed by the session async runtime.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/job.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/job.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — job registry, cancellation, delivery suppression.\n - `packages/coding-agent/src/async/support.ts` — feature gating for background jobs.\n - `packages/coding-agent/src/tools/bash.ts` — explicit async bash and auto-backgrounded bash jobs.\n - `packages/coding-agent/src/task/index.ts` — async task-job scheduling.\n - `packages/coding-agent/src/sdk.ts` — automatic follow-up delivery for unsuppressed completions.\n - `packages/coding-agent/src/config/settings-schema.ts` — `async.pollWaitDuration` options.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `poll` | `string[]` | No | Job ids to watch. Cannot be combined with `list`. If omitted (and `cancel` is also omitted), the tool watches all running jobs. If provided, missing ids are silently filtered out before waiting. |\n| `cancel` | `string[]` | No | Job ids to cancel before any polling. Missing ids are reported as `not_found`; non-running ids as `already_completed`. |\n| `list` | `boolean` | No | Return an immediate snapshot of every job spawned by the calling agent (running + completed within retention) without waiting. Read-only — cannot be combined with `poll` or `cancel`. |\n\n## Outputs\nThe tool returns one text block plus `details`.\n\n- `content[0].text`: markdown-like plain text sections assembled by `#buildResult(...)`:\n - `## Cancelled (N)` for cancel outcomes.\n - `## Completed (N)` for non-running jobs, including stored `resultText` and `errorText`.\n - `## Still Running (N)` for jobs still in `running`.\n- `details.jobs`: array of snapshots:\n - `id: string`\n - `type: \"bash\" | \"task\"`\n - `status: \"running\" | \"completed\" | \"failed\" | \"cancelled\"`\n - `label: string`\n - `durationMs: number`\n - optional `resultText`, `errorText`\n- `details.cancelled` appears only when `cancel` was passed; each item is `{ id, status }` where status is `\"cancelled\" | \"not_found\" | \"already_completed\"`.\n\nStreaming behavior:\n- During a polling wait, `execute(...)` emits `onUpdate(...)` every 500 ms with an empty text block and fresh `details.jobs` snapshots.\n- Final return is single-shot after a completion, timeout, abort, or immediate fast path.\n\nRead-only snapshot path:\n- Calling `job` with `list: true` returns a markdown summary of every job spawned by the calling agent (running + completed within retention) without waiting.\n\n## Flow\n1. `JobTool.createIf(...)` in `packages/coding-agent/src/tools/job.ts` only exposes the tool when `isBackgroundJobSupportEnabled(...)` returns true for either `async.enabled` or `bash.autoBackground.enabled`.\n2. `execute(...)` fetches `session.asyncJobManager`. If absent, it returns `Async execution is disabled; no background jobs are available.`\n3. `cancel` ids are processed first:\n - `manager.getJob(id)` missing → `not_found`.\n - existing job with `status !== \"running\"` → `already_completed`.\n - running job → `manager.cancel(id)`, which sets `job.status = \"cancelled\"`, aborts the controller, and schedules eviction.\n4. Polling mode is chosen with `const shouldPoll = requestedPollIds !== undefined || cancelIds.length === 0`:\n - only `cancel` present → return immediately, no wait.\n - explicit `poll`, or no args at all → proceed to watch jobs.\n5. Watch set resolution:\n - explicit `poll` → map ids through `manager.getJob(...)` and drop missing ones.\n - no `poll` and no `cancel` → `manager.getRunningJobs()`.\n6. Empty watch set returns immediately:\n - if cancellations happened, return snapshots for the cancelled ids that still exist.\n - else return either `No matching jobs found for IDs: ...` or `No running background jobs to wait for.`\n7. If every watched job is already non-running, `#buildResult(...)` returns immediately without waiting.\n8. Otherwise the tool waits on `Promise.race(...)` across:\n - every watched running job's `job.promise`,\n - a timeout promise for `async.pollWaitDuration`,\n - the tool-call abort signal when present.\n9. Before waiting, it calls `manager.watchJobs(watchedJobIds)`. This suppresses automatic completion delivery for those ids while they are being watched.\n10. If `onUpdate` exists, a 500 ms interval sends progress snapshots from `#snapshotJobs(...)`; one snapshot is emitted immediately before entering the race.\n11. In `finally`, the tool always calls `manager.unwatchJobs(...)`, clears the timeout, and stops the progress interval.\n12. `#buildResult(...)` deduplicates jobs, snapshots current manager state, then calls `manager.acknowledgeDeliveries(...)` for every non-running job in the result. That suppresses later automatic follow-up delivery for the same completions and removes queued deliveries for those ids.\n13. The final text groups jobs by non-running vs still-running state. A timeout is not an error path; it simply returns the current snapshot.\n\n## Modes / Variants\n- Poll all running jobs: call with neither `poll` nor `cancel`.\n- Poll explicit ids: call with `poll` only.\n- Cancel only: call with `cancel` only; cancellations happen and the tool returns immediately.\n- Cancel then poll: call with both. Cancellations are applied first, then the tool watches the remaining resolved `poll` ids.\n- Read-only inspection: call with `list: true` for the same snapshot data without waiting on completion.\n\nSpawn paths that produce jobs:\n- `packages/coding-agent/src/tools/bash.ts`\n - `async: true` always registers a `type: \"bash\"` job with `AsyncJobManager.register(...)` and returns a start message.\n - auto-background mode (`bash.autoBackground.enabled`) starts the same managed job path for non-PTY commands, waits up to `min(bash.autoBackground.thresholdMs, timeoutMs - 1000)`, and if the command is still running returns a background-job start result instead of inline command output.\n- `packages/coding-agent/src/task/index.ts`\n - when `async.enabled` is on, the chosen agent is not blocking, and `tasks.length > 0`, each task item is registered as a `type: \"task\"` job.\n\nLifecycle and exact state names:\n- Conceptual scheduling path: `pending` (only task-progress bookkeeping before work starts) → `running` → `completed` / `failed`; cancellation changes a running async job to `cancelled`.\n- Exact `AsyncJob.status` values in `packages/coding-agent/src/async/job-manager.ts`: `\"running\" | \"completed\" | \"failed\" | \"cancelled\"`.\n- Exact per-task progress values in `packages/coding-agent/src/task/types.ts`: `\"pending\" | \"running\" | \"completed\" | \"failed\" | \"aborted\"`.\n\n## Side Effects\n- Filesystem\n - None in `job.ts` itself.\n - Jobs being observed may already have written artifacts/results through their own tool runtimes.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Reads and mutates `session.asyncJobManager` state.\n - `watchJobs(...)` / `unwatchJobs(...)` toggle delivery suppression for the watched ids.\n - `acknowledgeDeliveries(...)` marks completed ids as suppressed and removes queued deliveries for them.\n - `cancel(...)` aborts running jobs through each job's `AbortController`.\n- User-visible prompts / interactive UI\n - Polling emits periodic `onUpdate` snapshots every 500 ms.\n - Automatic job completion follow-ups are generated by `packages/coding-agent/src/sdk.ts` only for unsuppressed deliveries.\n- Background work / cancellation\n - Waiting uses a timeout plus optional tool-call abort signal.\n - Cancelling a job does not synchronously await teardown; it flips state, aborts, and returns control to the manager/job promise.\n\n## Limits & Caps\n- Poll wait duration comes from `async.pollWaitDuration` in `packages/coding-agent/src/config/settings-schema.ts`:\n - allowed values: `5s`, `10s`, `30s`, `1m`, `5m`\n - default: `30s`\n- Progress update cadence while polling: `PROGRESS_INTERVAL_MS = 500` in `packages/coding-agent/src/tools/job.ts`.\n- Async job retention default: `DEFAULT_RETENTION_MS = 5 * 60 * 1000` in `packages/coding-agent/src/async/job-manager.ts`.\n- Manager fallback max-running limit: `DEFAULT_MAX_RUNNING_JOBS = 15` in `packages/coding-agent/src/async/job-manager.ts`.\n- Session wiring clamps `async.maxJobs` to `1..100` before constructing the manager in `packages/coding-agent/src/sdk.ts`; settings default is `100` in `packages/coding-agent/src/config/settings-schema.ts`.\n- Async completion delivery retry backoff in `packages/coding-agent/src/async/job-manager.ts`:\n - base `500` ms\n - max `30_000` ms\n - jitter `< 200` ms\n - exponent capped at 8 doublings\n\n## Errors\n- Tool-disabled path is returned as normal text, not thrown: `Async execution is disabled; no background jobs are available.`\n- Polling a nonexistent id is not an exception:\n - with `poll` only, missing ids are dropped; if none remain the tool returns `No matching jobs found for IDs: ...`.\n - with `cancel`, each missing id is reported as `not_found` in `details.cancelled` and text.\n- Cancelling a non-running job is not an exception; it reports `already_completed` even if the actual status is `completed`, `failed`, or `cancelled`.\n- Tool-call abort during polling stops waiting and returns a final snapshot through `#buildResult(...)`; it does not cancel watched jobs.\n- Failures inside the underlying async work are stored on the job (`status: \"failed\"`, `errorText`) and reported in normal tool output, not rethrown by `job`.\n- Calling `list: true` against an empty manager returns a normal empty-list result rather than throwing; missing ids passed to `poll` are silently filtered.\n\n## Notes\n- `job` waits for the first watched running job to settle, not for all watched jobs. If others remain `running`, they are reported under `## Still Running`; the caller must invoke `job` again to continue waiting.\n- Delivery suppression is the key difference between snapshot and automatic delivery:\n - snapshots (`job` calls with `poll` or `list: true`) read current manager state;\n - follow-up delivery comes from `AsyncJobManager.#enqueueDelivery(...)` and `sdk.ts` `onJobComplete`;\n - watched or acknowledged ids are suppressed via `isDeliverySuppressed(...)`.\n- `manager.cancel(id)` sets `status = \"cancelled\"` before the underlying promise settles. The job function may later populate `resultText` or `errorText`; `job-manager.ts` preserves that text but does not transition the status away from `cancelled`.\n- Retention eviction removes the job record, suppression flags, and watch flag together. After eviction, both `job` calls and `list: true` snapshots behave as if the id never existed.\n",
65
66
  "tools/lsp.md": "# lsp\n\n> Query language servers for diagnostics, navigation, symbols, renames, code actions, capabilities, and raw requests.\n\n## Source\n- Entry: `packages/coding-agent/src/lsp/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/lsp.md`\n- Key collaborators:\n - `packages/coding-agent/src/lsp/client.ts` — client process lifecycle and JSON-RPC\n - `packages/coding-agent/src/lsp/config.ts` — config loading, auto-detect, server selection\n - `packages/coding-agent/src/lsp/lspmux.ts` — optional `lspmux` command wrapping\n - `packages/coding-agent/src/lsp/edits.ts` — apply `WorkspaceEdit` and text edits\n - `packages/coding-agent/src/lsp/utils.ts` — URI conversion, symbol resolution, formatting, glob expansion\n - `packages/coding-agent/src/lsp/types.ts` — tool schema and protocol types\n - `packages/coding-agent/src/lsp/clients/index.ts` — custom linter client cache/factory\n - `packages/coding-agent/src/lsp/clients/lsp-linter-client.ts` — LSP-backed linter adapter\n - `packages/coding-agent/src/lsp/clients/biome-client.ts` — Biome CLI diagnostics/formatting adapter\n - `packages/coding-agent/src/lsp/clients/swiftlint-client.ts` — SwiftLint CLI diagnostics adapter\n - `packages/coding-agent/src/tools/index.ts` — tool registration and `lsp.enabled` gating\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout defaults and clamping\n - `packages/coding-agent/src/lsp/defaults.json` — built-in server definitions for auto-detect\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `action` | string enum | Yes | One of `diagnostics`, `definition`, `references`, `hover`, `symbols`, `rename`, `rename_file`, `code_actions`, `type_definition`, `implementation`, `status`, `reload`, `capabilities`, `request`. |\n| `file` | string | No | File path; for `diagnostics` also a glob; for workspace forms use `\"*\"`; for `rename_file` this is the source path. |\n| `line` | number | No | 1-indexed line number for position-based actions. Defaults to `1` on the single-file action path. |\n| `symbol` | string | No | Substring used to resolve the column on `line`. Supports `name#N` occurrence selectors; `N` is 1-indexed and defaults to `1`. |\n| `query` | string | No | Workspace symbol query, code-action selector/filter, or LSP method name for `action=request`. |\n| `new_name` | string | No | Required for `rename` and `rename_file`. |\n| `apply` | boolean | No | For `rename`/`rename_file`, apply unless explicitly `false`. For `code_actions`, list unless explicitly `true`. |\n| `timeout` | number | No | Seconds, clamped by `clampTimeout(\"lsp\", ...)` to `5..60`, default `20`. |\n| `payload` | string | No | JSON string for `action=request`; overrides auto-built params. |\n\n## Outputs\n- Single-shot `AgentToolResult`.\n- `content` is always one text block: `[{ type: \"text\", text: string }]`.\n- `details` is `LspToolDetails`: `action`, `success`, optional `serverName`, optional original `request`.\n- No streaming updates.\n- No artifact URIs or background jobs.\n- Many validation failures are returned as ordinary text results with `details.success: false`; aborts throw `ToolAbortError` instead.\n\n## Flow\n1. `packages/coding-agent/src/tools/index.ts` registers `lsp: LspTool.createIf`; session creation also gates it behind `session.enableLsp !== false` and `settings.get(\"lsp.enabled\")`.\n2. `LspTool.execute()` in `packages/coding-agent/src/lsp/index.ts` clamps `timeout` with `clampTimeout(\"lsp\", ...)`, builds an `AbortSignal.timeout(...)`, and combines it with the caller signal.\n3. `getConfig()` loads and caches `LspConfig` per cwd, applies idle-timeout config via `setIdleTimeout()`, and reuses the cached config on later calls.\n4. Config loading in `packages/coding-agent/src/lsp/config.ts` merges `defaults.json` with JSON/YAML overrides from project, project config dirs, user config dirs, plugin roots, and home; if there are no overrides it auto-detects servers from root markers plus executable discovery.\n5. Server routing uses `getServersForFile()` / `getServerForFile()` from `config.ts`: extension or basename match, then sort primary servers before linters. `index.ts` further filters custom linter clients out of navigation/refactor paths with `getLspServersForFile()` / `getLspServerForFile()`.\n6. `getOrCreateClient()` in `client.ts` creates one process per `command:cwd`, optionally wraps supported commands with `lspmux`, spawns the server, starts the background message reader, sends `initialize`, stores server capabilities, then sends `initialized`.\n7. The message reader in `client.ts` parses LSP frames, resolves pending requests, caches `publishDiagnostics`, tracks `$/progress` tokens for project-load completion, answers `workspace/configuration`, and applies `workspace/applyEdit` requests through `applyWorkspaceEdit()`.\n8. File-scoped actions call `ensureFileOpen()` before requests. Column resolution uses `resolveSymbolColumn()` from `utils.ts`: read the target file, pick first non-whitespace when `symbol` is omitted, otherwise find the exact or case-insensitive match on the target line and honor `#N` occurrence selectors.\n9. Actions dispatch in `LspTool.execute()` through dedicated branches: workspace-only branches (`status`, some `diagnostics`, workspace `symbols`, workspace `reload`, `capabilities`, `request`) run before the single-file switch; all other single-file actions share one client lookup and `switch(action)`.\n10. Requests go through `sendRequest()` in `client.ts`, which allocates an incrementing JSON-RPC id, installs abort and timeout handling, sends `$/cancelRequest` on abort, and rejects on timeout or process exit.\n11. Actions that return edits either preview with `formatWorkspaceEdit()` or apply with `applyWorkspaceEdit()` from `edits.ts`; `rename_file` also performs the filesystem rename and then sends `workspace/didRenameFiles`.\n12. Non-abort failures inside the single-file action block are converted to `LSP error: ...`; many precondition failures return explicit text without throwing.\n\n## Modes / Variants\n### Routing and workspace scope\n- `file: \"*\"` is only special for `diagnostics`, `symbols`, and `reload`.\n- `status` ignores `file`.\n- `capabilities` with omitted `file` or `\"*\"` inspects all non-custom LSP servers; with a concrete file it scopes to matching non-custom servers.\n- `request` with omitted `file` or `\"*\"` chooses the first available non-custom LSP server; with a concrete file it chooses that file's primary non-linter server.\n- `rename_file` sends `workspace/willRenameFiles` and `workspace/didRenameFiles` to every non-custom LSP server from `getLspServers(config)`, not just one file-scoped server.\n- Diagnostics are the only tool action that queries both normal LSP servers and custom linter clients (`BiomeClient`, `SwiftLintClient`, or `LspLinterClient`).\n\n### `diagnostics`\n**Inputs**\n- Required: `file`, unless using workspace mode with `file: \"*\"`.\n- Optional: `timeout`.\n\n**Execution**\n- `file: \"*\"`: `runWorkspaceDiagnostics()` detects project type from root markers and runs one subprocess command: Rust `cargo check --message-format=short`, TypeScript `npx tsc --noEmit`, Go `go build ./...`, Python `pyright`.\n- Concrete file or glob: `resolveDiagnosticTargets()` treats non-globs as one target, otherwise expands a `Bun.Glob` up to `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Per file, every matching server runs: custom clients call `lint(file)`; real LSP servers optionally wait for project load, capture `diagnosticsVersion`, `refreshFile()`, then `waitForDiagnostics()` for fresh `publishDiagnostics`.\n- Results are deduplicated by range+message and severity-sorted.\n\n**Output text**\n- Single target with no issues: `OK`.\n- Single target with issues: `<summary>:\\n<grouped diagnostics>`.\n- Batch/glob target: one section per file, plus an initial truncation warning when the glob exceeds the file cap.\n- Workspace mode: `Workspace diagnostics (<detected description>):\\n<command output>`.\n\n### `definition`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/definition` with `{ textDocument, position }`.\n- Accepts `Location`, `Location[]`, `LocationLink`, or `LocationLink[]`; `normalizeLocationResult()` converts `LocationLink` to `targetSelectionRange ?? targetRange`.\n- Waits for project load before the request.\n\n**Output text**\n- `No definition found` or `Found N definition(s):` followed by `file:line:col` and one context line above/below each location.\n\n### `type_definition`\nSame as `definition`, but sends `textDocument/typeDefinition` and reports `type definition(s)`.\n\n### `implementation`\nSame as `definition`, but sends `textDocument/implementation` and reports `implementation(s)`.\n\n### `references`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/references` with `includeDeclaration: true`.\n- For project-aware servers, retries up to `REFERENCES_RETRY_COUNT` times when the only hit is the queried declaration; between retries it waits for project load and sleeps `REFERENCES_RETRY_DELAY_MS`.\n- First `REFERENCE_CONTEXT_LIMIT` references include surrounding context; the rest are location-only.\n\n**Output text**\n- `No references found` or `Found N reference(s):` with contextual entries first, then `... M additional reference(s) shown without context` when truncated.\n\n### `hover`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `timeout`.\n\n**Execution**\n- Sends `textDocument/hover`.\n- `extractHoverText()` flattens strings, markup content, marked-string objects, or arrays into plain text.\n\n**Output text**\n- `No hover information` or the extracted hover text.\n\n### `symbols`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted file on the early workspace branch, plus required `query`.\n- Document mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode sends `workspace/symbol` to every non-custom LSP server, post-filters matches with `filterWorkspaceSymbols()`, deduplicates with `dedupeWorkspaceSymbols()`, then truncates to `WORKSPACE_SYMBOL_LIMIT`.\n- Document mode sends `textDocument/documentSymbol` to the primary server. If the first item has `selectionRange`, it formats hierarchical `DocumentSymbol`s; otherwise it formats flat `SymbolInformation`s.\n\n**Output text**\n- Workspace mode: `Found N symbol(s) matching \"query\":` plus formatted `name @ file:line:col`, with an omission line when over the limit.\n- Document mode: `Symbols in <file>:` plus hierarchical or flat symbol lines.\n\n### `rename`\n**Inputs**\n- Required: `file`, `new_name`.\n- Optional: `line`, `symbol`, `apply`, `timeout`.\n\n**Execution**\n- Waits for project load, sends `textDocument/rename`, receives a `WorkspaceEdit`.\n- `apply !== false` applies edits immediately with `applyWorkspaceEdit()`.\n- `apply === false` renders a preview with `formatWorkspaceEdit()`.\n\n**Output text**\n- `Rename returned no edits`, `Applied rename:` plus applied change lines, or `Rename preview:` plus summarized edits.\n\n### `rename_file`\n**Inputs**\n- Required: `file` source path, `new_name` destination path.\n- Optional: `apply`, `timeout`.\n\n**Execution**\n- Resolves absolute source and destination, rejects identical paths, missing source, existing destination, empty rename set, or directories with more than `MAX_RENAME_PAIRS` files.\n- `enumerateRenamePairs()` returns one `{oldUri,newUri}` pair for a file or walks every regular file in a directory tree.\n- Sends `workspace/willRenameFiles` with `{ files: pairs }` to every non-custom LSP server; collects returned `WorkspaceEdit`s and server notes.\n- Preview mode (`apply === false`) only formats those edits.\n- Apply mode runs each returned `WorkspaceEdit`, renames the source path on disk, sends `textDocument/didClose` for every renamed open file, deletes those `openFiles` entries, then sends `workspace/didRenameFiles`.\n\n**Output text**\n- Preview: `Rename preview: <file-count label> → <dest>` plus per-server edit summaries and optional server notes.\n- Apply: `Renamed <file-count label> → <dest>` plus applied edit summaries, filesystem rename line, and optional server notes.\n\n### `code_actions`\n**Inputs**\n- Required: `file`.\n- Optional: `line`, `symbol`, `query`, `apply`, `timeout`.\n\n**Execution**\n- Reads cached diagnostics for the open URI from `client.diagnostics` and sends `textDocument/codeAction` for a zero-width range at the resolved position.\n- When `apply !== true`, `query` is passed as `context.only: [query]`; this is a server-side kind filter.\n- When `apply === true`, `query` becomes a required client-side selector: either a zero-based numeric index or a case-insensitive substring of the action title.\n- Applying a `CodeAction` uses `applyCodeAction()`: optionally `codeAction/resolve`, then `applyWorkspaceEdit(edit)`, then optional `workspace/executeCommand`.\n- Applying a bare `Command` only runs `workspace/executeCommand`.\n\n**Output text**\n- List mode: `N code action(s):` plus `index: [kind] title` lines.\n- Apply mode success: `Applied \"title\":` plus `Workspace edit:` and/or `Executed command(s):` sections.\n- Apply mode miss: `No code action matches \"query\". Available actions:`.\n- Apply mode with no edit/command: `Action \"title\" has no workspace edit or command to apply`.\n\n### `status`\n**Inputs**\n- None.\n\n**Execution**\n- Reads configured servers from cached `LspConfig`, not `getActiveClients()`.\n- Calls `detectLspmux()` and appends status text when `lspmux` is installed.\n\n**Output text**\n- `Active language servers: ...` or `No language servers configured for this project`, optionally followed by `lspmux: active (multiplexing enabled)` or `lspmux: installed but server not running`.\n\n### `reload`\n**Inputs**\n- Workspace mode: `file: \"*\"` or omitted `file`.\n- Single-file mode: required `file`.\n- Optional: `timeout`.\n\n**Execution**\n- Workspace mode reloads every non-custom LSP server.\n- Single-file mode reloads the primary server for that file.\n- `reloadServer()` tries `rust-analyzer/reloadWorkspace`, then `workspace/didChangeConfiguration` with `{ settings: {} }`; if neither works it kills the process so the next request cold-starts a new client.\n\n**Output text**\n- One line per server: `Reloaded <server>`, `Restarted <server>`, or `Failed to reload <server>: ...`.\n\n### `capabilities`\n**Inputs**\n- Optional: `file`, `timeout`.\n\n**Execution**\n- With a concrete `file`, inspects matching non-custom servers for that file.\n- With omitted `file` or `\"*\"`, inspects every non-custom configured server.\n- Starts servers as needed and dumps `client.serverCapabilities ?? {}` as pretty JSON.\n\n**Output text**\n- Per server: `<server>:` followed by indented `capabilities: { ... }`, or `<server>: failed to start (...)`.\n\n### `request`\n**Inputs**\n- Required: `query` method name.\n- Optional: `file`, `line`, `symbol`, `payload`, `timeout`.\n\n**Execution**\n- Chooses one non-custom server: file-scoped primary server, otherwise the first configured non-custom server.\n- Param building precedence:\n 1. If `payload` is present, parse JSON and use it verbatim.\n 2. Else if `file` is concrete and `line` is present, build `{ textDocument: { uri }, position: { line: line - 1, character } }` using `resolveSymbolColumn()`.\n 3. Else if `file` is concrete, build `{ textDocument: { uri } }`.\n 4. Else use `{}`.\n- Opens the file first when `file` is concrete.\n\n**Output text**\n- Success: `<server> ← <method>:\\n<formatted result>`, where non-string results are `JSON.stringify(..., null, 2)` and nullish values become `null`.\n- Failure: `LSP error from <server> on <method>: ...`.\n\n## Side Effects\n- Filesystem\n - Reads config files, target files, and root markers.\n - `rename` and `code_actions` may edit/create/delete/rename files via `applyWorkspaceEdit()`.\n - `rename_file` always renames the source path on disk in apply mode.\n - Server-initiated `workspace/applyEdit` requests also mutate files through `applyWorkspaceEdit()`.\n- Network\n - None directly; communication is local stdio JSON-RPC to subprocesses.\n- Subprocesses / native bindings\n - Spawns language servers with `ptree.spawn()`.\n - Workspace diagnostics spawns `cargo`, `npx`, `go`, or `pyright`.\n - `BiomeClient` and `SwiftLintClient` spawn CLI tools.\n - Optional `lspmux` detection spawns `lspmux status`; supported servers may be wrapped through `lspmux client`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Caches config per cwd in `configCache`.\n - Caches LSP clients per `command:cwd`, with `pendingRequests`, `diagnostics`, `openFiles`, `serverCapabilities`, and project-load state.\n - Caches custom linter clients by `serverName:cwd`.\n - Updates client `lastActivity`; optional idle-timeout cleanup is driven by `setIdleTimeout()`.\n- Background work / cancellation\n - Every request has an abortable timeout signal.\n - Aborting an in-flight LSP request sends `$/cancelRequest`.\n - Background message readers persist for each live client until process exit/shutdown.\n\n## Limits & Caps\n- Tool timeout clamp: default `20`, min `5`, max `60` seconds — `TOOL_TIMEOUTS.lsp` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- LSP request default timeout inside `sendRequest()`: `30_000ms` — `DEFAULT_REQUEST_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Warmup initialize timeout default: `5_000ms` — `WARMUP_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Project-load wait fallback: `15_000ms` — `PROJECT_LOAD_TIMEOUT_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Idle-client sweep interval when enabled: `60_000ms` — `IDLE_CHECK_INTERVAL_MS` in `packages/coding-agent/src/lsp/client.ts`.\n- Diagnostic message output cap: first `50` messages — `DIAGNOSTIC_MESSAGE_LIMIT` in `packages/coding-agent/src/lsp/index.ts`.\n- Single-file diagnostics wait: `3_000ms` — `SINGLE_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Batch/glob diagnostics wait per file: `400ms` — `BATCH_DIAGNOSTICS_WAIT_TIMEOUT_MS`.\n- Glob diagnostic target cap: first `20` matches — `MAX_GLOB_DIAGNOSTIC_TARGETS`.\n- Workspace symbol cap: first `200` entries — `WORKSPACE_SYMBOL_LIMIT`.\n- Reference context cap: first `50` references include source context — `REFERENCE_CONTEXT_LIMIT`.\n- References retry count: `2` retries, `250ms` backoff — `REFERENCES_RETRY_COUNT`, `REFERENCES_RETRY_DELAY_MS`.\n- Directory rename cap: `1_000` file pairs — `MAX_RENAME_PAIRS`.\n- `detectLspmux()` state cache TTL: `5 * 60 * 1000ms`; liveness check timeout: `1_000ms` — `STATE_CACHE_TTL_MS`, `LIVENESS_TIMEOUT_MS` in `packages/coding-agent/src/lsp/lspmux.ts`.\n- Workspace diagnostics output cap: first `50` lines from the subprocess.\n\n## Errors\n- Missing or invalid inputs are usually returned as text with `details.success: false`, not thrown:\n - missing `file`/`query`/`new_name`\n - invalid JSON in `payload`\n - no matching server\n - invalid `rename_file` source/destination conditions\n- `resolveSymbolColumn()` throws explicit errors for missing files, missing symbols, and out-of-bounds `#N` selectors; these surface as `LSP error: ...` or request-specific error text.\n- `sendRequest()` rejects on timeout with `LSP request <method> timed out after <ms>ms`.\n- Client process exit rejects all pending requests with an exit-code/stderr error assembled in `getOrCreateClient()`.\n- Single-file action failures inside the main `try` become `LSP error: <message>`.\n- `request` has its own error envelope: `LSP error from <server> on <method>: <message>`.\n- Some server failures are intentionally softened:\n - diagnostics continue when one server fails\n - `rename_file` suppresses `workspace/willRenameFiles` “method not found” errors and records other server errors as notes\n - `code_actions` ignores `codeAction/resolve` failures and applies unresolved actions when possible\n- Aborts are not converted to text: `ToolAbortError` is rethrown.\n\n## Notes\n- `status` reports configured/available servers from `LspConfig`, not currently active client processes from `getActiveClients()`.\n- `getLspServerForFile()` excludes `createClient` adapters and linter-only servers; navigation/refactor actions never target Biome/SwiftLint custom clients.\n- `getServersForFile()` matches both file extensions and exact basenames from `fileTypes`; config can target names like `Dockerfile` if present.\n- `symbol` matching is exact first, then case-insensitive, and falls back to the Nth occurrence on the specified line only; it never scans other lines.\n- `code_actions` uses `query` in two different ways: server-side `context.only` filter in list mode, client-side title/index selector in apply mode.\n- `rename` and `rename_file` default to apply. Preview requires `apply: false`.\n- `request` with `file: \"*\"` is treated the same as omitted `file`: it does not build workspace-specific params.\n- `reload` does not recreate a client immediately after killing it; the next request triggers reinitialization.\n- `workspace/applyEdit` can apply edits initiated by the server outside the direct tool action result path.\n- `detectLspmux()` can be disabled with `GJC_DISABLE_LSPMUX=1`; only `rust-analyzer` is in `DEFAULT_SUPPORTED_SERVERS`.\n- Startup LSP warmup (`discoverStartupLspServers(cwd)` in `sdk.ts`) is gated on `enableLsp && options.hasUI && settings.get(\"lsp.diagnosticsOnWrite\")` — print/RPC/ACP/script sessions skip it and let `getOrCreateClient()` cold-start servers on demand. See `docs/sdk.md` § Startup performance.\n- `configCache` is per-process and never auto-invalidated; config changes require a fresh process to be observed by `getConfig()` callers.",
67
+ "tools/monitor.md": "# monitor\n\n> Start a background monitor that streams stdout lines as task notifications. Mirrors Claude Code's `Monitor` tool surface.\n\n## Source\n\n- Entry: `packages/coding-agent/src/tools/monitor.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/monitor.md`\n- Key collaborators:\n - `packages/coding-agent/src/async/job-manager.ts` — stores the captured stream via `appendOutput`/`readOutputSince` and delivers the final background-job result.\n - `packages/coding-agent/src/exec/bash-executor.ts` and `packages/coding-agent/src/session/streaming-output.ts` — provide the unthrottled `onRawChunk` capture hook that feeds the manager.\n - `packages/coding-agent/src/tools/bash.ts` — exposes `BashTool.startMonitorJob(...)`, a Bash-aligned helper that preserves interception, cwd/env expansion, artifacts, timeouts, and raw stream capture.\n - `packages/coding-agent/src/tools/job.ts` — polls/cancels the monitor task by id; there is no sibling `MonitorKill` tool.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `command` | `string` | Yes | Shell command to run as a background monitor. Each sanitized stdout line is delivered as a task-notification. |\n| `kind` | `\"log\" \\| \"poll\" \\| \"watch\" \\| \"other\"` | Yes | Category of monitor. Surfaces in listings. |\n| `description` | `string` | Yes | Short human-readable summary of what is being monitored. |\n| `timeout` | `number` | No | Maximum wall-clock seconds the monitor may run before automatic shutdown. Omit for session lifetime. |\n| `persistent` | `boolean` | No | Keep the monitor running after its first emitted event. Persistent monitors survive until session end, timeout, or explicit cancel via `job`. Defaults to `false`. |\n\n## Outputs\n\nThe tool returns one text block plus `details`:\n\n- `content[0].text`: `Monitor started · task <task_id> · persistent: true|false`.\n- `details`: `{ taskId, kind, description, command, persistent }`.\n\nEach newline-terminated stdout line is appended to the manager-owned cursor and sent to the agent as a `<task-notification>` custom message between turns. Use `job` with the returned `taskId` to inspect completion state or terminate the monitor.\n\n## Behavior / Lifecycle\n\n1. `MonitorTool.createIf(session)` gates the tool on `isBackgroundJobSupportEnabled(session.settings)` — identical to `JobTool`'s gate.\n2. `execute(...)` delegates to `BashTool.startMonitorJob(...)`, so Monitor inherits Bash's interception rules, cwd normalization, internal URL expansion, environment construction, artifact allocation, timeout clamping, and unthrottled raw capture.\n3. The helper mirrors every sanitized raw chunk to `manager.appendOutput(jobId, chunk)` and line-buffers the stream so each stdout line dispatches one `<task-notification>` event.\n4. Non-persistent monitors auto-cancel after delivering their first stdout-line notification. Persistent monitors terminate when the underlying command exits, `timeout` elapses, the calling agent is torn down, or the user cancels the returned background task via `job`.\n\n## Errors\n\n- `ToolError`: `Async execution is disabled; the monitor tool is unavailable in this session.` — emitted when `AsyncJobManager.instance()` returns `undefined`.\n- Invalid parameter shapes are rejected by zod with the project's standard validation error path.\n\n## Examples\n\nTail an error log and react when lines appear:\n\n```jsonc\n{\n \"command\": \"tail -F /var/log/app.log | grep -i error\",\n \"kind\": \"log\",\n \"description\": \"Tail app.log for errors\"\n}\n```\n\nPoll CI status until the build completes (15-minute timeout):\n\n```jsonc\n{\n \"command\": \"while true; do gh run view --json status,conclusion --jq .status; sleep 30; done\",\n \"kind\": \"poll\",\n \"description\": \"Watch CI build status\",\n \"timeout\": 900,\n \"persistent\": false\n}\n```\n\n## Parity oracle\n\nThe schema and behavior captured in this doc are pinned by the fixture at\n`packages/coding-agent/test/fixtures/claude-code-tools/monitor.schema.json`,\ncaptured from the upstream Claude Code CLI (`claude --version 2.1.152`). Any\ndeviation from that fixture is a parity bug.\n",
66
68
  "tools/read.md": "# read\n\n> Read files, directories, archives, SQLite databases, internal resources, images, documents, and URLs through one `path` string.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/read.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/read.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/path-utils.ts` — split `path` from trailing selectors; normalize local paths.\n - `packages/coding-agent/src/tools/archive-reader.ts` — detect `archive.ext:inner/path`, index archives, list/read entries.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite targets, parse selectors, render tables.\n - `packages/coding-agent/src/tools/fetch.ts` — URL parsing, fetch/render pipeline, URL cache/artifacts.\n - `packages/coding-agent/src/internal-urls/router.ts` — resolve `agent://`, `artifact://`, `local://`, `memory://`, `gjc://`, and `rule://`.\n - `packages/coding-agent/src/edit/notebook.ts` — convert `.ipynb` to editable `# %% [...] cell:N` text.\n - `packages/coding-agent/src/utils/file-display-mode.ts` — decide hashline vs line-number vs raw display.\n - `packages/coding-agent/src/workspace-tree.ts` — render directory trees.\n - `packages/coding-agent/src/edit/file-read-cache.ts` — cache read lines for later hashline edit recovery.\n - `packages/coding-agent/src/tools/index.ts` — registers `read: s => new ReadTool(s)`.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Filesystem path, internal URL, or web URL. May end with a trailing selector such as `:50-100` or `:raw`. |\n\n### Selector grammar\n\nFor normal file-like reads, `splitPathAndSel()` in `packages/coding-agent/src/tools/path-utils.ts` recognizes the final suffix only when it matches one of these forms:\n\n| Suffix | Meaning |\n| --- | --- |\n| `:raw` | Raw/verbatim mode. Disables structural summaries and line prefixes. |\n| `:N` / `:LN` | Start at 1-indexed line `N`, open-ended. |\n| `:A-B` / `:LA-LB` | Inclusive 1-indexed line range. |\n| `:A+C` / `:LA+LC` | `C` lines starting at `A`; tool converts this to end line `A + C - 1`. |\n| `:range:raw` or `:raw:range` | Same line selection, but raw output. |\n\nValidation in `parseLineRangeChunk()`:\n- line numbers are 1-indexed; `:0` throws.\n- `+` counts must be `>= 1`.\n- `-` end must be `>= start`.\n\nSelector parsing intentionally falls through for unrecognized trailing `:...`; archive and SQLite paths consume their own colon syntax.\n\nURL selectors are parsed separately in `packages/coding-agent/src/tools/fetch.ts` and support only `:raw`, `:N`, `:A-B`, and `:A+C` — no optional `L` prefix there.\n\n## Outputs\n- Single-shot `AgentToolResult` built through `toolResult()` in `packages/coding-agent/src/tools/tool-result.ts`.\n- `content` is usually one text block. Image reads may return `[text, image]`.\n- `details` is path-dependent. `ReadToolDetails` may include:\n - `kind: \"file\" | \"url\"` (URL path uses `kind: \"url\"`; file reads usually omit `kind`)\n - `isDirectory`\n - `resolvedPath`\n - `suffixResolution`\n - URL fields: `url`, `finalUrl`, `contentType`, `method`, `notes`\n - `truncation`\n - `displayContent` (unprefixed text + starting line for TUI rendering)\n - `summary` (`lines`, `elidedSpans`, `elidedLines`) for structural summaries\n - `meta` from `packages/coding-agent/src/tools/output-meta.ts`\n- `details.meta.source` is set to the backing path, URL, or internal URL.\n- `details.meta.truncation` carries shown range, total lines/bytes, next offset, and optional `artifactId` for cached URL output.\n- Directory/archive listings and SQLite table lists also set `details.meta.limits` when list limits trigger.\n\n## Flow\n1. `ReadTool.execute()` accepts `{ path }`. `file://...` inputs are expanded first with `expandPath()`.\n2. It tries URL handling first via `parseReadUrlTarget()` from `packages/coding-agent/src/tools/fetch.ts`.\n - Plain URL reads call `executeReadUrl()`.\n - URL reads with line selectors load or refresh the URL cache with `loadReadUrlCacheEntry()` and paginate the cached text locally with `#buildInMemoryTextResult()`.\n3. If not a web URL, it checks `session.internalRouter.canHandle(...)`.\n - Internal URLs are resolved with `internalRouter.resolve()`.\n - `agent://` query extraction (`/path` or `?q=`) bypasses pagination and returns the extracted content directly.\n - Other internal resources are paginated in-memory by `#buildInMemoryTextResult()`.\n4. It tries archive resolution next with `#resolveArchiveReadPath()`.\n - `parseArchivePathCandidates()` scans for `.tar`, `.tar.gz`, `.tgz`, or `.zip` anywhere before `:sub/path`.\n - On success, `#readArchive()` either lists a directory or decodes an entry as UTF-8 text.\n5. It tries SQLite resolution with `#resolveSqliteReadPath()`.\n - `parseSqlitePathCandidates()` scans for `.sqlite`, `.sqlite3`, `.db`, `.db3` before any `:table`, `:key`, or `?query` suffix.\n - `#readSqlite()` dispatches on `parseSqliteSelector()`.\n6. Otherwise it treats the input as a local filesystem path.\n - `resolveReadPath()` expands `~`, resolves relative to session cwd, treats bare `/` as session cwd, and retries macOS screenshot/NFD/curly-quote variants.\n - If the path does not exist, `findUniqueSuffixMatch()` does a workspace glob-based unique suffix lookup (skipped for remote mounts).\n7. Directories go through `#readDirectory()`.\n8. Non-directories branch by content type:\n - image metadata / inline image\n - editable notebook text\n - markit-converted document\n - structural summary for parseable code/prose\n - streamed text/line-range read\n9. Local text reads are streamed by `streamLinesFromFile()` rather than loading the whole file. The tool adds up to 3 lines of context before/after explicit bounded ranges.\n10. Non-empty contiguous local reads are recorded into `getFileReadCache(session)` for later hashline edit recovery.\n11. If suffix resolution happened, the first text block is prefixed with `[Path '...' not found; resolved to '...' via suffix match]`.\n\n## Modes / Variants\n\n### Local text files\n- No selector: if summarization is enabled and the file is small enough, `#trySummarize()` calls `summarizeCode()`.\n - Guards: file size `<= 2 MiB` (`MAX_SUMMARY_BYTES`), line count `<= 20_000` (`MAX_SUMMARY_LINES`).\n - Summary output keeps selected declarations and replaces elided spans with `...`. When at least one span is elided, the text content ends with a footer like `[NN lines across MM elided regions; read <path>:raw or a line range like <path>:1-9999 for verbatim content]` so the agent has a concrete recovery selector instead of a bare marker.\n - When an elided block sits between matching brace lines, `#renderSummary()` may merge them into one anchored line rather than emitting separate opener/closer lines.\n- Explicit selector or summarization miss: streamed text read.\n - Default open-ended limit is `min(session setting read.defaultLimit, DEFAULT_MAX_LINES)`.\n - Explicit ranges expand by `RANGE_LEADING_CONTEXT_LINES = 1` / `RANGE_TRAILING_CONTEXT_LINES = 3` on the constrained sides only.\n - Non-raw output uses `resolveFileDisplayMode()`:\n - hashline anchors when edit mode is hashline, read is not raw, source is mutable, edit tool exists, and `readHashLines !== false`\n - otherwise optional line numbers when `readLineNumbers === true`\n - raw mode suppresses both\n- Prefix format in hashline mode is `lineNumber + 2-char line hash + \"|\"`, e.g. `41th|def alpha():`, from `formatHashLine()` in `packages/coding-agent/src/hashline/hash.ts`.\n- Those anchors are what the `edit`/hashline path consumes later; immutable sources and `:raw` intentionally suppress them.\n\n### Directory listings\n- `#readDirectory()` calls `buildDirectoryTree()` with:\n - `maxDepth = 2`\n - `perDirLimit = 12`\n - `rootLimit = null`\n - `lineCap = limit` when a line selector was present, else unlimited at this layer\n- `buildDirectoryTree()` sorts siblings by recency, shows file sizes and relative ages, and may mark `limits.resultLimit` when the tree truncates.\n- Empty directories render as `(empty directory)`.\n\n### Archives\n- Supported archive containers: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- Syntax: `archive.ext`, `archive.ext:path/inside`, `archive.ext:path/inside:50-60`.\n- `openArchive()` reads the whole archive into memory, then:\n - tar/tgz uses `new Bun.Archive(bytes)`\n - zip uses `fflate.unzipSync()`\n- Archive paths normalize `/`, drop `.` segments, and reject `..`.\n- Directory reads list immediate children; files show `name` plus ` (size)` when size > 0.\n- Directory listing default limit is `500` entries in `#readArchiveDirectory()`.\n- File entries are UTF-8 decoded. Non-UTF-8 entries return `[Cannot read binary archive entry '...' (...)]` instead of bytes.\n- Text archive entries reuse the normal in-memory pagination/anchoring path.\n\n### SQLite databases\n- Database detection requires both a matching extension and a valid SQLite file header (`isSqliteFile()`).\n- Selector forms from `parseSqliteSelector()`:\n\n#### `db.sqlite`\n- `kind: \"list\"`\n- Lists non-`sqlite_%` tables with row counts.\n- `#readSqlite()` caps the rendered list to `500` tables via `applyListLimit()`.\n\n#### `db.sqlite:table`\n- `kind: \"schema\"`\n- Returns `sqlite_master.sql` plus sample rows.\n- Sample size is `DEFAULT_SCHEMA_SAMPLE_LIMIT = 5`.\n\n#### `db.sqlite:table:key`\n- `kind: \"row\"`\n- Resolves by primary key when the table has exactly one PK column; otherwise falls back to `rowid` lookup.\n- No query parameters allowed on row lookups.\n\n#### `db.sqlite:table?limit=...&offset=...&order=...&where=...`\n- `kind: \"query\"`\n- Defaults: `limit = 20`, `offset = 0`.\n- `limit` is capped at `500`.\n- `order` accepts `column` or `column:asc|desc` and must name an existing column.\n- `where` is accepted only after `validateWhereClause()` rejects comments, semicolons, and control keywords like `LIMIT`, `OFFSET`, `UNION`, `ATTACH`, `PRAGMA`.\n- Unknown query parameters throw.\n\n#### `db.sqlite?q=SELECT ...`\n- `kind: \"raw\"`\n- Cannot be combined with table selectors or any other query param.\n- Empty `q` throws.\n- `executeReadQuery()` runs `db.prepare(sql).all()` and rejects bound parameters; it does not verify that the SQL starts with `SELECT`.\n\n- Rendering caps in `packages/coding-agent/src/tools/sqlite-reader.ts`:\n - ASCII table width `120` (`MAX_RENDER_WIDTH`)\n - per-column width `40` (`MAX_COLUMN_WIDTH`)\n- `#readSqlite()` opens Bun SQLite in `{ readonly: true, strict: true }` and sets `PRAGMA busy_timeout = 3000`.\n\n### Documents\n- `CONVERTIBLE_EXTENSIONS` in `packages/coding-agent/src/tools/read.ts` covers `.pdf`, `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`, `.rtf`, `.epub`.\n- `convertFileWithMarkit()` converts the file to text/markdown.\n- Converted output is then head-truncated with normal shared limits; there is no line selector support inside the source document before conversion.\n- Conversion failures return a text block like `[Cannot read .pdf file: ...]`.\n\n### Jupyter notebooks\n- `.ipynb` goes through `readEditableNotebookText()` unless `:raw` was requested.\n- Output is editable plain text with markers like:\n\n```text\n# %% [code] cell:0\n...\n```\n\n- Raw mode bypasses that conversion and falls back to file-text reading.\n\n### Images\n- Image detection is metadata-based (`readImageMetadata()`).\n- Max accepted image size is `20 MiB` (`MAX_IMAGE_INPUT_BYTES`, re-exported as `MAX_IMAGE_SIZE`). Larger files throw.\n- If `inspect_image.enabled` is true, `read` returns metadata only (MIME, bytes, dimensions, channels, alpha) plus a suggestion to call `inspect_image`.\n- Otherwise it calls `loadImageInput()` and returns:\n - a text note from the image loader\n - an inline image block\n- Unsupported/undecodable image formats throw a `ToolError`.\n\n### Internal URLs\n- `read` does not resolve these itself; it delegates to `session.internalRouter.resolve()`.\n- Registered protocols are outside this file, but the router in `packages/coding-agent/src/internal-urls/router.ts` is built for `agent://`, `artifact://`, `issue://`, `local://`, `memory://`, `gjc://`, `pr://`, and `rule://`.\n- `#handleInternalUrl()` behavior:\n - parses the URL with `parseInternalUrl()` so colons inside the host segment are legal\n - for `agent://`, treats non-root path extraction or `?q=` extraction as a special no-pagination mode\n - otherwise paginates the resolved text in memory\n - passes `immutable` through to `resolveFileDisplayMode()` so anchors are suppressed for immutable resources such as artifacts, memory, and agent outputs\n- `issue://<N>` / `pr://<N>` (and the long form `issue://<owner>/<repo>/<N>` / `pr://<owner>/<repo>/<N>`) route through the same SQLite cache the `github` tool writes to; `?comments=0` selects the no-comments rendering. Bare `issue://` / `pr://` (and `issue://<owner>/<repo>` / `pr://<owner>/<repo>`) issue a live `gh issue list` / `gh pr list` for browsing, accepting `?state=`, `?limit=`, `?author=`, `?label=`. PR diffs share the same cache through `pr://<N>/diff` (numbered file listing with per-file hints), `pr://<N>/diff/<i>` (single file slice; 1-indexed), and `pr://<N>/diff/all` (verbatim unified diff); the listing and per-file slices are reconstructed from the cached unified-diff payload, so all three variants share one `gh pr diff` invocation per PR. Diff content is served as `text/plain`. Soft TTL `github.cache.softTtlSec` (default 5 minutes), hard TTL `github.cache.hardTtlSec` (default 7 days). Stale-hit returns the cached row and schedules a background refresh.\n\n### Web URLs\n- `parseReadUrlTarget()` accepts `http://`, `https://`, or `www.` targets.\n- Plain URL reads call `executeReadUrl()` in `packages/coding-agent/src/tools/fetch.ts`.\n- `:raw` means raw HTML/body fallback path; plain URL reads prefer rendered/reader-friendly output.\n- `:N`, `:A-B`, `:A+C` do not refetch. They page over cached output from the prior or current URL render.\n- URL render pipeline in `renderUrl()`:\n 1. normalize scheme (`https://` added for bare `www.`)\n 2. try special handlers for known sites unless raw\n 3. fetch with `loadPage()`\n 4. if content is image/PDF/DOCX/etc., try binary fetch + markit/image handling\n 5. handle JSON directly, feeds via feed parser, plain text directly\n 6. for HTML and non-raw mode, try markdown alternates, `URL.md`, content negotiation, feed alternates, HTML-to-text renderers, extracted linked documents, then `llms.txt`\n 7. fall back to raw body text/html\n- URL output is wrapped with a small header:\n\n```text\nURL: ...\nContent-Type: ...\nMethod: ...\nNotes: ...\n\n---\n```\n\n- `method` records the winning path (`json`, `feed`, `text`, `alternate-markdown`, `md-suffix`, `content-negotiation`, `image`, `markit`, `llms.txt`, `raw`, `raw-html`, etc.).\n- URL reads may return an inline image block when the fetched resource is a supported image and survives resizing.\n\n## Side Effects\n- Filesystem\n - Opens and streams local files.\n - Reads entire archives into memory before indexing.\n - May read URL-cache artifact files from the session artifacts directory.\n - Writes URL output artifacts when URL output is truncated or when line-range pagination needs a persisted cache body.\n- Network\n - URL mode performs HTTP fetches, binary refetches, and alternate-endpoint probes.\n- Subprocesses / native bindings\n - Uses Bun SQLite for `.db`/`.sqlite*`.\n - Uses `Bun.Archive` for tar/tgz and `fflate` for zip.\n - URL HTML rendering can delegate into site handlers and HTML-to-text backends from `packages/coding-agent/src/tools/fetch.ts`.\n- Session state\n - Records local text lines into `session.fileReadCache` for later stale-anchor recovery.\n - Uses `session.internalRouter` for internal URLs.\n - Uses `session.allocateOutputArtifact()` for cached/truncated URL output.\n- Background work / cancellation\n - Most branches honor `AbortSignal`; the tool itself is marked `nonAbortable = true`, but helper paths still call `throwIfAborted(signal)`.\n\n## Limits & Caps\n- Shared text truncation defaults from `packages/coding-agent/src/session/streaming-output.ts`:\n - `DEFAULT_MAX_LINES = 3000`\n - `DEFAULT_MAX_BYTES = 50 * 1024`\n- Local text open-ended default line limit: `read.defaultLimit`, clamped to `[1, DEFAULT_MAX_LINES]`.\n- Explicit line ranges add `1` leading and `3` trailing context lines on the constrained sides (`RANGE_LEADING_CONTEXT_LINES` / `RANGE_TRAILING_CONTEXT_LINES`).\n- File streaming chunk size: `8 * 1024` bytes (`READ_CHUNK_SIZE`).\n- Local streamed byte budget for line reads: `max(DEFAULT_MAX_BYTES, maxLinesToCollect * 512)`.\n- Structural summaries only run when file size `<= 2 MiB` and line count `<= 20_000`.\n- Image input max: `20 MiB`.\n- Directory tree caps for local directories: depth `2`, per-directory children `12`.\n- Archive directory default list cap: `500` entries.\n- SQLite:\n - default row query limit `20`\n - schema sample limit `5`\n - max query limit `500`\n - table list cap `500`\n - render width `120`, column width `40`\n - busy timeout `3000` ms\n- URL read result shown to the model is truncated to `300` lines and `50 KiB` in `executeReadUrl()`; full cached output can be attached as an artifact.\n- Inline fetched URL images:\n - source bytes cap `20 MiB`\n - post-resize inline output cap `300 KiB`\n- Unique suffix auto-resolution glob timeout: `5000` ms.\n- File-read cache holds `30` paths per session.\n\n## Errors\n- Validation and operational failures surface as `ToolError`.\n- Selector errors include:\n - `Line selector 0 is invalid; lines are 1-indexed. Use :1.`\n - invalid `A+B` / `A-B` shapes\n - `Cannot combine query extraction with offset/limit` for `agent://.../path:50`\n- Missing local/archive/sqlite paths first attempt unique suffix resolution; if no unique match exists they error.\n- Out-of-bounds line reads do not throw. They return explanatory text with a suggestion such as `Use :1 ...` or `Use :<last line> ...`.\n- Binary archive entries do not throw; they return a text notice.\n- Document conversion failure returns a text notice.\n- Image oversize/unsupported/invalid cases throw.\n- SQLite parser rejects unsupported parameter combinations early; DB/runtime errors are caught and rethrown as `ToolError(message)`.\n- URL fetch failure does not throw when HTTP fetch succeeds but `response.ok === false`; it returns a failed URL read with `method: \"failed\"` and explanatory notes.\n\n## Notes\n- `readSchema` examples include `https://example.com:L1-L40`, but URL selector parsing in `packages/coding-agent/src/tools/fetch.ts` does not accept `L` prefixes.\n- Hashline anchors are suppressed for raw reads and immutable internal resources because there is no editable backing target for later `edit` consumption.\n- `splitPathAndSel()` intentionally treats unknown trailing `:...` as part of the path so `archive.zip:inner/file` and `db.sqlite:table:key` still work.\n- `resolveReadPath()` contains macOS-specific filename fallbacks for screenshot timestamps, NFD Unicode normalization, and curly apostrophes.\n- A bare `/` resolves to the session cwd, not the filesystem root.\n- URL cache keys are session-scoped and normalized by requested URL + raw/rendered mode; both requested URL and final redirected URL are cached.\n- URL line-range reads request `ensureArtifact: true, preferCached: true` so a later paginated read can reopen the same rendered body from artifact storage.\n- Raw SQLite `q=` execution is not keyword-restricted beyond “no bound parameters”; the read tool relies on the surrounding contract to keep it read-only.\n- The file-read cache is not a read acceleration cache. It exists to recover hashline edits when the file changed after the read.",
67
69
  "tools/recall.md": "# recall\n\n> Search the active Hindsight bank and return raw matching memories.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/hindsight-recall.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recall.md`\n- Key collaborators:\n - `packages/coding-agent/src/hindsight/state.ts` — session state, recall query defaults, prompt-side auto-recall.\n - `packages/coding-agent/src/hindsight/content.ts` — result formatting and UTC timestamp formatting.\n - `packages/coding-agent/src/hindsight/client.ts` — HTTP `recall` call and error mapping.\n - `packages/coding-agent/src/hindsight/bank.ts` — bank id and tag-filter scoping.\n - `docs/tools/retain.md` — shared backend, storage, seeding, and mental-model bootstrap.\n\n## Inputs\n\n| Field | Type | Required | Description |\n|---|---|---:|---|\n| `query` | `string` | Yes | Natural-language search query. The tool passes it through unchanged. |\n\n## Outputs\nReturns a single-shot tool result.\n\nWhen matches exist:\n- `content[0].type = \"text\"`\n- `content[0].text = \"Found <n> relevant memories (as of YYYY-MM-DD HH:MM UTC):\\n\\n<bullet list>\"`\n- each bullet is `- <text> [<type>] (<mentioned_at>)`; the type and timestamp suffixes appear only when those fields are present\n- `details = {}`\n\nWhen no matches exist:\n- `content[0].text = \"No relevant memories found.\"`\n- `details = {}`\n\n## Flow\n1. `HindsightRecallTool.createIf(...)` only exposes the tool when `memory.backend == \"hindsight\"`.\n2. `execute(...)` wraps the whole operation in `untilAborted(...)` from `@gajae-code/utils`.\n3. It reads the active `HindsightSessionState`; missing state throws `Hindsight backend is not initialised for this session.`\n4. It calls `state.client.recall(...)` with:\n - `bankId` from session bootstrap,\n - the model-supplied `query`,\n - `budget`, `maxTokens`, and `types` from `HindsightConfig`,\n - tag filters from the bank scope (`recallTags`, `recallTagsMatch`).\n5. `HindsightApi.recall(...)` POSTs `/v1/default/banks/{bank_id}/memories/recall`.\n6. Results are formatted into a plain-text list with `formatMemories(...)`; empty results map to the fixed no-match string.\n7. Failures are logged with `logger.warn(\"recall failed\", ...)` and rethrown.\n\n## Modes / Variants\n- Tool path: explicit query-only recall. The tool does not compose context from recent turns; that richer path is reserved for backend auto-recall in `HindsightSessionState.beforeAgentStartPrompt(...)` / `maybeRecallOnAgentStart(...)`.\n- Bank scoping is inherited from the active `HindsightSessionState`:\n - `global` — no tag filter.\n - `per-project` — separate bank id per cwd basename.\n - `per-project-tagged` — shared bank id plus `project:<cwd basename>` filter with `tagsMatch = \"any\"`, so project-tagged and untagged global memories can both surface.\n- Session scope: reads cross-session server-side memories, but uses per-session cached config and scope.\n\n## Side Effects\n- Network\n - `POST /v1/default/banks/{bank_id}/memories/recall` via `packages/coding-agent/src/hindsight/client.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None on success. Unlike backend auto-recall, this tool does not update `lastRecallSnippet` or refresh the system prompt.\n- Background work / cancellation\n - Aborts through `untilAborted(...)` if the tool call signal is cancelled.\n\n## Limits & Caps\n- Client default budget for raw `HindsightApi.recall(...)` is `\"mid\"`; this tool overrides from config in `packages/coding-agent/src/hindsight/state.ts`.\n- Default recall settings from `packages/coding-agent/src/config/settings-schema.ts`:\n - `hindsight.recallBudget = \"mid\"`\n - `hindsight.recallMaxTokens = 1024`\n - `hindsight.recallTypes = [\"world\", \"experience\"]`\n- The explicit tool path does not apply `hindsight.recallContextTurns` or `hindsight.recallMaxQueryChars`; those caps only affect backend auto-recall query composition.\n\n## Errors\n- Throws `Hindsight backend is not initialised for this session.` when no state exists.\n- HTTP and fetch failures become `HindsightError` from `packages/coding-agent/src/hindsight/client.ts` with `statusCode` and parsed `details` when available.\n- Non-`Error` failures are normalized to `new Error(String(err))` before rethrow.\n\n## Notes\n- Shared backend details are in `docs/tools/retain.md`: server-side storage, subagent aliasing, bank scoping, mission setup, and mental-model bootstrap.\n- Mental models are not fetched by this tool. They may still already be present in the agent's developer instructions because the backend caches a `<mental_models>` block separately from recall results.\n- The tool returns raw memory hits; it does not synthesize across them. Use `reflect` for that path.\n",
68
70
  "tools/recipe.md": "# recipe\n\n> Run a task exposed by a detected project task runner.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/recipe/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/recipe.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/recipe/runner.ts` — op parsing, task resolution, prompt model.\n - `packages/coding-agent/src/tools/recipe/render.ts` — shell-style call/result rendering.\n - `packages/coding-agent/src/tools/recipe/runners/index.ts` — runner registration order.\n - `packages/coding-agent/src/tools/recipe/runners/just.ts` — detect `just` recipes from justfiles.\n - `packages/coding-agent/src/tools/recipe/runners/pkg.ts` — detect `package.json` scripts and workspaces.\n - `packages/coding-agent/src/tools/recipe/runners/cargo.ts` — detect Cargo run/test targets.\n - `packages/coding-agent/src/tools/recipe/runners/make.ts` — parse make targets from makefiles.\n - `packages/coding-agent/src/tools/recipe/runners/task.ts` — detect Taskfile tasks via `task --list-all`.\n - `packages/coding-agent/src/tools/bash.ts` — actual command execution, truncation, cwd/env handling.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `string` | Yes | Single string containing the task selector plus trailing arguments. The first whitespace-delimited token selects the task; the remainder is appended verbatim to the resolved runner command. Examples from schema/prompt: `test`, `build --release`, `pkg-a/test`, `crate/bin/server`, `pkg:test --watch`. |\n\n### `op` grammar\n\n```text\nop := S* head (S+ tail)?\nhead := explicit-runner / implicit-task\nexplicit-runner := runner-id \":\" task-token\nimplicit-task := task-token\nrunner-id := detected runner id (`just` | `pkg` | `cargo` | `make` | `task`)\ntask-token := first non-whitespace token; may contain `/`\ntail := remaining characters after the first whitespace run\n```\n\nResolution rules from `resolveRunnerAndTask()`:\n- Leading whitespace is ignored; an empty `op` throws `ToolError` with the available task list.\n- Only the first token is parsed structurally. Everything after the first whitespace run becomes `tail` and is appended to the command unchanged.\n- If `head` contains `:` and the prefix matches a detected runner id, the suffix must exactly match a task in that runner.\n- Otherwise `head` is treated as a task name and matched across all detected runners.\n- If exactly one runner has that task, it is used.\n- If multiple runners have that task, the call is rejected and the error tells the model to use `<runner-id>:<task>`.\n- Namespaced task names generated by runners use `/`, not `:`. `/` is part of the task name, not a parser separator.\n\n## Outputs\n- Delegates directly to `BashTool.execute()` and returns the same `AgentToolResult<BashToolDetails>` shape.\n- Success path: one text content block containing merged command output (`result.output` from bash execution, or `(no output)`), plus any timeout clamp notice appended after a blank line.\n- Recipe does not return separate `stdout`, `stderr`, or `exitCode` fields. `stdout`/`stderr` are already merged into the text block by bash execution; `exitCode` is only observed indirectly (success requires `0`, non-zero becomes an error).\n- Error path: throws `ToolError`; for non-zero exits the message is the merged output followed by `Command exited with code <n>`.\n- `details` may include:\n - `timeoutSeconds`: effective timeout used by bash.\n - `requestedTimeoutSeconds`: only when bash clamped a requested timeout; recipe never sets one itself.\n - `meta`: output truncation metadata from bash execution.\n - `async`: defined by bash background execution paths, but recipe does not expose an `async` input.\n- When bash output is truncated, the full text is stored in an artifact and referenced via bash truncation metadata.\n- Call/result rendering in the TUI uses bash shell rendering with a resolved title, command preview, and optional task cwd.\n\n## Flow\n1. `RecipeTool.createIf()` in `packages/coding-agent/src/tools/recipe/index.ts` checks `session.settings.get(\"recipe.enabled\")`; disabled returns `null`.\n2. It probes every runner in `RUNNERS` from `packages/coding-agent/src/tools/recipe/runners/index.ts` with `Promise.all(...)` in this order: `just`, `pkg`, `cargo`, `make`, `task`.\n3. Each runner returns either `null` or a `DetectedRunner { id, label, commandPrefix, tasks }`; runners with zero tasks are discarded.\n4. If no runners remain, the tool is not registered.\n5. Constructor stores detected runners, instantiates `BashTool`, renders the model-facing description by passing `buildPromptModel(runners)` into `packages/coding-agent/src/prompts/tools/recipe.md`, and builds shell renderers from `createRecipeToolRenderer()`.\n6. On execution, `RecipeTool.execute()` calls `resolveCommand(op, this.#runners)`.\n7. `resolveCommand()` in `packages/coding-agent/src/tools/recipe/runner.ts`:\n 1. `parseOp()` trims only leading whitespace, extracts the first non-whitespace token as `head`, and keeps the remainder as `tail`.\n 2. `resolveRunnerAndTask()` resolves `head` either as `runnerId:taskName` or as an unqualified task name.\n 3. It throws `ToolError` for empty ops, missing explicit tasks, ambiguous task names, or unknown tasks; all error variants include the available task list.\n 4. It builds the final shell command with `buildCommand(commandPrefix, commandName, tail)`, joining non-empty parts with spaces.\n 5. If the task defines `cwd`, that relative path is returned alongside the command.\n8. `RecipeTool.execute()` forwards `{ command, cwd }` into `BashTool.execute()`; recipe does not pass timeout, env, async, or pty options.\n9. `BashTool.execute()` resolves internal URLs, validates/normalizes cwd against `session.cwd`, clamps timeout, applies bash interception rules, runs the command, and formats the final result.\n\n## Modes / Variants\n- Tool enablement:\n - Disabled by `recipe.enabled` setting: tool is absent.\n - Enabled but no detected tasks: tool is absent.\n- Task selection:\n - Unqualified task name: succeeds only when exactly one detected runner owns that task.\n - Explicit runner-qualified task: `<runner-id>:<task>`.\n- Runner detection paths:\n - `just`: requires `just` on `PATH`, a justfile, and successful `just --dump --dump-format=json`.\n - `pkg`: requires a readable root `package.json`; picks a package manager command from lockfiles or `bun` availability; discovers root scripts and workspace package scripts.\n - `cargo`: requires `cargo` on `PATH`, `Cargo.toml`, and successful `cargo metadata --no-deps --format-version=1`.\n - `make`: requires `make` on `PATH` and a makefile; parses targets statically.\n - `task`: requires `task` on `PATH`, a Taskfile, and successful `task --list-all --json`.\n- Execution path:\n - Always the synchronous `bash` call surface from recipe inputs.\n - Bash may still auto-background long-running work if `bash.autoBackground.enabled` and session async job support are enabled.\n\n## Side Effects\n- Filesystem\n - Reads manifests from the session cwd during detection: justfiles, `package.json`, workspace `package.json` files, `Cargo.toml`, makefiles, `Taskfile.yml` / `Taskfile.yaml`.\n - Command execution runs in `session.cwd` or a task-specific relative cwd resolved under it.\n - Bash may allocate output artifacts for truncated command output.\n- Subprocesses / native bindings\n - Detection may spawn `just --dump --dump-format=json`, `cargo metadata --no-deps --format-version=1`, and `task --list-all --json`.\n - Execution spawns the resolved shell command through `BashTool` / `executeBash()`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Tool availability depends on session settings.\n - Constructor prompt text is specialized to detected runners/tasks.\n - Bash execution may create async job records and output artifacts if bash auto-background triggers.\n- User-visible prompts / interactive UI\n - The model-facing tool description lists detected runners and up to 20 tasks per runner.\n - TUI rendering shows a shell-style preview using the resolved title/command/cwd.\n- Background work / cancellation\n - Detection is parallelized across runners.\n - Runtime command execution honors the passed abort signal through `BashTool`.\n\n## Limits & Caps\n- Prompt task listing is capped at `PROMPT_TASK_LIMIT = 20` per runner in `packages/coding-agent/src/tools/recipe/runner.ts`; this affects the rendered tool description, not execution.\n- Recipe itself defines no timeout input; delegated bash execution therefore uses bash's default `timeout = 300` seconds from `packages/coding-agent/src/tools/bash.ts`.\n- Bash clamps timeouts to the configured bash range (`clampTimeout(\"bash\", ...)` in `packages/coding-agent/src/tools/bash.ts`), but recipe cannot request a custom value.\n- `pkg` workspace discovery normalizes workspace globs to `.../package.json` and sorts matched package files lexicographically before task generation.\n- `cargo` deduplicates generated task names with a `Set`, so duplicate targets collapse to one recipe task.\n\n## Errors\n- Detection failures in runner modules are mostly soft-failed:\n - Missing binaries, missing manifests, parse failures, or non-zero probe exits usually return `null` and log with `logger.debug(...)`.\n - Result: the affected runner disappears instead of surfacing an error to the model.\n- Invocation failures are hard errors from `resolveRunnerAndTask()`:\n - Empty `op`.\n - Explicit runner prefix with missing/empty task.\n - Ambiguous unqualified task name across runners.\n - Unknown task name.\n- Execution failures come from `BashTool.execute()`:\n - Invalid cwd.\n - Bash interceptor blocks.\n - Aborts/timeouts.\n - Non-zero exit codes.\n - Missing exit status.\n- All `resolveRunnerAndTask()` errors include the current available task list to help the model retry.\n\n## Notes\n- `RecipeTool` sets `concurrency = \"exclusive\"`; calls do not run concurrently with other exclusive tools.\n- Tool registration is all-or-nothing per runner: a detected runner with zero tasks is dropped.\n- Runner ids are fixed string literals from the runner modules: `just`, `pkg`, `cargo`, `make`, `task`.\n- `buildPromptModel()` includes each task's rendered command (`commandPrefix` + `commandName`) and relative cwd when present; the prompt therefore exposes the exact shell form recipe will run.\n- `pkg` task names:\n - Root `package.json` scripts keep bare names like `test`.\n - Workspace scripts are always namespaced as `<package-name-or-dir>/<script>` and set `cwd` to that package directory.\n - Script names are shell-quoted into `commandName`, so a task like `build` becomes `bun run 'build'` / `npm run 'build'` / similar.\n- `pkg` command prefix selection prefers lockfiles in this order: `bun.lock`/`bun.lockb`, `pnpm-lock.yaml`, `yarn.lock`, `package-lock.json`/`npm-shrinkwrap.json`; otherwise it falls back to `bun run` if `bun` exists, else `npm run`.\n- `cargo` task names are generated from metadata targets:\n - Single-package manifests: `bin/<name>`, `example/<name>`, `test/<name>`.\n - Multi-package workspaces: `<package>/bin/<name>`, `<package>/example/<name>`, `<package>/test/<name>`.\n - Each task overrides `commandPrefix` to the full `cargo run ... --bin|--example` or `cargo test ... --test` prefix, and `commandName` to the quoted target name.\n- `make` target parsing is static text parsing, not `make -qp` output:\n - Recognizes makefiles named `Makefile`, `makefile`, `GNUmakefile`.\n - Uses `.PHONY` lines to decide whether to include undocumented file targets; without any `.PHONY`, all parsed targets are exposed.\n - If `.PHONY` exists, documented non-phony targets are kept with ` (file target)` appended to `doc`.\n- `just` detection ignores private recipes and preserves declared parameter names only for prompt display; execution still accepts arbitrary `tail` text.\n- `task` detection uses `desc` first, then `summary`, for task documentation.\n- Recipe has no env input of its own. Commands inherit whatever environment `BashTool` supplies for normal bash execution in the session.",
@@ -76,7 +78,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
76
78
  "tools/ssh.md": "# ssh\n\n> Execute one remote command on a discovered SSH host.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ssh.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ssh.md`\n- Key collaborators:\n - `packages/coding-agent/src/ssh/ssh-executor.ts` — runs `ssh`, captures output\n - `packages/coding-agent/src/ssh/connection-manager.ts` — master-connection reuse, host probing\n - `packages/coding-agent/src/ssh/sshfs-mount.ts` — optional `sshfs` mount side effect\n - `packages/coding-agent/src/discovery/ssh.ts` — discovers host configs\n - `packages/coding-agent/src/capability/ssh.ts` — canonical host shape\n - `packages/coding-agent/src/session/streaming-output.ts` — tail streaming, truncation, artifacts\n - `packages/coding-agent/src/tools/tool-timeouts.ts` — timeout clamp rules\n - `packages/utils/src/dirs.ts` — user/project ssh config paths\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `host` | `string` | Yes | Host name key from discovered SSH config entries, not an arbitrary hostname/IP. |\n| `command` | `string` | Yes | Remote command string passed to `ssh` as the remote command. |\n| `cwd` | `string` | No | Remote working directory. The tool prepends a shell-specific `cd`/`Set-Location` wrapper. |\n| `timeout` | `number` | No | Timeout in seconds. Default `60`; clamped to `1..3600`. |\n\n## Outputs\nThe tool returns a standard text tool result built in `packages/coding-agent/src/tools/ssh.ts`:\n\n- `content`: one text block containing combined remote stdout+stderr, or `\"(no output)\"` when empty.\n- `details.meta.truncation`: present when output exceeded the in-memory tail window; derived from the executor summary.\n\nStreaming behavior:\n\n- While the command runs, `onUpdate` receives tail-only text snapshots built from `TailBuffer` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Final output is single-shot after process exit.\n\nSide-channel artifacts:\n\n- When session artifact allocation is available and output exceeds the spill threshold, full output is written to a session artifact file and the returned summary carries its `artifactId` internally.\n- The ssh tool itself does not print the `artifact://...` URI into the result text.\n\nFailure behavior:\n\n- Unknown host, missing host config, timeout, cancellation, SSH startup failure, key validation failure, or non-zero remote exit all surface as thrown `ToolError`s.\n- Non-zero remote exit includes captured output plus `Command exited with code N`.\n\n## Flow\n1. `loadSshTool()` in `packages/coding-agent/src/tools/ssh.ts` calls `loadCapability(sshCapability.id, { cwd: session.cwd })` to discover hosts.\n2. `packages/coding-agent/src/discovery/ssh.ts` loads host entries from, in this order: project managed ssh config, user managed ssh config, `ssh.json` in the repo root, `.ssh.json` in the repo root.\n3. `getSSHConfigPath(\"project\")` and `getSSHConfigPath(\"user\")` in `packages/utils/src/dirs.ts` resolve those managed files to `.gjc/ssh.json` in the project and `~/.gjc/agent/ssh.json` in the user config dir. This tool does not read `~/.ssh/config`.\n4. Capability loading deduplicates by host name with first item winning; provider order is priority-sorted and the SSH JSON provider registers at priority `5`.\n5. `loadHosts()` in `packages/coding-agent/src/tools/ssh.ts` builds `hostsByName` and drops later duplicates again with `if (!hostsByName.has(host.name))`.\n6. Tool description text is built from `packages/coding-agent/src/prompts/tools/ssh.md` plus an `Available hosts:` list. Each host entry calls `getHostInfoForHost()` to show detected shell/OS when cached; otherwise it renders `detecting...`.\n7. On execute, `SshTool.execute()` rejects any `host` not in the discovered host-name set.\n8. `ensureHostInfo()` in `packages/coding-agent/src/ssh/connection-manager.ts` ensures an SSH master connection exists, loads cached host info from disk if present, and probes remote OS/shell when cache is missing or stale.\n9. `buildRemoteCommand()` in `packages/coding-agent/src/tools/ssh.ts` prepends a cwd change when `cwd` is provided:\n - Unix-like or Windows compat shells: `cd -- '<cwd>' && <command>`\n - Windows PowerShell: `Set-Location -Path '<cwd>'; <command>`\n - Windows cmd: `cd /d \"<cwd>\" && <command>`\n10. `clampTimeout(\"ssh\", rawTimeout)` applies the `1..3600` second clamp from `packages/coding-agent/src/tools/tool-timeouts.ts`.\n11. `executeSSH()` in `packages/coding-agent/src/ssh/ssh-executor.ts` calls `ensureConnection(host)` again, opportunistically mounts the remote host root with `sshfs` if available, optionally wraps the command in `bash -c` or `sh -c` for Windows compat mode, then spawns `ssh` with `ptree.spawn`.\n12. Output from both stdout and stderr is piped into one `OutputSink`; chunks are sanitized and forwarded to streaming updates through `streamTailUpdates()`.\n13. On normal exit, the sink returns combined output plus truncation counters. On timeout or abort, `executeSSH()` returns `cancelled: true` and prefixes the output with a notice line such as `[SSH: ...]` or `[Command aborted: ...]`.\n14. `SshTool.execute()` converts `cancelled: true` into `ToolError`, converts non-zero exit codes into `ToolError`, otherwise returns the text result with truncation metadata.\n\n## Modes / Variants\n- **Tool unavailable**: `loadSshTool()` returns `null` when discovery finds no hosts, so the tool is not registered for that session.\n- **Unix-like target**: remote command is passed through directly, with optional `cd -- ... &&` prefix.\n- **Windows native shell**: cwd wrapper uses PowerShell `Set-Location` or cmd `cd /d`; command otherwise runs in the remote default Windows shell.\n- **Windows compat shell**: if host probing finds `bash` or `sh` on Windows, `executeSSH()` wraps the remote command as `bash -c '...'` or `sh -c '...'`. Host config can force compat on/off with `compat`.\n- **Cached vs probed host info**: shell/OS detection comes from in-memory cache, persisted JSON under the remote-host dir, or a fresh probe over SSH.\n- **Truncated vs untruncated output**: small output stays in memory; large output keeps only the last 50 KiB in memory and may spill full output to an artifact file.\n\n## Side Effects\n- Filesystem\n - Reads managed SSH config JSON plus legacy `ssh.json` / `.ssh.json`.\n - Validates private-key path existence and permissions before connecting.\n - Persists probed host info as JSON under the remote-host cache dir via `persistHostInfo()`.\n - May create the SSH control socket dir and, when `sshfs` exists, remote mount dirs.\n - May write full command output to a session artifact file.\n- Network\n - Opens SSH connections to the selected host.\n - May issue extra probe commands to detect OS/shell and compat shells.\n- Subprocesses / native bindings\n - Requires `ssh` on `PATH`; spawns it for connection checks, master startup, probing, and command execution.\n - May call `sshfs`, `mountpoint`, `fusermount`/`fusermount3`, or `umount`.\n - Sanitizes streamed text with `@gajae-code/natives` text sanitization.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses session artifact allocation when available.\n - Registers postmortem cleanup hooks for SSH master connections and sshfs mounts.\n - Tool concurrency is `exclusive`, so the agent scheduler should not run multiple ssh tool calls concurrently.\n- Background work / cancellation\n - Process spawn receives the tool `AbortSignal`.\n - Cancellation/timeout ends the running ssh process and returns a cancelled result that the tool turns into an error.\n\n## Limits & Caps\n- Timeout defaults/clamps: `default=60`, `min=1`, `max=3600` in `packages/coding-agent/src/tools/tool-timeouts.ts`.\n- Output tail window: `DEFAULT_MAX_BYTES = 50 * 1024` in `packages/coding-agent/src/session/streaming-output.ts`.\n- Output sink spill threshold defaults to the same `50 KiB`; once exceeded, only the tail remains in memory.\n- SSH master reuse persistence: `ControlPersist=3600` in `packages/coding-agent/src/ssh/connection-manager.ts` and `packages/coding-agent/src/ssh/sshfs-mount.ts`.\n- SSH host info schema version: `HOST_INFO_VERSION = 2` in `packages/coding-agent/src/ssh/connection-manager.ts`; stale cache entries are reprobed.\n- Streaming tail buffer compacts after more than `10` pending chunks (`MAX_PENDING`) before trimming.\n\n## Errors\n- `Unknown SSH host: ... Available hosts: ...` when the model passes a host name not present in discovery.\n- `SSH host not loaded: ...` if the discovered-name set and `hostsByName` map diverge.\n- `ssh binary not found on PATH` when `ssh` is unavailable.\n- `SSH key not found: ...`, `SSH key is not a file: ...`, or `SSH key permissions must be 600 or stricter: ...` from key validation.\n- `Failed to start SSH master for <target>: <stderr>` when control-master startup fails.\n- Non-zero remote command exit becomes `ToolError` with captured output and `Command exited with code N`.\n- Timeout becomes a cancelled result with output notice `[SSH: <timeout message>]`, then `ToolError`.\n- Abort becomes a cancelled result with output notice `[Command aborted: <message>]`, then `ToolError`.\n- `sshfs` mount failures are logged and ignored in `executeSSH()`; they do not fail the tool call.\n- Discovery parse problems do not fail tool loading; they become capability warnings. If all sources are empty/invalid, the tool simply does not load.\n\n## Notes\n- Host discovery is JSON-based only. The tool does not parse OpenSSH config files.\n- Discovery expands environment variables recursively in the parsed JSON and expands `~` in `key`/`keyPath`.\n- Host names are capability keys; the model must pass the config key, not the raw hostname.\n- Commands run without a PTY. `executeSSH()` uses `ptree.spawn(..., { stdin: \"pipe\", stderr: \"full\" })` and does not request an interactive terminal.\n- The tool exposes `cwd` but no `env`, `pty`, upload, download, or explicit file-transfer fields.\n- Lower layers support an `artifactId` for full output and a `remotePath` mount target, but `SshTool.execute()` does not expose those knobs.\n- Both stdout and stderr are merged into one output stream; ordering is whatever arrives through the two streams.\n- `StrictHostKeyChecking=accept-new` and `BatchMode=yes` are always set for connection checks, master startup, and command runs.\n- Connection reuse is keyed by discovered host name, not by raw target tuple alone.\n- `closeAllConnections()` and sshfs unmount cleanup run through postmortem hooks, not per-call teardown.\n",
77
79
  "tools/task.md": "# task\n\n> Launch subagents for parallel, optionally isolated work.\n\n## Source\n- Entry: `packages/coding-agent/src/task/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/task.md`\n- Key collaborators:\n - `packages/coding-agent/src/task/types.ts` — dynamic schema, progress/result types, output caps.\n - `packages/coding-agent/src/task/discovery.ts` — resolve bundled GJC agents.\n - `packages/coding-agent/src/task/agents.ts` — bundled agent definitions and frontmatter parsing.\n - `packages/coding-agent/src/task/executor.ts` — create child sessions, run subagents, collect output.\n - `packages/coding-agent/src/task/parallel.ts` — concurrency-limited scheduling and async semaphore.\n - `packages/coding-agent/src/task/isolation-backend.ts` — isolation backend resolution and platform fallback.\n - `packages/coding-agent/src/task/worktree.ts` — worktree / FUSE / ProjFS setup, patch capture, branch merge.\n - `packages/coding-agent/src/task/output-manager.ts` — session-scoped `agent://` id allocation.\n - `packages/coding-agent/src/task/simple-mode.ts` — `default` / `schema-free` / `independent` field gating.\n - `packages/coding-agent/src/internal-urls/agent-protocol.ts` — resolve `agent://<id>` to saved subagent output.\n - `packages/coding-agent/src/tools/index.ts` — tool registration and recursion-depth gating.\n - `packages/coding-agent/src/sdk.ts` — child-session router/tool wiring and per-subagent `AgentOutputManager`.\n - `docs/task-agent-discovery.md` — deeper discovery and precedence notes.\n - `docs/handoff-generation-pipeline.md` — session artifact/handoff persistence patterns used by the wider session layer.\n\n## Inputs\n\n### Default mode (`task.simple = \"default\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name for every task item. Resolved at execution time through `discoverAgents(...)`. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Batch of small, self-contained task items. `id` max length 48 in schema; duplicate ids are rejected case-insensitively at runtime. |\n| `context` | `string` | No | Shared background prepended to every subagent system prompt. Trimmed before use. |\n| `schema` | `string` | No | JSON-encoded JTD schema. Overrides agent/session output schema when this mode allows task-level schemas. |\n| `isolated` | `boolean` | No | Only present when the tool is created with isolation enabled. Requests isolated execution for the whole batch. |\n\n`tasks[].description` is UI-only. `tasks[].assignment` is the actual per-task instruction.\n\n### Schema-free mode (`task.simple = \"schema-free\"`)\n\nSame as default, except `schema` is rejected by `validateTaskModeParams(...)` in `packages/coding-agent/src/task/index.ts`.\n\n### Independent mode (`task.simple = \"independent\"`)\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `agent` | `string` | Yes | Exact agent name. |\n| `tasks` | `Array<{ id: string; description: string; assignment: string }>` | Yes | Same item shape, but each `assignment` must carry all required background because shared `context` is disabled. |\n| `isolated` | `boolean` | No | Same conditional field as above. |\n\nIn this mode both `context` and `schema` are rejected.\n\n## Outputs\nThe tool returns one text block plus `details: TaskToolDetails`.\n\n`details` fields:\n- `projectAgentsDir: string | null` — nearest discovered project `agents/` dir.\n- `results: SingleResult[]` — one entry per task in input order for synchronous execution; empty for async-launch responses.\n- `totalDurationMs: number`\n- `usage?: Usage` — sum of per-subagent assistant-message usage.\n- `outputPaths?: string[]` — written `.md` artifact paths for completed subagent outputs.\n- `progress?: AgentProgress[]` — live or final per-task progress snapshots.\n- `async?: { state: \"running\" | \"completed\" | \"failed\"; jobId: string; type: \"task\" }` — present for background execution updates/results.\n\n`SingleResult` includes:\n- identity: `index`, `id`, `agent`, `agentSource`, `description`, optional `assignment`\n- status: `exitCode`, optional `error`, optional `aborted`, optional `abortReason`\n- output: `output`, `stderr`, `truncated`, `durationMs`, `tokens`\n- artifact metadata: `outputPath?`, `patchPath?`, `branchName?`, `nestedPatches?`, `outputMeta?`\n- extracted tool data: `extractedToolData?` from registered subprocess tool handlers such as `yield` and `report_finding`\n\nArtifacts and side channels:\n- Every subagent with an artifacts dir writes `<id>.md`; `agent://<id>` resolves to that file.\n- If the output file is JSON, `agent://<id>/<path>` and `agent://<id>?q=<query>` perform JSON extraction in `packages/coding-agent/src/internal-urls/agent-protocol.ts`.\n- When the parent session persists artifacts, each subagent also gets `<id>.jsonl` session history.\n- Isolated patch mode writes `<id>.patch` per successful task before merge.\n- Async mode returns immediately after job registration, then emits `onUpdate(...)` progress snapshots and later hands completion to the session async-job pipeline.\n\n## Flow\n1. `TaskTool.create(...)` in `packages/coding-agent/src/task/index.ts` calls `discoverAgents(session.cwd)` once to build the dynamic prompt description from current agents and `task.simple` capabilities.\n2. `execute(...)` validates mode-gated fields with `validateTaskModeParams(...)`.\n3. It decides async vs sync:\n - sync when `async.enabled` is false\n - sync when the selected cached agent has `blocking === true`\n - sync when `tasks.length === 0`\n - otherwise async job scheduling\n4. Async path:\n - allocate unique output ids with `AgentOutputManager.allocateBatch(...)`\n - create one async job per task through `session.asyncJobManager.register(...)`\n - limit concurrent job bodies with `Semaphore(task.maxConcurrency)` from `packages/coding-agent/src/task/parallel.ts`\n - each job body calls `#executeSync(...)` with a one-task batch and the preallocated id\n - `onUpdate(...)` emits aggregate `progress` snapshots and `details.async`\n5. Sync path (`#executeSync(...)`) rediscovers agents from disk via `discoverAgents(...)`, so runtime resolution can differ from the earlier prompt description.\n6. It resolves the requested agent with `getAgent(...)`, rejects unknown or disabled agents, and enforces parent spawn policy plus `GJC_BLOCKED_AGENT` self-recursion prevention.\n7. It derives the effective output schema in priority order: task call `schema` (if allowed) → agent frontmatter `output` → inherited parent session schema.\n8. It validates task ids: missing ids and case-insensitive duplicates are immediate errors.\n9. If `isolated` was requested, it requires a git repo (`getRepoRoot(...)` / `captureBaseline(...)`) and resolves the actual backend through `resolveIsolationBackendForTaskExecution(...)`.\n10. It chooses an artifacts dir from the parent session when available, otherwise a temp dir, and writes `context.md` there when `session.getCompactContext?.()` returns content.\n11. It allocates unique ids again if the caller did not preallocate them, then builds `tasksWithUniqueIds`.\n12. For each task, it seeds an `AgentProgress` entry and runs `runTask(...)` through `mapWithConcurrencyLimit(...)` using `task.maxConcurrency`.\n13. Non-isolated `runTask(...)` calls `runSubprocess(...)` directly with parent cwd.\n14. Isolated `runTask(...)`:\n - creates an isolation workspace (`ensureWorktree(...)`, `ensureFuseOverlay(...)`, or `ensureProjfsOverlay(...)`)\n - applies the captured baseline for worktrees\n - runs `runSubprocess(...)` inside that workspace\n - on success, either commits to a per-task branch (`mergeMode === \"branch\"`) or captures a patch with `captureDeltaPatch(...)`\n - always cleans up the isolation workspace/backend\n15. `runSubprocess(...)` in `packages/coding-agent/src/task/executor.ts` creates a child agent session with:\n - isolated settings snapshot via `Settings.isolated(...)`, forcing `async.enabled = false` and `bash.autoBackground.enabled = false`\n - child `agentId` / `parentTaskPrefix` equal to the allocated task id\n - child internal URL router and `AgentOutputManager` from `packages/coding-agent/src/sdk.ts`\n - the shared `context`, optional `context.md` reference, optional isolation worktree path, output schema, and IRC peer roster in the system prompt template\n16. Child tool availability is derived from the agent definition plus runtime guards:\n - explicit `agent.tools` if provided\n - auto-add `task` when the agent has `spawns` and recursion depth allows it\n - remove `task` at or past `task.maxRecursionDepth`\n - expand `exec` to `eval` and `bash`\n - strip parent-owned `todo_write` after session creation\n17. `runSubprocess(...)` subscribes to child agent events, coalesces progress updates every 150 ms, forwards lifecycle/progress events on the parent event bus, and extracts tool data through `subprocessToolRegistry`.\n18. The child must finish through the hidden `yield` tool. If it does not, `runSubprocess(...)` sends up to 3 reminder prompts; the last reminder forces `toolChoice = yield` when supported.\n19. Finalization uses `finalizeSubprocessOutput(...)` to reconcile raw assistant text, `yield` payloads, structured schemas, `report_finding` data, and abort states. Output is truncated with `MAX_OUTPUT_BYTES` / `MAX_OUTPUT_LINES` before returning to the parent, but the full raw output is still written to `<id>.md`.\n20. After all sync tasks finish, `#executeSync(...)` aggregates usage, collects artifact paths, and if isolation was used merges results back:\n - branch mode: cherry-pick per-task branches with `mergeTaskBranches(...)`, then delete merged branches with `cleanupTaskBranches(...)`\n - patch mode: combine non-empty patch artifacts, dry-check with `git.patch.canApplyText(...)`, then apply or leave manual artifacts\n - nested repo patches are applied separately with `applyNestedPatches(...)`\n21. The final text summary is rendered from `packages/coding-agent/src/prompts/tools/task-summary.md` and includes `agent://<id>` handles for outputs that exist.\n\n## Modes / Variants\n- Execution mode\n - Sync inline execution — default path.\n - Async background execution — one async job per task item when `async.enabled` is on and the chosen agent is not marked `blocking`.\n- Simple mode\n - `default` — accepts shared `context` and per-call `schema`.\n - `schema-free` — accepts `context`, rejects `schema`.\n - `independent` — rejects `context` and `schema`; each assignment stands alone.\n- Isolation backend\n - `none` — no isolation.\n - `worktree` — detached git worktree plus baseline replay.\n - `fuse-overlay` — Unix FUSE overlay mount.\n - `fuse-projfs` — Windows ProjFS overlay.\n- Isolation merge strategy\n - Patch mode — capture/apply root patches, keep patch artifacts when application fails.\n - Branch mode — commit each task onto `gjc/task/<id>` branch, cherry-pick into parent, preserve failed branches for manual resolution.\n- Agent source\n - Bundled GJC agents — retained four-agent surface.\n - Bundled agents — appended last from `packages/coding-agent/src/task/agents.ts`.\n- Bundled agent types\n - `explore` — read-only scout with structured handoff output.\n - `plan` — architecture/planning agent; may spawn `explore`.\n - `designer` — UI/UX specialist.\n - `reviewer` — review agent with `report_finding` extraction.\n - `task` — general-purpose worker with full capabilities.\n - `quick_task` — low-reasoning mechanical worker using the same task prompt body.\n - `librarian` — source-grounded external API/library researcher.\n\n## Side Effects\n- Filesystem\n - Writes `context.md`, `<id>.jsonl`, and `<id>.md` under the session artifacts dir or a temp task dir.\n - In isolated patch mode writes `<id>.patch` artifacts.\n - Creates/removes worktrees or overlay mount directories.\n - In branch mode creates temporary worktrees and task branches.\n- Network\n - Child sessions may use whichever networked tools/models their active tool set permits.\n- Subprocesses / native bindings\n - `fuse-overlayfs` and `fusermount`/`fusermount3` for FUSE isolation.\n - ProjFS native bindings via `@gajae-code/natives` on Windows.\n - Git operations for baseline capture, patch apply, worktrees, branches, stash, cherry-pick, commits.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Creates child `AgentSession` instances with isolated settings snapshots.\n - Registers async jobs in `session.asyncJobManager` for background task mode.\n - Emits `task:subagent:event`, `task:subagent:progress`, and `task:subagent:lifecycle` on the parent event bus.\n - Allocates session-scoped output ids through `AgentOutputManager` so `agent://` remains unique across invocations and resumes.\n - Shares the parent `local://` root with subagents by passing `localProtocolOptions` through `createAgentSession(...)`.\n- User-visible prompts / interactive UI\n - Async mode streams aggregate progress updates.\n - Missing-`yield` recovery sends up to three internal reminder prompts to the child session.\n - Final summaries include `<system-notification>` blocks for isolation fallbacks or merge failures.\n- Background work / cancellation\n - Parent abort stops scheduling new work, aborts active child sessions, and marks unscheduled tasks as skipped.\n - Async jobs keep their own cancellation via `AsyncJobManager`.\n - Await timeouts are observation windows only: they do not stop, fail, or make a subagent stale. Inspect/list and continue work; cancel only when the subagent has actually failed, gone off-track, or become unrecoverably wrong.\n\n## Limits & Caps\n- Per-subagent output truncation: `MAX_OUTPUT_BYTES = 500_000` and `MAX_OUTPUT_LINES = 5000` in `packages/coding-agent/src/task/types.ts`. Full raw output is still written to `<id>.md` before truncation is returned to the caller.\n- Progress coalescing in child execution: `PROGRESS_COALESCE_MS = 150` in `packages/coding-agent/src/task/executor.ts`.\n- Recent output tail for progress: `RECENT_OUTPUT_TAIL_BYTES = 8 * 1024` and `recentOutput` keeps the last 8 non-empty lines in `packages/coding-agent/src/task/executor.ts`.\n- Missing-`yield` reminder retries: `MAX_YIELD_RETRIES = 3` in `packages/coding-agent/src/task/executor.ts`.\n- Task id schema cap: `tasks[].id` `maxLength: 48` in `packages/coding-agent/src/task/types.ts`.\n- Prompt text says ids should be `≤32` chars, but the runtime schema allows 48; this mismatch is real.\n- Async/full sync parallelism both use `task.maxConcurrency` from settings:\n - sync path: `mapWithConcurrencyLimit(...)`\n - async path: `Semaphore(...)` around job bodies\n- Recursion depth gate: `task.maxRecursionDepth` from settings; `packages/coding-agent/src/tools/index.ts` hides the `task` tool at or beyond the limit, and `runSubprocess(...)` also strips child `task` access at max depth.\n- Final inline summary preview per task uses `fullOutputThreshold = 5000` chars in `packages/coding-agent/src/task/index.ts`; longer outputs are summarized while `agent://<id>` points to the full artifact.\n\n## Errors\n- Most validation failures are returned as normal tool text with empty `results`, not thrown:\n - invalid simple-mode fields\n - unknown/disabled agent\n - missing tasks\n - missing/duplicate task ids\n - spawn-policy denial\n - requesting `isolated` while isolation mode is `none`\n- Isolated execution without a git repo returns `Isolated task execution requires a git repository. ...`.\n- Backend resolution can return a hard error (`ProjFS isolation initialization failed...`) or a non-fatal warning with fallback to `worktree`.\n- `mapWithConcurrencyLimit(...)` fails fast on non-abort worker exceptions; already completed results are preserved only in the thrown path’s local state, not surfaced unless the caller catches and converts them.\n- Child-session failures surface as `SingleResult.exitCode = 1` with `stderr`/`error` populated.\n- If the child omits `yield`, `finalizeSubprocessOutput(...)` injects warnings such as `SYSTEM WARNING: Subagent exited without calling yield tool after 3 reminders.`\n- Async scheduling failures are accumulated per task; if no jobs start, the tool returns `Failed to start background task jobs: ...`.\n- `agent://<id>` resolution errors are model-visible when another tool reads them: no session, no artifacts dir, missing id, conflicting extraction syntax, or invalid JSON for extraction.\n\n## Notes\n- Bundled agent definitions are embedded in `packages/coding-agent/src/task/agents.ts`.\n- `TaskTool.create(...)` caches discovered agents only for description rendering and the async blocking-agent decision. `#executeSync(...)` rediscovers agents each call.\n- Custom agent frontmatter can override bundled agents by name. Bundled definitions are embedded at build time in `packages/coding-agent/src/task/agents.ts`.\n- Child sessions do not inherit conversation history automatically. The only built-in carry-over is shared `context`, optional `context.md`, workspace tree/context files, and shared `local://` root.\n- `Settings.isolated(...)` gives each child a session-isolated settings snapshot; tool enablement is recomputed inside the child session rather than sharing mutable parent tool state.\n- Plan mode mutates an `effectiveAgent` with a read-only tool subset and plan-mode prompt text, but `runSubprocess(...)` is still invoked with `agent` rather than `effectiveAgent`. Model/thinking/schema overrides use the effective agent; prompt/tool/spawn restrictions do not fully flow through this call path.\n- Branch-mode merge temporarily stashes the parent repo before cherry-picking task branches. A stash-pop conflict is treated as merge failure and leaves recovery state behind.\n- Patch-mode only applies combined root patches if every successful task produced a patch and `git.patch.canApplyText(...)` succeeds.\n- Nested git repos are handled separately from the root repo. They are copied into isolated worktrees, diffed independently, and merged later with `applyNestedPatches(...)` because parent git cannot track their file-level changes.\n- `agent://` ids are numeric-prefixed (`0-Task`, `1-Task`, nested like `0-Parent.0-Child`) by `AgentOutputManager`; this is what prevents artifact collisions across repeated or nested task invocations.\n",
78
80
  "tools/todo_write.md": "# todo_write\n\n> Applies ordered mutations to the session todo list and returns a text summary plus the full phase/task state.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/todo-write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/todo-write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/index.ts` — registers tool, exposes session hooks, gates availability.\n - `packages/coding-agent/src/modes/controllers/event-controller.ts` — updates the visible todo UI on tool completion.\n - `packages/coding-agent/src/session/agent-session.ts` — stores cached phases, auto-clears done/dropped tasks, emits failure reminders.\n - `packages/coding-agent/src/modes/controllers/todo-command-controller.ts` — `/todo` command path, custom-entry persistence, transcript reminder injection.\n - `packages/coding-agent/src/tools/render-utils.ts` — collapsed-preview cap for renderer trees.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `TodoOpEntry[]` | Yes | Ordered operations to apply. `minItems: 1`.\n\n### `TodoOpEntry`\n\n| Op | Required fields | Optional fields | Effect |\n| --- | --- | --- | --- |\n| `init` | `list` | None of the other fields are used | Replaces the entire list with `list`; every new task starts `pending` before normalization. |\n| `start` | `task` | None | Marks one task `in_progress`; any other `in_progress` task is demoted to `pending`. |\n| `done` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `completed`. |\n| `drop` | `task` or `phase` or neither | None | Marks the target task, phase, or all tasks `abandoned`. |\n| `rm` | `task` or `phase` or neither | None | Removes the target task, clears the phase's task list, or clears all task lists. |\n| `append` | `phase`, `items` | None | Appends new `pending` tasks to a phase; creates the phase if missing. |\n| `note` | `task`, `text` | None | Appends one trimmed note string to the task's `notes` array. |\n\n### Fields used inside ops\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `op` | `\"init\" | \"start\" | \"done\" | \"rm\" | \"drop\" | \"append\" | \"note\"` | Yes | Operation discriminator. |\n| `list` | `{ phase: string; items: string[] }[]` | For `init` | Full replacement payload. Each `items` array has `minItems: 1`. |\n| `task` | `string` | For `start`; for task-targeted `done`/`drop`/`rm`/`note` | Exact task content match. |\n| `phase` | `string` | For `append`; for phase-targeted `done`/`drop`/`rm` | Exact phase name match, except `append` lazily creates a missing phase. |\n| `items` | `string[]` | For `append` | Tasks to append. `minItems: 1`. |\n| `text` | `string` | For `note` | Note text; trailing whitespace is stripped before storing. Empty-after-trim is rejected. |\n\n## Outputs\nThe tool returns a single-shot `AgentToolResult`:\n\n- `content`: one text part containing the summary from `formatSummary(...)`.\n - Empty final state with no errors: `Todo list cleared.`\n - Non-empty final state: remaining-item list, current phase progress, then a per-phase tree.\n - If the active `in_progress` task has notes, the summary includes the note bodies inline.\n - If any op produced validation/runtime errors, the summary starts with `Errors: ...` but still returns the mutated state.\n- `details`:\n - `phases: TodoPhase[]`\n - `storage: \"session\" | \"memory\"`\n\n`TodoPhase` / `TodoItem` state model:\n\n- `TodoPhase`: `{ name: string, tasks: TodoItem[] }`\n- `TodoItem`: `{ content: string, status: \"pending\" | \"in_progress\" | \"completed\" | \"abandoned\", notes?: string[] }`\n\nThe TUI renderer (`todoWriteToolRenderer`) merges call and result into one transcript block, renders phases as a tree, shows note counts as superscripts, and renders the note bodies only for the current `in_progress` task. Collapsed transcript previews cap tree items at `PREVIEW_LIMITS.COLLAPSED_ITEMS` (`8`).\n\n## Flow\n1. `TodoWriteTool.execute(...)` clones the current cached phases from `session.getTodoPhases?.() ?? []` (`packages/coding-agent/src/tools/todo-write.ts`).\n2. `applyParams(...)` walks `params.ops` in order and applies each entry with `applyEntry(...)`.\n3. Each op mutates the working phase array:\n - `initPhases(...)` rebuilds the list from scratch.\n - `start` resolves a task by exact `content`, demotes every other `in_progress` task to `pending`, then marks the target `in_progress`.\n - `done` / `drop` use `getTaskTargets(...)` to target one task, one phase, or every task.\n - `rm` removes one task, clears one phase's `tasks`, or clears all phases' task arrays.\n - `appendItems(...)` resolves or creates the target phase and pushes new `pending` tasks unless the same task content already exists anywhere.\n - `note` trims trailing whitespace, rejects empty text, and appends the note to `task.notes`.\n4. Missing task/phase references are recorded in an `errors` array by `resolveTaskOrError(...)` / `resolvePhaseOrError(...)`; execution continues through the rest of the batch.\n5. After the full batch, `normalizeInProgressTask(...)` enforces the single-active-task invariant:\n - if multiple tasks are `in_progress`, only the first stays active and the rest become `pending`;\n - if none are `in_progress`, the first `pending` task in phase/task order is auto-promoted to `in_progress`.\n6. `execute(...)` stores the normalized phases with `session.setTodoPhases?.(...)` and reports `storage` as `\"session\"` when `session.getSessionFile()` exists, else `\"memory\"`.\n7. The agent runtime also watches `todo_write` tool results in `packages/coding-agent/src/session/agent-session.ts`; successful results refresh cached todos, failed results inject a hidden next-turn reminder telling the model that todo progress is not visible until it retries.\n8. The event controller updates the visible todo UI from `result.details.phases` on success, or shows a warning on error (`packages/coding-agent/src/modes/controllers/event-controller.ts`).\n\n## Modes / Variants\n### State transitions\n\n| Current status | `start` | `done` | `drop` | `rm` | `append` | `note` |\n| --- | --- | --- | --- | --- | --- | --- |\n| `pending` | `in_progress` on target | `completed` | `abandoned` | Removed | New tasks enter as `pending` | No status change |\n| `in_progress` | Target stays `in_progress`; non-target active tasks become `pending` | `completed` | `abandoned` | Removed | No status change | No status change |\n| `completed` | Can be set back to `in_progress` if targeted | Stays `completed` | Becomes `abandoned` if targeted | Removed | No status change | No status change |\n| `abandoned` | Can be set back to `in_progress` if targeted | Becomes `completed` if targeted | Stays `abandoned` | Removed | No status change | No status change |\n\nNormalization then re-applies the single-active-task rule after the full op batch.\n\n### Op targeting rules\n- `done`, `drop`, `rm`:\n - `task` set: affect one exact-content task.\n - else `phase` set: affect every task in that exact-name phase.\n - else: affect every task in every phase.\n- `append` is the only op that creates a missing phase.\n- `note` only targets a single task.\n- `init` discards previous phases entirely.\n\n### Markdown round-trip helpers\nThe same file also exposes non-tool helpers used by `/todo`:\n- `phasesToMarkdown(...)` serializes phases as headings plus checklist items (`[ ]`, `[/]`, `[x]`, `[-]`) with blockquote note bodies.\n- `markdownToPhases(...)` parses that format, defaults orphan tasks into a `Todos` phase, accepts `>` as an `in_progress` marker and `~` as `abandoned`, and runs the same normalization step.\n\n## Side Effects\n- Filesystem\n - None in the tool itself.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Mutates the session todo cache through `setTodoPhases`.\n - `storage` reports whether the session has a backing session file, but the tool does not append a custom session entry itself.\n - Successful tool-result messages carry `details.phases`; `getLatestTodoPhasesFromEntries(...)` can reconstruct state later from those transcript entries.\n - Failed `todo_write` results cause `agent-session` to enqueue a hidden next-turn reminder (`customType: \"todo-write-error-reminder\"`).\n- User-visible prompts / interactive UI\n - Transcript block is rendered by `todoWriteToolRenderer` and merged with the call line.\n - `event-controller` updates the visible todo panel from successful results.\n - On error, `event-controller` shows `Todo update failed...`; the visible panel may stay stale until a later successful call.\n- Background work / cancellation\n - `AgentSession.setTodoPhases(...)` schedules auto-clear timers for `completed` / `abandoned` tasks via `tasks.todoClearDelay`.\n\n## Limits & Caps\n- `ops` array: `minItems: 1` (`todoWriteSchema`).\n- `init.list[*].items`: `minItems: 1`.\n- `append.items`: `minItems: 1`.\n- Renderer collapsed preview: `PREVIEW_LIMITS.COLLAPSED_ITEMS = 8` (`packages/coding-agent/src/tools/render-utils.ts`).\n- Auto-clear delay: `tasks.todoClearDelay` default `60` seconds; `< 0` disables auto-clear, `0` clears on the next microtask (`packages/coding-agent/src/session/agent-session.ts`).\n- Tool execution mode: `concurrency = \"exclusive\"`, `strict = true`, `loadMode = \"discoverable\"`.\n\n## Errors\n- The tool does not throw for ordinary bad op payloads; it accumulates human-readable strings in `errors` and still returns success with the mutated state.\n- Error strings come from the helpers in `packages/coding-agent/src/tools/todo-write.ts`, including:\n - `Missing list for init operation`\n - `Missing task content`\n - `Task \"...\" not found` with an extra empty-list hint when applicable\n - `Missing phase name`\n - `Phase \"...\" not found`\n - `Missing phase name for append operation`\n - `Missing items for append operation`\n - `Task \"...\" already exists`\n - `Missing text for note operation`\n- Because ops are processed in order, earlier errors do not roll back later ops.\n- Runtime-level tool failure is handled outside the tool body: `agent-session` injects a hidden reminder and the event controller warns the user that visible progress may be stale.\n- Idempotency is op-specific:\n - `init` is a full replacement; replaying the same payload yields the same state.\n - `start`, `done`, and `drop` are effectively idempotent on an existing target state, but `start` also demotes any other active task.\n - `rm` is not idempotent for targeted removals: the second call errors because the task or phase is gone.\n - `append` is not idempotent: duplicate task content is rejected with `Task \"...\" already exists`.\n - `note` is append-only and never idempotent; replaying it adds another note entry.\n\n## Notes\n- Task lookup is exact string equality inside the tool. The model-facing prompt says task content and phase names are identifiers and should stay unique; `append` enforces task uniqueness globally, but `init` does not validate duplicate task or phase names.\n- `findTaskByContent(...)` returns the first matching task across phases. Duplicate task contents make later targeted ops ambiguous.\n- `normalizeInProgressTask(...)` runs after the whole batch, not after each op. A single call can intentionally build an intermediate invalid state and rely on final normalization.\n- `storage: \"session\"` means the session has a session-file backing; it does not mean this tool wrote a durable custom entry.\n- Reload persistence differs by path:\n - plain `todo_write` calls survive in transcript tool-result details;\n - `/todo` command edits additionally append `customType: \"user_todo_edit\"` entries and inject a visible-to-model `<system-reminder>` developer message describing the manual edit.\n- On session resume, `AgentSession.#syncTodoPhasesFromBranch()` strips `completed` and `abandoned` tasks before restoring the cached list. The `/todo` command works around that by reading the latest transcript/custom-entry state so historical done/dropped tasks still appear to the user.\n- Tool availability is gated by `todo.enabled`, and the registry excludes it when `includeYield` is enabled (`packages/coding-agent/src/tools/index.ts`).\n- Subagents do not inherit `todo_write`; `packages/coding-agent/src/task/executor.ts` filters it out as a parent-owned tool.\n",
79
- "tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Anthropic model web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/openai-code.ts` — OpenAI code provider SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote search adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query. `executeSearch()` rewrites any `2020`-`2029` substring to the current year before dispatch. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it. Prompt text says Brave and Perplexity; code also maps it for Tavily and SearXNG. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, a `## Sources` section follows with a source count, then one entry per source:\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when at least one provider was attempted. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` does not forward its `_signal` argument into `executeSearch()`, so provider cancellation is only available to internal callers that place `signal` inside `SearchQueryParams`.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` chooses a provider list:\n - if `params.provider` is set and not `\"auto\"`, it loads that provider with `getSearchProvider()`; if `isAvailable()` returns true, the list is `[that provider]`, otherwise it falls back to `resolveProviderChain(\"auto\")`.\n - otherwise it calls `resolveProviderChain()` with the module-global preferred provider from `packages/coding-agent/src/web/search/provider.ts`.\n3. `resolveProviderChain()` lazily loads each provider module on demand, checks `isAvailable()`, and returns only available providers. If a preferred provider is set, it is tried first, then the static `SEARCH_PROVIDER_ORDER` excluding that provider.\n4. If no providers are available, `executeSearch()` returns `Error: No web search provider configured.` with `details.response.provider = \"none\"`.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query` after year-rewrite,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/tools/web-search.md`.\n6. On the first successful `SearchResponse`, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes the last error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed (<labels>). Last error: <message>`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; unavailable forced providers fall back to the auto chain instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default used by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Auto chain order**: `tavily`, `perplexity`, `brave`, `jina`, `kimi`, `anthropic`, `gemini`, `openai-code`, `zai`, `exa`, `parallel`, `kagi`, `synthetic`, `searxng` (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`).\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY`.\n - OAuth/cookie mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"`.\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `findAnthropicAuth()` from `@gajae-code/ai`.\n - Querying: Anthropic model Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **OpenAI code** — `packages/coding-agent/src/web/search/providers/openai-code.ts`\n - Availability: non-expired OAuth credential for `openai-code` in `agent.db`.\n - Querying: SSE POST to `https://chatgpt.com/backend-api/openai-code/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against the Z.AI `web_search_prime` search endpoint.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: requires `EXA_API_KEY` and settings must not disable `exa.enabled` or `exa.enableSearch`.\n - Querying: POST `https://api.exa.ai/search` with `EXA_API_KEY`. No no-key fallback is used.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: GET `https://kagi.com/api/v0/search?q=<query>&limit=<n>` with `Authorization: Bot <key>`.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `relatedQuestions`, `requestId`.\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, OpenAI code), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`, but `WebSearchTool.execute()` does not pass its `_signal` into `executeSearch()`. Internal callers can still use cancellation by calling `runSearchQuery()` / `executeSearch()` with `signal` embedded in params.\n\n## Limits & Caps\n- Provider auto-order length: 14 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`).\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 10` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `anthropic-model-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- Tool-level no-provider case returns a normal tool result with `Error: No web search provider configured.`; it does not throw.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; failures are summarized from the last attempted provider.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - OpenAI code and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- The prompt says `recency` is for Brave and Perplexity, but code also implements it for Tavily and SearXNG.\n- The year rewrite in `executeSearch()` is blunt: any `2020`-`2029` substring is replaced with the current year.\n- `packages/coding-agent/src/config/settings-schema.ts` exposes provider preferences for `auto`, `exa`, `brave`, `jina`, `kimi`, `perplexity`, `anthropic`, `zai`, `tavily`, `kagi`, `synthetic`, `parallel`, and `searxng`. Gemini and OpenAI code are in the registry and auto chain but not in that settings enum.\n- Exa availability fails closed unless `EXA_API_KEY` is present and Exa settings remain enabled.\n",
81
+ "tools/web_search.md": "# web_search\n\n> Run one web query through the first available search provider and return LLM-formatted answer, source URLs, and optional citations.\n\n## Source\n- Entry: `packages/coding-agent/src/web/search/index.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/web-search.md`\n- Key collaborators:\n - `packages/coding-agent/src/web/search/provider.ts` — lazy provider registry; availability chain.\n - `packages/coding-agent/src/web/search/types.ts` — unified `SearchResponse` / `SearchProviderError` types.\n - `packages/coding-agent/src/web/search/render.ts` — TUI renderer details type.\n - `packages/coding-agent/src/web/search/providers/base.ts` — provider interface and shared params contract.\n - `packages/coding-agent/src/web/search/providers/utils.ts` — credential lookup; source normalization.\n - `packages/coding-agent/src/web/search/providers/anthropic.ts` — Anthropic model web-search provider.\n - `packages/coding-agent/src/web/search/providers/brave.ts` — Brave Search API adapter.\n - `packages/coding-agent/src/web/search/providers/duckduckgo.ts` — keyless DuckDuckGo html/lite scrape adapter (permissionless default/fallback).\n - `packages/coding-agent/src/web/search/providers/openai-code.ts` — OpenAI code provider SSE adapter.\n - `packages/coding-agent/src/web/search/providers/exa.ts` — Exa API adapter.\n - `packages/coding-agent/src/web/search/providers/gemini.ts` — Gemini grounding SSE adapter.\n - `packages/coding-agent/src/web/search/providers/jina.ts` — Jina Reader search adapter.\n - `packages/coding-agent/src/web/search/providers/kagi.ts` — Kagi provider wrapper.\n - `packages/coding-agent/src/web/search/providers/kimi.ts` — Kimi search adapter.\n - `packages/coding-agent/src/web/search/providers/parallel.ts` — Parallel provider wrapper.\n - `packages/coding-agent/src/web/search/providers/perplexity.ts` — Perplexity API / OAuth adapter.\n - `packages/coding-agent/src/web/search/providers/searxng.ts` — self-hosted SearXNG adapter.\n - `packages/coding-agent/src/web/search/providers/synthetic.ts` — Synthetic search adapter.\n - `packages/coding-agent/src/web/search/providers/tavily.ts` — Tavily search adapter.\n - `packages/coding-agent/src/web/search/providers/zai.ts` — Z.AI remote search adapter.\n - `packages/coding-agent/src/web/parallel.ts` — Parallel search/extract HTTP client.\n - `packages/coding-agent/src/web/kagi.ts` — Kagi HTTP client.\n - `packages/coding-agent/src/tools/index.ts` — built-in tool registration and enable flag.\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `query` | `string` | Yes | Search query. `executeSearch()` rewrites any `2020`-`2029` substring to the current year before dispatch. |\n| `recency` | `\"day\" \\| \"week\" \\| \"month\" \\| \"year\"` | No | Time filter. Only providers that implement it use it. Prompt text says Brave and Perplexity; code also maps it for Tavily and SearXNG. |\n| `limit` | `number` | No | Max results to return. Usually becomes the provider request's result-count parameter when `num_search_results` is absent. |\n| `max_tokens` | `number` | No | Passed through as `maxOutputTokens` / `max_tokens` only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `temperature` | `number` | No | Passed through only by Anthropic, Gemini, and Perplexity API-key mode. Ignored by the other providers. |\n| `num_search_results` | `number` | No | Requested upstream search breadth. For most providers this is the same count used for returned sources. Perplexity is the only adapter that keeps it distinct from `limit`. |\n\n## Outputs\nThe tool returns a single text content block plus structured `details`.\n\n- `content`: `[{ type: \"text\", text: string }]`\n- `details`: `SearchRenderDetails` from `packages/coding-agent/src/web/search/render.ts`\n - `response: SearchResponse`\n - `error?: string`\n\n`text` is produced by `formatForLLM()` in `packages/coding-agent/src/web/search/index.ts`:\n\n- If `response.answer` exists, it is emitted first.\n- If sources exist, a `## Sources` section follows with a source count, then one entry per source:\n - `[n] <title> (<formatted age or published date>)`\n - ` <url>`\n - optional snippet line truncated to 240 chars.\n- If citations exist, a `## Citations` section follows with URL/title plus optional cited text truncated to 240 chars.\n- If related questions exist, a `## Related` bullet list follows.\n- If search queries exist, a `Search queries: <n>` section follows, capped to the first 3 queries and 120 chars each.\n\nFailure output is not thrown at the tool boundary when at least one provider was attempted. Instead the tool returns:\n\n- `content[0].text = \"Error: ...\"`\n- `details.response.provider = <last attempted provider> | \"none\"`\n- `details.error = ...`\n\nStreaming: none. `WebSearchTool.execute()` does not forward its `_signal` argument into `executeSearch()`, so provider cancellation is only available to internal callers that place `signal` inside `SearchQueryParams`.\n\n## Flow\n1. `WebSearchTool.execute()` in `packages/coding-agent/src/web/search/index.ts` delegates directly to `executeSearch()`.\n2. `executeSearch()` resolves the provider list via a single `resolveProviderChain(authStorage, params.provider ?? \"auto\", activeModelProvider)` call. The active model's provider is threaded in from `WebSearchTool` (`this.#session.model?.provider`, falling back to parsing `getActiveModelString()`) and from the CustomTool path (`ctx.model?.provider`).\n3. `resolveProviderChain()` is active-model-gated, not credential-scanning:\n - an explicitly preferred/selected provider that is `isAvailable()` becomes the primary;\n - otherwise the active model's own native search (`MODEL_PROVIDER_TO_SEARCH`) becomes the primary, but only when that provider's own credentials exist (`isAvailable()`);\n - keyed standalone providers are never auto-selected — explicit selection only.\n4. DuckDuckGo (keyless, `isAvailable()` always true) is always appended as the terminal fallback, so a missing primary — or a primary runtime failure — still returns results with zero configuration. There is no longer a \"No web search provider configured\" path.\n5. For each provider in order, `executeSearch()` calls `provider.search()` with:\n - `query` after year-rewrite,\n - `limit`, `recency`, `temperature`, `maxOutputTokens`, `numSearchResults`,\n - `systemPrompt` from `packages/coding-agent/src/prompts/tools/web-search.md`.\n6. On the first successful `SearchResponse`, `formatForLLM()` renders answer/sources/citations/related/search-queries into one text block and returns it with `details.response`.\n7. If a provider throws, `executeSearch()` records the error and tries the next provider. There is no provider-level parallel fan-out; fallback is sequential.\n8. After all candidates fail, `formatProviderError()` normalizes the last error:\n - Anthropic `404` becomes `Anthropic web search returned 404 (model or endpoint not found).`\n - `401`/`403` become `<Provider> authorization failed ...` except Z.AI, which preserves its raw message.\n - other `SearchProviderError`s surface `error.message`.\n9. If more than one provider was attempted, the final message is `All web search providers failed (<labels>). Last error: <message>`; otherwise it is just the normalized last error.\n\n## Modes / Variants\n- **Provider selection**\n - **Forced provider**: internal callers may pass `provider`; an unavailable forced provider falls back to the chain (which always ends in DuckDuckGo) instead of hard-failing (`packages/coding-agent/src/web/search/index.ts`). This field is not in the model-facing schema.\n - **Preferred provider**: `setPreferredSearchProvider()` sets a module-global default consumed by `resolveProviderChain()`. `packages/coding-agent/src/sdk.ts` and `packages/coding-agent/src/modes/controllers/selector-controller.ts` wire this from settings.\n - **Active-model-gated auto**: in `auto` mode, resolution maps the active model's provider to its own native search via `MODEL_PROVIDER_TO_SEARCH` (`openai|openai-codex→codex`, `anthropic→anthropic`, `google|google-gemini-cli|google-antigravity|gemini→gemini`, `moonshot|kimi-code|kimi→kimi`, `zai`, `perplexity`, `synthetic`), used only if that provider's creds exist; everything else falls to DuckDuckGo. `SEARCH_PROVIDER_ORDER` no longer drives auto credential scanning — it is retained for explicit selection, labels, and CLI option lists.\n- **Provider adapters**\n - **Tavily** — `packages/coding-agent/src/web/search/providers/tavily.ts`\n - Availability: API key from env or `agent.db` via `findCredential()`.\n - Querying: POST `https://api.tavily.com/search`.\n - `recency` maps to Tavily `time_range`; code explicitly keeps `topic` at default general scope instead of narrowing to news.\n - `limit` / `num_search_results`: adapter uses `params.numSearchResults ?? params.limit`, clamped to `5..20` with default `5`.\n - Output: `answer`, `sources`, `requestId`, `authMode: \"api_key\"`.\n - **Perplexity** — `packages/coding-agent/src/web/search/providers/perplexity.ts`\n - Availability: auth precedence is `PERPLEXITY_COOKIES` -> OAuth token in `agent.db` -> `PERPLEXITY_API_KEY` / `PPLX_API_KEY`.\n - OAuth/cookie mode: POSTs to `https://www.perplexity.ai/rest/sse/perplexity_ask`, consumes SSE, merges partial events, extracts answer and source URLs, sets `authMode: \"oauth\"`.\n - API-key mode: POSTs to `https://api.perplexity.ai/chat/completions` with `model: \"sonar-pro\"`, `search_mode: \"web\"`, `num_search_results`, optional `search_recency_filter`, `max_tokens`, `temperature`.\n - `num_search_results` controls upstream API breadth only in API-key mode. `limit` is preserved separately as `num_results` and slices returned `sources` after parsing in both auth modes.\n - Output may include `answer`, `sources`, `citations`, `usage`, `model`, `requestId`, `authMode`.\n - **Brave** — `packages/coding-agent/src/web/search/providers/brave.ts`\n - Availability: `BRAVE_API_KEY` only.\n - Querying: GET `https://api.search.brave.com/res/v1/web/search` with `count`, `extra_snippets=true`, and `freshness=pd|pw|pm|py` for `recency`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Jina** — `packages/coding-agent/src/web/search/providers/jina.ts`\n - Availability: `JINA_API_KEY` only.\n - Querying: GET-like fetch to `https://s.jina.ai/<encoded query>` with bearer auth.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` / `num_search_results`: adapter slices sources to `params.numSearchResults ?? params.limit` when provided; otherwise returns all payload items.\n - Output: `sources` only.\n - **Kimi** — `packages/coding-agent/src/web/search/providers/kimi.ts`\n - Availability: `MOONSHOT_SEARCH_API_KEY`, `KIMI_SEARCH_API_KEY`, `MOONSHOT_API_KEY`, or `agent.db` credentials for `moonshot` / `kimi-code`.\n - Querying: POST to `MOONSHOT_SEARCH_BASE_URL` / `KIMI_SEARCH_BASE_URL` / default `https://api.kimi.com/coding/v1/search` with `text_query`, `limit`, `enable_page_crawling`, `timeout_seconds: 30`.\n - `limit` / `num_search_results`: `params.numSearchResults ?? params.limit`, clamped to `1..20`, default `10`.\n - Output: `sources`, `requestId`.\n - **Anthropic** — `packages/coding-agent/src/web/search/providers/anthropic.ts`\n - Availability: `findAnthropicAuth()` from `@gajae-code/ai`.\n - Querying: Anthropic model Messages API with web-search tool enabled.\n - `max_tokens` and `temperature` pass through.\n - `limit` and `num_search_results` are collapsed together before dispatch: `num_results = params.numSearchResults ?? params.limit`.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage.searchRequests`, `model`, `requestId`.\n - **Gemini** — `packages/coding-agent/src/web/search/providers/gemini.ts`\n - Availability: OAuth credentials in `agent.db` for `google-gemini-cli` or `google-antigravity`.\n - Querying: SSE `streamGenerateContent` call with Google Search grounding enabled. Antigravity auth tries two fallback endpoints and retries `401/403/400 invalid auth` once after token refresh; `429/5xx` retry with exponential backoff and server-provided retry delay, capped by a `5 * 60 * 1000` ms rate-limit budget.\n - `max_tokens` and `temperature` pass through as `generationConfig.maxOutputTokens` / `generationConfig.temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `citations`, `searchQueries`, `usage`, `model`.\n - **OpenAI code** — `packages/coding-agent/src/web/search/providers/openai-code.ts`\n - Availability: non-expired OAuth credential for `openai-code` in `agent.db`.\n - Querying: SSE POST to `https://chatgpt.com/backend-api/openai-code/responses` with `tool_choice: { type: \"web_search\" }` and `search_context_size: \"high\"` by default.\n - Ignores `recency`, `max_tokens`, and `temperature` in this tool path.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include `answer`, `sources`, `usage`, `model`, `requestId`. If the streamed response has no `url_citation` annotations, the adapter falls back to scraping markdown links and bare URLs from the answer text.\n - **Z.AI** — `packages/coding-agent/src/web/search/providers/zai.ts`\n - Availability: env or `agent.db` credential for `zai`.\n - Querying: JSON-RPC `tools/call` against the Z.AI `web_search_prime` search endpoint.\n - Fallback chain inside the provider: tries `{query,count}`, then `{search_query,count}`, then `{search_query, search_engine:\"search-prime\", count}` when earlier attempts fail with argument-shape errors.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output may include parsed free-text `answer`, `sources`, `requestId`.\n - **Exa** — `packages/coding-agent/src/web/search/providers/exa.ts`\n - Availability: requires `EXA_API_KEY` and settings must not disable `exa.enabled` or `exa.enableSearch`.\n - Querying: POST `https://api.exa.ai/search` with `EXA_API_KEY`. No no-key fallback is used.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: synthesized `answer` from up to 3 result summaries, `sources`, `requestId`.\n - **Parallel** — `packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`\n - Availability: env or `agent.db` credential for `parallel`.\n - Querying: POST `https://api.parallel.ai/v1beta/search` with `objective=query`, `search_queries=[query]`, `mode:\"fast\"`, `max_chars_per_result: 10000`, beta header `search-extract-2025-10-10`.\n - There is no provider fan-out here despite the name; the current adapter always sends a one-element `search_queries` array.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `requestId`.\n - **Kagi** — `packages/coding-agent/src/web/search/providers/kagi.ts`, `packages/coding-agent/src/web/kagi.ts`\n - Availability: env or `agent.db` credential for `kagi`.\n - Querying: GET `https://kagi.com/api/v0/search?q=<query>&limit=<n>` with `Authorization: Bot <key>`.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..40`, default `10`.\n - Output: `sources`, `relatedQuestions`, `requestId`.\n - **Synthetic** — `packages/coding-agent/src/web/search/providers/synthetic.ts`\n - Availability: env or `agent.db` credential for `synthetic`.\n - Querying: POST `https://api.synthetic.new/v2/search` with `{ query }`.\n - Ignores `recency`, `max_tokens`, and `temperature`.\n - `limit` and `num_search_results` are collapsed together before dispatch.\n - Output: `sources` only.\n - **SearXNG** — `packages/coding-agent/src/web/search/providers/searxng.ts`\n - Availability: endpoint from `searxng.endpoint` setting or `SEARXNG_ENDPOINT` env.\n - Querying: GET `<endpoint>/search?format=json&q=...`; optional settings add `categories` and `language`.\n - Auth precedence: Basic auth (`searxng.basicUsername` / `searxng.basicPassword` or env equivalents) over bearer token (`searxng.token` / `SEARXNG_TOKEN`). Basic credentials are validated for RFC 7617 restrictions.\n - `recency` maps to `time_range`; `week` is downgraded to `month` because SearXNG does not support week.\n - `limit` and `num_search_results` are collapsed together before dispatch, clamped to `1..20`, default `10`.\n - Output: `sources`, `relatedQuestions` from `suggestions`.\n\n## Side Effects\n- Network\n - Calls one or more external search providers over HTTPS until one succeeds or all fail.\n - Provider-specific transports include JSON POST, JSON GET, SSE streaming (Perplexity OAuth/API, Gemini, OpenAI code), and JSON-RPC over HTTP (Z.AI).\n- Subprocesses / native bindings\n - None.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Uses a module-global provider-instance cache in `packages/coding-agent/src/web/search/provider.ts`.\n - Uses a module-global preferred-provider setting in the same file.\n - `packages/coding-agent/src/tools/index.ts` gates tool availability behind `session.settings.get(\"web_search.enabled\")`.\n- Background work / cancellation\n - Many provider adapters accept `AbortSignal`, but `WebSearchTool.execute()` does not pass its `_signal` into `executeSearch()`. Internal callers can still use cancellation by calling `runSearchQuery()` / `executeSearch()` with `signal` embedded in params.\n\n## Limits & Caps\n- Provider registry size: 15 providers (`SEARCH_PROVIDER_ORDER` in `packages/coding-agent/src/web/search/provider.ts`), including the keyless `duckduckgo` default/fallback. `SEARCH_PROVIDER_ORDER` no longer drives auto selection — see \"Active-model-gated auto\" above.\n- `formatForLLM()` truncates source snippets and citation text to 240 chars (`packages/coding-agent/src/web/search/index.ts`).\n- `formatForLLM()` emits at most 3 search queries, each truncated to 120 chars (`packages/coding-agent/src/web/search/index.ts`).\n- Brave result count: default `10`, max `20` (`DEFAULT_NUM_RESULTS`, `MAX_NUM_RESULTS` in `packages/coding-agent/src/web/search/providers/brave.ts`).\n- Tavily result count: default `5`, max `20` (`packages/coding-agent/src/web/search/providers/tavily.ts`).\n- Kimi result count: default `10`, max `20`; request timeout field fixed to `30` seconds (`packages/coding-agent/src/web/search/providers/kimi.ts`).\n- Parallel result count: default `10`, max `40`; per-result excerpt cap `10_000` chars (`packages/coding-agent/src/web/search/providers/parallel.ts`, `packages/coding-agent/src/web/parallel.ts`).\n- Kagi result count: default `10`, max `40` (`packages/coding-agent/src/web/search/providers/kagi.ts`).\n- SearXNG result count: default `10`, max `20` (`packages/coding-agent/src/web/search/providers/searxng.ts`).\n- Perplexity API-key mode defaults: `max_tokens = 8192`, `temperature = 0.2`, `num_search_results = 10` (`packages/coding-agent/src/web/search/providers/perplexity.ts`).\n- Anthropic defaults: model `anthropic-model-haiku-4-5`, `DEFAULT_MAX_TOKENS = 4096` when the provider omits `max_tokens` (`packages/coding-agent/src/web/search/providers/anthropic.ts`).\n- Gemini retries: up to `3` retries per endpoint, base delay `1000` ms, rate-limit delay budget `5 * 60 * 1000` ms (`packages/coding-agent/src/web/search/providers/gemini.ts`).\n\n## Errors\n- There is no \"no provider configured\" case: DuckDuckGo (keyless) is always appended as the terminal fallback, so the chain is never empty.\n- Tool-level all-failed case also returns a normal tool result with `Error: ...`; failures are summarized from the last attempted provider.\n- Provider adapters usually throw `SearchProviderError(provider, message, status)` for HTTP or protocol failures.\n- Availability probes intentionally swallow lookup errors and report `false` in many providers via `isApiKeyAvailable()`.\n- Per-provider notable failures:\n - Anthropic: missing credentials throw a plain `Error`; a `404` is remapped to a special final message by `formatProviderError()`.\n - Perplexity: missing auth throws a plain `Error`; OAuth stream `error_code` events become `SearchProviderError(\"perplexity\", ...)`.\n - Gemini: auth refresh, endpoint fallback, and retry logic are internal; final exhausted failures surface as `SearchProviderError(\"gemini\", ...)`.\n - OpenAI code and Gemini both fail if the HTTP response has no body after a `200`.\n - Z.AI treats malformed SSE/JSON-RPC payloads as provider errors and retries only argument-shape failures across request variants.\n - SearXNG `findAuth()` can throw configuration errors before any HTTP call if Basic auth fields are incomplete or invalid.\n\n## Notes\n- The model-facing schema does not expose `provider`, but internal callers can force one through `SearchQueryParams`.\n- `resolveProviderChain()` lazily imports provider modules and caches singleton instances. Just asking for labels via `getSearchProviderLabel()` does not trigger those imports.\n- Most providers treat `limit` and `num_search_results` as the same number because adapters pass `params.numSearchResults ?? params.limit`. Perplexity is the only implementation that preserves both concepts.\n- The prompt says `recency` is for Brave and Perplexity, but code also implements it for Tavily and SearXNG.\n- The year rewrite in `executeSearch()` is blunt: any `2020`-`2029` substring is replaced with the current year.\n- `packages/coding-agent/src/config/settings-schema.ts` exposes provider preferences for `auto`, `exa`, `brave`, `jina`, `kimi`, `perplexity`, `anthropic`, `zai`, `tavily`, `kagi`, `synthetic`, `parallel`, and `searxng`. Gemini and OpenAI code are in the registry and auto chain but not in that settings enum.\n- Exa availability fails closed unless `EXA_API_KEY` is present and Exa settings remain enabled.\n",
80
82
  "tools/write.md": "# write\n\n> Create or overwrite a file, archive entry, or SQLite row.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/write.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/write.md`\n- Key collaborators:\n - `packages/coding-agent/src/tools/archive-reader.ts` — parse `archive.ext:entry` selectors.\n - `packages/coding-agent/src/tools/sqlite-reader.ts` — detect SQLite paths and perform row insert/update/delete.\n - `packages/coding-agent/src/lsp/index.ts` — format-on-write and diagnostics writethrough.\n - `packages/coding-agent/src/tools/auto-generated-guard.ts` — block overwriting generated files.\n - `packages/coding-agent/src/tools/fs-cache-invalidation.ts` — invalidate shared FS scan caches after writes.\n - `packages/coding-agent/src/tools/plan-mode-guard.ts` — resolve paths and enforce plan-mode write policy.\n\n## Inputs\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `path` | `string` | Yes | Target path. Plain file path writes a filesystem file. `archive.ext:inner/path` writes an archive entry for `.tar`, `.tar.gz`, `.tgz`, or `.zip`. `db.sqlite:table` inserts a row. `db.sqlite:table:key` updates or deletes a row. |\n| `content` | `string` | Yes | Full replacement file content, archive entry content, or SQLite row payload. SQLite non-delete writes must parse as a JSON5 object. Empty or whitespace-only content deletes a SQLite row when `path` includes a row key. |\n\nWorked examples:\n\n```text\npath: \"src/generated/config.json\"\ncontent: \"{\\n \\\"enabled\\\": true\\n}\\n\"\n```\n\n```text\npath: \"fixtures/archive.zip:templates/email.txt\"\ncontent: \"hello\\n\"\n```\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n## Outputs\nSingle-shot result.\n\n- Success always returns a text block.\n - Plain file write: `Successfully wrote <bytes> bytes to <relative-path>`.\n - Archive write: `Successfully wrote <bytes> bytes to <relative-archive-path>:<entry-path>`.\n - SQLite write: one of `Inserted row into <table>`, `Updated row '<key>' in <table>`, `No row updated ...`, `Deleted row ...`, `No row deleted ...`.\n- If hashline prefixes were copied from `read` output and stripped first, the first text block gets an extra note.\n- Plain file writes may also return `details.diagnostics` plus `details.meta.diagnostics` when LSP diagnostics-on-write is enabled.\n- SQLite writes use `toolResult(...).sourcePath(...)`, so `details.meta.sourcePath` points at the database file.\n- Archive writes return empty `details`.\n\n## Flow\n1. `WriteTool.execute()` in `packages/coding-agent/src/tools/write.ts` strips `LINE+ID|` hashline prefixes from `content` when the session is in hashline display mode.\n2. It calls `#resolveArchiveWritePath()` first. That uses `parseArchivePathCandidates()` from `packages/coding-agent/src/tools/archive-reader.ts`, checks candidate archive files on disk, and falls back to the longest matching archive suffix even when the archive file does not exist yet.\n3. Archive writes call `enforcePlanModeWrite(..., { op: exists ? \"update\" : \"create\" })`, then `#writeArchiveEntry()`.\n - The parent directory of the archive file is created with `fs.mkdir(..., { recursive: true })`.\n - `.zip` archives are read with `fflate.unzipSync()`, the target entry is replaced in an in-memory map, and the archive is rewritten with `fflate.zipSync()` + `Bun.write()`.\n - `.tar`, `.tar.gz`, and `.tgz` archives are read with `Bun.Archive`, existing entries are copied into an object map, the target entry is replaced, and `Bun.Archive.write()` rewrites the archive.\n - `invalidateFsScanAfterWrite()` runs on the archive file path.\n4. If the path is not treated as an archive, `execute()` calls `#resolveSqliteWritePath()`. That uses `parseSqlitePathCandidates()` and `isSqliteFile()` from `packages/coding-agent/src/tools/sqlite-reader.ts`. Existing non-SQLite files suppress the SQLite path interpretation.\n5. SQLite writes call `enforcePlanModeWrite(..., { op: \"update\" })`, then `#writeSqliteRow()`.\n - The database must already exist; missing DBs throw `SQLite database '<path>' not found`.\n - The tool opens `new Database(..., { create: false, strict: true })` and sets `PRAGMA busy_timeout = 3000`.\n - Whitespace-only `content` with a row key deletes a row.\n - Non-empty `content` is parsed with `Bun.JSON5.parse()`, must be a JSON object, and is routed to insert/update helpers from `packages/coding-agent/src/tools/sqlite-reader.ts`.\n - `invalidateFsScanAfterWrite()` runs on the DB path and the connection is closed in `finally`.\n6. Otherwise the tool treats `path` as a plain filesystem file.\n - `enforcePlanModeWrite(..., { op: \"create\" })` runs before path resolution.\n - Existing files are checked by `assertEditableFile()` to block overwriting detected generated files.\n - The session’s writethrough callback writes content. With LSP enabled and `lsp.formatOnWrite` / `lsp.diagnosticsOnWrite` settings on, `createLspWritethrough()` may format content, sync it through LSP servers, save it, and collect diagnostics. Otherwise `writethroughNoop()` writes directly with `Bun.write()` or `file.write()`.\n - `invalidateFsScanAfterWrite()` runs on the file path.\n7. The tool returns a text result and optional diagnostics metadata.\n\n## Modes / Variants\n### Plain file path\n- Target is any path that does not resolve as an archive selector and does not resolve as an existing-or-new SQLite selector.\n- Existing files are overwritten.\n- `write.ts` does not call `fs.mkdir()` on this path; parent-directory creation is only implemented in the archive branch.\n\nExample:\n\n```text\npath: \"tmp/output.txt\"\ncontent: \"hello\\n\"\n```\n\n### Archive entry write\n- Selector syntax: `archive.ext:inner/path`.\n- Supported archive suffixes come from `parseArchivePathCandidates()`: `.tar`, `.tar.gz`, `.tgz`, `.zip`.\n- The inner path is normalized to `/`, strips empty and `.` segments, rejects `..`, and rejects directory targets ending in `/`.\n- Rewrites the whole archive file after replacing one entry.\n- Creates the parent directory for the archive file if needed.\n\nExample:\n\n```text\npath: \"build/assets.tar.gz:css/app.css\"\ncontent: \"body { color: black; }\\n\"\n```\n\n### SQLite table insert\n- Selector syntax: `db.sqlite:table`.\n- `content` must parse as a JSON5 object.\n- Empty object is allowed and becomes `INSERT INTO <table> DEFAULT VALUES`.\n- Query parameters are rejected for SQLite writes.\n\nExample:\n\n```text\npath: \"data/app.db:users\"\ncontent: \"{name: 'Ada', active: true}\"\n```\n\n### SQLite row update / delete\n- Selector syntax: `db.sqlite:table:key`.\n- Non-empty `content` updates the row.\n- Empty or whitespace-only `content` deletes the row.\n- Row lookup uses the single-column primary key if present; otherwise it falls back to `rowid`. Composite primary keys and `WITHOUT ROWID` tables are rejected for key-based writes.\n\nExample update:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"{email: 'ada@example.com'}\"\n```\n\nExample delete:\n\n```text\npath: \"data/app.sqlite:users:42\"\ncontent: \"\"\n```\n\n## Side Effects\n- Filesystem\n - Creates or overwrites plain files.\n - Rewrites entire archive files when writing an archive entry.\n - Creates parent directories for archive files only.\n - Mutates existing SQLite databases; never creates a new SQLite DB.\n- Subprocesses / native bindings\n - Uses Bun SQLite bindings via `bun:sqlite`.\n - Uses Bun archive APIs and lazily imports `fflate` for ZIP reads/writes.\n - May talk to configured LSP servers through `packages/coding-agent/src/lsp/index.ts`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Invalidates shared filesystem scan cache entries through `invalidateFsScanAfterWrite()`.\n - Enforces plan-mode write restrictions before mutating the target.\n- Background work / cancellation\n - Marks the tool `nonAbortable = true` and `concurrency = \"exclusive\"` in `WriteTool`.\n - LSP writethrough can schedule deferred diagnostics fetches after a timeout, but plain `write.ts` only consumes the immediate return value.\n\n## Limits & Caps\n- `WriteTool` itself exposes no byte cap beyond storing `content` in memory and, for archives, rebuilding the archive in memory.\n- Generated-file detection reads at most `CHECK_BYTE_COUNT = 1024` bytes and `HEADER_LINE_LIMIT = 40` header lines from an existing file in `packages/coding-agent/src/tools/auto-generated-guard.ts`.\n- SQLite writes set `PRAGMA busy_timeout = 3000`.\n- LSP writethrough uses a `5_000` ms operation timeout in `runLspWritethrough()` and may schedule a deferred diagnostics fetch with `AbortSignal.timeout(25_000)` in `scheduleDeferredDiagnosticsFetch()`.\n\n## Errors\n- Invalid archive subpaths throw `ToolError` with messages such as:\n - `Archive write path must target a file inside the archive`\n - `Archive write path must target a file, not a directory`\n - `Archive path cannot contain '..'`\n- SQLite path parsing throws on unsupported forms:\n - `SQLite write paths do not support query parameters`\n - `SQLite write path must target a table`\n - `SQLite row writes require a non-empty row key`\n- Missing SQLite DBs surface as `SQLite database '<path>' not found`.\n- SQLite content errors are model-visible `ToolError`s, including invalid JSON5, non-object payloads, unknown columns, non-scalar values, empty update objects, composite primary keys, and `WITHOUT ROWID` tables.\n- Existing plain files may be rejected by `assertEditableFile()` when they look generated.\n- Archive read/write failures and unexpected SQLite exceptions are wrapped in `ToolError(error.message)`.\n- If no LSP server matches or LSP formatting/diagnostics times out, file writes still fall back to writing content; diagnostics may be omitted.\n\n## Notes\n- Archive path detection runs before SQLite detection. A path that matches an archive selector is never treated as SQLite.\n- SQLite detection declines when an existing file with a `.sqlite` / `.db` suffix is present but does not have SQLite magic bytes; then the path falls back to a plain file write.\n- ZIP entry content is encoded with `new TextEncoder().encode(content)` in `#writeArchiveEntry()`. Non-ZIP archive writes pass the string directly to `Bun.Archive.write()`.\n- The prompt forbids two common anti-patterns: using `write` for routine edits that should use `edit`, and creating `*.md` / `README` files unless explicitly requested. It also forbids emojis unless requested.\n- Plain file writes report byte count using `cleanContent.length`, which is UTF-16 code units in JS, not an on-disk byte measurement.\n- `stripWriteContent()` only removes hashline prefixes when the session’s file display mode has `hashLines` enabled; otherwise content is written unchanged.\n",
81
83
  "tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/modes/controllers/input-controller.ts` (`/tree`, keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding action `tree`\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows most conversational nodes, but hides bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is case-insensitive\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
82
84
  "ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules and constructs a `TtsrManager`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nfor (const rule of rulesResult.items) {\n if (rule.condition?.length && ttsrManager.addRule(rule)) continue;\n // non-TTSR rules continue through normal rule handling\n}\n```\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- `rule.condition` is absent or all condition regexes fail to compile\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues.\n\n### Setting caveat\n\n`TtsrSettings.enabled` is loaded into the manager but is not currently checked in runtime gating. If TTSR rules exist, matching still runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- append delta into a source/tool scoped manager buffer\n- call `checkDelta(delta, matchContext)`\n\n`checkDelta()` iterates registered rules and returns all matching rules that pass scope, global-path, condition, and repeat policy checks.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue().catch(() => {})` swallows follow-up errors.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",