octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,1828 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pty"
4
+ require "securerandom"
5
+ require "fileutils"
6
+ require_relative "base"
7
+ require_relative "security"
8
+ require_relative "../utils/trash_directory"
9
+ require_relative "terminal/session_manager"
10
+ require_relative "terminal/output_cleaner"
11
+ require_relative "terminal/persistent_session"
12
+ require_relative "../background_task_registry"
13
+
14
+ module Octo
15
+ module Tools
16
+ # Unified terminal tool — the SINGLE entry point for running shell
17
+ # commands. Replaces the former `shell` + `safe_shell` tools.
18
+ #
19
+ # === AI-facing contract
20
+ #
21
+ # Five call shapes, all on one tool:
22
+ #
23
+ # 1) Run a command, wait for it:
24
+ # terminal(command: "ls -la")
25
+ # → { exit_code: 0, output: "..." }
26
+ #
27
+ # 2) Run a command that is expected to keep running (dev servers,
28
+ # watchers, REPLs meant to stay open):
29
+ # terminal(command: "rails s", background: true)
30
+ # – collects ~2s of startup output, then:
31
+ # – if it crashed in those 2s → { exit_code: N, output: "..." }
32
+ # – if still alive → { session_id: 7, state: "background",
33
+ # output: "Puma starting..." }
34
+ #
35
+ # 3) A previous call returned a session_id because the command
36
+ # blocked on input (sudo password, REPL, etc.). Answer it:
37
+ # terminal(session_id: 3, input: "mypass\n")
38
+ #
39
+ # 4) Poll a running session for new output without sending anything:
40
+ # terminal(session_id: 7, input: "")
41
+ #
42
+ # 5) Kill a stuck / no-longer-wanted session:
43
+ # terminal(session_id: 7, kill: true)
44
+ #
45
+ # === Response handshake
46
+ #
47
+ # - Response has `exit_code` → command finished.
48
+ # - Response has `session_id` → command is still running;
49
+ # look at `state`: "waiting" means blocked on input,
50
+ # "background" means intentionally long-running.
51
+ #
52
+ # === Safety
53
+ #
54
+ # Every new `command` is routed through Octo::Tools::Security before
55
+ # being handed to the shell. This:
56
+ # - Blocks sudo / pkill octo / eval / curl|bash / etc.
57
+ # - Rewrites `curl ... | bash` into "download & review".
58
+ # - Protects Gemfile / .env / .ssh / etc. from writes.
59
+ # `rm` is additionally intercepted at runtime by a shell function
60
+ # installed in each PTY session (see SAFE_RM_BASH): it moves files
61
+ # into the per-project trash at $OCTO_TRASH_DIR instead of
62
+ # deleting them. See trash_manager for list/restore.
63
+ # `input` is NOT subject to these rules (it is a reply to an already-
64
+ # running program, not a fresh command).
65
+ class Terminal < Base
66
+ self.tool_name = "terminal"
67
+ self.tool_description = <<~DESC.strip
68
+ Run shell commands via PTY. Safety: rm→trash, sudo blocked, secrets protected.
69
+
70
+ Two modes:
71
+ {command} DEFAULT — sync. Quick commands return {exit_code, output} immediately. Slow build/test/install commands are auto-routed to async by the harness (you get a handle, see below). Hits idle on an interactive prompt → also returns a handle.
72
+ {command, async:true} Async — never blocks. Use for a long task you intend to leave running (build, deploy, side quest). Returns a handle.
73
+
74
+ Five operations on an existing handle:
75
+ {handle_id} Query status. Returns {state, command, elapsed_seconds, output_file, exit_code (if exited)}.
76
+ {handle_id, input:"y\n"} Send input to the PTY + poll for new output. Use to answer prompts, drive REPLs, etc.
77
+ {handle_id, kill:true} Terminate the underlying process and free the handle.
78
+ Read(output_file) Read the raw PTY log file directly — fastest way to peek at progress mid-flight. (output_file path is in every handle response.)
79
+ (wait for notification) A <task-notification> with the same handle_id is pushed to your context when the task exits — you don't need to poll.
80
+
81
+ Response shape:
82
+ - sync completes → {exit_code, output, state:"exited"}
83
+ - sync hits idle (prompt waiting) → {handle_id, state:"waiting", output, output_file}
84
+ - async accepted → {accepted:true, handle_id, state:"running", output_file, startup_output}
85
+ - handle query/input → {handle_id, state, ...} plus exit_code if state=="exited"
86
+
87
+ If output exceeds the limit, `output` is truncated and `full_output_file`
88
+ points at a file on disk — use terminal(command: "grep ... <path>") to search it.
89
+ input supports byte escapes: \x03 Ctrl-C, \x04 Ctrl-D, \t Tab, \x1b Esc.
90
+
91
+ GUIDANCE:
92
+ - Default to sync. The harness recognises build/test/install patterns and
93
+ auto-switches them to async — you don't need to think about it.
94
+ - Use async:true when you want to fire off a long task and continue with
95
+ unrelated work (you'll get a notification on completion). Also use it
96
+ for dev servers, watchers, REPLs you need to control later — same flag,
97
+ same handle.
98
+ - Never poll a handle in a tight loop — wait for the notification, or
99
+ Read(output_file) once if you really need to peek.
100
+ DESC
101
+ self.tool_category = "system"
102
+
103
+ # agent_session_id is injected by the Agent that owns this tool instance.
104
+ # It is NOT exposed in tool_parameters — AI agents cannot set it.
105
+ attr_reader :agent_session_id
106
+
107
+ def initialize(agent_session_id: nil)
108
+ super()
109
+ @agent_session_id = agent_session_id
110
+ end
111
+
112
+ self.tool_parameters = {
113
+ type: "object",
114
+ properties: {
115
+ command: { type: "string", description: "Shell command to start. Mutually exclusive with handle_id." },
116
+ async: { type: "boolean", description: "Run async — return a handle immediately instead of blocking. Use for long tasks you intend to leave running, dev servers, REPLs, or any side-quest you'll pivot away from. Default false (sync, harness still auto-async's slow build/test/install patterns)." },
117
+ handle_id: { type: "string", description: "Reference an existing async task or interactive session. Combine with input: to send input, with kill: to terminate, or use alone to query status. The handle_id is returned in every async / waiting-for-input response." },
118
+ input: { type: "string", description: "Bytes to write to the PTY of the handle_id (usually ends with \\n). Also re-polls for new output. \"\" = poll only." },
119
+ kill: { type: "boolean", description: "Terminate the process referenced by handle_id and release the handle." },
120
+ cwd: { type: "string", description: "Working dir for new command." },
121
+ env: { type: "object", description: "Extra env vars for new command.", additionalProperties: { type: "string" } },
122
+ max_duration: { type: "integer", description: "Hard ceiling (seconds) for an async task before the watcher kills it. Defaults to 7200 (2h). Raise for very long jobs (large docker build, full integration suite)." }
123
+ }
124
+ }
125
+
126
+ # Hard ceiling on the raw `output:` string we send back to the LLM.
127
+ # 4000 chars ≈ 1000 tokens — matches the value the legacy safe_shell
128
+ # tool used, which was empirically tuned to keep tool-call turns cheap.
129
+ # When real output exceeds this we SPILL the full cleaned text to a
130
+ # dedicated overflow file and only return the first portion — see
131
+ # OVERFLOW_PREVIEW_CHARS / spill_overflow_file below.
132
+ MAX_LLM_OUTPUT_CHARS = 4_000
133
+ # When output overflows, the preview we keep in-context is slightly
134
+ # shorter than the hard ceiling so the "full output at: /tmp/..."
135
+ # notice + path still fits under MAX_LLM_OUTPUT_CHARS.
136
+ OVERFLOW_PREVIEW_CHARS = 3_800
137
+ # Per-line cap applied at write-time (inside the cleaning pipeline).
138
+ # Prevents a single minified JSON / CSS / JS blob from eating the
139
+ # entire 4 KB budget in one go. 500 chars is long enough to preserve
140
+ # real error messages (including stack frames) but short enough to
141
+ # survive dozens of lines inside 4 KB.
142
+ MAX_LINE_CHARS = 500
143
+ # Max seconds we keep a single tool call blocked inside the shell.
144
+ # Raised from 15s → 60s so long-running installs/builds (bundle install,
145
+ # gem install, npm install, docker build, rails new, ...) produce far
146
+ # fewer LLM round-trips: each poll replays the full context, so every
147
+ # avoided poll saves ~all the tokens of one turn.
148
+ DEFAULT_TIMEOUT = 60
149
+ # How long output must be quiet before we assume the foreground command
150
+ # is waiting for user input and return control to the LLM.
151
+ # Raised from 500ms → 3000ms → 10_000ms: real shell prompts (sudo,
152
+ # REPL, [Y/n] confirmations) stay quiet forever, so 10s still feels
153
+ # instant for them; long builds / test runs frequently have multi-
154
+ # second gaps between phases (compilation ↔ linking, spec file
155
+ # transitions), and anything below 10s split those into multiple
156
+ # polls — each poll replays the whole LLM context, which is expensive.
157
+ DEFAULT_IDLE_MS = 10_000
158
+ # Background commands collect this many seconds of startup output so
159
+ # the agent can see crashes / readiness before getting the session_id.
160
+ BACKGROUND_COLLECT_SECONDS = 2
161
+ # Default ceiling for a fire-and-forget background task (fire_and_forget).
162
+ # Tasks running longer than this are treated as stuck and the watcher
163
+ # returns a timeout result. Callers can override via metadata[:max_duration].
164
+ # 2 hours covers large CI suites (full rspec, big docker build, slow
165
+ # `npm install` on a cold cache) but still bounds resource usage.
166
+ BACKGROUND_TASK_MAX_DURATION = 7_200
167
+ IDLE_MAX_DURATION = 120 # 2 min — abandoned pagers/REPLs
168
+ # Sentinel: when passed as idle_ms, disables idle early-return.
169
+ DISABLED_IDLE_MS = 10_000_000
170
+
171
+ # Commands that we know take a long time and produce bursty output
172
+ # (quiet gaps between test files, compile phases, download batches,
173
+ # etc.). When the command line STARTS WITH or CONTAINS any of these
174
+ # tokens, we auto-extend the timeout to SLOW_COMMAND_TIMEOUT and
175
+ # disable idle-return entirely — otherwise the LLM ends up polling
176
+ # the same long-running job 5-10x, replaying full context each time.
177
+ # Taken verbatim from the legacy shell.rb list.
178
+ SLOW_COMMAND_PATTERNS = [
179
+ # Ruby
180
+ "bundle install", "bundle update", "bundle exec rspec",
181
+ "rspec", "rake test", "rails test",
182
+
183
+ # Node ecosystem — covers npm / yarn / pnpm test/dev/build/install variants
184
+ "npm install", "npm ci", "npm test", "npm run build", "npm run test", "npm run dev",
185
+ "yarn install", "yarn build", "yarn test", "yarn dev",
186
+ "pnpm install", "pnpm build", "pnpm test", "pnpm dev",
187
+
188
+ # Python
189
+ "pytest", "pip install", "pip3 install", "python -m pip install",
190
+ "python -m pytest", "python setup.py",
191
+
192
+ # Go / Rust
193
+ "cargo build", "cargo test", "cargo install", "cargo bench",
194
+ "go build", "go test", "go install", "go mod tidy",
195
+
196
+ # JVM (Maven / Gradle)
197
+ "mvn test", "mvn package", "mvn install",
198
+ "gradle build", "gradle test", "gradle assemble", "gradle bootRun",
199
+
200
+ # .NET / Elixir / PHP / Swift
201
+ "dotnet build", "dotnet test", "dotnet restore",
202
+ "mix test", "mix deps.get",
203
+ "composer install", "composer update",
204
+ "xcodebuild", "swift test",
205
+
206
+ # C / C++ / Make-family
207
+ "make", "make test", "make install", "make build", "make all",
208
+ "cmake --build", "cmake -B",
209
+
210
+ # Containers / Infra
211
+ "docker build", "docker-compose build",
212
+ "terraform plan", "terraform apply",
213
+ "helm install", "helm upgrade",
214
+ "kubectl apply", "ansible-playbook", "vagrant up"
215
+ ].freeze
216
+ # Timeout granted to commands matched by SLOW_COMMAND_PATTERNS.
217
+ # 180s matches the legacy safe_shell "hard_timeout" for slow commands.
218
+ SLOW_COMMAND_TIMEOUT = 180
219
+
220
+ # Patterns that are obviously quick — using fire_and_forget on these
221
+ # is almost certainly a mistake and wastes tokens. The harness rejects
222
+ # such calls at runtime with a clear error so the LLM falls back to
223
+ # foreground mode.
224
+ QUICK_COMMAND_PATTERNS = [
225
+ /\A\s*ls\b/,
226
+ /\A\s*cd\s/,
227
+ /\A\s*pwd\b/,
228
+ /\A\s*cat\s/,
229
+ /\A\s*echo\b/,
230
+ /\A\s*head\b/,
231
+ /\A\s*tail\b/,
232
+ /\A\s*wc\b/,
233
+ /\A\s*which\b/,
234
+ /\A\s*whoami\b/,
235
+ /\A\s*date\b/,
236
+ /\A\s*uname\b/,
237
+ /\A\s*env\b/,
238
+ /\A\s*clear\b/,
239
+ /\A\s*history\b/,
240
+ /\A\s*ps\b/,
241
+ /\A\s*mkdir\b/,
242
+ /\A\s*touch\b/,
243
+ /\A\s*rm\b/,
244
+ /\A\s*mv\b/,
245
+ /\A\s*cp\b/
246
+ ].freeze
247
+
248
+ # Absolute path to the safe-rm shell snippet shipped with the gem.
249
+ # Sourced by every interactive PTY session to install a `rm` shell
250
+ # function that moves files to $OCTO_TRASH_DIR instead of
251
+ # deleting them.
252
+ #
253
+ # Why source-from-file instead of writing the function body into
254
+ # the PTY directly?
255
+ # Writing a multi-line function definition into `zsh -l -i` is
256
+ # unreliable — ZLE (Zsh Line Editor) treats multi-line input as
257
+ # interactive editing and garbles the body. Loading from a file
258
+ # via a single `source` line avoids ZLE entirely.
259
+ #
260
+ # Why a shell function (instead of a Ruby-side text rewrite)?
261
+ # A function defers parsing to the shell itself, so heredocs,
262
+ # multi-line commands, globs, and variable expansion are all
263
+ # handled correctly. The previous Ruby rewriter mis-parsed any
264
+ # command containing a heredoc body with "rm" in it.
265
+ #
266
+ # Coverage:
267
+ # Intercepts — direct `rm …` in the interactive shell (incl.
268
+ # multi-line, heredoc, glob, env-var expansion).
269
+ # Bypassed by — `command rm`, `/bin/rm`, `xargs rm`, `find -exec rm`,
270
+ # child scripts. Same coverage as the old rewriter.
271
+ SAFE_RM_PATH = File.expand_path("terminal/safe_rm.sh", __dir__).freeze
272
+ # ---------------------------------------------------------------------
273
+ # Public entrypoint — dispatches on parameter shape
274
+ # ---------------------------------------------------------------------
275
+ def execute(command: nil, handle_id: nil, input: nil, async: false,
276
+ cwd: nil, env: nil, kill: nil, idle_ms: nil,
277
+ working_dir: nil, max_duration: nil, **_ignored)
278
+ # Auto-tune: for well-known long-running commands (rspec, bundle
279
+ # install, cargo build, etc.), we stretch the budget AND disable
280
+ # idle-return. This collapses what would otherwise be 5-10
281
+ # "is it still running?" LLM round-trips into a single synchronous
282
+ # call. Async runs and handle operations are NOT auto-tuned —
283
+ # async already returns quickly by design.
284
+ timeout = nil
285
+ if command && !async && !handle_id && slow_command?(command)
286
+ timeout ||= SLOW_COMMAND_TIMEOUT
287
+ idle_ms ||= DISABLED_IDLE_MS
288
+ end
289
+
290
+ timeout = (timeout || DEFAULT_TIMEOUT).to_i
291
+ idle_ms = (idle_ms || DEFAULT_IDLE_MS).to_i
292
+ cwd ||= working_dir
293
+
294
+ # Operations on an existing handle (query / send input / kill).
295
+ if handle_id
296
+ handle_id = handle_id.to_s
297
+ if kill
298
+ return do_kill_handle(handle_id)
299
+ elsif input.nil?
300
+ return do_query_handle(handle_id)
301
+ else
302
+ return do_continue_handle(handle_id, input.to_s, timeout: timeout, idle_ms: idle_ms)
303
+ end
304
+ end
305
+
306
+ # Start a new command.
307
+ if command && !command.to_s.strip.empty?
308
+ # Runtime guard: reject async for obviously quick commands so the
309
+ # LLM doesn't waste tokens on an "I started it" turn for `ls`.
310
+ if async && quick_command?(command.to_s)
311
+ return {
312
+ error: "async:true is for long-running tasks (builds, tests, installs, dev servers). " \
313
+ "This command looks quick — drop async:true and use plain sync mode.",
314
+ hint: "Commands like ls, cat, pwd, echo should not use async:true.",
315
+ command: command.to_s
316
+ }
317
+ end
318
+ return do_start(command.to_s, cwd: cwd, env: env, timeout: timeout,
319
+ idle_ms: idle_ms, async: async ? true : false,
320
+ max_duration: max_duration ? max_duration.to_i : nil)
321
+ end
322
+
323
+ { error: "terminal: must provide either `command`, or `handle_id` (alone to query, with input: to write, with kill:true to terminate)." }
324
+ rescue SecurityError => e
325
+ { error: "[Security] #{e.message}", security_blocked: true }
326
+ rescue StandardError => e
327
+ { error: "terminal failed: #{e.class}: #{e.message}", backtrace: e.backtrace.first(5) }
328
+ end
329
+
330
+ # Alias used by ToolExecutor to decide whether :confirm_safes mode
331
+ # should auto-execute without asking the user.
332
+ def self.command_safe_for_auto_execution?(command)
333
+ Octo::Tools::Security.command_safe_for_auto_execution?(command)
334
+ end
335
+
336
+ # ---------------------------------------------------------------------
337
+ # Internal Ruby API — synchronous capture
338
+ # ---------------------------------------------------------------------
339
+ #
340
+ # Run a shell command and BLOCK until it terminates, returning
341
+ # [output, exit_code]. Drop-in replacement for Open3.capture2e that
342
+ # goes through the same PTY + login-shell + Security pipeline used by
343
+ # the AI-facing tool (so rbenv/mise shims and gem mirrors work).
344
+ #
345
+ # Why this exists separately from #execute:
346
+ #
347
+ # `execute` may return early with a :session_id the moment output
348
+ # goes idle for DEFAULT_IDLE_MS (3s) — this is intentional for AI
349
+ # agents (they can inspect progress, inject input, decide to kill).
350
+ # Ruby callers like the HTTP server's upgrade flow only care about
351
+ # "did it finish, with what output, what exit code" — they need
352
+ # synchronous semantics. Previously each caller re-implemented the
353
+ # poll loop (and 0.9.36's run_shell forgot to, causing the upgrade
354
+ # failure bug).
355
+ #
356
+ # NOT exposed in tool_parameters — AI agents cannot invoke this.
357
+ #
358
+ # @param command [String] the shell command to run
359
+ # @param timeout [Integer] per-poll timeout AND the basis for the
360
+ # overall deadline (deadline = timeout + 60s)
361
+ # @param cwd [String] optional working directory
362
+ # @param env [Hash] optional env overrides
363
+ # @return [Array(String, Integer|nil)] [output, exit_code].
364
+ # exit_code is nil only if the overall deadline was hit and
365
+ # the session had to be force-killed.
366
+ def self.run_sync(command, timeout: 120, cwd: nil, env: nil)
367
+ terminal = new
368
+ result = terminal.execute(
369
+ command: command,
370
+ timeout: timeout,
371
+ cwd: cwd,
372
+ env: env,
373
+ )
374
+ output = result[:output].to_s
375
+
376
+ # Hard deadline in wall-clock terms — a genuinely stuck command
377
+ # must terminate. Each individual poll still carries `timeout`.
378
+ deadline = Time.now + timeout.to_i + 60
379
+
380
+ while result[:exit_code].nil? && result[:handle_id] && Time.now < deadline
381
+ result = terminal.execute(
382
+ handle_id: result[:handle_id],
383
+ input: "",
384
+ timeout: timeout,
385
+ )
386
+ output += result[:output].to_s
387
+ end
388
+
389
+ # Deadline exceeded — best-effort cleanup so the session doesn't leak.
390
+ if result[:exit_code].nil? && result[:handle_id]
391
+ begin
392
+ terminal.execute(handle_id: result[:handle_id], kill: true)
393
+ rescue StandardError
394
+ # swallow — cleanup is best-effort
395
+ end
396
+ end
397
+
398
+ [output, result[:exit_code]]
399
+ end
400
+
401
+ # ---------------------------------------------------------------------
402
+ # 1) Start a new command
403
+ # ---------------------------------------------------------------------
404
+ private def do_start(command, cwd:, env:, timeout:, async:, max_duration: nil, idle_ms: DEFAULT_IDLE_MS)
405
+ if cwd && !Dir.exist?(cwd.to_s)
406
+ return { error: "cwd does not exist: #{cwd}" }
407
+ end
408
+
409
+ # Security pre-flight: reject / rewrite dangerous commands before
410
+ # they ever reach the shell. Raises SecurityError on block.
411
+ safe_command = Octo::Tools::Security.make_safe(
412
+ command,
413
+ project_root: cwd || Dir.pwd
414
+ )
415
+
416
+ # Transparent async: if the caller didn't ask for async but the
417
+ # command is a known slow pattern (build/test/install), behave AS
418
+ # IF async:true was specified — the LLM gets a handle back, the
419
+ # user keeps their input free.
420
+ async ||= slow_command?(command)
421
+
422
+ if async
423
+ # Async path — spawn dedicated session, register a Registry
424
+ # task, start the watcher that pushes a notification on exit.
425
+ return start_async_command(command, safe_command, cwd: cwd, env: env, max_duration: max_duration)
426
+ end
427
+
428
+ # Foreground sync — try the persistent shell first, fall back to
429
+ # a one-shot dedicated session if the persistent slot is busy.
430
+ session, _reused = acquire_persistent_session(cwd: cwd, env: env)
431
+ persistent = !session.nil?
432
+ session ||= spawn_dedicated_session(cwd: cwd, env: env)
433
+ return session if session.is_a?(Hash) && session[:error]
434
+
435
+ # Run precmd/chpwd hooks before the user command so directory-
436
+ # aware version managers (mise, direnv, conda, pyenv-virtualenv…)
437
+ # pick up the current cwd and push their tools onto PATH. See
438
+ # write_user_command for the full rationale.
439
+ write_user_command(session, safe_command, with_hooks: true)
440
+
441
+ result = wait_and_package(
442
+ session,
443
+ timeout: timeout,
444
+ idle_ms: idle_ms,
445
+ persistent: persistent,
446
+ original_command: command,
447
+ rewritten_command: safe_command
448
+ )
449
+
450
+ # Sync command is still alive when wait_and_package returned —
451
+ # either waiting on a prompt (idle) or just slow (timeout reached)
452
+ # or backgrounded. Promote to a handle so the LLM can address it
453
+ # with terminal(handle_id:, ...). No watcher — the LLM is expected
454
+ # to come back synchronously. If the LLM walks away without
455
+ # killing the handle, the process leaks; we accept that as rare.
456
+ if result[:session_id] && %w[waiting background timeout].include?(result[:state].to_s)
457
+ return promote_to_handle(session, result)
458
+ end
459
+
460
+ result
461
+ end
462
+
463
+ # Spawn a session, write the command, collect ~2s of startup output
464
+ # to surface crashes early, then register a Registry task + watcher.
465
+ # Used by every async path (explicit async:true OR slow_command?
466
+ # auto-routing).
467
+ private def start_async_command(command, safe_command, cwd:, env:, max_duration:)
468
+ session = spawn_dedicated_session(cwd: cwd, env: env)
469
+ return session if session.is_a?(Hash) && session[:error]
470
+
471
+ write_user_command(session, safe_command, with_hooks: true)
472
+
473
+ # Collect ~2s of startup output so crashes are visible right away.
474
+ result = wait_and_package(
475
+ session,
476
+ timeout: BACKGROUND_COLLECT_SECONDS,
477
+ idle_ms: DISABLED_IDLE_MS,
478
+ background: true,
479
+ persistent: false,
480
+ original_command: command,
481
+ rewritten_command: safe_command
482
+ )
483
+
484
+ # If it finished inside the startup window (fast command misjudged
485
+ # as slow, or it crashed immediately), return the sync result.
486
+ unless result[:session_id] && %w[background waiting].include?(result[:state].to_s)
487
+ return result
488
+ end
489
+
490
+ # Still running — register the task + watcher, return a handle.
491
+ task_result = register_task_for_session(
492
+ session, command: command, cwd: cwd,
493
+ max_duration: max_duration, watch: true, dedup: true
494
+ )
495
+
496
+ if task_result.is_a?(Hash) && task_result[:duplicate]
497
+ SessionManager.kill(session.id)
498
+ return {
499
+ error: "duplicate_task",
500
+ handle_id: task_result[:handle_id],
501
+ state: "running",
502
+ message: "A background task with the same command is already running " \
503
+ "(handle: #{task_result[:handle_id]}). " \
504
+ "Please wait for it to complete, or kill it first with " \
505
+ "terminal(handle_id: \"#{task_result[:handle_id]}\", kill: true)."
506
+ }
507
+ end
508
+
509
+ handle_id = task_result
510
+
511
+ {
512
+ accepted: true,
513
+ handle_id: handle_id,
514
+ state: "running",
515
+ output_file: session.log_file,
516
+ startup_output: result[:output],
517
+ message: "Async task started. You'll receive a <task-notification> when it exits. " \
518
+ "To peek at live progress: Read(output_file). To kill: " \
519
+ "terminal(handle_id: \"#{handle_id}\", kill: true)."
520
+ }
521
+ end
522
+
523
+ # Promote a sync-hits-idle-or-timeout session to a handle. Two paths:
524
+ #
525
+ # :waiting → watch:false — LLM must come back with input/kill.
526
+ # No callback registered, no push notification.
527
+ # :timeout → watch:true — command is still running, just slow.
528
+ # Watcher monitors until exit, callback pushes a
529
+ # <task-notification> to the agent when done.
530
+ #
531
+ # For :timeout, we add accepted:true so the agent's act() path
532
+ # registers a completion callback (same as explicit async:true).
533
+ private def promote_to_handle(session, result)
534
+ state_str = result[:state].to_s
535
+ watch = (state_str == "timeout" || state_str == "idle")
536
+
537
+ max_duration = state_str == "idle" ? IDLE_MAX_DURATION : nil
538
+
539
+ handle_id = register_task_for_session(
540
+ session,
541
+ command: result[:original_command] || result[:rewritten_command] || nil,
542
+ cwd: nil,
543
+ max_duration: max_duration,
544
+ watch: watch
545
+ )
546
+
547
+ idle_msg = "Command is waiting for input (idle). To answer: " \
548
+ "terminal(handle_id: \"#{handle_id}\", input: \"y\n\"). " \
549
+ "To kill: terminal(handle_id: \"#{handle_id}\", kill: true)."
550
+ timeout_msg = "Command exceeded sync timeout but is still running. " \
551
+ "You'll be notified when it finishes. " \
552
+ "To peek: Read(output_file). " \
553
+ "To kill: terminal(handle_id: \"#{handle_id}\", kill: true)."
554
+
555
+ result_hash = {
556
+ handle_id: handle_id,
557
+ state: result[:state],
558
+ output: result[:output],
559
+ output_file: session.log_file,
560
+ message: watch ? timeout_msg : idle_msg,
561
+ bytes_read: result[:bytes_read]
562
+ }
563
+ result_hash[:accepted] = true if watch
564
+ result_hash
565
+ end
566
+
567
+ # Register a Registry task for a running session. Stores the
568
+ # internal SessionManager id in metadata so handle ops can look up
569
+ # the PTY later. Watcher is optional — async paths want it (for
570
+ # push notifications); sync-promoted handles don't (LLM is driving
571
+ # synchronously). Returns the handle_id.
572
+ private def register_task_for_session(session, command:, cwd:, max_duration:, watch:, dedup: false)
573
+ dedup_key = dedup ? "#{@agent_session_id}:#{command}" : nil
574
+
575
+ result = BackgroundTaskRegistry.create_task(
576
+ type: "terminal",
577
+ metadata: {
578
+ command: command,
579
+ cwd: cwd,
580
+ max_duration: max_duration || BACKGROUND_TASK_MAX_DURATION,
581
+ agent_session_id: @agent_session_id,
582
+ internal_session_id: session.id,
583
+ watched: watch
584
+ },
585
+ on_cancel: build_session_cancel_hook(session),
586
+ dedup_key: dedup_key
587
+ )
588
+
589
+ return result if result.is_a?(Hash) && result[:duplicate]
590
+
591
+ handle_id = result
592
+
593
+ if watch
594
+ start_background_watcher(session, handle_id, command: command,
595
+ max_duration: max_duration || BACKGROUND_TASK_MAX_DURATION)
596
+ end
597
+
598
+ handle_id
599
+ end
600
+
601
+ # Cancel hook used by Registry to kill the underlying process +
602
+ # close fds when a task is cancelled. Same logic for both watched
603
+ # (async) and unwatched (sync-promoted) handles.
604
+ private def build_session_cancel_hook(session)
605
+ ->(_task) {
606
+ begin
607
+ SessionManager.kill(session.id, signal: "TERM")
608
+ sleep 0.1
609
+ Process.kill("KILL", session.pid)
610
+ rescue StandardError
611
+ # ignore — best-effort cleanup
612
+ end
613
+ begin
614
+ session.writer.close
615
+ session.reader.close
616
+ session.log_io.close
617
+ rescue StandardError
618
+ # ignore
619
+ end
620
+ SessionManager.forget(session.id)
621
+ }
622
+ end
623
+
624
+ # Look up the PTY session backing a handle_id (UUID). Returns nil
625
+ # if the handle is unknown, already completed, or the session was
626
+ # forgotten by SessionManager.
627
+ private def session_for_handle(handle_id)
628
+ task = BackgroundTaskRegistry.get(handle_id)
629
+ return nil unless task
630
+ return nil unless task[:status] == "running"
631
+
632
+ internal_id = task[:metadata]&.[](:internal_session_id)
633
+ return nil unless internal_id
634
+
635
+ SessionManager.refresh(internal_id)
636
+ end
637
+
638
+ # ---------------------------------------------------------------------
639
+ # 2) Continue a handle: send input + poll for new output
640
+ # ---------------------------------------------------------------------
641
+ private def do_continue_handle(handle_id, input, timeout:, idle_ms: DEFAULT_IDLE_MS)
642
+ session = session_for_handle(handle_id)
643
+ return { error: "Handle #{handle_id} not found (already finished or killed)." } unless session
644
+
645
+ # Bump last-activity so the sweep thread doesn't cancel an
646
+ # unwatched handle that the LLM is actively driving.
647
+ BackgroundTaskRegistry.record_activity(handle_id)
648
+
649
+ if %w[exited killed].include?(session.status)
650
+ # Mark Registry task complete (if not already) and clean up.
651
+ BackgroundTaskRegistry.complete(handle_id, { exit_code: session.exit_code,
652
+ output: "",
653
+ state: "exited" })
654
+ cleanup_session(session)
655
+ return { handle_id: handle_id, state: "exited", exit_code: session.exit_code,
656
+ error: "Handle #{handle_id} has already #{session.status}." }
657
+ end
658
+
659
+ session.mutex.synchronize { session.writer.write(normalize_input_for_pty(input.to_s)) } unless input.to_s.empty?
660
+
661
+ result = wait_and_package(session, timeout: timeout, idle_ms: idle_ms)
662
+
663
+ # If the command finished as part of this sync poll, mark the Registry
664
+ # task complete so any registered callback knows. For "watched"
665
+ # handles (async path) the watcher would catch this anyway and we'd
666
+ # race; complete() is idempotent (registry checks status before
667
+ # firing) so it's safe.
668
+ if result[:exit_code]
669
+ BackgroundTaskRegistry.complete(handle_id, {
670
+ exit_code: result[:exit_code],
671
+ output: result[:output],
672
+ state: "exited"
673
+ })
674
+ end
675
+
676
+ # Rename session_id → handle_id in the result if wait_and_package
677
+ # set it (it uses the internal int id by default). Once the command
678
+ # has exited (exit_code is set), drop handle_id — the handle is gone.
679
+ result.delete(:session_id)
680
+ result[:handle_id] = handle_id unless result[:exit_code]
681
+ result
682
+ end
683
+
684
+ # `\n` is a Unix newline, not the "Enter key". Inside cooked-mode PTYs
685
+ # the kernel's ICRNL setting converts `\r` → `\n` on input, so `\r`
686
+ # behaves identically to `\n` for ordinary shell/`read`/`input()` use.
687
+ # BUT raw-mode TUI apps (curses-style installers, menus) read raw bytes
688
+ # and only recognize `\r` as Enter; `\n` gets inserted as a literal
689
+ # character into search fields, text inputs, etc.
690
+ #
691
+ # `\r` is therefore the only byte that means "Enter" in BOTH modes, so
692
+ # we transparently translate `\n` → `\r` before writing to the PTY.
693
+ # AI callers never need to know the difference.
694
+ private def normalize_input_for_pty(str)
695
+ str.gsub("\n", "\r")
696
+ end
697
+
698
+ # ---------------------------------------------------------------------
699
+ # 3) Kill a handle — cancel the Registry task (which fires the
700
+ # on_cancel hook to TERM/KILL the underlying process and close fds).
701
+ # ---------------------------------------------------------------------
702
+ private def do_kill_handle(handle_id)
703
+ cancelled = BackgroundTaskRegistry.cancel(handle_id, reason: "Killed by user via terminal tool.")
704
+ if cancelled
705
+ { killed: true, handle_id: handle_id, message: "Handle #{handle_id} cancelled." }
706
+ else
707
+ { error: "Handle #{handle_id} not found or already completed." }
708
+ end
709
+ end
710
+
711
+ # ---------------------------------------------------------------------
712
+ # 4) Query a handle — current state without sending input or waiting.
713
+ # ---------------------------------------------------------------------
714
+ private def do_query_handle(handle_id)
715
+ task = BackgroundTaskRegistry.get(handle_id)
716
+ return { error: "Handle #{handle_id} not found." } unless task
717
+
718
+ # Bump last-activity so the sweep thread knows this handle is
719
+ # still being driven by the LLM and doesn't cancel it.
720
+ BackgroundTaskRegistry.record_activity(handle_id)
721
+
722
+ elapsed = task[:created_at] ? (Time.now - task[:created_at]).round : nil
723
+ session = session_for_handle(handle_id)
724
+ {
725
+ handle_id: handle_id,
726
+ state: task[:status],
727
+ command: task[:metadata]&.[](:command),
728
+ started_at: task[:created_at]&.iso8601,
729
+ elapsed_seconds: elapsed,
730
+ output_file: session&.log_file,
731
+ exit_code: task.dig(:result, :exit_code),
732
+ message: status_message_for_handle(task, elapsed)
733
+ }.compact
734
+ end
735
+
736
+ private def status_message_for_handle(task, elapsed)
737
+ status = task[:status]
738
+ cmd = task[:metadata]&.[](:command) || "unknown command"
739
+ time_str = elapsed ? "(running for #{elapsed}s)" : ""
740
+
741
+ case status
742
+ when "running"
743
+ "Handle is still running #{time_str}: #{cmd}. You will be notified when it completes. DO NOT query this handle again — just wait for the notification."
744
+ when "completed"
745
+ result = task[:result] || {}
746
+ exit_code = result[:exit_code]
747
+ if exit_code.nil?
748
+ "Handle completed with unknown status: #{cmd}."
749
+ elsif exit_code.zero?
750
+ "Handle completed successfully: #{cmd}."
751
+ else
752
+ "Handle failed with exit code #{exit_code}: #{cmd}."
753
+ end
754
+ when "cancelled"
755
+ "Handle was cancelled: #{cmd}."
756
+ else
757
+ "Handle status: #{status} #{time_str}: #{cmd}."
758
+ end
759
+ end
760
+
761
+ # =====================================================================
762
+ # Plumbing
763
+ # =====================================================================
764
+
765
+ # Wait for the current command to either (a) finish with a marker,
766
+ # (b) go idle on a prompt, or (c) hit the timeout. Package accordingly.
767
+ #
768
+ # Behaviour matrix:
769
+ #
770
+ # state | background: false | background: true
771
+ # ---------+------------------------------+-----------------------------
772
+ # :matched | exit_code (finished) | exit_code (crashed fast)
773
+ # :eof | exit_code (child gone) | exit_code (crashed fast)
774
+ # :idle | session_id, state=waiting | — (idle disabled)
775
+ # :timeout | session_id, state=timeout | session_id, state=background
776
+ private def wait_and_package(session, timeout:, idle_ms: DEFAULT_IDLE_MS,
777
+ background: false, persistent: false,
778
+ original_command: nil, rewritten_command: nil)
779
+ start_offset = session.read_offset
780
+
781
+ _before, code, state = read_until_marker(session, timeout: timeout, idle_ms: idle_ms)
782
+
783
+ new_offset = log_size(session)
784
+ raw = read_log_slice(session.log_file, start_offset, new_offset)
785
+ cleaned = OutputCleaner.clean(raw)
786
+ cleaned = cleaned.sub(session.marker_regex, "").rstrip if session.marker_regex
787
+ cleaned = strip_command_echo(cleaned, marker_token: session.marker_token)
788
+ # Per-line cap first: one minified JSON blob shouldn't blow the
789
+ # whole 4 KB budget. MUST run before overflow spill so the file
790
+ # on disk also has the long lines shortened (otherwise grep-ing
791
+ # the spill file returns thousand-char lines the LLM chokes on).
792
+ cleaned = truncate_long_lines(cleaned)
793
+ truncated = false
794
+ overflow_file = nil
795
+ total_chars = cleaned.bytesize
796
+ if cleaned.bytesize > MAX_LLM_OUTPUT_CHARS
797
+ # Spill the FULL cleaned output to a sidecar file before we chop,
798
+ # so the LLM can cat/grep/tail it in a follow-up tool call.
799
+ overflow_file = spill_overflow_file(cleaned, session_id: session.id)
800
+
801
+ # byteslice may cut through the middle of a multi-byte char, which
802
+ # leaves the result as invalid UTF-8. Re-scrub after truncation so
803
+ # everything downstream (JSON.generate, format_result, UI) gets a
804
+ # guaranteed-valid UTF-8 string.
805
+ preview = cleaned.byteslice(0, OVERFLOW_PREVIEW_CHARS)
806
+ preview.force_encoding(Encoding::UTF_8)
807
+ preview = preview.scrub("?") unless preview.valid_encoding?
808
+
809
+ notice = if overflow_file
810
+ "\n\n...[Output truncated for LLM: showing first #{OVERFLOW_PREVIEW_CHARS} " \
811
+ "of #{total_chars} chars. Full output saved to: #{overflow_file} — " \
812
+ "use `grep`, `head`, or `tail` on this path to search the rest.]"
813
+ else
814
+ "\n\n...[output truncated at #{OVERFLOW_PREVIEW_CHARS} chars " \
815
+ "(overflow file unavailable; total was #{total_chars} chars)]"
816
+ end
817
+
818
+ cleaned = preview + notice
819
+ truncated = true
820
+ end
821
+ SessionManager.advance_offset(session.id, new_offset)
822
+
823
+ # Note rewrites so the agent notices if Security changed the command.
824
+ rewrite_note = rewrite_note(original_command, rewritten_command)
825
+
826
+ case state
827
+ when :matched, :eof
828
+ exit_code = code || session.exit_code
829
+ if persistent && state == :matched && session_healthy?(session)
830
+ # Command finished cleanly — return the shell to the pool so
831
+ # the next call reuses it (no cold-start cost).
832
+ stored = PersistentSessionPool.instance.release(session)
833
+ cleanup_session(session) unless stored
834
+ else
835
+ cleanup_session(session)
836
+ end
837
+ if xcode_tools_missing?(cleaned)
838
+ cleaned = "Xcode Command Line Tools are not installed.\n" \
839
+ "Run: bash ~/.octo/scripts/install_system_deps.sh\n" \
840
+ "Then retry the original command."
841
+ exit_code = 1
842
+ end
843
+ {
844
+ output: cleaned,
845
+ exit_code: exit_code,
846
+ bytes_read: new_offset - start_offset,
847
+ output_truncated: truncated,
848
+ full_output_file: overflow_file,
849
+ security_rewrite: rewrite_note
850
+ }.compact
851
+ when :idle, :timeout
852
+ # Command is still running interactively. If this was the persistent
853
+ # session, we must release it from pool ownership — the caller now
854
+ # owns it for follow-up input/kill, and the pool will spawn a fresh
855
+ # one on the next acquire.
856
+ PersistentSessionPool.instance.discard if persistent
857
+ {
858
+ output: cleaned,
859
+ # NB: session_id here is the INTERNAL SessionManager int id, not
860
+ # exposed to the LLM. Caller paths (do_start / do_continue_handle /
861
+ # start_async_command) translate it into a handle_id (UUID) via
862
+ # the Registry before returning to the LLM.
863
+ session_id: session.id,
864
+ state: background ? "background" : (state == :idle ? "waiting" : "timeout"),
865
+ bytes_read: new_offset - start_offset,
866
+ output_truncated: truncated,
867
+ full_output_file: overflow_file,
868
+ security_rewrite: rewrite_note
869
+ }.compact
870
+ end
871
+ end
872
+
873
+ private def xcode_tools_missing?(output)
874
+ return false if output.nil? || output.empty?
875
+ output.include?("xcode-select") && output.include?("No developer tools were found")
876
+ end
877
+
878
+ private def session_healthy?(session)
879
+ return false unless session
880
+ return false if %w[exited killed].include?(session.status.to_s)
881
+ begin
882
+ Process.kill(0, session.pid)
883
+ true
884
+ rescue Errno::ESRCH
885
+ false
886
+ rescue StandardError
887
+ true
888
+ end
889
+ end
890
+
891
+ # The shell may echo the wrapper line we injected (`{ USER_CMD; }; ...;
892
+ # printf "__OCTO_DONE_..."`) before running it. When stty -echo is
893
+ # honoured (bash/fresh pty) this is a no-op; when it isn't (zsh ZLE
894
+ # sometimes re-enables echo on reuse, or the user sent input to a
895
+ # running session) we strip the wrapper echo wherever it appears.
896
+ #
897
+ # Observed variants of the echoed wrapper:
898
+ #
899
+ # 1) Multi-line, starting the buffer (PTY in cooked mode, expanded
900
+ # \n escapes inside printf's double-quoted format string):
901
+ # { USER_CMD
902
+ # }; __octo_ec=$?; printf "
903
+ # __OCTO_DONE_<token>_%s__
904
+ # " "$__octo_ec"
905
+ #
906
+ # 2) Single-line / partially-truncated (PTY width wrap or partial
907
+ # char drop ate the leading `{` or first chars of the command):
908
+ # ails runner foo.rb ... }; __octo_ec=$?; printf " __OCTO_DONE_<token>_%s__ " "$__octo_ec"
909
+ #
910
+ # 3) Embedded mid-stream when re-echoed (e.g. after session re-use
911
+ # or after a user input: call landed in a shell that re-enabled
912
+ # echo). Same shape as (1) or (2) but not anchored to the start.
913
+ #
914
+ # We handle all three by running two passes:
915
+ # * an anchored multi-line strip (keeps the legacy behaviour and is
916
+ # cheapest when stty -echo silently failed);
917
+ # * a token-aware global strip that removes any remaining echoed
918
+ # wrapper fragment anywhere in the buffer. The token makes this
919
+ # safe: the real completion marker was already removed via
920
+ # session.marker_regex above, so any surviving occurrence of
921
+ # __OCTO_DONE_<token>_ is by definition an echoed wrapper.
922
+ private def strip_command_echo(text, marker_token: nil)
923
+ return text if text.nil? || text.empty?
924
+
925
+ # Pass 0: strip the hooks prefix echo if `stty -echo` failed and
926
+ # the shell echoed our `{ for __octo_f ...; } >/dev/null 2>&1`
927
+ # line. `__octo_f` / `__octo_pc` are our private variable
928
+ # names (double-underscore) that real user code effectively never
929
+ # emits, which makes this safe to strip anywhere in the buffer.
930
+ text = text.gsub(
931
+ /\{\s*(?:for\s+__octo_f[^}]*?unset\s+__octo_f[^}]*?|if\s+\[[^}]*?__octo_pc[^}]*?unset\s+__octo_pc[^}]*?)\}\s*>\s*\/dev\/null\s+2>&1;?\n?/m,
932
+ ""
933
+ )
934
+
935
+ # Pass 1: anchored strip — the full wrapper echoed at the start,
936
+ # possibly spanning multiple real newlines.
937
+ text = text.sub(/\A\{.*?"\$__octo_ec"\s*\n?/m, "")
938
+
939
+ # Pass 2: token-aware global strip — remove any leftover wrapper
940
+ # echo fragment, wherever it sits. Requires the session token so
941
+ # we never touch unrelated user output that happens to mention
942
+ # `__octo_ec`.
943
+ if marker_token && !marker_token.empty?
944
+ token_re = Regexp.escape(marker_token)
945
+
946
+ # 2a. Multi-line shape: walk back from __OCTO_DONE_<token> to
947
+ # the opening `{` of the wrapper (start of line or start of
948
+ # buffer) and forward to the closing `"$__octo_ec"`.
949
+ text = text.gsub(
950
+ /(?:^|(?<=\n))\{[^\n]*\n(?:[^\n]*\n)*?[^\n]*__OCTO_DONE_#{token_re}_[^\n]*\n[^\n]*"\$__octo_ec"[^\n]*\n?/,
951
+ ""
952
+ )
953
+
954
+ # 2b. Single-line shape: everything collapsed onto one line.
955
+ # Strip from the wrapper's `}; __octo_ec=$?` pivot (or the
956
+ # opening `{` if still present on that line) through the end of
957
+ # the printf invocation (`"$__octo_ec"`).
958
+ text = text.gsub(
959
+ /[^\n]*\}; *__octo_ec=\$\?; *printf[^\n]*__OCTO_DONE_#{token_re}_[^\n]*"\$__octo_ec"[^\n]*\n?/,
960
+ ""
961
+ )
962
+
963
+ # 2c. Last-resort: a bare marker-format fragment on its own,
964
+ # without the `}; printf ...` prefix (e.g. terminal wrapped the
965
+ # echo such that only the tail survived). Drop lines that
966
+ # contain the literal `__OCTO_DONE_<token>_%s__` format —
967
+ # the real marker has `\d+` in place of `%s` so this only hits
968
+ # echoed wrappers.
969
+ text = text.gsub(/^.*__OCTO_DONE_#{token_re}_%s__.*\n?/, "")
970
+ end
971
+
972
+ # Pass 3: token-INDEPENDENT fingerprint strip — PTY width-wrap
973
+ # can chop the `__OCTO_DONE_<token>_%s__` format string out of
974
+ # printf entirely, leaving e.g. `}; __octo_ec=$?; printf " " "$__octo_ec"`.
975
+ # None of the token-aware patterns above catch that. The pair
976
+ # `}; __octo_ec=$?` (opening pivot) and `"$__octo_ec"` (printf
977
+ # tail) are our wrapper's unique fingerprints — `__octo_ec` is a
978
+ # private double-underscore var name that user code effectively
979
+ # never emits — so we strip anything between them (non-greedy,
980
+ # multiline-aware) to also handle width-wrap that inserted
981
+ # real \n breaks inside the echo.
982
+ text = text.gsub(
983
+ /[^\n]*\}; *__octo_ec=\$\?.*?"\$__octo_ec"[^\n]*\n?/m,
984
+ ""
985
+ )
986
+
987
+ # Pass 4: bare pivot with no printf tail at all (extreme
988
+ # truncation cut off everything after `__octo_ec=$?`). Still a
989
+ # reliable fingerprint thanks to the `__octo_ec` var name.
990
+ text = text.gsub(
991
+ /[^\n]*\}; *__octo_ec=\$\?;?[^\n]*\n?/,
992
+ ""
993
+ )
994
+
995
+ text
996
+ end
997
+
998
+ # NOTE: background_hint helper removed — the unified handle-based API
999
+ # composes per-context messages in do_start / promote_to_handle /
1000
+ # start_async_command directly, all using the handle_id (UUID) the
1001
+ # LLM should reference.
1002
+
1003
+ private def rewrite_note(original, rewritten)
1004
+ return nil if original.nil? || rewritten.nil?
1005
+ return nil if original.strip == rewritten.strip
1006
+ {
1007
+ original: original,
1008
+ rewritten: rewritten,
1009
+ message: "Command was rewritten by the safety layer."
1010
+ }
1011
+ end
1012
+
1013
+ private def cleanup_session(session)
1014
+ SessionManager.kill(session.id, signal: "TERM") rescue nil
1015
+ sleep 0.05
1016
+ Process.kill("KILL", session.pid) rescue nil
1017
+ session.writer.close rescue nil
1018
+ session.reader.close rescue nil
1019
+ session.log_io.close rescue nil
1020
+ SessionManager.forget(session.id)
1021
+ end
1022
+
1023
+ # -----------------------------------------------------------------
1024
+ # Background task watcher (fire_and_forget mode)
1025
+ # -----------------------------------------------------------------
1026
+
1027
+ # Spawn a watcher thread that waits for the background session to
1028
+ # finish, then packages the result and notifies the registry.
1029
+ # The session is cleaned up after completion (success or crash).
1030
+ private def start_background_watcher(session, handle_id, command: nil, max_duration: BACKGROUND_TASK_MAX_DURATION)
1031
+ Thread.new do
1032
+ Thread.current.name = "bg-terminal-#{handle_id[0, 8]}"
1033
+ begin
1034
+ start_offset = session.read_offset
1035
+
1036
+ _before, code, state = read_until_marker(
1037
+ session,
1038
+ timeout: max_duration,
1039
+ idle_ms: DISABLED_IDLE_MS
1040
+ )
1041
+
1042
+ result = package_background_result(session, start_offset, code, state)
1043
+ result[:handle_id] = handle_id
1044
+ result[:command] = command
1045
+ result[:output_file] = session.log_file
1046
+ BackgroundTaskRegistry.complete(handle_id, result)
1047
+ rescue => e
1048
+ BackgroundTaskRegistry.complete(handle_id, {
1049
+ error: "Background watcher failed: #{e.class}: #{e.message}",
1050
+ exit_code: nil,
1051
+ handle_id: handle_id,
1052
+ command: command,
1053
+ output_file: session.log_file
1054
+ })
1055
+ ensure
1056
+ cleanup_session(session)
1057
+ end
1058
+ end
1059
+ end
1060
+
1061
+ # Package the final result of a background session for the registry.
1062
+ # Mirrors wait_and_package but without session pooling logic.
1063
+ private def package_background_result(session, start_offset, code, state)
1064
+ new_offset = log_size(session)
1065
+ raw = read_log_slice(session.log_file, start_offset, new_offset)
1066
+ cleaned = OutputCleaner.clean(raw)
1067
+ cleaned = cleaned.sub(session.marker_regex, "").rstrip if session.marker_regex
1068
+ cleaned = strip_command_echo(cleaned, marker_token: session.marker_token)
1069
+ cleaned = truncate_long_lines(cleaned)
1070
+
1071
+ exit_code = nil
1072
+ if state == :matched || state == :eof
1073
+ exit_code = code || session.exit_code
1074
+ end
1075
+
1076
+ # Spill if oversized
1077
+ overflow_file = nil
1078
+ if cleaned.bytesize > MAX_LLM_OUTPUT_CHARS
1079
+ overflow_file = spill_overflow_file(cleaned, session_id: session.id)
1080
+ preview = cleaned.byteslice(0, OVERFLOW_PREVIEW_CHARS)
1081
+ preview.force_encoding(Encoding::UTF_8)
1082
+ preview = preview.scrub("?") unless preview.valid_encoding?
1083
+ notice = if overflow_file
1084
+ "\n\n...[Output truncated for LLM: showing first #{OVERFLOW_PREVIEW_CHARS} " \
1085
+ "of #{cleaned.bytesize} chars. Full output saved to: #{overflow_file}]"
1086
+ else
1087
+ "\n\n...[output truncated at #{OVERFLOW_PREVIEW_CHARS} chars]"
1088
+ end
1089
+ cleaned = preview + notice
1090
+ end
1091
+
1092
+ result = {
1093
+ output: cleaned,
1094
+ exit_code: exit_code,
1095
+ state: state.to_s,
1096
+ bytes_read: new_offset - start_offset
1097
+ }
1098
+ result[:full_output_file] = overflow_file if overflow_file
1099
+ result[:error] = "Process exited without exit code" if state == :eof && exit_code.nil?
1100
+ result
1101
+ end
1102
+
1103
+ private def chdir_args(cwd)
1104
+ cwd && Dir.exist?(cwd) ? { chdir: cwd } : {}
1105
+ end
1106
+
1107
+ # ---------------------------------------------------------------------
1108
+ # Spawn a PTY-backed shell session and install our marker.
1109
+ #
1110
+ # Two flavours:
1111
+ # * persistent — uses the user's real shell with full rc loading
1112
+ # (`zsh -l -i` / `bash -l -i`) so shell functions, aliases, PATH
1113
+ # tweaks etc. are all available. Cold-starts in ~1s which is why
1114
+ # we aggressively reuse these via PersistentSessionPool.
1115
+ # * dedicated — minimal shell with no rc (`bash --noprofile --norc
1116
+ # -i`). Used for background commands (rails s, etc.) that will
1117
+ # occupy the PTY for a long time, and as a fallback when a
1118
+ # persistent spawn fails. Starts in ~50ms.
1119
+ # ---------------------------------------------------------------------
1120
+
1121
+ # Try to acquire a persistent session. Returns [session, reused] or
1122
+ # [nil, false] on any failure (caller falls back to dedicated).
1123
+ private def acquire_persistent_session(cwd:, env:)
1124
+ PersistentSessionPool.instance.acquire(runner: self, cwd: cwd, env: env)
1125
+ rescue SpawnFailed
1126
+ [nil, false]
1127
+ rescue StandardError
1128
+ [nil, false]
1129
+ end
1130
+
1131
+ # Public-ish: called by PersistentSessionPool to build a new long-lived
1132
+ # shell. Uses the user's SHELL with login+interactive flags so that all
1133
+ # rc hooks (nvm, rbenv, brew shellenv, mise, conda, etc.) are loaded.
1134
+ def spawn_persistent_session
1135
+ shell, shell_name = user_shell
1136
+ args = persistent_shell_args(shell, shell_name)
1137
+ session = spawn_shell(args: args, shell_name: shell_name,
1138
+ command: "<persistent>", cwd: nil, env: {})
1139
+ raise SpawnFailed, session[:error] if session.is_a?(Hash)
1140
+ session
1141
+ end
1142
+
1143
+ # Dedicated one-shot shell — no rc, fast startup. Used for background
1144
+ # commands and as a fallback.
1145
+ private def spawn_dedicated_session(cwd:, env:)
1146
+ args = ["/bin/bash", "--noprofile", "--norc", "-i"]
1147
+ spawn_shell(args: args, shell_name: "bash",
1148
+ command: "<dedicated>", cwd: cwd, env: env || {})
1149
+ end
1150
+
1151
+ # Returns [shell_path, shell_name]. Falls back to /bin/bash if SHELL
1152
+ # isn't set or the binary isn't executable.
1153
+ private def user_shell
1154
+ shell = ENV["SHELL"].to_s
1155
+ shell = "/bin/bash" if shell.empty? || !File.executable?(shell)
1156
+ name = File.basename(shell)
1157
+ # Only zsh / bash have first-class marker support; everything else
1158
+ # falls through to bash behaviour.
1159
+ name = "bash" unless %w[zsh bash].include?(name)
1160
+ [shell, name]
1161
+ end
1162
+
1163
+ private def persistent_shell_args(shell, shell_name)
1164
+ case shell_name
1165
+ when "zsh", "bash"
1166
+ [shell, "-l", "-i"]
1167
+ else
1168
+ ["/bin/bash", "--noprofile", "--norc", "-i"]
1169
+ end
1170
+ end
1171
+
1172
+ # Core spawn: PTY + reader thread + marker install.
1173
+ private def spawn_shell(args:, shell_name:, command:, cwd:, env:)
1174
+ # Per-project trash dir — the rm shell-function (see SAFE_RM_BASH
1175
+ # and install_marker) reads this env var to know where to move
1176
+ # deleted files.
1177
+ trash_dir =
1178
+ begin
1179
+ Octo::TrashDirectory.new(cwd || Dir.pwd).trash_dir
1180
+ rescue StandardError
1181
+ nil
1182
+ end
1183
+
1184
+ spawn_env = {
1185
+ "TERM" => "xterm-256color",
1186
+ "PS1" => "",
1187
+ # AI agents never need interactive pagers — less/more would
1188
+ # block the PTY waiting for input, causing idle promotion and
1189
+ # wasted timeout waiting. Force everything to dump to stdout.
1190
+ "PAGER" => "cat",
1191
+ "GIT_PAGER" => "cat",
1192
+ # Prevent our sub-shell from polluting the user's ~/.zsh_history
1193
+ # (or ~/.bash_history). We fork a full interactive login shell to
1194
+ # get rbenv/nvm/brew-shellenv/mise loaded, but every command we
1195
+ # feed it (including our `{ cmd; }; printf "__OCTO_DONE_..."`
1196
+ # wrappers) would otherwise land in the user's shared HISTFILE
1197
+ # on exit.
1198
+ #
1199
+ # Note: zsh/bash rc files may *override* HISTFILE, so this is
1200
+ # only the first line of defence — `install_marker` re-disables
1201
+ # history after rc has run. See that method for details.
1202
+ "HISTFILE" => "/dev/null",
1203
+ "HISTSIZE" => "0",
1204
+ "SAVEHIST" => "0"
1205
+ }
1206
+ spawn_env["OCTO_TRASH_DIR"] = trash_dir if trash_dir
1207
+ (env || {}).each { |k, v| spawn_env[k.to_s] = v.to_s }
1208
+
1209
+ log_file = SessionManager.allocate_log_file
1210
+ log_io = File.open(log_file, "wb")
1211
+
1212
+ # Prevent the child process from inheriting the server's
1213
+ # listening socket (port 7070) which would block hot_restart.
1214
+ # PTY.spawn does not support close_others, so we temporarily
1215
+ # set close_on_exec on the inherited fd — the kernel closes
1216
+ # it in the child after exec while the parent keeps it open.
1217
+ inherited_fd = ENV["OCTO_INHERIT_FD"].to_i
1218
+ if inherited_fd > 0
1219
+ begin
1220
+ inherited_io = IO.for_fd(inherited_fd)
1221
+ inherited_io.autoclose = false
1222
+ was_cloexec = inherited_io.close_on_exec?
1223
+ inherited_io.close_on_exec = true
1224
+ rescue StandardError
1225
+ inherited_fd = 0
1226
+ end
1227
+ end
1228
+
1229
+ reader, writer, pid = PTY.spawn(
1230
+ spawn_env, *args, chdir_args(cwd)
1231
+ )
1232
+ reader.sync = true
1233
+ writer.sync = true
1234
+
1235
+ # Restore original close_on_exec flag on the parent's fd so the
1236
+ # server can continue accepting connections after hot_restart.
1237
+ if inherited_fd > 0
1238
+ begin
1239
+ inherited_io.close_on_exec = was_cloexec
1240
+ rescue StandardError
1241
+ # best-effort
1242
+ end
1243
+ end
1244
+
1245
+ begin
1246
+ writer.winsize = [40, 120]
1247
+ rescue StandardError
1248
+ # unsupported on some platforms
1249
+ end
1250
+
1251
+ marker_token = SecureRandom.hex(8)
1252
+ reader_thread = start_reader_thread(reader, log_io)
1253
+
1254
+ session = SessionManager.register(
1255
+ pid: pid, command: command, cwd: cwd || Dir.pwd,
1256
+ log_file: log_file, log_io: log_io,
1257
+ reader: reader, writer: writer,
1258
+ reader_thread: reader_thread,
1259
+ mode: "shell", marker_token: marker_token,
1260
+ shell_name: shell_name
1261
+ )
1262
+
1263
+ # Give the shell a moment to print its startup banner (zsh -l -i
1264
+ # loads a lot of stuff), then drain whatever noise it wrote so the
1265
+ # marker install doesn't collide with it.
1266
+ sleep 0.2
1267
+ drain_any(session, timeout: 2.5)
1268
+ install_marker(session)
1269
+ _before, _code, state = read_until_marker(session, timeout: 10, idle_ms: DISABLED_IDLE_MS)
1270
+ unless state == :matched
1271
+ cleanup_session(session)
1272
+ return { error: "Failed to initialize terminal session (marker state=#{state}, shell=#{shell_name})" }
1273
+ end
1274
+ session.read_offset = log_size(session)
1275
+ SessionManager.advance_offset(session.id, session.read_offset)
1276
+
1277
+ SessionManager.mark_running(session.id)
1278
+ session
1279
+ end
1280
+
1281
+ # Background thread: drain PTY → log file.
1282
+ private def start_reader_thread(reader, log_io)
1283
+ Thread.new do
1284
+ loop do
1285
+ break if reader.closed? || log_io.closed?
1286
+ begin
1287
+ ready = IO.select([reader], nil, nil, 0.5)
1288
+ next unless ready
1289
+ chunk = reader.read_nonblock(4096)
1290
+ log_io.write(chunk) rescue nil
1291
+ log_io.flush rescue nil
1292
+ rescue IO::WaitReadable
1293
+ next
1294
+ rescue EOFError, Errno::EIO, IOError
1295
+ break
1296
+ rescue StandardError
1297
+ break
1298
+ end
1299
+ end
1300
+ ensure
1301
+ log_io.close rescue nil
1302
+ end
1303
+ end
1304
+
1305
+ # Install minimal shell setup (runs AFTER rc has loaded):
1306
+ # - disable history (HISTFILE=/dev/null + unset HISTFILE)
1307
+ # - disable input echo (stty -echo)
1308
+ # - empty PS1/PS2 so prompt lines don't add noise
1309
+ #
1310
+ # NOTE: we deliberately do NOT use PROMPT_COMMAND (bash) / precmd (zsh)
1311
+ # to emit the completion marker. Those hooks fight zsh's ZLE, iTerm2
1312
+ # shell integration, etc. Instead, every user command is wrapped with
1313
+ # an inline printf marker — see `write_user_command`. Same bytes work
1314
+ # in bash, zsh, and anything POSIX-ish.
1315
+ private def install_marker(session)
1316
+ # Order matters:
1317
+ # 1. Disable history BEFORE anything else, so this setup line
1318
+ # itself never lands in ~/.zsh_history / ~/.bash_history.
1319
+ # We already set HISTFILE=/dev/null in spawn_env, but the
1320
+ # user's rc (.zshrc/.bashrc) may override it — so we reset
1321
+ # it here, AFTER rc has run. Unsetting HISTFILE is the
1322
+ # belt-and-braces: zsh/bash won't write history on exit if
1323
+ # HISTFILE is unset.
1324
+ # 2. stty -echo stops the PTY from echoing our wrapper lines
1325
+ # back into captured output.
1326
+ # 3. Empty PS1/PS2 keeps prompt noise out of captured output.
1327
+ setup_line = %Q{HISTFILE=/dev/null; HISTSIZE=0; SAVEHIST=0; unset HISTFILE 2>/dev/null; set +o histexpand 2>/dev/null; stty -echo 2>/dev/null; PS1=""; PS2=""; export PAGER=cat; export GIT_PAGER=cat\n}
1328
+ session.mutex.synchronize { session.writer.write(setup_line) }
1329
+
1330
+ # Install the safe-rm shell function. Single-line `source`
1331
+ # avoids feeding a multi-line function definition through ZLE
1332
+ # (which would garble it under zsh -l -i). The file itself
1333
+ # ships with the gem — see SAFE_RM_PATH.
1334
+ if File.exist?(SAFE_RM_PATH)
1335
+ source_line = %Q{source #{SAFE_RM_PATH} 2>/dev/null || true\n}
1336
+ session.mutex.synchronize { session.writer.write(source_line) }
1337
+ end
1338
+
1339
+ # Emit the first marker by running a no-op through the same wrapper
1340
+ # we use for real commands. spawn_shell's read_until_marker will
1341
+ # match this and consider the shell ready.
1342
+ write_user_command(session, ":")
1343
+ end
1344
+
1345
+ # Wrap a user command so we can reliably detect its completion + exit
1346
+ # code regardless of shell flavour (bash/zsh/sh).
1347
+ #
1348
+ # The command runs in a group (`{ ...; }`) so trailing pipelines still
1349
+ # complete before the marker fires. `$?` inside the group captures the
1350
+ # user command's exit code; we stash it in `__octo_ec` immediately so
1351
+ # intervening shell activity doesn't clobber it before printf runs.
1352
+ #
1353
+ # Leading `\n` in the printf format ensures the marker starts on its
1354
+ # own line even when the user command ended without a trailing newline.
1355
+ #
1356
+ # `with_hooks:` — when true and the session is a real rc-loaded zsh/
1357
+ # bash, we run the shell's `chpwd_functions` + `precmd_functions`
1358
+ # before the user command. This mimics what the shell would do at
1359
+ # every prompt in an interactive session, and is what makes mise /
1360
+ # direnv / conda-auto-activate / pyenv-virtualenv / autoenv etc.
1361
+ # actually push their tools onto PATH.
1362
+ #
1363
+ # Why this is necessary:
1364
+ # Most of these tools register themselves via precmd/chpwd hooks
1365
+ # when you `eval "$(tool activate zsh)"` in ~/.zshrc. In a real
1366
+ # terminal, those hooks fire every time the shell draws a new
1367
+ # prompt. Our persistent session never draws a prompt (we drive
1368
+ # it by writing one line at a time and reading back our marker),
1369
+ # so the hooks never run — which is why commands like `node -v`
1370
+ # come back as "command not found" even though ~/.zshrc was
1371
+ # loaded at spawn time.
1372
+ #
1373
+ # We don't run hooks for internal bookkeeping commands (source rc,
1374
+ # env reset, cd, marker install) — those use with_hooks: false.
1375
+ private def write_user_command(session, command, with_hooks: false)
1376
+ token = session.marker_token
1377
+ # Hooks run in their own group with stdout+stderr redirected to
1378
+ # /dev/null so any chatty hook (direnv's "direnv: loading .envrc",
1379
+ # conda banners, etc.) never contaminates captured output. Their
1380
+ # exit codes are also swallowed so the *user* command's $? is what
1381
+ # lands in `__octo_ec`.
1382
+ hooks_line = with_hooks ? hooks_prefix_for(session) : ""
1383
+ line = %Q|#{hooks_line}{ #{command}\n}; __octo_ec=$?; printf "\n__OCTO_DONE_#{token}_%s__\n" "$__octo_ec"\n|
1384
+ session.mutex.synchronize { session.writer.write(line) }
1385
+ end
1386
+
1387
+ # Build the "run hooks" prefix line. Empty string for shells where
1388
+ # we don't know how to introspect hook registries.
1389
+ private def hooks_prefix_for(session)
1390
+ body = hook_invocation_for(session)
1391
+ return "" if body.strip.empty?
1392
+ # Single-line `{ …; } >/dev/null 2>&1;` so the hooks always run in
1393
+ # the same shell (no subshell — they must mutate PATH in *this*
1394
+ # shell), but their output goes nowhere. The trailing semicolon
1395
+ # separates from the user-command wrapper. The whole thing stays
1396
+ # on one logical line (newlines inside `body` are fine inside
1397
+ # `{ ... }`).
1398
+ "{ #{body.strip}\n} >/dev/null 2>&1;\n"
1399
+ end
1400
+
1401
+ # Build the shell-specific snippet that runs every registered
1402
+ # chpwd / precmd function. Returns an empty string for shells we
1403
+ # don't know how to introspect (sh, dedicated --norc bash, etc.)
1404
+ # so those sessions behave exactly as before.
1405
+ #
1406
+ # Each hook is wrapped in `2>/dev/null || true` so a single broken
1407
+ # hook can't abort the user command or leak stderr noise into
1408
+ # captured output.
1409
+ private def hook_invocation_for(session)
1410
+ case session.shell_name.to_s
1411
+ when "zsh"
1412
+ # zsh: chpwd_functions / precmd_functions are arrays of function
1413
+ # names. `(P)name` expansion is avoided — plain `$array` with
1414
+ # word splitting works under the default zsh options since
1415
+ # `.zshrc` already ran (KSH_ARRAYS etc. is off by default for
1416
+ # interactive zsh started via -i).
1417
+ <<~ZSH
1418
+ for __octo_f in $chpwd_functions; do "$__octo_f" 2>/dev/null || true; done
1419
+ for __octo_f in $precmd_functions; do "$__octo_f" 2>/dev/null || true; done
1420
+ unset __octo_f 2>/dev/null
1421
+ ZSH
1422
+ when "bash"
1423
+ # bash: no chpwd equivalent. PROMPT_COMMAND may be a string
1424
+ # (classic) or an array (bash 5.1+). Handle both.
1425
+ <<~BASH
1426
+ if [ "${BASH_VERSINFO[0]:-0}" -ge 5 ] && [ "${BASH_VERSINFO[1]:-0}" -ge 1 ] && declare -p PROMPT_COMMAND 2>/dev/null | grep -q 'declare -a'; then
1427
+ for __octo_pc in "${PROMPT_COMMAND[@]}"; do eval "$__octo_pc" 2>/dev/null || true; done
1428
+ elif [ -n "${PROMPT_COMMAND:-}" ]; then
1429
+ eval "$PROMPT_COMMAND" 2>/dev/null || true
1430
+ fi
1431
+ unset __octo_pc 2>/dev/null
1432
+ BASH
1433
+ else
1434
+ ""
1435
+ end
1436
+ end
1437
+
1438
+ # ---------------------------------------------------------------------
1439
+ # In-session helpers used by PersistentSessionPool to reset state
1440
+ # between commands without having to respawn the shell.
1441
+ # ---------------------------------------------------------------------
1442
+
1443
+ # Issue an in-shell command and wait for its marker. Returns true on
1444
+ # success (marker hit), false otherwise. Swallows output.
1445
+ private def run_inline(session, line, timeout: 5)
1446
+ write_user_command(session, line)
1447
+ _before, _code, state = read_until_marker(session, timeout: timeout, idle_ms: DISABLED_IDLE_MS)
1448
+ new_offset = log_size(session)
1449
+ SessionManager.advance_offset(session.id, new_offset)
1450
+ state == :matched
1451
+ end
1452
+
1453
+ # Called by the pool when rc files (e.g. ~/.zshrc) have changed since
1454
+ # this session was spawned. Sources them in shell-startup order so
1455
+ # later files can see env set by earlier ones.
1456
+ #
1457
+ # Notes:
1458
+ # - Errors inside each `source` are NOT silenced (dropping stderr
1459
+ # previously masked failures like a broken `mise activate` that
1460
+ # would leave PATH without node/ruby/etc.). They land in the PTY
1461
+ # log where a developer can inspect them if a command mysteriously
1462
+ # fails to find a tool.
1463
+ # - `|| true` keeps the compound line's exit code at 0 so our
1464
+ # marker reader treats the re-source as "succeeded" regardless
1465
+ # of per-file hiccups — we don't want a flaky rc to disable the
1466
+ # whole persistent shell.
1467
+ def source_rc_in_session(session, rc_files)
1468
+ return if rc_files.empty?
1469
+ cmd = rc_files.map { |f|
1470
+ escaped = f.gsub('"', '\"')
1471
+ "source \"#{escaped}\" || true"
1472
+ }.join("; ")
1473
+ run_inline(session, cmd, timeout: 15)
1474
+ end
1475
+
1476
+ # Called by the pool to reset env between calls. First unsets any keys
1477
+ # we exported last time, then exports the new ones.
1478
+ def reset_env_in_session(session, unset_keys:, set_env:)
1479
+ parts = []
1480
+ unset_keys.each { |k| parts << "unset #{shell_escape_var(k)}" }
1481
+ set_env.each { |k, v| parts << "export #{shell_escape_var(k)}=#{shell_escape_value(v)}" }
1482
+ return if parts.empty?
1483
+ run_inline(session, parts.join("; "))
1484
+ end
1485
+
1486
+ # Called by the pool to move the live shell to `cwd`.
1487
+ def cd_in_session(session, cwd)
1488
+ run_inline(session, "cd #{shell_escape_value(cwd)}")
1489
+ end
1490
+
1491
+ private def shell_escape_var(name)
1492
+ # Env var names are alphanumeric + underscore by POSIX; reject anything
1493
+ # else defensively so we never build a malformed line.
1494
+ name.to_s.gsub(/[^A-Za-z0-9_]/, "")
1495
+ end
1496
+
1497
+ private def shell_escape_value(val)
1498
+ # Wrap in single quotes, escaping any embedded single quotes.
1499
+ "'" + val.to_s.gsub("'", "'\\''") + "'"
1500
+ end
1501
+
1502
+ # ---------------------------------------------------------------------
1503
+ # PTY/log read helpers
1504
+ # ---------------------------------------------------------------------
1505
+ private def drain_any(session, timeout: 1.0)
1506
+ deadline = Time.now + timeout
1507
+ loop do
1508
+ remaining = deadline - Time.now
1509
+ break if remaining <= 0
1510
+ ready = IO.select([session.reader], nil, nil, [remaining, 0.1].min)
1511
+ break unless ready
1512
+ begin
1513
+ session.reader.read_nonblock(4096)
1514
+ rescue IO::WaitReadable
1515
+ next
1516
+ rescue EOFError, Errno::EIO
1517
+ break
1518
+ end
1519
+ end
1520
+ end
1521
+
1522
+ # Poll the log file until a marker matches, idle-return fires, or timeout.
1523
+ # Returns [raw_before_marker, exit_code_or_nil, state].
1524
+ # state ∈ :matched, :idle, :timeout, :eof
1525
+ private def read_until_marker(session, timeout:, idle_ms: DEFAULT_IDLE_MS)
1526
+ return ["", nil, :eof] unless session.marker_regex
1527
+
1528
+ deadline = Time.now + timeout
1529
+ idle_sec = idle_ms / 1000.0
1530
+ start_size = session.read_offset
1531
+ last_size = start_size
1532
+ last_change = Time.now
1533
+
1534
+ loop do
1535
+ current_size = log_size(session)
1536
+ if current_size > last_size
1537
+ slice = read_log_slice(session.log_file, session.read_offset, current_size)
1538
+ if (m = slice.match(session.marker_regex))
1539
+ return [slice[0...m.begin(0)], m[1].to_i, :matched]
1540
+ end
1541
+ last_size = current_size
1542
+ last_change = Time.now
1543
+ end
1544
+
1545
+ SessionManager.refresh(session.id)
1546
+ if session.status == "exited" || session.status == "killed"
1547
+ slice = read_log_slice(session.log_file, session.read_offset, log_size(session))
1548
+ if (m = slice.match(session.marker_regex))
1549
+ return [slice[0...m.begin(0)], m[1].to_i, :matched]
1550
+ end
1551
+ return [slice, nil, :eof]
1552
+ end
1553
+
1554
+ if last_size > start_size && (Time.now - last_change) >= idle_sec
1555
+ return ["", nil, :idle]
1556
+ end
1557
+
1558
+ return ["", nil, :timeout] if Time.now >= deadline
1559
+ sleep 0.05
1560
+ end
1561
+ end
1562
+
1563
+ private def log_size(session)
1564
+ session.log_io.size rescue File.size(session.log_file) rescue 0
1565
+ end
1566
+
1567
+ private def read_log_slice(path, from, to)
1568
+ return "" if to <= from
1569
+ File.open(path, "rb") do |f|
1570
+ f.seek(from)
1571
+ f.read(to - from).to_s
1572
+ end
1573
+ rescue Errno::ENOENT
1574
+ ""
1575
+ end
1576
+
1577
+ # Detect commands that are known to take a long time and produce
1578
+ # bursty output with multi-second quiet gaps. Used by `execute` to
1579
+ # auto-widen the timeout / disable idle-return so the LLM doesn't
1580
+ # poll a rspec/bundle-install 10 times over.
1581
+ #
1582
+ # Matching is substring-based after stripping common prefixes
1583
+ # (`sudo `, `env VAR=val `, `cd path && ...`) so that wrapping the
1584
+ # real slow command in another shell construct still hits.
1585
+ private def slow_command?(command)
1586
+ return false if command.nil? || command.empty?
1587
+ s = command.to_s
1588
+
1589
+ # Strip leading `cd ... && ` / `cd ...;` — users / the agent often
1590
+ # prepend a cd to the real command.
1591
+ s = s.sub(/\Acd\s+\S+\s*(?:&&|;)\s*/, "")
1592
+ # Strip leading env-var assignments: `FOO=bar BAZ=qux cmd`.
1593
+ s = s.sub(/\A(?:[A-Za-z_][A-Za-z0-9_]*=\S+\s+)+/, "")
1594
+ # Trim leading whitespace.
1595
+ s = s.lstrip
1596
+
1597
+ SLOW_COMMAND_PATTERNS.any? { |pat| s.include?(pat) }
1598
+ end
1599
+
1600
+ # Check if a command is obviously quick and should never use
1601
+ # fire_and_forget. Used as a runtime guard to prevent token waste.
1602
+ private def quick_command?(command)
1603
+ return false if command.nil? || command.empty?
1604
+ s = command.to_s
1605
+
1606
+ # Strip leading `cd ... && ` / `cd ...;` — the real command follows.
1607
+ s = s.sub(/\A\s*cd\s+\S+\s*(?:&&|;)\s*/, "")
1608
+ # Strip leading env-var assignments.
1609
+ s = s.sub(/\A(?:[A-Za-z_][A-Za-z0-9_]*=\S+\s+)+/, "")
1610
+ s = s.lstrip
1611
+
1612
+ QUICK_COMMAND_PATTERNS.any? { |pat| s.match?(pat) }
1613
+ end
1614
+
1615
+ # Apply per-line truncation to a cleaned (post-OutputCleaner) string.
1616
+ # If any single line exceeds MAX_LINE_CHARS, we chop it at that length
1617
+ # and append `…[line truncated: <original> chars]` so the LLM knows
1618
+ # content was elided. Critical for minified JS/CSS/JSON dumps that
1619
+ # would otherwise swallow the entire 4 KB budget with one line.
1620
+ private def truncate_long_lines(text, max: MAX_LINE_CHARS)
1621
+ return text if text.nil? || text.empty?
1622
+ lines = text.split("\n", -1)
1623
+ any_truncated = false
1624
+ truncated_lines = lines.map do |line|
1625
+ if line.bytesize > max
1626
+ any_truncated = true
1627
+ sliced = line.byteslice(0, max).to_s
1628
+ sliced.force_encoding(Encoding::UTF_8)
1629
+ sliced = sliced.scrub("?") unless sliced.valid_encoding?
1630
+ "#{sliced} …[line truncated: #{line.bytesize} chars]"
1631
+ else
1632
+ line
1633
+ end
1634
+ end
1635
+ return text unless any_truncated
1636
+ truncated_lines.join("\n")
1637
+ end
1638
+
1639
+ # Overflow directory: shared across sessions (and persists after
1640
+ # Octo exits) so the LLM can re-read the full output in later
1641
+ # turns. Lives under /tmp so it is naturally swept by the OS, and
1642
+ # we also best-effort prune files older than OVERFLOW_MAX_AGE_SEC
1643
+ # on each write so long-running servers don't accumulate garbage.
1644
+ OVERFLOW_DIR_NAME = "octo-terminal-overflow"
1645
+ OVERFLOW_MAX_AGE_SEC = 7 * 24 * 60 * 60 # 7 days
1646
+
1647
+ private def overflow_dir
1648
+ @overflow_dir ||= begin
1649
+ dir = File.join(Dir.tmpdir, OVERFLOW_DIR_NAME)
1650
+ FileUtils.mkdir_p(dir)
1651
+ dir
1652
+ end
1653
+ end
1654
+
1655
+ # Drop overflow files older than OVERFLOW_MAX_AGE_SEC. Best-effort —
1656
+ # any error (permission, race with another process) is swallowed,
1657
+ # we'd rather keep the current command's result than crash because
1658
+ # of stale cleanup.
1659
+ private def prune_old_overflow_files
1660
+ cutoff = Time.now - OVERFLOW_MAX_AGE_SEC
1661
+ Dir.glob(File.join(overflow_dir, "*.log")).each do |f|
1662
+ next unless File.file?(f)
1663
+ begin
1664
+ File.delete(f) if File.mtime(f) < cutoff
1665
+ rescue StandardError
1666
+ # ignore
1667
+ end
1668
+ end
1669
+ rescue StandardError
1670
+ # ignore
1671
+ end
1672
+
1673
+ # Write the full cleaned output to a sidecar file so the LLM can
1674
+ # `grep` / `head` / `tail` it in a follow-up tool call. Returns the
1675
+ # absolute path, or nil if the write failed (in which case we'll
1676
+ # just truncate without disclosure).
1677
+ private def spill_overflow_file(cleaned, session_id:)
1678
+ prune_old_overflow_files
1679
+ ts = Time.now.strftime("%Y%m%d-%H%M%S")
1680
+ sid = session_id || "nosid"
1681
+ rand = SecureRandom.hex(3)
1682
+ path = File.join(overflow_dir, "#{ts}-s#{sid}-#{rand}.log")
1683
+ File.open(path, "wb") { |f| f.write(cleaned) }
1684
+ path
1685
+ rescue StandardError
1686
+ nil
1687
+ end
1688
+
1689
+
1690
+
1691
+ # Max visible length of a command inside the tool-call summary line.
1692
+ # Keeps the "terminal(...)" summary on a single UI row even when the
1693
+ # underlying command spans multiple lines (heredocs, multi-line ruby
1694
+ # -e blocks, etc.). The full command is still executed — only the
1695
+ # display is shortened.
1696
+ DISPLAY_COMMAND_MAX_CHARS = 80
1697
+
1698
+ def format_call(args)
1699
+ cmd = args[:command] || args["command"]
1700
+ handle = args[:handle_id] || args["handle_id"]
1701
+ inp = args[:input] || args["input"]
1702
+ kill = args[:kill] || args["kill"]
1703
+ async = args[:async] || args["async"]
1704
+
1705
+ if handle && kill
1706
+ "terminal(cancel handle)"
1707
+ elsif handle && !inp.nil?
1708
+ if inp.to_s.empty?
1709
+ "terminal(check handle)"
1710
+ else
1711
+ preview = inp.to_s.strip
1712
+ preview = preview.length > 30 ? "#{preview[0, 30]}..." : preview
1713
+ "terminal(send #{preview.inspect})"
1714
+ end
1715
+ elsif handle
1716
+ "terminal(query handle)"
1717
+ elsif cmd
1718
+ display_cmd = compact_command_for_display(cmd)
1719
+ if async
1720
+ "terminal(#{display_cmd}, async)"
1721
+ else
1722
+ "terminal(#{display_cmd})"
1723
+ end
1724
+ else
1725
+ "terminal(?)"
1726
+ end
1727
+ end
1728
+
1729
+ # Collapse newlines and runs of whitespace into single spaces, then
1730
+ # truncate with an ellipsis so the command fits on one line in the UI.
1731
+ private def compact_command_for_display(cmd)
1732
+ one_line = cmd.to_s.gsub(/\s+/, " ").strip
1733
+ if one_line.length > DISPLAY_COMMAND_MAX_CHARS
1734
+ "#{one_line[0, DISPLAY_COMMAND_MAX_CHARS - 3]}..."
1735
+ else
1736
+ one_line
1737
+ end
1738
+ end
1739
+
1740
+ # Number of trailing lines of output to include in the human-readable
1741
+ # display string (the result text that shows up in CLI / WebUI bubbles
1742
+ # under each tool call). Keep small so multi-poll loops stay readable.
1743
+ DISPLAY_TAIL_LINES = 6
1744
+
1745
+ def format_result(result)
1746
+ return "[Blocked] #{result[:error]}" if result.is_a?(Hash) && result[:security_blocked]
1747
+ return "error: #{result[:error]}" if result.is_a?(Hash) && result[:error]
1748
+ return "stopped" if result.is_a?(Hash) && result[:killed]
1749
+
1750
+ return "done" unless result.is_a?(Hash)
1751
+
1752
+ # Async task accepted — harness will notify on completion.
1753
+ if result[:accepted]
1754
+ return "async task started"
1755
+ end
1756
+
1757
+ prefix = result[:security_rewrite] ? "[Safe] " : ""
1758
+ tail = display_tail(result[:output])
1759
+
1760
+ status =
1761
+ if result[:handle_id]
1762
+ # still running / waiting for input
1763
+ state = result[:state] || "waiting"
1764
+ "… #{state}"
1765
+ elsif result.key?(:exit_code)
1766
+ ec = result[:exit_code]
1767
+ ec.to_i.zero? ? "✓ exit=0" : "✗ exit=#{ec}"
1768
+ else
1769
+ "done"
1770
+ end
1771
+
1772
+ status = "#{prefix}#{status}" unless prefix.empty?
1773
+
1774
+ # When output overflowed, surface the file path in the UI too
1775
+ # (not just in the LLM-facing `output`). Keeps the dev aware that
1776
+ # the full log is recoverable.
1777
+ if result[:full_output_file]
1778
+ status = "#{status} [full: #{result[:full_output_file]}]"
1779
+ end
1780
+
1781
+ tail.empty? ? status : "#{tail}\n#{status}"
1782
+ end
1783
+
1784
+ def format_result_for_ui(result)
1785
+ return nil unless result.is_a?(Hash)
1786
+ return { type: "terminal", status: "error", error: result[:error] } if result[:error]
1787
+ return { type: "terminal", status: "killed" } if result[:killed]
1788
+ return { type: "terminal", status: "async", handle_id: result[:handle_id] } if result[:accepted]
1789
+
1790
+ cmd = result[:original_command] || result[:rewritten_command] || ""
1791
+ ec = result[:exit_code]
1792
+ output = result[:output].to_s
1793
+
1794
+ {
1795
+ type: "terminal",
1796
+ command: cmd,
1797
+ exit_code: ec,
1798
+ output_preview: output.slice(0, 800),
1799
+ output_truncated: result[:output_truncated] || false,
1800
+ full_output_file: result[:full_output_file],
1801
+ status: ec.nil? ? "running" : (ec.zero? ? "success" : "failed")
1802
+ }
1803
+ end
1804
+
1805
+ # Extract the last DISPLAY_TAIL_LINES non-empty lines of output so the
1806
+ # user can see what actually happened in this poll, not just a "128B"
1807
+ # byte-count. Output is USUALLY already cleaned by OutputCleaner, but
1808
+ # if a caller hands us raw bytes (or a byteslice chopped a multi-byte
1809
+ # character in half), `split`/`strip` would raise
1810
+ # Encoding::CompatibilityError: invalid byte sequence in UTF-8
1811
+ # and the whole tool call would error. Guard with scrub.
1812
+ private def display_tail(output)
1813
+ return "" if output.nil?
1814
+ text = output.to_s
1815
+ # Defensive: make sure we have a valid UTF-8 string. No-op on the
1816
+ # happy path (already UTF-8, valid); only rebuilds when broken.
1817
+ unless text.encoding == Encoding::UTF_8 && text.valid_encoding?
1818
+ text = text.dup.force_encoding(Encoding::UTF_8)
1819
+ text = text.scrub("?") unless text.valid_encoding?
1820
+ end
1821
+ return "" if text.strip.empty?
1822
+ lines = text.split(/\r?\n/).reject { |l| l.strip.empty? }
1823
+ return "" if lines.empty?
1824
+ lines.last(DISPLAY_TAIL_LINES).join("\n")
1825
+ end
1826
+ end
1827
+ end
1828
+ end