octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
data/lib/octo/agent.rb ADDED
@@ -0,0 +1,2180 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "json"
5
+ require "cgi"
6
+ require "tty-prompt"
7
+ require "set"
8
+ require_relative "utils/arguments_parser"
9
+ require_relative "utils/file_processor"
10
+ require_relative "utils/environment_detector"
11
+
12
+ # Load all agent modules
13
+ require_relative "agent/message_compressor"
14
+ require_relative "agent/message_compressor_helper"
15
+ require_relative "agent/tool_executor"
16
+ require_relative "agent/session_serializer"
17
+ require_relative "agent/skill_manager"
18
+ require_relative "agent/system_prompt_builder"
19
+ require_relative "agent/llm_caller"
20
+ require_relative "agent/time_machine"
21
+ require_relative "agent/memory_updater"
22
+ require_relative "agent/next_message_suggester"
23
+ require_relative "agent/skill_evolution"
24
+ require_relative "agent/skill_reflector"
25
+ require_relative "agent/skill_auto_creator"
26
+ require_relative "background_task_registry"
27
+
28
+ module Octo
29
+ class Agent
30
+ # Include all functionality modules
31
+ include MessageCompressorHelper
32
+ include ToolExecutor
33
+ include SessionSerializer
34
+ include SkillManager
35
+ include SystemPromptBuilder
36
+ include LlmCaller
37
+ include TimeMachine
38
+ include MemoryUpdater
39
+ include NextMessageSuggester
40
+ include SkillEvolution
41
+ include SkillReflector
42
+ include SkillAutoCreator
43
+
44
+ attr_reader :session_id, :name, :history, :iterations, :working_dir, :created_at, :total_tasks, :todos,
45
+ :cache_stats, :ui, :skill_loader, :agent_profile,
46
+ :status, :error, :updated_at, :source,
47
+ :latest_latency, # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
48
+ :reasoning_effort
49
+ attr_accessor :pinned
50
+
51
+ REASONING_EFFORTS = %w[low medium high].freeze
52
+
53
+ def permission_mode
54
+ @config&.permission_mode&.to_s || ""
55
+ end
56
+
57
+ def reasoning_effort=(value)
58
+ @reasoning_effort = normalize_reasoning_effort(value)
59
+ end
60
+
61
+ private def normalize_reasoning_effort(value)
62
+ return nil if value.nil?
63
+ str = value.to_s.strip.downcase
64
+ return nil if str.empty? || str == "off" || str == "none"
65
+ return str if REASONING_EFFORTS.include?(str)
66
+ nil
67
+ end
68
+
69
+ public
70
+
71
+ def initialize(client, config, working_dir:, ui:, profile:, session_id:, source:)
72
+ @client = client # Client for current model
73
+ @config = config.is_a?(AgentConfig) ? config : AgentConfig.new(config)
74
+ @agent_profile = AgentProfile.load(profile)
75
+ @source = source.to_sym # :manual | :cron | :channel
76
+ @tool_registry = ToolRegistry.new
77
+ @hooks = HookManager.new
78
+ @session_id = session_id
79
+ @name = ""
80
+ @pinned = false
81
+ @history = MessageHistory.new
82
+ @todos = [] # Store todos in memory
83
+ @iterations = 0
84
+ @cache_stats = {
85
+ cache_creation_input_tokens: 0,
86
+ cache_read_input_tokens: 0,
87
+ total_requests: 0,
88
+ cache_hit_requests: 0,
89
+ raw_api_usage_samples: [] # Store raw API usage for debugging
90
+ }
91
+ @start_time = nil
92
+ @working_dir = working_dir || Dir.pwd
93
+ @created_at = Time.now.iso8601
94
+ @total_tasks = 0
95
+ @previous_total_tokens = 0 # Track tokens from previous iteration for delta calculation
96
+ @latest_latency = nil # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
97
+ @reasoning_effort = nil # Per-session reasoning effort override; nil = provider default
98
+ @ui = ui # UIController for direct UI interaction
99
+ @debug_logs = [] # Debug logs for troubleshooting
100
+ @pending_injections = [] # Pending inline skill injections to flush after observe()
101
+ @pending_script_tmpdirs = [] # Decrypted-script tmpdirs to shred when agent.run completes
102
+ @pending_error_rollback = false # Deferred rollback flag set by restore_session on error
103
+ @in_run_loop = false # True while agent.run() is active (set under @state_mutex)
104
+ # Unified inbox for events that should land in @history at the next
105
+ # iteration boundary inside the run loop. Items are typed:
106
+ # {kind: :bg_notification, content:, bubble:, enqueued_at:}
107
+ # {kind: :user_msg, content:, files:, enqueued_at:}
108
+ # Drained chronologically by drain_inbox_into_history! (run loop top).
109
+ @inbox = []
110
+ @inbox_run_pending = false # Set true after enqueue_user_message decides to spawn a run; cleared at run() entry. Dedupes concurrent spawns.
111
+ @state_mutex = Mutex.new # Protects @in_run_loop, @inbox, @inbox_run_pending, @current_run_thread, @discard_threshold
112
+ @run_mutex = Mutex.new # Serializes every Agent#run invocation regardless of caller
113
+ @checkpoint_index = 0 # Tracks how many messages have been persisted to incremental log
114
+ @on_checkpoint = nil # Callback proc(messages) for incremental persistence
115
+ @current_run_thread = nil # Thread currently inside run()'s body — set under @state_mutex; used by interrupt_current_run!
116
+ @discard_threshold = nil # Time. Stale run attempts whose enqueue time predates this are dropped.
117
+
118
+ # Compression tracking
119
+ @compression_level = 0 # Tracks how many times we've compressed (for progressive summarization)
120
+ @compressed_summaries = [] # Store summaries from previous compressions for reference
121
+
122
+ # Message compressor for LLM-based intelligent compression
123
+ # Uses LLM to preserve key decisions, errors, and context while reducing token count
124
+ @message_compressor = MessageCompressor.new(@client, model: current_model)
125
+
126
+ # Skill loader for skill management
127
+ @skill_loader = SkillLoader.new(working_dir: @working_dir)
128
+
129
+ # Initialize Time Machine
130
+ init_time_machine
131
+
132
+ # Register built-in tools
133
+ register_builtin_tools
134
+
135
+ # Ensure user-space parsers are in place (~/.octo/parsers/)
136
+ Utils::ParserManager.setup!
137
+
138
+ # Ensure bundled shell scripts are in place (~/.octo/scripts/)
139
+ Utils::ScriptsManager.setup!
140
+ end
141
+
142
+ # Restore from a saved session
143
+ def self.from_session(client, config, session_data, ui: nil, profile:)
144
+ working_dir = session_data[:working_dir] || session_data["working_dir"] || Dir.pwd
145
+ original_id = session_data[:session_id] || session_data["session_id"] || Octo::SessionManager.generate_id
146
+ # Restore source from persisted data; fall back to :manual for legacy sessions
147
+ source = (session_data[:source] || session_data["source"] || "manual").to_sym
148
+ agent = new(client, config, working_dir: working_dir, ui: ui, profile: profile,
149
+ session_id: original_id, source: source)
150
+ agent.restore_session(session_data)
151
+ agent
152
+ end
153
+
154
+ def add_hook(event, &block)
155
+ @hooks.add(event, &block)
156
+ end
157
+
158
+ # Register a callback that receives newly-added messages after each
159
+ # iteration checkpoint. Used by the server layer for incremental
160
+ # persistence (.jsonl crash-recovery logs).
161
+ def on_checkpoint(&block)
162
+ @on_checkpoint = block
163
+ end
164
+
165
+ # Reset the checkpoint cursor so the next checkpoint! captures
166
+ # everything added from this point forward.
167
+ def reset_checkpoint!
168
+ @checkpoint_index = @history.to_a.size
169
+ end
170
+
171
+ # Emit any messages added to @history since the last checkpoint
172
+ # through the registered @on_callback handler.
173
+ def checkpoint!
174
+ return unless @on_checkpoint
175
+
176
+ current = @history.to_a
177
+ new_msgs = current[@checkpoint_index..] || []
178
+ return if new_msgs.empty?
179
+
180
+ @on_checkpoint.call(new_msgs)
181
+ @checkpoint_index = current.size
182
+ end
183
+
184
+ # Switch this session to a different model, identified by its stable
185
+ # runtime id. Ids survive list reorders, additions, and field edits,
186
+ # which is why we no longer expose an index-based API.
187
+ # @param id [String] Model id (see AgentConfig#parse_models)
188
+ # @return [Boolean] true if switched successfully, false otherwise
189
+ def switch_model_by_id(id)
190
+ return false unless @config.switch_model_by_id(id)
191
+
192
+ rebuild_client_for_current_model!
193
+ true
194
+ end
195
+
196
+ # Rebuild the underlying Client (and dependent components) to pick up
197
+ # credentials/model name from the currently-selected model in @config.
198
+ private def rebuild_client_for_current_model!
199
+ @client = Octo::Client.new(
200
+ @config.api_key,
201
+ base_url: @config.base_url,
202
+ model: @config.model_name,
203
+ anthropic_format: @config.anthropic_format?
204
+ )
205
+ # Update message compressor with new client and model
206
+ @message_compressor = MessageCompressor.new(@client, model: current_model)
207
+
208
+ # Inject a new session context to notify the AI of the model switch
209
+ inject_session_context
210
+ end
211
+
212
+ # Change the working directory for this session
213
+ # Injects a new session context to notify the AI of the directory change
214
+ def change_working_dir(new_dir)
215
+ @working_dir = new_dir
216
+ inject_session_context
217
+ true
218
+ end
219
+
220
+ # Get list of available model names
221
+ def available_models
222
+ @config.model_names
223
+ end
224
+
225
+ # Get current model configuration info
226
+ def current_model_info
227
+ model = @config.current_model
228
+ return nil unless model
229
+
230
+ {
231
+ id: model["id"],
232
+ name: model["name"],
233
+ model: model["model"],
234
+ base_url: model["base_url"]
235
+ }
236
+ end
237
+
238
+ # Get current model name (respects any active fallback override)
239
+ private def current_model
240
+ @config.effective_model_name
241
+ end
242
+
243
+ # Rename this session. Called by auto-naming (first message) or user explicit rename.
244
+ def rename(new_name)
245
+ @name = new_name.to_s.strip
246
+ end
247
+
248
+ # Entry point for an agent turn. Two modes:
249
+ #
250
+ # run("user typed this") — user message mode
251
+ # run — drain-only mode: nothing to append directly;
252
+ # the inbox drain at iteration top is expected
253
+ # to find something. If the inbox is also empty,
254
+ # the loop exits immediately (no wasted LLM call).
255
+ def run(user_input = nil, files: [], system_notification: nil, _bg_enqueued_at: nil)
256
+ # 救法 1: bg-notification runs carry their enqueue timestamp. If a user
257
+ # interrupt has bumped @discard_threshold past that timestamp, the
258
+ # notification's "ticket" is stale — drop it before we grab the heavy
259
+ # @run_mutex (which a real user message may be waiting behind us for).
260
+ if system_notification && _bg_enqueued_at
261
+ @state_mutex.synchronize do
262
+ return if @discard_threshold && _bg_enqueued_at < @discard_threshold
263
+ end
264
+ end
265
+
266
+ # Serialize every Agent#run invocation so concurrent callers cannot
267
+ # mutate @history, @iterations, etc. simultaneously.
268
+ @run_mutex.synchronize do
269
+ if system_notification && _bg_enqueued_at
270
+ @state_mutex.synchronize do
271
+ return if @discard_threshold && _bg_enqueued_at < @discard_threshold
272
+ end
273
+ end
274
+
275
+ @state_mutex.synchronize do
276
+ @in_run_loop = true
277
+ @current_run_thread = Thread.current
278
+ # We're entering run() — any concurrent caller that observed
279
+ # @inbox_run_pending == true and decided NOT to spawn can rely
280
+ # on this run absorbing their inbox items. Clear the flag.
281
+ @inbox_run_pending = false
282
+ end
283
+
284
+ # Drain-only mode: no direct input, and nothing queued either. Don't
285
+ # bother the LLM with an empty turn.
286
+ if user_input.nil? && system_notification.nil?
287
+ empty_inbox = @state_mutex.synchronize { @inbox.empty? }
288
+ if empty_inbox
289
+ @state_mutex.synchronize do
290
+ @in_run_loop = false
291
+ @current_run_thread = nil
292
+ end
293
+ Octo::Logger.info("agent.drain_only_run_empty_inbox", session_id: @session_id)
294
+ return
295
+ end
296
+ end
297
+
298
+ # Show the "thinking" indicator as early as possible so the user gets
299
+ # immediate feedback after sending a message. Without this the UI stays
300
+ # silent during synchronous setup work (system prompt assembly, file
301
+ # parsing, history compression checks) before the first LLM call. The
302
+ # subsequent `think` call will re-emit show_progress, which is an
303
+ # idempotent update on the same progress UI element.
304
+ @ui&.show_progress
305
+
306
+ # Start new task for Time Machine
307
+ task_id = start_new_task
308
+
309
+ @start_time = Time.now
310
+ @task_truncation_count = 0 # Reset truncation counter for each task
311
+ @task_timeout_hint_injected = false # Reset read-timeout hint injection (see LlmCaller)
312
+ @task_upstream_truncation_hint_injected = false # Reset upstream-truncation hint injection (see LlmCaller)
313
+ # Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
314
+ # across tasks to correctly calculate delta tokens in each iteration
315
+ @task_start_iterations = @iterations # Track starting iterations for this task
316
+ # Track cache stats for current task
317
+ @task_cache_stats = {
318
+ cache_creation_input_tokens: 0,
319
+ cache_read_input_tokens: 0,
320
+ total_requests: 0,
321
+ cache_hit_requests: 0
322
+ }
323
+
324
+ # Deferred error rollback: if the previous session ended with an error,
325
+ # trim history back to just before that failed user message now — at the
326
+ # point the user actually sends a new message, not at restore time.
327
+ # (Trimming at restore time caused replay_history to return empty results.)
328
+ if @pending_error_rollback
329
+ @pending_error_rollback = false
330
+ last_user_index = @history.last_real_user_index
331
+ if last_user_index
332
+ @history.truncate_from(last_user_index)
333
+ @hooks.trigger(:session_rollback, {
334
+ reason: "Previous session ended with error — rolling back before new message",
335
+ rolled_back_message_index: last_user_index
336
+ })
337
+ end
338
+ end
339
+
340
+ # Add system prompt as the first message if this is the first run
341
+ if @history.empty?
342
+ system_prompt = build_system_prompt
343
+ @history.append({ role: "system", content: system_prompt })
344
+ end
345
+
346
+ # Inject session context (date + model) if not yet present or date has changed
347
+ inject_session_context_if_needed
348
+
349
+ # Inject chunk index card if archived chunks exist and index is stale
350
+ inject_chunk_index_if_needed
351
+
352
+ if system_notification
353
+ # System notification mode — triggered by background task completion.
354
+ # Skip all user input processing; inject the notification directly.
355
+ @history.append({
356
+ role: "user",
357
+ content: system_notification,
358
+ system_injected: true,
359
+ task_id: task_id,
360
+ created_at: Time.now.to_f
361
+ })
362
+ elsif user_input.nil? || user_input.empty?
363
+ # Drain-only mode: nothing to append now. The iteration drain at the
364
+ # top of the loop will pick up whatever's in @inbox (which is why we
365
+ # were started — see http_server's follow-up run spawn).
366
+ else
367
+ # Normal user message mode — files may or may not be attached.
368
+ processed = process_files_for_user_message(user_input, files)
369
+ append_processed_user_message_to_history!(processed, task_id)
370
+
371
+ # If the user typed a slash command targeting a skill with disable-model-invocation: true,
372
+ # inject the skill content as a synthetic assistant message so the LLM can act on it.
373
+ # Skills already in the system prompt (model_invocation_allowed?) are skipped.
374
+ inject_skill_command_as_assistant_message(user_input, task_id)
375
+ end
376
+
377
+ @hooks.trigger(:on_start, system_notification ? "[background task notification]" : user_input)
378
+
379
+ begin
380
+ # Track if request_user_feedback was called
381
+ awaiting_user_feedback = false
382
+ # Track if task was interrupted by user (denied tool execution)
383
+ task_interrupted = false
384
+
385
+ loop do
386
+ @iterations += 1
387
+ @hooks.trigger(:on_iteration, @iterations)
388
+
389
+ # Drain any inbox items (queued user messages) since the last
390
+ # iteration. Without this, items sit in the queue until the WHOLE
391
+ # run completes — latency drops from "minutes" to "one LLM turn".
392
+ drain_inbox_into_history!(task_id)
393
+
394
+ # Persist drained inbox messages immediately so they survive a
395
+ # crash during the upcoming think() call.
396
+ checkpoint!
397
+
398
+ # Think: LLM reasoning with tool support
399
+ response = think
400
+ @last_token_usage = response[:token_usage] if response && response[:token_usage]
401
+
402
+ # Debug: check for potential infinite loops
403
+ if @config.verbose
404
+ @ui&.log("Iteration #{@iterations}: finish_reason=#{response[:finish_reason]}, tool_calls=#{response[:tool_calls]&.size || 'nil'}", level: :debug)
405
+ end
406
+
407
+ # Skip if compression happened (response is nil)
408
+ if response.nil?
409
+ checkpoint!
410
+ next
411
+ end
412
+
413
+ # Checkpoint immediately after think() so the assistant message
414
+ # (and any tool_calls it contains) is persisted before we enter
415
+ # act(). If a crash happens between think() and the next
416
+ # iteration-boundary checkpoint, recovery can at least restore
417
+ # the model's intent.
418
+ checkpoint!
419
+
420
+ # [DIAG] Only log when finish_reason=="stop" AND tool_calls non-empty —
421
+ # the suspicious combo that indicates an upstream-truncated tool_use
422
+ # response. Normal responses produce no log line here to avoid noise.
423
+ begin
424
+ tool_calls = response[:tool_calls] || []
425
+ if response[:finish_reason] == "stop" && !tool_calls.empty?
426
+ tc_summary = tool_calls.map do |c|
427
+ args_str = c[:arguments].is_a?(String) ? c[:arguments] : c[:arguments].to_s
428
+ {
429
+ name: c[:name].to_s,
430
+ args_len: args_str.length,
431
+ args_head: args_str[0, 120]
432
+ }
433
+ end
434
+ Octo::Logger.warn("agent.think_response",
435
+ session_id: @session_id,
436
+ iteration: @iterations,
437
+ finish_reason: response[:finish_reason].to_s,
438
+ tool_calls_count: tool_calls.size,
439
+ tool_calls: tc_summary,
440
+ content_len: response[:content].to_s.length,
441
+ completion_tokens: response.dig(:token_usage, :completion_tokens),
442
+ ttft_ms: response.dig(:latency, :ttft_ms),
443
+ suspicious_truncation: true
444
+ )
445
+ end
446
+ rescue StandardError => e
447
+ Octo::Logger.warn("agent.think_response.log_failed", error: e.message)
448
+ end
449
+
450
+ # Check if done (no more tool calls needed).
451
+ #
452
+ # Defensive rule: we ONLY exit on empty/missing tool_calls.
453
+ # We used to also short-circuit on finish_reason=="stop", but
454
+ # upstream routers (OpenRouter → Anthropic/Bedrock) can return the
455
+ # contradictory combo `finish_reason=="stop" + non-empty tool_calls
456
+ # with truncated args`, which caused the agent to silently treat a
457
+ # truncated response as "task complete". Truncation is now caught
458
+ # earlier by LlmCaller#detect_upstream_truncation! (which raises
459
+ # UpstreamTruncatedError → RetryableError); this branch stays as
460
+ # a belt-and-braces guard: if that detector ever misses a new
461
+ # truncation pattern, we still won't silently exit while the model
462
+ # is mid-tool_call.
463
+ if response[:tool_calls].nil? || response[:tool_calls].empty?
464
+ # [DIAG] Pin down exactly which sub-condition triggered the task exit.
465
+ Octo::Logger.info("agent.loop_break_normal",
466
+ session_id: @session_id,
467
+ iteration: @iterations,
468
+ branch: (response[:tool_calls].nil? ? "tool_calls_nil" : "tool_calls_empty"),
469
+ finish_reason: response[:finish_reason].to_s,
470
+ tool_calls_count: (response[:tool_calls] || []).size
471
+ )
472
+ if response[:content] && !response[:content].empty?
473
+ emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
474
+ end
475
+
476
+ # Debug: log why we're stopping
477
+ if @config.verbose && (response[:tool_calls].nil? || response[:tool_calls].empty?)
478
+ reason = response[:finish_reason] == "stop" ? "API returned finish_reason=stop" : "No tool calls in response"
479
+ @ui&.log("Stopping: #{reason}", level: :debug)
480
+ if response[:content] && response[:content].is_a?(String)
481
+ preview = response[:content].length > 200 ? response[:content][0...200] + "..." : response[:content]
482
+ @ui&.log("Response content: #{preview}", level: :debug)
483
+ end
484
+ end
485
+
486
+ # A queued user message may have landed between this think() and
487
+ # now. Don't break — loop back so the next iteration's drain
488
+ # injects it and the LLM addresses it within the same warm-cache
489
+ # run. Saves the full-context replay of a fresh run().
490
+ if inbox_pending?
491
+ Octo::Logger.info("agent.loop_continue_for_pending_inbox",
492
+ session_id: @session_id,
493
+ iteration: @iterations
494
+ )
495
+ checkpoint!
496
+ next
497
+ end
498
+
499
+ checkpoint!
500
+ break
501
+ end
502
+
503
+ # Show assistant message if there's content before tool calls
504
+ if response[:content] && !response[:content].empty?
505
+ emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
506
+ end
507
+
508
+ # Act: Execute tool calls
509
+ action_result = act(response[:tool_calls])
510
+
511
+ # Check if request_user_feedback was called
512
+ if action_result[:awaiting_feedback]
513
+ awaiting_user_feedback = true
514
+ observe(response, action_result[:tool_results])
515
+ flush_pending_injections
516
+ checkpoint!
517
+ break
518
+ end
519
+
520
+ # Observe: Add tool results to conversation context
521
+ observe(response, action_result[:tool_results])
522
+
523
+ # Flush any inline skill injections enqueued by invoke_skill during act().
524
+ # Must happen AFTER observe() so toolResult is appended before skill instructions,
525
+ # producing a legal message sequence for all API providers (especially Bedrock).
526
+ flush_pending_injections
527
+
528
+ # Check if user denied any tool
529
+ if action_result[:denied]
530
+ task_interrupted = true
531
+ # If user provided feedback, treat it as a user question/instruction
532
+ if action_result[:feedback] && !action_result[:feedback].empty?
533
+ # Add user feedback as a new user message with system_injected marker
534
+ @history.append({
535
+ role: "user",
536
+ content: "The user has a question/feedback for you: #{action_result[:feedback]}\n\nPlease respond to the user's question/feedback before continuing with any actions.",
537
+ system_injected: true
538
+ })
539
+ checkpoint!
540
+ # Continue loop to let agent respond to feedback
541
+ next
542
+ else
543
+ # User just said "no" without feedback - stop and wait
544
+ @ui&.show_assistant_message("Tool execution was denied. Please give more instructions...", files: [])
545
+ checkpoint!
546
+ break
547
+ end
548
+ end
549
+
550
+ # Normal iteration end — persist incremental checkpoint before
551
+ # the next loop iteration so crash recovery captures this turn.
552
+ checkpoint!
553
+ end
554
+
555
+ result = build_result
556
+
557
+ # Save snapshots of modified files for Time Machine
558
+ if @modified_files_in_task && !@modified_files_in_task.empty?
559
+ save_modified_files_snapshot(@modified_files_in_task)
560
+ @modified_files_in_task = [] # Reset for next task
561
+ end
562
+
563
+ # Run skill evolution hooks after main loop completes
564
+ # Skip if task was interrupted by user (denied tool) or awaiting user feedback
565
+ # Only for main agent (not subagents) to avoid recursive evolution
566
+ unless @is_subagent || task_interrupted || awaiting_user_feedback
567
+ run_skill_evolution_hooks
568
+ end
569
+
570
+ # Run long-term memory update as a forked subagent BEFORE we print
571
+ # show_complete. Running it as a subagent (rather than inline in
572
+ # the main loop) gives us correct visual ordering structurally:
573
+ # the subagent blocks until done, its progress spinner finishes,
574
+ # and only then [OK] Task Complete is printed. No cleanup dance,
575
+ # no cross-method progress handle holding.
576
+ # Skip on interrupt / feedback / subagent (self-guarded inside too).
577
+ unless @is_subagent || task_interrupted || awaiting_user_feedback
578
+ run_memory_update_subagent
579
+ end
580
+
581
+ if @is_subagent
582
+ # Parent agent (skill_manager) prints the completion summary; skip here.
583
+ else
584
+ @ui&.show_complete(
585
+ iterations: result[:iterations],
586
+ duration: result[:duration_seconds],
587
+ cache_stats: result[:cache_stats],
588
+ awaiting_user_feedback: awaiting_user_feedback
589
+ )
590
+ # Show token usage once at task completion (not on every iteration)
591
+ @ui&.show_token_usage(@last_token_usage) if @last_token_usage
592
+ end
593
+
594
+ # Fire async ghost-text prediction for the user's next message. Must
595
+ # run AFTER show_complete (so the UI is in its idle "awaiting input"
596
+ # state) and is fire-and-forget — the suggestion arrives later via
597
+ # the UI's own +show_next_message_suggestion+ event.
598
+ # Same guards as run_memory_update_subagent: skip if interrupted,
599
+ # awaiting feedback, or running as a subagent.
600
+ unless @is_subagent || task_interrupted || awaiting_user_feedback
601
+ run_next_message_suggestion!
602
+ end
603
+
604
+ @hooks.trigger(:on_complete, result)
605
+ result
606
+ rescue Octo::AgentInterrupted
607
+ # Let CLI handle the interrupt message
608
+ raise
609
+ rescue StandardError => e
610
+ # Log complete error information to debug_logs for troubleshooting
611
+ @debug_logs << {
612
+ timestamp: Time.now.iso8601,
613
+ event: "agent_run_error",
614
+ error_class: e.class.name,
615
+ error_message: e.message,
616
+ backtrace: e.backtrace&.first(30) # Keep first 30 lines of backtrace
617
+ }
618
+ Octo::Logger.error("agent_run_error", error: e)
619
+
620
+ # 400 errors mean our request was malformed — roll back history so the bad
621
+ # message is not replayed on the next user turn.
622
+ # Other errors (auth, network, etc.) leave history intact for retry.
623
+ @pending_error_rollback = true if e.is_a?(Octo::BadRequestError)
624
+
625
+ # Build error result for session data, but let CLI handle error display
626
+ result = build_result(:error, error: e.message) # rubocop:disable Lint/UselessAssignment
627
+ raise
628
+ ensure
629
+ @state_mutex.synchronize do
630
+ @in_run_loop = false
631
+ @current_run_thread = nil
632
+ end
633
+
634
+ # Safety net: ensure any lingering progress spinner is stopped.
635
+ # Normal paths close their own spinners; this guards against exceptions
636
+ # raised between a progress slot's active/done pair.
637
+ @ui&.show_progress(phase: "done")
638
+
639
+ # Reap stale completed background tasks so the registry doesn't grow
640
+ # unboundedly across a long session.
641
+ begin
642
+ BackgroundTaskRegistry.prune_completed(max_age: 3_600, agent_session_id: @session_id)
643
+ rescue => e
644
+ Octo::Logger.error("background_task_prune_error", error: e)
645
+ end
646
+
647
+ # If an inbox item arrived during the ensure block — i.e. AFTER the
648
+ # last in-loop drain but BEFORE we released @run_mutex — handle it
649
+ # on a fresh thread so we don't block the caller.
650
+ flush_inbox_after_run
651
+ end
652
+ end # @run_mutex.synchronize
653
+ end
654
+
655
+ # Drain ALL pending inbox items into @history. Called at the top of every
656
+ # iteration inside the run loop so messages land within one LLM turn of
657
+ # arrival — never deferred until the whole run completes.
658
+ #
659
+ # Returns true if anything was drained, false if the inbox was empty.
660
+ private def drain_inbox_into_history!(task_id)
661
+ items = nil
662
+ @state_mutex.synchronize do
663
+ return false if @inbox.empty?
664
+ items = @inbox.dup
665
+ @inbox.clear
666
+ end
667
+
668
+ # Coalesce consecutive bg notifications into one system_injected msg
669
+ bg_bubbles = []
670
+ bg_bodies = []
671
+
672
+ flush_bg = lambda do
673
+ next if bg_bodies.empty?
674
+ bg_bubbles.each { |b| emit_bubble_for_notification(b) }
675
+ @history.append({
676
+ role: "user",
677
+ content: bg_bodies.join("\n\n"),
678
+ system_injected: true,
679
+ task_id: task_id,
680
+ created_at: Time.now.to_f
681
+ })
682
+ bg_bubbles = []
683
+ bg_bodies = []
684
+ end
685
+
686
+ items.each do |item|
687
+ case item[:kind]
688
+ when :bg_notification
689
+ bg_bubbles << item[:bubble]
690
+ bg_bodies << format_notification_for_history(item[:content])
691
+ when :user_msg
692
+ flush_bg.call
693
+
694
+ created_at = item[:enqueued_at] || Time.now
695
+
696
+ if item[:processed]
697
+ append_processed_user_message_to_history!(item[:processed], task_id)
698
+ @ui&.show_user_message(
699
+ item[:processed][:user_content],
700
+ files: item[:processed][:display_files] || [],
701
+ created_at: created_at.to_f,
702
+ source: :web
703
+ )
704
+ else
705
+ @history.append({
706
+ role: "user",
707
+ content: item[:content],
708
+ task_id: task_id,
709
+ created_at: created_at.to_f
710
+ })
711
+ @total_tasks += 1
712
+ @ui&.show_user_message(item[:content], created_at: created_at.to_f, source: :web)
713
+ end
714
+ else
715
+ Octo::Logger.warn("agent.unknown_inbox_kind", kind: item[:kind])
716
+ end
717
+ end
718
+ flush_bg.call
719
+
720
+ if items.any? { |i| i[:kind] == :user_msg }
721
+ remaining = @state_mutex.synchronize { @inbox.count { |i| i[:kind] == :user_msg } }
722
+ broadcast_user_message_queue_status(remaining)
723
+ end
724
+
725
+ Octo::Logger.info("agent.inbox_drained",
726
+ session_id: @session_id,
727
+ count: items.size,
728
+ kinds: items.group_by { |i| i[:kind] }.transform_values(&:size)
729
+ )
730
+ true
731
+ rescue => e
732
+ # Drain failed partway through: unprocessed items are lost from @inbox
733
+ # because we cleared it before the loop. Re-queue the survivors.
734
+ survivors = items.compact
735
+ unless survivors.empty?
736
+ @state_mutex.synchronize do
737
+ @inbox.unshift(*survivors)
738
+ end
739
+ Octo::Logger.warn("agent.drain_inbox_recovered",
740
+ session_id: @session_id,
741
+ recovered: survivors.size,
742
+ error: e.message
743
+ )
744
+ end
745
+ Octo::Logger.error("agent.drain_inbox_error",
746
+ session_id: @session_id,
747
+ error: e,
748
+ survivors: survivors.size
749
+ )
750
+ false
751
+ end
752
+
753
+ # True if at least one item is currently queued in the inbox. Used by the
754
+ # run loop's "LLM said done — but should we really break?" check.
755
+ private def inbox_pending?
756
+ @state_mutex.synchronize { !@inbox.empty? }
757
+ end
758
+
759
+ # Public: count of pending :user_msg items in the inbox.
760
+ # Used by WebSocket reconnect to replay queue status to newly subscribed tabs.
761
+ def inbox_user_message_count
762
+ @state_mutex.synchronize { @inbox.count { |i| i[:kind] == :user_msg } }
763
+ end
764
+
765
+ # Public: snapshot of pending :user_msg items in the inbox, in a format
766
+ # ready for replay via UI#show_user_message on WebSocket reconnect.
767
+ # Each entry: { content:, files:, created_at: } — files is an array of
768
+ # display-file hashes (name, data_url, mime_type).
769
+ def inbox_user_messages_snapshot
770
+ @state_mutex.synchronize do
771
+ @inbox.select { |i| i[:kind] == :user_msg }.map do |item|
772
+ created_at = item[:enqueued_at] || Time.now
773
+ if item[:processed]
774
+ {
775
+ content: item[:processed][:user_content],
776
+ files: item[:processed][:display_files] || [],
777
+ created_at: created_at.to_f
778
+ }
779
+ else
780
+ {
781
+ content: item[:content],
782
+ files: [],
783
+ created_at: created_at.to_f
784
+ }
785
+ end
786
+ end
787
+ end
788
+ end
789
+
790
+ # Set @discard_threshold to now and (best-effort) raise AgentInterrupted
791
+ # into the thread currently inside Agent#run. Called by http_server's
792
+ # interrupt_session in addition to the existing session[:thread].raise —
793
+ # the existing path only catches user-msg runs (whose thread is tracked
794
+ # in session[:thread]).
795
+ #
796
+ # Idempotent: harmless to call multiple times. Best-effort: Thread#raise
797
+ # against a thread blocked deep in a syscall may not fire immediately;
798
+ # the watchdog in http_server escalates if needed.
799
+ def interrupt_current_run!
800
+ target = nil
801
+ @state_mutex.synchronize do
802
+ @discard_threshold = Time.now
803
+ target = @current_run_thread
804
+ end
805
+ return false unless target
806
+ begin
807
+ target.raise(Octo::AgentInterrupted, "Interrupted by user")
808
+ rescue StandardError
809
+ # Thread may have just exited; nothing to do.
810
+ end
811
+ true
812
+ end
813
+
814
+ # True iff a thread is currently inside Agent#run (between acquiring
815
+ # @run_mutex and releasing it). Server-layer callers use this to decide
816
+ # whether a new user message can be enqueued (run in flight will drain
817
+ # it) or needs a fresh run() to be spawned (agent is idle).
818
+ def in_run_loop?
819
+ @state_mutex.synchronize { @in_run_loop }
820
+ end
821
+
822
+ # Queue a user message (text and/or files) into the inbox. Returns a
823
+ # tristate describing what the caller should do next:
824
+ # :running — a run is currently in flight; the in-loop drain at
825
+ # the next iteration boundary will pick this up.
826
+ # Caller does NOT spawn a new run.
827
+ # :spawn — agent is idle AND no other caller has been told to
828
+ # spawn yet. Caller IS responsible for spawning a
829
+ # drain-only run (typically via run_agent_task so the
830
+ # thread is registered for interrupt_session).
831
+ # :spawn_pending — agent is idle BUT another concurrent caller has
832
+ # already been told to spawn. Caller does NOT spawn —
833
+ # the other run will absorb this message too.
834
+ #
835
+ # Files are processed eagerly **on the caller's thread** (typically the
836
+ # HTTP-handler thread) so the processed payload is fully formed by the
837
+ # time it sits in the inbox. The agent thread's drain then just appends
838
+ # to @history — no @history mutation off-thread.
839
+ def enqueue_user_message(content, files: [])
840
+ processed = nil
841
+ if files && !files.empty?
842
+ processed = process_files_for_user_message(content, files)
843
+ end
844
+
845
+ result = nil
846
+ pending_user_msgs = 0
847
+ @state_mutex.synchronize do
848
+ @inbox << {
849
+ kind: :user_msg,
850
+ content: content.to_s,
851
+ processed: processed,
852
+ enqueued_at: Time.now
853
+ }
854
+ pending_user_msgs = @inbox.count { |i| i[:kind] == :user_msg }
855
+ if @in_run_loop
856
+ result = :running
857
+ elsif @inbox_run_pending
858
+ result = :spawn_pending
859
+ else
860
+ @inbox_run_pending = true
861
+ result = :spawn
862
+ end
863
+ end
864
+
865
+ # Only broadcast the "N waiting" hint when the message will actually
866
+ # WAIT behind an in-flight run. For :spawn / :spawn_pending the agent
867
+ # will drain it within milliseconds, so flashing a count then
868
+ # immediately clearing it would just be visual noise.
869
+ if result == :running
870
+ broadcast_user_message_queue_status(pending_user_msgs)
871
+ end
872
+
873
+ Octo::Logger.info("agent.user_message_enqueued",
874
+ session_id: @session_id,
875
+ decision: result,
876
+ has_files: !processed.nil?,
877
+ pending_user_msgs: pending_user_msgs
878
+ )
879
+ result
880
+ end
881
+
882
+ private def broadcast_user_message_queue_status(count)
883
+ @ui&.update_user_message_queue_status(pending: count)
884
+ rescue => e
885
+ Octo::Logger.error("agent.user_queue_status_error",
886
+ session_id: @session_id,
887
+ error: e
888
+ )
889
+ end
890
+
891
+ # Called by BackgroundTaskRegistry when a fire-and-forget background task
892
+ # completes. Two delivery paths depending on agent state:
893
+ # - Agent is mid-run → enqueue; the per-iteration drain picks it up
894
+ # - Agent is idle → start a fresh run on the caller's thread
895
+ def resume_with_notification(notification_content, bubble: nil)
896
+ enqueue_at = Time.now
897
+ should_run = false
898
+
899
+ @state_mutex.synchronize do
900
+ if @in_run_loop
901
+ @inbox << {
902
+ kind: :bg_notification,
903
+ content: notification_content,
904
+ bubble: bubble,
905
+ enqueued_at: enqueue_at
906
+ }
907
+ Octo::Logger.info("agent.notification_queued",
908
+ session_id: @session_id,
909
+ queue_size: @inbox.size
910
+ )
911
+ return
912
+ end
913
+ should_run = true
914
+ end
915
+
916
+ return unless should_run
917
+
918
+ emit_bubble_for_notification(bubble)
919
+
920
+ formatted = format_notification_for_history(notification_content)
921
+ run("", system_notification: formatted, _bg_enqueued_at: enqueue_at)
922
+ end
923
+
924
+ # Drain any items queued during this run's ensure window (after the last
925
+ # in-loop drain but before @run_mutex was released).
926
+ private def flush_inbox_after_run
927
+ drain_target = nil
928
+ @state_mutex.synchronize do
929
+ drain_target = @inbox.find { |i| i[:kind] == :user_msg } || @inbox.first
930
+ end
931
+ return unless drain_target
932
+
933
+ case drain_target[:kind]
934
+ when :user_msg
935
+ Thread.new do
936
+ Thread.current.name = "inbox-flush-user-#{@session_id[0, 8]}"
937
+ begin
938
+ run
939
+ rescue => e
940
+ Octo::Logger.error("agent.flush_inbox_user_error",
941
+ session_id: @session_id,
942
+ error: e
943
+ )
944
+ end
945
+ end
946
+ when :bg_notification
947
+ item = nil
948
+ @state_mutex.synchronize do
949
+ idx = @inbox.find_index { |i| i[:kind] == :bg_notification }
950
+ item = @inbox.delete_at(idx) if idx
951
+ end
952
+ return unless item
953
+ Thread.new do
954
+ Thread.current.name = "inbox-flush-bg-#{@session_id[0, 8]}"
955
+ begin
956
+ resume_with_notification(item[:content], bubble: item[:bubble])
957
+ rescue => e
958
+ Octo::Logger.error("agent.flush_inbox_bg_error",
959
+ session_id: @session_id,
960
+ error: e
961
+ )
962
+ end
963
+ end
964
+ end
965
+ end
966
+
967
+ private def format_notification_for_history(content)
968
+ <<~MSG.strip
969
+ [SYSTEM NOTIFICATION - NOT USER INPUT]
970
+ This is an automated background-task event, NOT a message from the user.
971
+ Do NOT interpret this as user acknowledgement, confirmation, or response
972
+ to any pending question.
973
+
974
+ <task-notification>
975
+ #{content}
976
+ </task-notification>
977
+ MSG
978
+ end
979
+
980
+ private def emit_bubble_for_notification(bubble)
981
+ return unless bubble && @ui
982
+ @ui.show_background_task_notice(
983
+ command: bubble[:command],
984
+ handle_id: bubble[:handle_id],
985
+ status: bubble[:status]
986
+ )
987
+ rescue => e
988
+ Octo::Logger.error("agent.bubble_emit_error",
989
+ session_id: @session_id,
990
+ error: e
991
+ )
992
+ end
993
+
994
+ private def broadcast_background_tasks_snapshot
995
+ return unless @ui
996
+
997
+ now = Time.now
998
+ snapshot = BackgroundTaskRegistry.list_running(agent_session_id: @session_id).map do |t|
999
+ elapsed = t[:started_at] ? (now - t[:started_at]).round : 0
1000
+ {
1001
+ handle_id: t[:handle_id],
1002
+ command: t[:command],
1003
+ started_at: t[:started_at],
1004
+ elapsed: elapsed
1005
+ }
1006
+ end
1007
+ @ui.update_background_tasks(running: snapshot.size, tasks: snapshot)
1008
+ rescue => e
1009
+ Octo::Logger.error("agent.broadcast_bg_tasks_error",
1010
+ session_id: @session_id,
1011
+ error: e
1012
+ )
1013
+ end
1014
+
1015
+ private def format_terminal_notification(task_result, tool_use_id: nil)
1016
+ handle_id = task_result[:handle_id]
1017
+ command = task_result[:command]
1018
+ output_file = task_result[:output_file]
1019
+ elapsed_seconds = task_result[:elapsed_seconds]
1020
+
1021
+ broadcast_background_tasks_snapshot
1022
+
1023
+ status, summary = derive_status_and_summary(task_result, command)
1024
+
1025
+ parts = []
1026
+ parts << "<task-id>#{handle_id}</task-id>" if handle_id
1027
+ parts << "<tool-use-id>#{tool_use_id}</tool-use-id>" if tool_use_id
1028
+ parts << "<command>#{command}</command>" if command
1029
+ parts << "<status>#{status}</status>"
1030
+ if task_result[:exit_code]
1031
+ parts << "<exit-code>#{task_result[:exit_code]}</exit-code>"
1032
+ end
1033
+ if elapsed_seconds
1034
+ parts << "<elapsed-seconds>#{elapsed_seconds}</elapsed-seconds>"
1035
+ end
1036
+ parts << "<output-file>#{output_file}</output-file>" if output_file
1037
+ if task_result[:full_output_file]
1038
+ parts << "<full-output-file>#{task_result[:full_output_file]}</full-output-file>"
1039
+ end
1040
+ parts << "<summary>#{summary}</summary>"
1041
+
1042
+ siblings = BackgroundTaskRegistry.list_running(agent_session_id: @session_id)
1043
+ if siblings.any?
1044
+ sibling_lines = siblings.map do |s|
1045
+ cmd = s[:command].to_s
1046
+ cmd = cmd.length > 60 ? "#{cmd[0, 57]}..." : cmd
1047
+ elapsed = s[:started_at] ? (Time.now - s[:started_at]).round : nil
1048
+ elapsed_s = elapsed ? "#{elapsed}s" : "?"
1049
+ " <sibling-task><handle-id>#{s[:handle_id]}</handle-id><command>#{cmd}</command><elapsed>#{elapsed_s}</elapsed></sibling-task>"
1050
+ end
1051
+ parts << "<sibling-tasks>\n#{sibling_lines.join("\n")}\n</sibling-tasks>"
1052
+ end
1053
+
1054
+ {
1055
+ content: parts.join("\n"),
1056
+ bubble: { command: command, handle_id: handle_id, status: status }
1057
+ }
1058
+ end
1059
+
1060
+ private def derive_status_and_summary(task_result, command)
1061
+ cmd_label = command ? "`#{command}`" : "background terminal task"
1062
+ last_line = last_nonempty_line(task_result[:output])
1063
+
1064
+ if task_result[:cancelled]
1065
+ ["cancelled", "#{cmd_label} cancelled by user"]
1066
+ elsif task_result[:error]
1067
+ ["error", "#{cmd_label} watcher error: #{task_result[:error]}"]
1068
+ elsif task_result[:exit_code]
1069
+ verb = task_result[:exit_code].zero? ? "exited 0" : "exited #{task_result[:exit_code]}"
1070
+ body = "#{cmd_label} #{verb}"
1071
+ body = "#{body} — last: #{last_line.inspect}" if last_line
1072
+ [(task_result[:exit_code].zero? ? "success" : "failed"), body]
1073
+ else
1074
+ ["success", "#{cmd_label} completed"]
1075
+ end
1076
+ end
1077
+
1078
+ private def last_nonempty_line(output)
1079
+ return nil if output.nil? || output.to_s.empty?
1080
+ line = output.to_s.lines.map(&:chomp).reject { |l| l.strip.empty? }.last
1081
+ return nil if line.nil? || line.empty?
1082
+ line.length > 200 ? "#{line[0, 200]}…" : line
1083
+ end
1084
+
1085
+ private def think
1086
+ # Check API key before starting progress indicator
1087
+ if @client.instance_variable_get(:@api_key).nil? || @client.instance_variable_get(:@api_key).empty?
1088
+ @ui&.show_error("API key is not configured! Please run /config to set up your API key.")
1089
+ raise AgentError, "API key is not configured"
1090
+ end
1091
+
1092
+ # Ensure a thinking progress indicator is live for the duration of this
1093
+ # LLM turn. This is idempotent — if `run` already started one at task
1094
+ # entry (or a previous iteration left one running), the UI recognizes
1095
+ # the bare reentry and preserves the existing spinner.
1096
+ @ui&.show_progress
1097
+
1098
+ # Check if compression is needed
1099
+ compression_context = compress_messages_if_needed(force: false)
1100
+
1101
+ # If compression is triggered, insert compression message and handle it
1102
+ if compression_context
1103
+ # Show compression start notification
1104
+ @ui&.show_info(
1105
+ "Message history compression starting (~#{compression_context[:original_token_count]} tokens, #{compression_context[:original_message_count]} messages) - Level #{compression_context[:compression_level]}"
1106
+ )
1107
+
1108
+ compression_message = compression_context[:compression_message]
1109
+ @history.append(compression_message)
1110
+ compression_handled = false
1111
+
1112
+ # Open a dedicated quiet-style handle for the compression work.
1113
+ # This sits on top of the outer thinking progress (if any); Plan B
1114
+ # semantics detach the outer spinner until we finish here. On any
1115
+ # exception the ensure block in with_progress guarantees the
1116
+ # handle is released — no more orphan gray ticker colliding with
1117
+ # a yellow ticker (the original flicker bug).
1118
+ #
1119
+ # NOTE: safe-navigation (+&.+) with blocks silently skips the
1120
+ # block when the receiver is nil. We need the compression work to
1121
+ # run even when @ui is nil (e.g. in tests), so branch explicitly.
1122
+ begin
1123
+ if @ui
1124
+ @ui.with_progress(message: "Compressing message history...", style: :quiet) do |handle|
1125
+ response = call_llm
1126
+ handle_compression_response(response, compression_context, progress: handle)
1127
+ compression_handled = true
1128
+ end
1129
+ else
1130
+ response = call_llm
1131
+ handle_compression_response(response, compression_context)
1132
+ compression_handled = true
1133
+ end
1134
+ ensure
1135
+ # If interrupted or failed, roll back the speculative compression message
1136
+ # so it doesn't pollute future conversation turns.
1137
+ unless compression_handled
1138
+ @history.rollback_before(compression_message)
1139
+ # Also restore compression_level since compress_messages_if_needed already incremented it.
1140
+ # Failure to do so would cause the next call to start at level 2 instead of 1,
1141
+ # and more importantly would re-trigger compression on the very next think() call
1142
+ # (with the user's new message as the last entry), producing consecutive user messages
1143
+ # that confuse the LLM into echoing compression instructions.
1144
+ @compression_level -= 1
1145
+ end
1146
+ end
1147
+ return nil
1148
+ end
1149
+
1150
+ # Normal LLM call. call_llm no longer manages the progress lifecycle;
1151
+ # we keep the spinner live across the call and finalize it here so the
1152
+ # UI transitions cleanly to the assistant message that follows.
1153
+ response = nil
1154
+ begin
1155
+ response = call_llm
1156
+ rescue
1157
+ # Ensure the spinner is stopped on any error path before it bubbles up.
1158
+ @ui&.show_progress(phase: "done")
1159
+ raise
1160
+ end
1161
+
1162
+ # Handle truncated responses (when max_tokens limit is reached)
1163
+ if response[:finish_reason] == "length"
1164
+ # Count recent truncations to prevent infinite loops
1165
+ @task_truncation_count = (@task_truncation_count || 0) + 1
1166
+
1167
+ if @task_truncation_count >= 3
1168
+ # Too many truncations - task is too complex
1169
+ @ui&.show_progress(phase: "done")
1170
+ @ui&.show_error("Response truncated multiple times. Task is too complex.")
1171
+
1172
+ # Create a response that tells the user to break down the task
1173
+ error_response = {
1174
+ content: "I apologize, but this task is too complex to complete in a single response. " \
1175
+ "Please break it down into smaller steps, or reduce the amount of content to generate at once.\n\n" \
1176
+ "For example, when creating a long document:\n" \
1177
+ "1. First create the file with a basic structure\n" \
1178
+ "2. Then use edit() to add content section by section",
1179
+ finish_reason: "stop",
1180
+ tool_calls: nil
1181
+ }
1182
+
1183
+ # Add this as an assistant message so it appears in conversation
1184
+ @history.append({
1185
+ role: "assistant",
1186
+ content: error_response[:content]
1187
+ })
1188
+
1189
+ return error_response
1190
+ end
1191
+
1192
+ # Preserve the truncated assistant message (text only, drop incomplete tool_calls)
1193
+ # so the LLM sees what it attempted before. This also maintains the required
1194
+ # user/assistant alternation for Bedrock Converse API.
1195
+ truncated_text = response[:content] || ""
1196
+ truncated_text = "..." if truncated_text.strip.empty?
1197
+ truncated_msg = {
1198
+ role: "assistant",
1199
+ content: truncated_text,
1200
+ task_id: @current_task_id
1201
+ }
1202
+ # Preserve reasoning_content on truncated turns as well.
1203
+ # This is the real LLM-emitted reasoning — keeping it here lets
1204
+ # MessageHistory#to_api recognize we're in thinking mode and pad any
1205
+ # other synthetic assistant messages in the history with an empty
1206
+ # reasoning_content automatically (see message_history.rb).
1207
+ truncated_msg[:reasoning_content] = response[:reasoning_content] if response[:reasoning_content]
1208
+ @history.append(truncated_msg)
1209
+
1210
+ # Insert system message to guide LLM to retry with smaller steps
1211
+ @history.append({
1212
+ role: "user",
1213
+ content: "[SYSTEM] Your previous response was truncated because it exceeded the output token limit (max_tokens=#{@config.max_tokens}). " \
1214
+ "The incomplete tool call has been discarded. Please retry with a different approach:\n" \
1215
+ "- For long file content: create the file with a basic structure first, then use edit() to add content section by section\n" \
1216
+ "- Break down large tasks into multiple smaller tool calls\n" \
1217
+ "- Keep each tool call argument under 2000 characters\n" \
1218
+ "- Use multiple tool calls instead of one large call",
1219
+ truncated: true
1220
+ })
1221
+
1222
+ # Close the current spinner so the warning appears cleanly;
1223
+ # the recursive think() call below will reopen a new one.
1224
+ @ui&.show_progress(phase: "done")
1225
+ @ui&.show_warning("Response truncated (#{@task_truncation_count}/3). Retrying with smaller steps...")
1226
+
1227
+ # Persist the truncated assistant message and the system retry hint
1228
+ # before the recursive think() so they survive a crash mid-retry.
1229
+ checkpoint!
1230
+
1231
+ # Recursively retry
1232
+ return think
1233
+ end
1234
+
1235
+ # Add assistant response to history
1236
+ msg = { role: "assistant", task_id: @current_task_id }
1237
+ # Always include content field (some APIs require it even with tool_calls)
1238
+ # Use empty string instead of null for better compatibility
1239
+ msg[:content] = response[:content] || ""
1240
+ # Only add tool_calls if they actually exist (don't add empty arrays)
1241
+ if response[:tool_calls]&.any?
1242
+ msg[:tool_calls] = format_tool_calls_for_api(response[:tool_calls])
1243
+ end
1244
+ # Store token_usage in the message so replay_history can re-emit it
1245
+ msg[:token_usage] = response[:token_usage] if response[:token_usage]
1246
+ # Store per-message latency — this is the source of truth (session.json)
1247
+ # for all time-to-first-token / duration / throughput info. The status
1248
+ # bar signal reads the last assistant message's latency; no separate
1249
+ # config file or top-level session field is introduced.
1250
+ if response[:latency]
1251
+ msg[:latency] = response[:latency]
1252
+ @latest_latency = response[:latency]
1253
+ # Push to UI so the status-bar signal updates immediately after the
1254
+ # model finishes (before any tool execution delays the next event).
1255
+ @ui&.update_sessionbar(latency: response[:latency])
1256
+ end
1257
+ # Preserve reasoning_content from the real LLM response.
1258
+ # This is the authoritative signal used by MessageHistory#to_api to
1259
+ # detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)
1260
+ # and automatically pad any synthetic assistant messages with an empty
1261
+ # reasoning_content so every outgoing payload satisfies the provider's
1262
+ # "reasoning_content must be passed back" contract.
1263
+ msg[:reasoning_content] = response[:reasoning_content] if response[:reasoning_content]
1264
+ @history.append(msg)
1265
+
1266
+ # Close the thinking spinner before returning. The caller (run loop)
1267
+ # is about to render the assistant message and/or tool invocations,
1268
+ # which should appear after the spinner disappears.
1269
+ @ui&.show_progress(phase: "done")
1270
+
1271
+ response
1272
+ end
1273
+
1274
+ private def act(tool_calls)
1275
+ return { denied: false, feedback: nil, tool_results: [], awaiting_feedback: false } unless tool_calls
1276
+
1277
+ denied = false
1278
+ feedback = nil
1279
+ results = []
1280
+ awaiting_feedback = false
1281
+
1282
+ tool_calls.each_with_index do |call, index|
1283
+ # Resolve tool name: handle case-insensitive and common alias mismatches
1284
+ # from different LLM providers (e.g. "read" → "file_reader", "Read" → "file_reader")
1285
+ original_name = call[:name]
1286
+ resolved = @tool_registry.resolve(call[:name])
1287
+ if resolved && resolved != call[:name]
1288
+ @debug_logs << {
1289
+ timestamp: Time.now.iso8601,
1290
+ event: "tool_name_resolved",
1291
+ original: original_name,
1292
+ resolved: resolved
1293
+ }
1294
+ call = call.merge(name: resolved)
1295
+ elsif resolved.nil?
1296
+ # Tool truly not found — let the rescue below handle it with a clear message
1297
+ end
1298
+
1299
+ # Hook: before_tool_use
1300
+ hook_result = @hooks.trigger(:before_tool_use, call)
1301
+ if hook_result[:action] == :deny
1302
+ @ui&.show_warning("Tool #{call[:name]} denied by hook")
1303
+ results << build_error_result(call, hook_result[:reason] || "Tool use denied by hook")
1304
+ next
1305
+ end
1306
+
1307
+ # Show preview for edit and write tools even in auto-approve mode
1308
+ if should_auto_execute?(call[:name], call[:arguments])
1309
+ # In auto-approve mode, show preview for edit and write tools
1310
+ if call[:name] == "edit" || call[:name] == "write"
1311
+ show_tool_preview(call)
1312
+ end
1313
+ else
1314
+ # Permission check (if not in auto-approve mode)
1315
+ confirmation = confirm_tool_use?(call)
1316
+ unless confirmation[:approved]
1317
+ # Show denial warning only for user-initiated denials (not system-injected preview errors)
1318
+ # Preview errors are already shown to user, no need to repeat
1319
+ system_injected = confirmation[:system_injected]
1320
+ unless system_injected
1321
+ denial_message = "Tool #{call[:name]} denied"
1322
+ if confirmation[:feedback] && !confirmation[:feedback].empty?
1323
+ denial_message += ": #{confirmation[:feedback]}"
1324
+ end
1325
+ @ui&.show_warning(denial_message)
1326
+ end
1327
+
1328
+ denied = true
1329
+ user_feedback = confirmation[:feedback]
1330
+ feedback = user_feedback if user_feedback
1331
+ results << build_denied_result(call, user_feedback, system_injected)
1332
+
1333
+ # Auto-deny all remaining tools
1334
+ remaining_calls = tool_calls[(index + 1)..-1] || []
1335
+ remaining_calls.each do |remaining_call|
1336
+ reason = user_feedback && !user_feedback.empty? ?
1337
+ user_feedback :
1338
+ "Auto-denied due to user rejection of previous tool"
1339
+ results << build_denied_result(remaining_call, reason, system_injected)
1340
+ end
1341
+ break
1342
+ end
1343
+ end
1344
+
1345
+ # Special handling for request_user_feedback: don't show as tool call
1346
+ unless call[:name] == "request_user_feedback"
1347
+ @ui&.show_tool_call(call[:name], redact_tool_args(call[:arguments]))
1348
+ end
1349
+
1350
+ # Execute tool
1351
+ begin
1352
+ tool = @tool_registry.get(call[:name])
1353
+
1354
+ # Parse and validate arguments with JSON repair capability
1355
+ args = Utils::ArgumentsParser.parse_and_validate(call, @tool_registry)
1356
+
1357
+ # Special handling for TodoManager: inject todos array
1358
+ if call[:name] == "todo_manager"
1359
+ args[:todos_storage] = @todos
1360
+ end
1361
+
1362
+ # Special handling for InvokeSkill: inject agent and skill_loader
1363
+ if call[:name] == "invoke_skill"
1364
+ args[:agent] = self
1365
+ args[:skill_loader] = @skill_loader
1366
+ end
1367
+
1368
+ # Special handling for Time Machine tools: inject agent
1369
+ if ["undo_task", "redo_task", "list_tasks"].include?(call[:name])
1370
+ args[:agent] = self
1371
+ end
1372
+
1373
+ # Inject working_dir so tools don't rely on Dir.chdir global state
1374
+ args[:working_dir] = @working_dir if @working_dir
1375
+
1376
+ # Show progress immediately for every tool execution so the user
1377
+ # always knows the agent is working. Using +with_progress+ wraps
1378
+ # the execution in an +ensure+ block so the spinner/ticker is
1379
+ # released even if the tool raises or the user interrupts.
1380
+ #
1381
+ # +quiet_on_fast_finish: true+ means "if the tool completes in
1382
+ # under FAST_FINISH_THRESHOLD_SECONDS, remove the progress line
1383
+ # instead of leaving a permanent 'Executing edit… (0s)' log
1384
+ # entry". The preceding `[=>] Edit(...)` tool-call line and the
1385
+ # following `[<=] Modified 1 occurrence` result line already
1386
+ # tell the full story — the middle progress frame is noise for
1387
+ # instant tools like edit/write/read/glob/grep. Truly slow
1388
+ # tools (terminal running a build, web_fetch) exceed the
1389
+ # threshold and their final frame is preserved as usual.
1390
+ result = nil
1391
+ if @ui
1392
+ progress_message = build_tool_progress_message(call[:name], args)
1393
+ @ui.with_progress(
1394
+ message: progress_message,
1395
+ style: :quiet,
1396
+ quiet_on_fast_finish: true
1397
+ ) do
1398
+ result = tool.execute(**args)
1399
+ end
1400
+ else
1401
+ result = tool.execute(**args)
1402
+ end
1403
+
1404
+ # Track modified files for Time Machine snapshots
1405
+ track_modified_files(call[:name], args)
1406
+
1407
+ # If a terminal tool was started in async mode, register a callback
1408
+ # so the agent is resumed when the task completes.
1409
+ if call[:name] == "terminal" && result.is_a?(Hash) && result[:accepted] && result[:handle_id]
1410
+ tool_use_id = call[:id]
1411
+ BackgroundTaskRegistry.register_callback(
1412
+ handle_id: result[:handle_id],
1413
+ agent: self
1414
+ ) do |task_result|
1415
+ notification = format_terminal_notification(task_result, tool_use_id: tool_use_id)
1416
+ resume_with_notification(notification[:content], bubble: notification[:bubble])
1417
+ end
1418
+ broadcast_background_tasks_snapshot
1419
+ end
1420
+
1421
+ # After killing a terminal handle, immediately refresh the badge so
1422
+ # the UI reflects the new count without waiting for a page reload.
1423
+ if call[:name] == "terminal" && result.is_a?(Hash) && result[:killed]
1424
+ broadcast_background_tasks_snapshot
1425
+ end
1426
+
1427
+ # Hook: after_tool_use
1428
+ @hooks.trigger(:after_tool_use, call, result)
1429
+
1430
+ # Update todos display after todo_manager execution
1431
+ if call[:name] == "todo_manager"
1432
+ @ui&.update_todos(@todos.dup)
1433
+ end
1434
+
1435
+ # Special handling for request_user_feedback: emit as interactive feedback card
1436
+ if call[:name] == "request_user_feedback"
1437
+ # Pass the raw call arguments to show_tool_call so the WebUI controller
1438
+ # can extract question/context/options and emit a "request_feedback" event
1439
+ # (renders as a clickable card in the browser).
1440
+ # Fallback UIs (terminal, IM channels) receive the formatted text message.
1441
+ @ui&.show_tool_call(call[:name], call[:arguments])
1442
+
1443
+ if @config.permission_mode == :auto_approve
1444
+ # auto_approve means no human is watching (unattended/scheduled tasks).
1445
+ # Inject an auto_reply so the LLM makes a reasonable decision and keeps going.
1446
+ result = result.merge(
1447
+ auto_reply: "No user is available. Please make a reasonable decision based on the context and continue."
1448
+ )
1449
+ else
1450
+ # confirm_all / confirm_safes — a human is present, truly wait for user input.
1451
+ awaiting_feedback = true
1452
+ end
1453
+ else
1454
+ # Use tool's format_result method to get display-friendly string
1455
+ formatted_result = tool.respond_to?(:format_result) ? tool.format_result(result) : result.to_s
1456
+ # Also try to get a structured UI representation for rich rendering
1457
+ ui_payload = tool.respond_to?(:format_result_for_ui) ? tool.format_result_for_ui(result) : nil
1458
+ @ui&.show_tool_result(formatted_result, ui_payload: ui_payload)
1459
+ end
1460
+
1461
+ results << build_success_result(call, result)
1462
+ rescue StandardError => e
1463
+ # Log complete error information to debug_logs for troubleshooting
1464
+ @debug_logs << {
1465
+ timestamp: Time.now.iso8601,
1466
+ event: "tool_execution_error",
1467
+ tool_name: call[:name],
1468
+ tool_args: call[:arguments],
1469
+ error_class: e.class.name,
1470
+ error_message: e.message,
1471
+ backtrace: e.backtrace&.first(20) # Keep first 20 lines of backtrace
1472
+ }
1473
+ Octo::Logger.error("tool_execution_error", tool: call[:name], error: e)
1474
+
1475
+ @hooks.trigger(:on_tool_error, call, e)
1476
+ @ui&.show_tool_error(e)
1477
+ # Use build_denied_result with system_injected=true so LLM knows it can retry
1478
+ results << build_denied_result(call, e.message, true)
1479
+ end
1480
+ end
1481
+
1482
+ {
1483
+ denied: denied,
1484
+ feedback: feedback,
1485
+ tool_results: results,
1486
+ awaiting_feedback: awaiting_feedback
1487
+ }
1488
+ end
1489
+
1490
+ private def observe(response, tool_results)
1491
+ # Add tool results as messages
1492
+ # Use Client to format results based on API type (Anthropic vs OpenAI)
1493
+ return if tool_results.empty?
1494
+
1495
+ formatted_messages = @client.format_tool_results(response, tool_results, model: current_model)
1496
+ formatted_messages.each { |msg| @history.append(msg.merge(task_id: @current_task_id)) }
1497
+
1498
+ # Append a follow-up `role:"user"` message for any image payloads that
1499
+ # could not be delivered inside the tool message.
1500
+ #
1501
+ # Background: OpenAI-compatible APIs (OpenRouter, Gemini, GPT-4o, etc.)
1502
+ # only accept image_url content blocks in `role:"user"` messages. Putting
1503
+ # base64 data in a `role:"tool"` message causes it to be JSON-encoded as
1504
+ # plain text, inflating token counts by 20-40x. The tool result carries a
1505
+ # plain-text description for the LLM; the actual image is delivered here.
1506
+ tool_results.each do |tr|
1507
+ inject = tr[:image_inject]
1508
+ next unless inject
1509
+
1510
+ mime_type = inject[:mime_type]
1511
+ base64_data = inject[:base64_data]
1512
+ path = inject[:path]
1513
+ next unless mime_type && base64_data
1514
+
1515
+ data_url = "data:#{mime_type};base64,#{base64_data}"
1516
+ label = path ? File.basename(path.to_s) : "image"
1517
+ image_block = { type: "image_url", image_url: { url: data_url } }
1518
+ image_block[:image_path] = path if path
1519
+ image_content = [
1520
+ { type: "text", text: "[Image: #{label}]" },
1521
+ image_block
1522
+ ]
1523
+ @history.append({
1524
+ role: "user",
1525
+ content: image_content,
1526
+ system_injected: true,
1527
+ task_id: @current_task_id
1528
+ })
1529
+ end
1530
+ end
1531
+
1532
+ # Enqueue an inline skill injection to be flushed after observe().
1533
+ # Called by InvokeSkill#execute to avoid injecting during tool execution,
1534
+ # which would break Bedrock's toolUse/toolResult pairing requirement.
1535
+ # @param skill [Octo::Skill] The skill whose instructions should be injected
1536
+ # @param task [String] The task description passed to the skill
1537
+ def enqueue_injection(skill, task)
1538
+ @pending_injections << { skill: skill, task: task }
1539
+ end
1540
+
1541
+ # Redact volatile tmpdir paths from tool call arguments before showing in UI.
1542
+ # @param args [String, Hash, nil] Raw tool arguments
1543
+ # @return [String, Hash, nil] Redacted arguments (same type as input)
1544
+ def redact_tool_args(args)
1545
+ args
1546
+ end
1547
+
1548
+ # Flush all pending inline skill injections into history.
1549
+ # Must be called AFTER observe() so toolResult is appended before skill instructions,
1550
+ # producing the correct message sequence for all API providers (especially Bedrock).
1551
+ private def flush_pending_injections
1552
+ return if @pending_injections.empty?
1553
+
1554
+ @pending_injections.each do |entry|
1555
+ inject_skill_as_assistant_message(entry[:skill], entry[:task], @current_task_id)
1556
+ end
1557
+ @pending_injections.clear
1558
+ end
1559
+
1560
+ # Check if agent is currently running
1561
+ def running?
1562
+ !@start_time.nil?
1563
+ end
1564
+
1565
+ private def build_result(status = :success, error: nil)
1566
+ task_iterations = @iterations - (@task_start_iterations || 0)
1567
+
1568
+ {
1569
+ status: status,
1570
+ session_id: @session_id,
1571
+ iterations: task_iterations,
1572
+ duration_seconds: Time.now - @start_time,
1573
+ cache_stats: @task_cache_stats || @cache_stats,
1574
+ history: @history,
1575
+ error: error
1576
+ }
1577
+ end
1578
+
1579
+ private def format_tool_calls_for_api(tool_calls)
1580
+ return nil unless tool_calls
1581
+
1582
+ valid = tool_calls.filter_map do |call|
1583
+ func = call[:function] || call
1584
+ name = func[:name] || call[:name]
1585
+ arguments = func[:arguments] || call[:arguments]
1586
+ # Skip malformed tool calls with nil name or arguments
1587
+ next if name.nil? || arguments.nil?
1588
+
1589
+ {
1590
+ id: call[:id],
1591
+ type: call[:type] || "function",
1592
+ function: {
1593
+ name: name,
1594
+ arguments: arguments
1595
+ }
1596
+ }
1597
+ end
1598
+
1599
+ valid.any? ? valid : nil
1600
+ end
1601
+
1602
+ private def register_builtin_tools
1603
+ @tool_registry.register(Tools::Terminal.new(agent_session_id: @session_id))
1604
+ @tool_registry.register(Tools::FileReader.new)
1605
+ @tool_registry.register(Tools::Write.new)
1606
+ @tool_registry.register(Tools::Edit.new)
1607
+ @tool_registry.register(Tools::Glob.new)
1608
+ @tool_registry.register(Tools::Grep.new)
1609
+ @tool_registry.register(Tools::WebSearch.new)
1610
+ @tool_registry.register(Tools::WebFetch.new)
1611
+ @tool_registry.register(Tools::TodoManager.new)
1612
+ @tool_registry.register(Tools::RequestUserFeedback.new)
1613
+ @tool_registry.register(Tools::InvokeSkill.new)
1614
+ @tool_registry.register(Tools::UndoTask.new)
1615
+ @tool_registry.register(Tools::RedoTask.new)
1616
+ @tool_registry.register(Tools::ListTasks.new)
1617
+ @tool_registry.register(Tools::Browser.new)
1618
+ end
1619
+
1620
+ # Fork a subagent with specified configuration
1621
+ # The subagent inherits all messages and tools from parent agent
1622
+ # Tools are not modified (for cache reuse), but forbidden tools are blocked at runtime via hooks
1623
+ # @param model [String, nil] Model name to use (nil = use current model)
1624
+ # @param forbidden_tools [Array<String>] List of tool names to forbid
1625
+ # @param system_prompt_suffix [String, nil] Additional instructions (inserted as user message for cache reuse)
1626
+ # @return [Agent] New subagent instance
1627
+ def fork_subagent(model: nil, forbidden_tools: [], system_prompt_suffix: nil)
1628
+ # Clone config to avoid affecting parent
1629
+ subagent_config = @config.deep_copy
1630
+
1631
+ # Switch to specified model if provided
1632
+ if model
1633
+ if model == "lite"
1634
+ # Special keyword: use lite model if available, otherwise fall back to default.
1635
+ #
1636
+ # Lite is now a *virtual* role — we don't require it to exist as a
1637
+ # concrete entry in @models. Instead we derive it from whatever
1638
+ # model the user is currently on (current_model), so switching
1639
+ # primary models automatically re-pairs with the right lite
1640
+ # companion (Claude → Haiku, DeepSeek V4-pro → V4-flash, ...).
1641
+ lite_cfg = subagent_config.lite_model_config_for_current
1642
+ if lite_cfg
1643
+ if lite_cfg["virtual"]
1644
+ # Provider-preset derived: apply the lite fields as a *session
1645
+ # overlay* on the subagent's config — this intentionally avoids
1646
+ # mutating the shared @models array / hashes which would pollute
1647
+ # the parent agent's own current model (e.g. turning the parent's
1648
+ # Opus entry into Haiku for the rest of the session).
1649
+ subagent_config.apply_virtual_model_overlay!(
1650
+ "api_key" => lite_cfg["api_key"],
1651
+ "base_url" => lite_cfg["base_url"],
1652
+ "model" => lite_cfg["model"],
1653
+ "anthropic_format" => lite_cfg["anthropic_format"]
1654
+ )
1655
+ elsif lite_cfg["id"]
1656
+ # Explicit user-configured lite (from OCTO_LITE_* env): a
1657
+ # real @models entry with a stable id. Switch to it normally.
1658
+ subagent_config.switch_model_by_id(lite_cfg["id"])
1659
+ end
1660
+ end
1661
+ # If no lite is resolvable, just use current (primary) model.
1662
+ else
1663
+ # Regular model name lookup — find the first model with a matching
1664
+ # name and switch by its stable id.
1665
+ target = subagent_config.models.find { |m| m["model"] == model }
1666
+ if target && target["id"]
1667
+ subagent_config.switch_model_by_id(target["id"])
1668
+ else
1669
+ raise AgentError, "Model '#{model}' not found in config. Available models: #{subagent_config.model_names.join(', ')}"
1670
+ end
1671
+ end
1672
+ end
1673
+
1674
+ # Create new client for subagent
1675
+ subagent_client = Octo::Client.new(
1676
+ subagent_config.api_key,
1677
+ base_url: subagent_config.base_url,
1678
+ model: subagent_config.model_name,
1679
+ anthropic_format: subagent_config.anthropic_format?
1680
+ )
1681
+
1682
+ # Create subagent (reuses all tools from parent, inherits agent profile from parent)
1683
+ # Subagent gets its own unique session_id.
1684
+ subagent = self.class.new(
1685
+ subagent_client,
1686
+ subagent_config,
1687
+ working_dir: @working_dir,
1688
+ ui: @ui,
1689
+ profile: @agent_profile.name,
1690
+ session_id: Octo::SessionManager.generate_id,
1691
+ source: @source
1692
+ )
1693
+ subagent.instance_variable_set(:@is_subagent, true)
1694
+
1695
+ # Inherit previous_total_tokens so the first iteration delta is calculated correctly
1696
+ subagent.instance_variable_set(:@previous_total_tokens, @previous_total_tokens)
1697
+
1698
+ # Deep clone history to avoid cross-contamination.
1699
+ # Dangling tool_calls (no tool_result yet) are cleaned up automatically by
1700
+ # MessageHistory#append when the subagent appends its first user message.
1701
+ cloned_messages = deep_clone(@history.to_a)
1702
+ subagent.instance_variable_set(:@history, MessageHistory.new(cloned_messages))
1703
+
1704
+ # Append system prompt suffix as user message (for cache reuse)
1705
+ if system_prompt_suffix
1706
+ subagent_history = subagent.history
1707
+
1708
+ # Build forbidden tools notice if any tools are forbidden
1709
+ forbidden_notice = if forbidden_tools.any?
1710
+ tool_list = forbidden_tools.map { |t| "`#{t}`" }.join(", ")
1711
+ "\n\n[System Notice] The following tools are disabled in this subagent and will be rejected if called: #{tool_list}"
1712
+ else
1713
+ ""
1714
+ end
1715
+
1716
+ subagent_history.append({
1717
+ role: "user",
1718
+ content: "CRITICAL: TASK CONTEXT SWITCH - FORKED SUBAGENT MODE\n\nYou are now running as a forked subagent — a temporary, isolated agent spawned by the parent agent to handle a specific task. You run independently and cannot communicate back to the parent mid-task. When you finish (i.e., you stop calling tools and return a final response), your output will be automatically summarized and returned to the parent agent as a result so it can continue.\n\n#{system_prompt_suffix}#{forbidden_notice}",
1719
+ system_injected: true,
1720
+ subagent_instructions: true
1721
+ })
1722
+
1723
+ # Insert an assistant acknowledgement so the conversation structure is complete:
1724
+ # [user] role/constraints → [assistant] ack → [user] actual task (from run())
1725
+ subagent_history.append({
1726
+ role: "assistant",
1727
+ content: "Understood. I am now operating as a subagent with the constraints above. Please provide the task.",
1728
+ system_injected: true
1729
+ })
1730
+ end
1731
+
1732
+ # Register hook to forbid certain tools at runtime (doesn't affect tool registry for cache)
1733
+ if forbidden_tools.any?
1734
+ subagent.add_hook(:before_tool_use) do |call|
1735
+ if forbidden_tools.include?(call[:name])
1736
+ {
1737
+ action: :deny,
1738
+ reason: "Tool '#{call[:name]}' is forbidden in this subagent context"
1739
+ }
1740
+ else
1741
+ { action: :allow }
1742
+ end
1743
+ end
1744
+ end
1745
+
1746
+ # Mark subagent metadata for summary generation
1747
+ subagent.instance_variable_set(:@is_subagent, true)
1748
+ subagent.instance_variable_set(:@parent_message_count, @history.size)
1749
+
1750
+ subagent
1751
+ end
1752
+
1753
+ # Generate summary from subagent execution
1754
+ # Extracts new messages added by subagent and creates a concise summary
1755
+ # This summary will replace the subagent instructions message in parent agent
1756
+ # @param subagent [Agent] The subagent that completed execution
1757
+ # @return [String] Summary text to insert into parent agent
1758
+ def generate_subagent_summary(subagent)
1759
+ parent_count = subagent.instance_variable_get(:@parent_message_count) || 0
1760
+ new_messages = subagent.history.to_a[parent_count..] || []
1761
+
1762
+ # Extract tool calls
1763
+ tool_calls = new_messages
1764
+ .select { |m| m[:role] == "assistant" && m[:tool_calls] }
1765
+ .flat_map { |m| m[:tool_calls].map { |tc| tc[:name] } }
1766
+ .uniq
1767
+
1768
+ # Extract final assistant response
1769
+ last_response = new_messages
1770
+ .reverse
1771
+ .find { |m| m[:role] == "assistant" && m[:content] && !m[:content].empty? }
1772
+ &.dig(:content)
1773
+
1774
+ # Build summary (this will replace the subagent instructions message)
1775
+ parts = []
1776
+ parts << "[SUBAGENT SUMMARY]"
1777
+ parts << "Completed in #{subagent.iterations} iterations"
1778
+ parts << "Tools used: #{tool_calls.join(', ')}" if tool_calls.any?
1779
+ parts << ""
1780
+ parts << "Results:"
1781
+ parts << (last_response || "(No response)")
1782
+
1783
+ parts.join("\n")
1784
+ end
1785
+
1786
+ # Deep clone helper for messages using Marshal
1787
+ # @param obj [Object] Object to clone
1788
+ # @return [Object] Deep cloned object
1789
+ private def deep_clone(obj)
1790
+ Marshal.load(Marshal.dump(obj))
1791
+ end
1792
+
1793
+ # Format user content with optional images
1794
+ # PDF files are handled upstream (server injects file path into message text),
1795
+ # so this method only needs to handle images.
1796
+ # @param text [String] User's text input
1797
+ # @param images [Array<String>] Array of image file paths or data: URLs
1798
+ # @param files [Array] Unused — kept for signature compatibility
1799
+ # @return [String|Array] String if no images, Array with content blocks otherwise
1800
+ # Pure: process a user message's text + file attachments into the data
1801
+ # structures needed for @history append, WITHOUT touching @history itself.
1802
+ # Safe to call from any thread (HTTP-handler thread or agent thread) —
1803
+ # only mutates argument-local state and runs the FileProcessor subprocess.
1804
+ #
1805
+ # Returns a Hash:
1806
+ # {
1807
+ # user_content: String or content-block Array (text + vision blocks),
1808
+ # display_files: Array<{name, type, preview_path}> for replay bubbles,
1809
+ # file_prompt: String (system_injected file references for LLM, "" if none)
1810
+ # }
1811
+ #
1812
+ # The companion +append_processed_user_message_to_history!+ takes this
1813
+ # hash and does the actual append — that part MUST run on the
1814
+ # @run_mutex-holding thread.
1815
+ private def process_files_for_user_message(content, files)
1816
+ image_files, disk_files = partition_files(Array(files))
1817
+ vision_images, downgraded = resolve_vision_images(image_files)
1818
+ all_disk_files = disk_files + downgraded
1819
+
1820
+ # Format user message — text + inline vision images
1821
+ user_content = format_user_content(content, vision_images.map { |v| { url: v[:url], path: v[:path] } })
1822
+
1823
+ # Parse disk files — process_path runs the parser script and returns a FileRef.
1824
+ all_disk_files = all_disk_files.map do |f|
1825
+ path = f[:path] || f["path"]
1826
+ name = f[:name] || f["name"]
1827
+ next f unless path && File.exist?(path.to_s)
1828
+ downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
1829
+ ref = Utils::FileProcessor.process_path(path, name: name)
1830
+ { name: ref.name, type: ref.type.to_s, path: ref.original_path,
1831
+ preview_path: ref.preview_path, parse_error: ref.parse_error, parser_path: ref.parser_path,
1832
+ downgrade_reason: downgrade_reason }
1833
+ end
1834
+
1835
+ display_files = all_disk_files.filter_map do |f|
1836
+ name = f[:name] || f["name"]
1837
+ next unless name
1838
+ { name: name, type: f[:type] || f["type"] || "file",
1839
+ preview_path: f[:preview_path] || f["preview_path"] }
1840
+ end
1841
+
1842
+ all_meta_files = vision_images.map { |v|
1843
+ { name: v[:name], type: "image", size_bytes: v[:size_bytes], path: v[:path] }
1844
+ } + all_disk_files
1845
+
1846
+ file_prompt = build_file_prompt(all_meta_files)
1847
+
1848
+ {
1849
+ user_content: user_content,
1850
+ display_files: display_files,
1851
+ file_prompt: file_prompt
1852
+ }
1853
+ end
1854
+
1855
+ # Mutates @history. Caller MUST hold @run_mutex (i.e. this is called
1856
+ # from inside Agent#run, either from the user-message-mode branch
1857
+ # directly or from drain_inbox_into_history! processing a user_msg
1858
+ # item whose files were pre-processed at HTTP-entry time).
1859
+ private def append_processed_user_message_to_history!(processed, task_id)
1860
+ @history.append({
1861
+ role: "user",
1862
+ content: processed[:user_content],
1863
+ task_id: task_id,
1864
+ created_at: Time.now.to_f,
1865
+ display_files: processed[:display_files].empty? ? nil : processed[:display_files]
1866
+ })
1867
+ @total_tasks += 1
1868
+
1869
+ file_prompt = processed[:file_prompt]
1870
+ unless file_prompt.nil? || file_prompt.empty?
1871
+ @history.append({
1872
+ role: "user",
1873
+ content: file_prompt,
1874
+ system_injected: true,
1875
+ task_id: task_id
1876
+ })
1877
+ end
1878
+ end
1879
+
1880
+ # Build the system_injected file-prompt string from an Array of file
1881
+ # metadata hashes. Returns "" if there are no files. Extracted so both
1882
+ # process_files_for_user_message and any future caller share one shape.
1883
+ private def build_file_prompt(all_meta_files)
1884
+ return "" if all_meta_files.empty?
1885
+
1886
+ all_meta_files.filter_map do |f|
1887
+ name = f[:name] || f["name"]
1888
+ type = f[:type] || f["type"]
1889
+ path = f[:path] || f["path"]
1890
+ preview_path = f[:preview_path] || f["preview_path"]
1891
+ size_bytes = f[:size_bytes] || f["size_bytes"]
1892
+ parse_error = f[:parse_error] || f["parse_error"]
1893
+ parser_path = f[:parser_path] || f["parser_path"]
1894
+ downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
1895
+
1896
+ next unless name
1897
+
1898
+ lines = ["[File: #{name}]", "Type: #{type || "file"}"]
1899
+ lines << "Size: #{format_size(size_bytes)}" if size_bytes
1900
+ lines << "Original: #{path}" if path
1901
+ lines << "Preview (Markdown): #{preview_path}" if preview_path
1902
+
1903
+ note = downgrade_note_for(downgrade_reason)
1904
+ lines << "Note: #{note}" if note
1905
+
1906
+ if preview_path.nil? && parse_error
1907
+ lines << "Parse failed: #{parse_error}"
1908
+ if parser_path
1909
+ expected_preview = "#{path}.preview.md"
1910
+ lines << "Action required: fix the parser at #{parser_path}, then run:"
1911
+ lines << " ruby #{parser_path} #{path} > #{expected_preview}"
1912
+ lines << "Once done, read #{expected_preview} to continue helping the user."
1913
+ end
1914
+ end
1915
+
1916
+ lines.join("\n")
1917
+ end.join("\n\n")
1918
+ end
1919
+
1920
+ # Partition files array into [image_files, non_image_files].
1921
+ # Image files: have mime_type starting with "image/" OR have data_url present.
1922
+ private def partition_files(files)
1923
+ image_files = []
1924
+ non_image_files = []
1925
+ files.each do |f|
1926
+ mime = f[:mime_type] || f["mime_type"] || ""
1927
+ data_url = f[:data_url] || f["data_url"]
1928
+ if mime.start_with?("image/") || data_url
1929
+ image_files << f
1930
+ else
1931
+ non_image_files << f
1932
+ end
1933
+ end
1934
+ [image_files, non_image_files]
1935
+ end
1936
+
1937
+ # Resolve image files to vision data_urls.
1938
+ # Files with data_url: use as-is (already compressed by frontend or adapter).
1939
+ # Files with path: convert to data_url via FileProcessor.
1940
+ #
1941
+ # Downgrade to disk file refs (with a `downgrade_reason` tag) when:
1942
+ # - :provider_no_vision — current model does not support vision input
1943
+ # (e.g. MiniMax, Kimi, DeepSeek, or octo's DeepSeek sidecar).
1944
+ # The downgrade is capability-driven and reflects the *current* model;
1945
+ # switching models takes effect on the next run with no cached state.
1946
+ # - :too_large — base64 payload exceeds MAX_IMAGE_BYTES. Downgrading here
1947
+ # keeps a hot context window from blowing up on e.g. a 20MB screenshot.
1948
+ #
1949
+ # Both reasons share the same downgrade path; `file_prompt` will later
1950
+ # emit a `Note:` line on the file entry explaining why the image isn't
1951
+ # inline, so the LLM has colocated context (no system prompt pollution).
1952
+ #
1953
+ # @return [Array<Hash>, Array<Hash>] [vision_images, downgraded_disk_files]
1954
+ private def resolve_vision_images(image_files)
1955
+ require "base64"
1956
+ max_bytes = Utils::FileProcessor::MAX_IMAGE_BYTES
1957
+ # Capability check once per run — current_model_supports? is cheap and
1958
+ # delegates to Providers.supports? under the hood, always reflecting
1959
+ # the current model (no stale state on `/model` switch).
1960
+ vision_supported = @config.current_model_supports?(:vision)
1961
+
1962
+ vision_images = [] # Array of { url:, name:, size_bytes:, path: }
1963
+ downgraded = []
1964
+
1965
+ image_files.each do |f|
1966
+ name = f[:name] || f["name"] || "image.jpg"
1967
+ mime = f[:mime_type] || f["mime_type"] || "image/jpeg"
1968
+ data_url = f[:data_url] || f["data_url"]
1969
+ path = f[:path] || f["path"]
1970
+
1971
+ if data_url
1972
+ b64_data = data_url.split(",", 2).last.to_s
1973
+ byte_size = (b64_data.bytesize * 3) / 4
1974
+ raw = Base64.decode64(b64_data)
1975
+ file_ref = Utils::FileProcessor.save_image_to_disk(body: raw, mime_type: mime, filename: name)
1976
+ reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
1977
+ if reason
1978
+ downgraded << { name: name, path: file_ref.original_path, type: "image",
1979
+ mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1980
+ else
1981
+ vision_images << { url: data_url, name: name, size_bytes: byte_size, path: file_ref.original_path }
1982
+ end
1983
+ elsif path
1984
+ begin
1985
+ data_url_from_path = Utils::FileProcessor.image_path_to_data_url(path)
1986
+ b64_data = data_url_from_path.split(",", 2).last.to_s
1987
+ byte_size = (b64_data.bytesize * 3) / 4
1988
+ reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
1989
+ if reason
1990
+ downgraded << { name: name, path: path, type: "image",
1991
+ mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1992
+ else
1993
+ vision_images << { url: data_url_from_path, name: name, size_bytes: byte_size, path: path }
1994
+ end
1995
+ rescue => e
1996
+ @ui&.log("Failed to load image #{name}: #{e.message}", level: :warn)
1997
+ end
1998
+ end
1999
+ end
2000
+
2001
+ [vision_images, downgraded]
2002
+ end
2003
+
2004
+ # Decide whether an image must be downgraded to a disk ref, and if so why.
2005
+ # Precedence: provider capability is checked first — a text-only model
2006
+ # can't use the image at any size, so there's no point re-checking size.
2007
+ # @return [Symbol, nil] :provider_no_vision | :too_large | nil (keep inline)
2008
+ private def downgrade_reason_for(vision_supported, byte_size, max_bytes)
2009
+ return :provider_no_vision unless vision_supported
2010
+ return :too_large if byte_size > max_bytes
2011
+ nil
2012
+ end
2013
+
2014
+ # Human-readable note for a downgrade reason, embedded next to the file
2015
+ # entry in the file_prompt. Kept intentionally terse and factual; the LLM
2016
+ # will see this alongside the file's name/type/path so it can tell the
2017
+ # user honestly why it can't see the image.
2018
+ # @return [String, nil] note text, or nil for no note
2019
+ private def downgrade_note_for(reason)
2020
+ case reason&.to_sym
2021
+ when :provider_no_vision
2022
+ "The current model does not support vision input. Image content is not visible to the model; suggest switching to a vision-capable model if the user needs image analysis."
2023
+ when :too_large
2024
+ "Image was too large for inline delivery and has been saved to disk. Read it with a vision-capable tool/model if needed."
2025
+ end
2026
+ end
2027
+
2028
+ # Build user message content for LLM.
2029
+ # Returns plain String when no vision images; Array of content parts otherwise.
2030
+ # Build user message content for LLM.
2031
+ # vision_images: Array of String (plain url) OR Hash { url:, path: }
2032
+ # path is stored in the block so history replay can reconstruct the image
2033
+ # from the tmp file when the base64 data_url is no longer available.
2034
+ private def format_user_content(text, vision_images)
2035
+ vision_images ||= []
2036
+
2037
+ return text if vision_images.empty?
2038
+
2039
+ content = []
2040
+ content << { type: "text", text: text } unless text.nil? || text.empty?
2041
+ vision_images.each do |img|
2042
+ if img.is_a?(Hash)
2043
+ block = { type: "image_url", image_url: { url: img[:url] } }
2044
+ block[:image_path] = img[:path] if img[:path]
2045
+ content << block
2046
+ else
2047
+ content << { type: "image_url", image_url: { url: img } }
2048
+ end
2049
+ end
2050
+ content
2051
+ end
2052
+
2053
+ # Format byte size as human-readable string.
2054
+ private def format_size(bytes)
2055
+ return "0B" unless bytes
2056
+ if bytes >= 1024 * 1024
2057
+ "#{(bytes / 1024.0 / 1024.0).round(1)}MB"
2058
+ elsif bytes >= 1024
2059
+ "#{(bytes / 1024.0).round(0).to_i}KB"
2060
+ else
2061
+ "#{bytes}B"
2062
+ end
2063
+ end
2064
+
2065
+ # Inject a session context message (date + model) into the conversation.
2066
+ # Only injects when:
2067
+ # 1. No context message exists yet in this session, OR
2068
+ # 2. The existing context is from a previous day (cross-day session)
2069
+ # Marked with system_injected: true so existing filters (replay_history,
2070
+ # get_recent_user_messages, etc.) automatically skip it.
2071
+ # Cache-safe: always inserted just before the current user message,
2072
+ # so no historical cache entries are ever invalidated.
2073
+ private def inject_session_context_if_needed
2074
+ today = Time.now.strftime("%Y-%m-%d")
2075
+
2076
+ # Skip if we already have a context for today
2077
+ return if @history.last_session_context_date == today
2078
+
2079
+ inject_session_context
2080
+ end
2081
+
2082
+ # Core method to inject session context (date, model, OS, paths).
2083
+ # Called by inject_session_context_if_needed (with date check)
2084
+ # and by switch_model (without date check, to force update).
2085
+ #
2086
+ # IMPORTANT: Skip injection when the system prompt hasn't been built yet.
2087
+ # Otherwise, appending a user message to an empty history makes
2088
+ # @history.empty? false, which causes run() to skip building the
2089
+ # system prompt entirely (see run()'s "first run" guard).
2090
+ # The injection will happen naturally in run() via
2091
+ # inject_session_context_if_needed after the system prompt is in place.
2092
+ private def inject_session_context
2093
+ # Don't inject context before system prompt exists — defer to
2094
+ # inject_session_context_if_needed which runs inside run()
2095
+ # after the system prompt has been built.
2096
+ return unless @history.has_system_prompt?
2097
+
2098
+ today = Time.now.strftime("%Y-%m-%d")
2099
+ os = Octo::Utils::EnvironmentDetector.os_type
2100
+ desktop = Octo::Utils::EnvironmentDetector.desktop_path
2101
+ parts = [
2102
+ "Today is #{Time.now.strftime('%Y-%m-%d, %A')}",
2103
+ "Current model: #{current_model}",
2104
+ os != :unknown ? "OS: #{Octo::Utils::EnvironmentDetector.os_label}" : nil,
2105
+ desktop ? "Desktop: #{desktop}" : nil,
2106
+ "Working directory: #{@working_dir}"
2107
+ ].compact.join(". ")
2108
+
2109
+ content = "[Session context: #{parts}]"
2110
+
2111
+ @history.append({
2112
+ role: "user",
2113
+ content: content,
2114
+ system_injected: true,
2115
+ session_context: true,
2116
+ session_date: today
2117
+ })
2118
+ end
2119
+
2120
+ # Parse markdown file:// links from assistant message content.
2121
+ # Handles both regular links and inline images:
2122
+ # [Download report](file:///path/to/file.pdf)
2123
+ # ![chart](file:///path/to/chart.png)
2124
+ #
2125
+ # Returns { text: String (original content, unmodified),
2126
+ # files: Array<{name:, path:, inline:}> }
2127
+ private def parse_file_links(content)
2128
+ return { text: content, files: [] } if content.nil? || content.empty?
2129
+
2130
+ files = []
2131
+ content.scan(/(!?)\[([^\]]*)\]\(file:\/\/([^)]+)\)/) do
2132
+ inline = $1 == "!"
2133
+ # URL-decode percent-encoded characters (e.g. Chinese filenames encoded by AI)
2134
+ raw_path = CGI.unescape($3)
2135
+ name = File.basename(raw_path)
2136
+ path = File.expand_path(raw_path)
2137
+ Octo::Logger.info("[parse_file_links] raw=#{$3.inspect} expanded=#{path.inspect} exist=#{File.exist?(path)}")
2138
+ files << { name: name, path: path, inline: inline }
2139
+ end
2140
+ { text: content, files: files }
2141
+ end
2142
+
2143
+ # Emit assistant message to UI, parsing any embedded file:// links first.
2144
+ #
2145
+ # Local image URL rewriting (file:// → /api/local-image) is intentionally
2146
+ # NOT done here. It is browser-specific (the Web UI runs on http://localhost
2147
+ # and cannot load file:// directly) and must stay scoped to the Web UI
2148
+ # controller. IM channel subscribers need the original file:// markdown so
2149
+ # parse_file_links can extract paths and deliver images as native attachments.
2150
+ private def emit_assistant_message(content, reasoning_content: nil)
2151
+ # Prepend reasoning/thinking content (from thinking-mode providers like
2152
+ # DeepSeek V4, Kimi K2) wrapped in <think> tags so the Web UI renders it
2153
+ # as a collapsible thinking block (see sessions.js _renderMarkdown).
2154
+ if reasoning_content && !reasoning_content.to_s.strip.empty?
2155
+ full_content = "<think>\n#{reasoning_content}\n</think>\n#{content}"
2156
+ else
2157
+ full_content = content
2158
+ end
2159
+
2160
+ return if full_content.nil? || full_content.to_s.strip.empty?
2161
+
2162
+ parsed = parse_file_links(content)
2163
+ @ui&.show_assistant_message(full_content, files: parsed[:files])
2164
+ end
2165
+
2166
+ # Track modified files for Time Machine snapshots
2167
+ # @param tool_name [String] Name of the tool that was executed
2168
+ # @param args [Hash] Arguments passed to the tool
2169
+ def track_modified_files(tool_name, args)
2170
+ @modified_files_in_task ||= []
2171
+
2172
+ case tool_name
2173
+ when "write", "edit"
2174
+ file_path = args[:path]
2175
+ full_path = File.expand_path(file_path, @working_dir)
2176
+ @modified_files_in_task << full_path unless @modified_files_in_task.include?(full_path)
2177
+ end
2178
+ end
2179
+ end
2180
+ end