octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,623 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "json"
5
+
6
+ module Octo
7
+ class Client
8
+ MAX_RETRIES = 10
9
+ RETRY_DELAY = 5 # seconds
10
+
11
+ def initialize(api_key, base_url:, model:, anthropic_format: false)
12
+ @api_key = api_key
13
+ @base_url = base_url
14
+ @model = model
15
+ # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Octo AI proxy)
16
+ @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
17
+
18
+ # Resolve provider once — reused for capability + api-type lookups.
19
+ provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)
20
+
21
+ # Decide anthropic_format dynamically based on provider+model, falling
22
+ # back to the explicit constructor flag for unknown providers / custom
23
+ # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
24
+ # native /v1/messages endpoint (preserving cache_control byte-for-byte)
25
+ # without requiring any change to user YAML.
26
+ provider_prefers_anthropic = provider_id &&
27
+ Providers.anthropic_format_for_model?(provider_id, @model)
28
+ @use_anthropic_format = provider_prefers_anthropic || anthropic_format
29
+
30
+ # Remember the provider id so we can tune connection headers below
31
+ # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
32
+ # some OpenRouter-compatible relays only honour Bearer — send both).
33
+ @provider_id = provider_id
34
+
35
+ # Determine vision support once at construction time.
36
+ # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
37
+ # content blocks; the conversion layer strips them when this is false.
38
+ @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)
39
+ end
40
+
41
+ # Returns true when the client is using the AWS Bedrock Converse API.
42
+ def bedrock?
43
+ @use_bedrock
44
+ end
45
+
46
+ # Returns true when the client is talking directly to the Anthropic API
47
+ # (determined at construction time via the anthropic_format flag).
48
+ def anthropic_format?(model = nil)
49
+ @use_anthropic_format && !@use_bedrock
50
+ end
51
+
52
+ # ── Connection test ───────────────────────────────────────────────────────
53
+
54
+ # Test API connection by sending a minimal request.
55
+ # Returns { success: true } or { success: false, error: "..." }.
56
+ def test_connection(model:)
57
+ if bedrock?
58
+ body = MessageFormat::Bedrock.build_request_body(
59
+ [{ role: :user, content: "hi" }], model, [], 16
60
+ ).to_json
61
+ response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
62
+ elsif anthropic_format?
63
+ minimal_body = { model: model, max_tokens: 16,
64
+ messages: [{ role: "user", content: "hi" }] }.to_json
65
+ response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
66
+ else
67
+ minimal_body = { model: model, max_tokens: 16,
68
+ messages: [{ role: "user", content: "hi" }] }.to_json
69
+ response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
70
+ end
71
+ handle_test_response(response)
72
+ rescue Faraday::Error => e
73
+ { success: false, error: "Connection error: #{e.message}" }
74
+ rescue => e
75
+ Octo::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
76
+ { success: false, error: e.message }
77
+ end
78
+
79
+ # ── Simple (non-agent) helpers ────────────────────────────────────────────
80
+
81
+ # Send a single string message and return the reply text.
82
+ def send_message(content, model:, max_tokens:)
83
+ messages = [{ role: "user", content: content }]
84
+ send_messages(messages, model: model, max_tokens: max_tokens)
85
+ end
86
+
87
+ # Send a messages array and return the reply text.
88
+ def send_messages(messages, model:, max_tokens:)
89
+ if bedrock?
90
+ body = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
91
+ response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
92
+ parse_simple_bedrock_response(response)
93
+ elsif anthropic_format?
94
+ body = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false, base_url: @base_url)
95
+ response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
96
+ parse_simple_anthropic_response(response)
97
+ else
98
+ body = { model: model, max_tokens: max_tokens, messages: messages }
99
+ response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
100
+ parse_simple_openai_response(response)
101
+ end
102
+ end
103
+
104
+ # ── Agent main path ───────────────────────────────────────────────────────
105
+
106
+ # Send messages with tool-calling support.
107
+ # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
108
+ #
109
+ # Latency measurement:
110
+ # Because the current HTTP path is *non-streaming* (plain POST, response
111
+ # body read in one shot), TTFB (time to response headers) is not exposed
112
+ # by Faraday's default adapter without extra plumbing. What we CAN measure
113
+ # cheaply — and what users actually feel — is total request duration,
114
+ # which for a non-streaming call equals the time from "hit Enter" to
115
+ # "first token visible" (since we receive everything at once).
116
+ #
117
+ # So we record `duration_ms` as the authoritative number and alias it to
118
+ # `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
119
+ # signal metric — see docs). When we migrate to streaming later, this
120
+ # same `ttft_ms` field will start carrying the *actual* first-token
121
+ # latency without any schema change.
122
+ # @param on_chunk [Proc, nil] optional streaming progress callback.
123
+ # Receives keyword args { input_tokens:, output_tokens: } with cumulative
124
+ # token counts. When nil, behaves exactly as the historical non-streaming
125
+ # path. When given but streaming is not yet wired for the active provider,
126
+ # a single synthetic invocation is fired after the response is received,
127
+ # so UI plumbing can be exercised end-to-end without the proxy work.
128
+ def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil)
129
+ caching_enabled = enable_caching && supports_prompt_caching?(model)
130
+ cloned = deep_clone(messages)
131
+
132
+ streaming_used = false
133
+ first_chunk_at = nil
134
+ wrapped_on_chunk = on_chunk && lambda do |**kwargs|
135
+ first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
136
+ on_chunk.call(**kwargs)
137
+ end
138
+
139
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
140
+ response =
141
+ if bedrock?
142
+ streaming_used = !on_chunk.nil?
143
+ send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
144
+ elsif anthropic_format?
145
+ streaming_used = !on_chunk.nil?
146
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
147
+ else
148
+ streaming_used = !on_chunk.nil?
149
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
150
+ end
151
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
152
+
153
+ if on_chunk && !streaming_used
154
+ usage = response[:usage] || {}
155
+ safe_invoke_on_chunk(
156
+ on_chunk,
157
+ input_tokens: usage[:prompt_tokens].to_i,
158
+ output_tokens: usage[:completion_tokens].to_i
159
+ )
160
+ end
161
+
162
+ duration_ms = ((t1 - t0) * 1000).round
163
+ ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
164
+ output_tokens = response[:usage]&.dig(:completion_tokens).to_i
165
+ tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
166
+
167
+ response[:latency] = {
168
+ ttft_ms: ttft_ms,
169
+ duration_ms: duration_ms,
170
+ output_tokens: output_tokens,
171
+ tps: tps,
172
+ model: model,
173
+ measured_at: Time.now.to_f,
174
+ streaming: streaming_used
175
+ }
176
+ response
177
+ end
178
+
179
+ # Format tool results into canonical messages ready to append to @messages.
180
+ # Always returns canonical format (role: "tool") regardless of API type —
181
+ # conversion to API-native happens inside each send_*_request.
182
+ def format_tool_results(response, tool_results, model:)
183
+ return [] if tool_results.empty?
184
+
185
+ if bedrock?
186
+ MessageFormat::Bedrock.format_tool_results(response, tool_results)
187
+ elsif anthropic_format?
188
+ MessageFormat::Anthropic.format_tool_results(response, tool_results)
189
+ else
190
+ MessageFormat::OpenAI.format_tool_results(response, tool_results)
191
+ end
192
+ end
193
+
194
+ # ── Prompt-caching support ────────────────────────────────────────────────
195
+
196
+ # Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).
197
+ #
198
+ # Handles both direct model names (e.g. "claude-haiku-4-5") and
199
+ # Octo AI Bedrock proxy names with "abs-" prefix (e.g. "abs-claude-haiku-4-5").
200
+ #
201
+ # Why only Claude models:
202
+ # - MiniMax uses automatic server-side caching (no cache_control needed from client)
203
+ # - Kimi uses a proprietary prompt_cache_key param, not cache_control
204
+ # - MiMo has no documented caching API
205
+ # - Only Claude (direct, OpenRouter, or OctoAI Bedrock proxy) consumes our
206
+ # cache_control / cachePoint markers
207
+ def supports_prompt_caching?(model)
208
+ # Strip OctoAI Bedrock proxy prefix before matching
209
+ model_str = model.to_s.downcase.sub(/^abs-/, "")
210
+ return false unless model_str.include?("claude")
211
+
212
+ # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
213
+ # claude-3.5-sonnet-... claude-3-7-sonnet claude-haiku-4-5 claude-sonnet-4-6
214
+ model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
215
+ end
216
+
217
+
218
+ # ── Bedrock Converse request / response ───────────────────────────────────
219
+
220
+ def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
221
+ body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
222
+ return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
223
+
224
+ response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
225
+
226
+ raise_error(response) unless response.status == 200
227
+ check_html_response(response)
228
+ parsed_body = safe_json_parse(response.body, context: "LLM response")
229
+ MessageFormat::Bedrock.parse_response(parsed_body)
230
+ end
231
+
232
+ # Streaming variant for Bedrock Converse.
233
+ # Posts to /model/{m}/converse-stream with stream:true; the proxy returns
234
+ # SSE frames whose `event` is the Bedrock event-type and whose `data` is
235
+ # the raw Bedrock event JSON. We accumulate frames into a synthetic
236
+ # non-streaming response and feed it back through the existing parser so
237
+ # downstream code is identical.
238
+ private def send_bedrock_stream_request(body, model, on_chunk)
239
+ stream_body = body.merge(stream: true)
240
+ aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
241
+ sse_buf = +""
242
+
243
+ response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
244
+ req.body = stream_body.to_json
245
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
246
+ sse_buf << chunk
247
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
248
+ end
249
+ end
250
+
251
+ unless response.status == 200
252
+ response.env.body = sse_buf if response.body.to_s.empty?
253
+ raise_error(response)
254
+ end
255
+ MessageFormat::Bedrock.parse_response(aggregator.to_h)
256
+ end
257
+
258
+ def parse_simple_bedrock_response(response)
259
+ raise_error(response) unless response.status == 200
260
+ data = safe_json_parse(response.body, context: "LLM response")
261
+ (data.dig("output", "message", "content") || [])
262
+ .select { |b| b["text"] }
263
+ .map { |b| b["text"] }
264
+ .join("")
265
+ end
266
+
267
+ # ── Anthropic request / response ──────────────────────────────────────────
268
+
269
+ def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
270
+ # Apply cache_control to the message that marks the cache breakpoint
271
+ messages = apply_message_caching(messages) if caching_enabled
272
+
273
+ body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, base_url: @base_url)
274
+ return send_anthropic_stream_request(body, on_chunk) if on_chunk
275
+
276
+ response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
277
+
278
+ raise_error(response) unless response.status == 200
279
+ check_html_response(response)
280
+ parsed_body = safe_json_parse(response.body, context: "LLM response")
281
+ MessageFormat::Anthropic.parse_response(parsed_body)
282
+ end
283
+
284
+ private def send_anthropic_stream_request(body, on_chunk)
285
+ stream_body = body.merge(stream: true)
286
+ aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
287
+ sse_buf = +""
288
+
289
+ response = anthropic_connection.post(anthropic_messages_path) do |req|
290
+ req.headers["Accept"] = "text/event-stream"
291
+ req.body = stream_body.to_json
292
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
293
+ sse_buf << chunk
294
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
295
+ end
296
+ end
297
+
298
+ raise_error(response) unless response.status == 200
299
+ MessageFormat::Anthropic.parse_response(aggregator.to_h)
300
+ end
301
+
302
+ def parse_simple_anthropic_response(response)
303
+ raise_error(response) unless response.status == 200
304
+ data = safe_json_parse(response.body, context: "LLM response")
305
+ (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
306
+ end
307
+
308
+ # ── OpenAI request / response ─────────────────────────────────────────────
309
+
310
+ def send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
311
+ # Apply cache_control markers to messages when caching is enabled.
312
+ # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
313
+ messages = apply_message_caching(messages) if caching_enabled
314
+
315
+ body = MessageFormat::OpenAI.build_request_body(
316
+ messages, model, tools, max_tokens, caching_enabled,
317
+ vision_supported: @vision_supported,
318
+ reasoning_effort: reasoning_effort
319
+ )
320
+ return send_openai_stream_request(body, on_chunk) if on_chunk
321
+
322
+ response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
323
+
324
+ raise_error(response) unless response.status == 200
325
+ check_html_response(response)
326
+
327
+ parsed_body = safe_json_parse(response.body, context: "LLM response")
328
+ MessageFormat::OpenAI.parse_response(parsed_body)
329
+ end
330
+
331
+ # Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
332
+ # via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
333
+ # accumulates them, and reconstructs the non-streaming JSON response shape so
334
+ # MessageFormat::OpenAI.parse_response works unchanged.
335
+ private def send_openai_stream_request(body, on_chunk)
336
+ stream_body = body.merge(stream: true, stream_options: { include_usage: true })
337
+ aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
338
+ sse_buf = +""
339
+
340
+ response = openai_connection.post("chat/completions") do |req|
341
+ req.body = stream_body.to_json
342
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
343
+ sse_buf << chunk
344
+ drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
345
+ end
346
+ end
347
+
348
+ raise_error(response) unless response.status == 200
349
+ MessageFormat::OpenAI.parse_response(aggregator.to_h)
350
+ end
351
+
352
+ def parse_simple_openai_response(response)
353
+ raise_error(response) unless response.status == 200
354
+ parsed_body = safe_json_parse(response.body, context: "LLM response")
355
+ parsed_body["choices"].first["message"]["content"]
356
+ end
357
+
358
+ # ── Prompt caching helpers ────────────────────────────────────────────────
359
+
360
+ # Add cache_control markers to the last 2 messages in the array.
361
+ #
362
+ # Why 2 markers:
363
+ # Turn N — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
364
+ # Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
365
+ # messages[-1] is the new message (marked) → cache WRITE for Turn N+2
366
+ #
367
+ # With only 1 marker (old behavior): Turn N marks messages[-1]; in Turn N+1 that same
368
+ # message is now [-2] and carries no marker → server sees a different prefix → cache MISS.
369
+ #
370
+ # Compression instructions (system_injected: true) are skipped — we never want to cache
371
+ # those ephemeral injection messages.
372
+ def apply_message_caching(messages)
373
+ return messages if messages.empty?
374
+
375
+ # Collect up to 2 candidate indices from the tail, skipping compression instructions.
376
+ candidate_indices = []
377
+ (messages.length - 1).downto(0) do |i|
378
+ break if candidate_indices.length >= 2
379
+
380
+ candidate_indices << i unless is_compression_instruction?(messages[i])
381
+ end
382
+
383
+ messages.map.with_index do |msg, idx|
384
+ candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
385
+ end
386
+ end
387
+
388
+ # Wrap or extend the message's content with a cache_control marker.
389
+ def add_cache_control_to_message(msg)
390
+ content = msg[:content]
391
+
392
+ content_array = case content
393
+ when String
394
+ [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
395
+ when Array
396
+ content.map.with_index do |block, idx|
397
+ idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
398
+ end
399
+ else
400
+ return msg
401
+ end
402
+
403
+ msg.merge(content: content_array)
404
+ end
405
+
406
+ def is_compression_instruction?(message)
407
+ message.is_a?(Hash) && message[:system_injected] == true
408
+ end
409
+
410
+ # ── HTTP connections ──────────────────────────────────────────────────────
411
+
412
+ # Bedrock Converse API endpoint path for a given model ID.
413
+ def bedrock_endpoint(model)
414
+ "/model/#{model}/converse"
415
+ end
416
+
417
+ # Bedrock Converse streaming endpoint path.
418
+ private def bedrock_stream_endpoint(model)
419
+ "/model/#{model}/converse-stream"
420
+ end
421
+
422
+ # Pull complete SSE frames out of a buffer and yield them as (event, data).
423
+ # An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
424
+ # stays in the buffer for the next chunk. Frames without an explicit
425
+ # `event:` line use the default "message" type per the SSE spec.
426
+ private def drain_sse_frames(buf)
427
+ while (sep = buf.index("\n\n"))
428
+ frame = buf.slice!(0, sep + 2)
429
+ event = "message"
430
+ data_lines = []
431
+ frame.each_line do |line|
432
+ line = line.chomp
433
+ if line.start_with?("event:")
434
+ event = line.sub(/^event:\s*/, "")
435
+ elsif line.start_with?("data:")
436
+ data_lines << line.sub(/^data:\s*/, "")
437
+ end
438
+ end
439
+ next if data_lines.empty?
440
+ yield event, data_lines.join("\n")
441
+ end
442
+ end
443
+
444
+ def bedrock_connection
445
+ @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
446
+ conn.headers["Content-Type"] = "application/json"
447
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
448
+ conn.options.timeout = 300
449
+ conn.options.open_timeout = 10
450
+ conn.ssl.verify = false
451
+ conn.adapter Faraday.default_adapter
452
+ end
453
+ end
454
+
455
+ def openai_connection
456
+ @openai_connection ||= Faraday.new(url: @base_url) do |conn|
457
+ conn.headers["Content-Type"] = "application/json"
458
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
459
+ conn.options.timeout = 300
460
+ conn.options.open_timeout = 10
461
+ conn.ssl.verify = false
462
+ conn.adapter Faraday.default_adapter
463
+ end
464
+ end
465
+
466
+ def anthropic_connection
467
+ @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
468
+ conn.headers["Content-Type"] = "application/json"
469
+ conn.headers["x-api-key"] = @api_key
470
+ conn.headers["anthropic-version"] = "2023-06-01"
471
+ conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
472
+ # OpenRouter's /v1/messages endpoint authenticates with a Bearer
473
+ # token (the OpenRouter API key), not Anthropic's x-api-key. We send
474
+ # both so the same connection code works for direct Anthropic and
475
+ # for OpenRouter-proxied Claude — each endpoint ignores the header
476
+ # it doesn't recognise.
477
+ if @provider_id == "openrouter"
478
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
479
+ end
480
+ # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
481
+ # prefix whitelist limited to first-party coding agents (Kimi CLI,
482
+ # Claude Code, Roo Code, Kilo Code, ...). Requests with the default
483
+ # Faraday UA are rejected with HTTP 403 access_terminated_error,
484
+ # despite a valid API key. We send a Claude Code-shaped UA here
485
+ # because octo talks to this endpoint over the same Anthropic
486
+ # /v1/messages protocol that Claude Code uses, so the UA matches the
487
+ # wire-level behaviour. Hardcoding rather than exposing as a config
488
+ # field is intentional: the only UAs known to pass the gate are the
489
+ # whitelisted-client formats, and the project's preset registry is
490
+ # the single source of truth for provider-specific quirks (mirroring
491
+ # how the openrouter Bearer-fallback above is hardcoded).
492
+ if @provider_id == "kimi-coding"
493
+ conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
494
+ end
495
+ conn.options.timeout = 300
496
+ conn.options.open_timeout = 10
497
+ conn.ssl.verify = false
498
+ conn.adapter Faraday.default_adapter
499
+ end
500
+ end
501
+
502
+ # Correct relative path for the Anthropic /v1/messages endpoint, accounting
503
+ # for whether the configured base_url already includes a "/v1" segment.
504
+ #
505
+ # Examples:
506
+ # base_url = "https://api.anthropic.com" → "v1/messages"
507
+ # base_url = "https://openrouter.ai/api/v1" → "messages"
508
+ # base_url = "https://openrouter.ai/api/v1/" → "messages"
509
+ #
510
+ # Without this, OpenRouter would receive POST /api/v1/v1/messages → 404
511
+ # (HTML error page), which bubbles up as the infamous
512
+ # "Invalid API endpoint or server error (received HTML instead of JSON)".
513
+ private def anthropic_messages_path
514
+ base = @base_url.to_s.chomp("/")
515
+ base.end_with?("/v1") ? "messages" : "v1/messages"
516
+ end
517
+
518
+ # ── Error handling ────────────────────────────────────────────────────────
519
+
520
+ def handle_test_response(response)
521
+ return { success: true } if response.status == 200
522
+
523
+ error_body = JSON.parse(response.body) rescue nil
524
+ { success: false, error: extract_error_message(error_body, response.body) }
525
+ end
526
+
527
+ def raise_error(response)
528
+ error_body = JSON.parse(response.body) rescue nil
529
+ error_message = extract_error_message(error_body, response.body)
530
+
531
+ case response.status
532
+ when 400
533
+ # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
534
+ # However, some proxy/relay providers do — so we inspect the message first.
535
+ # Also, Bedrock returns ThrottlingException as 400 instead of 429.
536
+ if error_message.match?(/ThrottlingException|unavailable|quota/i)
537
+ hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
538
+ raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
539
+ end
540
+
541
+ # True bad request — our message was malformed. Roll back history so the
542
+ # broken message is not replayed on the next user turn.
543
+ raise BadRequestError, "[LLM] Client request error: #{error_message}"
544
+ when 401 then raise AgentError, "[LLM] Invalid API key"
545
+ when 402 then raise AgentError, "[LLM] Billing or payment issue (possibly out of credits): #{error_message}"
546
+ when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
547
+ when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
548
+ when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
549
+ when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
550
+ else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
551
+ end
552
+ end
553
+
554
+ # Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)
555
+ def check_html_response(response)
556
+ body = response.body.to_s.lstrip
557
+ if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
558
+ raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
559
+ end
560
+ end
561
+
562
+ def extract_error_message(error_body, raw_body)
563
+ if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
564
+ return "Invalid API endpoint or server error (received HTML instead of JSON)"
565
+ end
566
+
567
+ return raw_body unless error_body.is_a?(Hash)
568
+
569
+ error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
570
+ error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
571
+ error_body["message"]&.then { |m| return m }
572
+ error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
573
+ end
574
+
575
+ # Parse JSON with user-friendly error messages.
576
+ # @param json_string [String] the JSON string to parse
577
+ # @param context [String] a description of what's being parsed (e.g., "LLM response")
578
+ # @return [Hash, Array] the parsed JSON
579
+ # @raise [RetryableError] if parsing fails (indicates a malformed LLM response)
580
+ def safe_json_parse(json_string, context: "response")
581
+ JSON.parse(json_string)
582
+ rescue JSON::ParserError => e
583
+ # Transform technical JSON parsing errors into user-friendly messages.
584
+ # These are usually caused by:
585
+ # 1. Incomplete/truncated LLM response (network issue, timeout)
586
+ # 2. LLM service returned malformed data
587
+ # 3. Proxy/gateway corruption
588
+ error_detail = if json_string.to_s.strip.empty?
589
+ "received empty response"
590
+ elsif json_string.to_s.bytesize > 500
591
+ "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
592
+ else
593
+ "response format is invalid"
594
+ end
595
+
596
+ raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
597
+ "This usually means the AI service returned incomplete or corrupted data. " \
598
+ "The request will be retried automatically."
599
+ end
600
+
601
+ # ── Streaming helpers ─────────────────────────────────────────────────────
602
+
603
+ # Invoke the user's on_chunk callback in a way that never lets a callback
604
+ # error tear down the LLM request. Streaming chunks are best-effort UI
605
+ # updates; a buggy progress renderer must not abort an in-flight call.
606
+ private def safe_invoke_on_chunk(on_chunk, **kwargs)
607
+ return unless on_chunk
608
+ on_chunk.call(**kwargs)
609
+ rescue => e
610
+ Octo::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
611
+ end
612
+
613
+ # ── Utilities ─────────────────────────────────────────────────────────────
614
+
615
+ def deep_clone(obj)
616
+ case obj
617
+ when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
618
+ when Array then obj.map { |item| deep_clone(item) }
619
+ else obj
620
+ end
621
+ end
622
+ end
623
+ end
@@ -0,0 +1,3 @@
1
+ You are calm, precise, and helpful. You communicate clearly and concisely.
2
+ You are honest about uncertainty and ask for clarification when needed.
3
+ You take initiative but respect the user's preferences and decisions.
@@ -0,0 +1 @@
1
+ (No user profile configured yet. To personalize, create ~/.octo/agents/USER.md)