octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,361 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Octo
4
+ module MessageFormat
5
+ # Static helpers for AWS Bedrock Converse API message format.
6
+ #
7
+ # The Bedrock Converse API has a completely different format from Anthropic's Messages API:
8
+ # - Authentication: Authorization: Bearer <ABSK...key>
9
+ # - Endpoint: POST /model/{modelId}/converse
10
+ # - Request: { messages: [{role:, content: [{text:}]}], toolConfig: {tools: [{toolSpec:...}]}, system: [{text:}] }
11
+ # - Response: { output: { message: { role:, content: [{text:} or {toolUse:}] } }, stopReason:, usage: }
12
+ #
13
+ # Internal canonical format (same as OpenAI-style):
14
+ # assistant tool_calls: { role: "assistant", tool_calls: [{id:, name:, arguments:}] }
15
+ # tool result: { role: "tool", tool_call_id:, content: }
16
+ #
17
+ # This module converts canonical format ↔ Bedrock Converse API format.
18
+ module Bedrock
19
+ # Detect if the request should use the Bedrock Converse API.
20
+ # Matches any of:
21
+ # - API key with "ABSK" prefix (native AWS Bedrock)
22
+ # - Model ID with "abs-" prefix (Octo AI proxy that speaks Bedrock Converse)
23
+ #
24
+ # A bare "octo-" key is NOT enough: that same workspace key is also
25
+ # used for dsk-*, or-*, and other OpenAI-compatible aliases served by
26
+ # the same Octo proxy on a different endpoint. The *model prefix* is
27
+ # the source of truth for which upstream format the proxy expects:
28
+ #
29
+ # abs-* → Bedrock Converse (POST /model/{id}/converse)
30
+ # dsk-* → OpenAI-compatible (POST /chat/completions)
31
+ # or-* → OpenAI-compatible (POST /chat/completions)
32
+ # other → depends on base_url + explicit anthropic_format flag
33
+ #
34
+ # Historically this method also returned true for any "octo-" key,
35
+ # which forced non-abs aliases into the Bedrock endpoint and produced
36
+ # `unknown model "..."` errors. Keep the explicit-prefix rule: if you
37
+ # add a new OpenAI-compatible alias family on the Octo proxy, it
38
+ # will route correctly without touching this file.
39
+ def self.bedrock_api_key?(api_key, model)
40
+ return true if api_key.to_s.start_with?("ABSK")
41
+ model.to_s.start_with?("abs-")
42
+ end
43
+
44
+ module_function
45
+
46
+ # ── Request building ──────────────────────────────────────────────────────
47
+
48
+ # Convert canonical @messages + tools into a Bedrock Converse API request body.
49
+ # @param messages [Array<Hash>] canonical messages (may include system)
50
+ # @param model [String]
51
+ # @param tools [Array<Hash>] OpenAI-style tool definitions
52
+ # @param max_tokens [Integer]
53
+ # @param caching_enabled [Boolean] (currently unused for Bedrock)
54
+ # @return [Hash] ready to serialize as JSON body
55
+ def build_request_body(messages, model, tools, max_tokens, caching_enabled = false, reasoning_effort: nil)
56
+ system_messages = messages.select { |m| m[:role] == "system" }
57
+ regular_messages = messages.reject { |m| m[:role] == "system" }
58
+
59
+ # Merge consecutive same-role messages (Bedrock requires alternating roles)
60
+ api_messages = merge_consecutive_tool_results(regular_messages.map { |msg| to_api_message(msg) })
61
+
62
+ # Inject cachePoint blocks AFTER conversion to Bedrock API format.
63
+ # Doing this on canonical messages (before to_api_message) is incorrect because
64
+ # tool-result messages (role: "tool") are converted to toolResult blocks, and
65
+ # Bedrock does not support cachePoint inside toolResult.content.
66
+ # Operating on the final Bedrock format ensures cachePoint is always a top-level
67
+ # sibling block in the message's content array, which is what Bedrock expects.
68
+ api_messages = apply_api_caching(api_messages) if caching_enabled
69
+
70
+ body = { messages: api_messages }
71
+
72
+ # Add system prompt if present
73
+ unless system_messages.empty?
74
+ system_text = system_messages.map { |m| extract_text(m[:content]) }.join("\n\n")
75
+ body[:system] = [{ text: system_text }] unless system_text.empty?
76
+ end
77
+
78
+ # Add inference config for max_tokens
79
+ body[:inferenceConfig] = { maxTokens: max_tokens }
80
+
81
+ # Add tool config if tools are provided
82
+ if tools&.any?
83
+ body[:toolConfig] = { tools: tools.map { |t| to_api_tool(t) } }
84
+ end
85
+
86
+ extra = additional_fields_for_effort(reasoning_effort)
87
+ body[:additionalModelRequestFields] = extra if extra
88
+
89
+ body
90
+ end
91
+
92
+ private_class_method def self.additional_fields_for_effort(effort)
93
+ return nil if effort.nil? || effort.to_s.empty?
94
+ return nil unless %w[low medium high].include?(effort.to_s)
95
+ {
96
+ thinking: { type: "adaptive" },
97
+ output_config: { effort: effort.to_s }
98
+ }
99
+ end
100
+
101
+ # ── Response parsing ──────────────────────────────────────────────────────
102
+
103
+ # Parse Bedrock Converse API response into canonical internal format.
104
+ # @param data [Hash] parsed JSON response body
105
+ # @return [Hash] canonical response: { content:, tool_calls:, finish_reason:, usage: }
106
+ def parse_response(data)
107
+ message = data.dig("output", "message") || {}
108
+ blocks = message["content"] || []
109
+ usage = data["usage"] || {}
110
+
111
+ # Extract text content
112
+ content = blocks.select { |b| b["text"] }.map { |b| b["text"] }.join("")
113
+
114
+ # Extract tool calls from toolUse blocks
115
+ tool_calls = blocks.select { |b| b["toolUse"] }.map do |b|
116
+ tc = b["toolUse"]
117
+ args = tc["input"].is_a?(String) ? tc["input"] : tc["input"].to_json
118
+ { id: tc["toolUseId"], type: "function", name: tc["name"], arguments: args }
119
+ end
120
+
121
+ # Map Bedrock stopReason → canonical finish_reason
122
+ finish_reason = case data["stopReason"]
123
+ when "end_turn" then "stop"
124
+ when "tool_use" then "tool_calls"
125
+ when "max_tokens" then "length"
126
+ else data["stopReason"]
127
+ end
128
+
129
+ cache_read = usage["cacheReadInputTokens"].to_i
130
+ cache_write = usage["cacheWriteInputTokens"].to_i
131
+
132
+ # Bedrock `inputTokens` = non-cached input only.
133
+ # Anthropic direct `input_tokens` = ALSO non-cached input only
134
+ # (cache_read_input_tokens and cache_creation_input_tokens are reported
135
+ # separately and are disjoint from input_tokens — NOT included in it).
136
+ # Normalise to the OpenAI/Bedrock convention so ModelPricing.calculate_cost
137
+ # works correctly:
138
+ # prompt_tokens = inputTokens + cacheReadInputTokens
139
+ # (calculate_cost subtracts cache_read_tokens from prompt_tokens to get
140
+ # the billable non-cached portion; cache_write is priced on top.)
141
+ prompt_tokens = usage["inputTokens"].to_i + cache_read
142
+
143
+ usage_data = {
144
+ prompt_tokens: prompt_tokens,
145
+ completion_tokens: usage["outputTokens"].to_i,
146
+ total_tokens: usage["totalTokens"].to_i
147
+ }
148
+ usage_data[:cache_read_input_tokens] = cache_read if cache_read > 0
149
+ usage_data[:cache_creation_input_tokens] = cache_write if cache_write > 0
150
+
151
+ { content: content, tool_calls: tool_calls, finish_reason: finish_reason,
152
+ usage: usage_data, raw_api_usage: usage }
153
+ end
154
+
155
+ # ── Tool result formatting ────────────────────────────────────────────────
156
+
157
+ # Format tool results into canonical messages to append to @messages.
158
+ # (Same as Anthropic format — canonical tool messages)
159
+ def format_tool_results(response, tool_results)
160
+ results_map = tool_results.each_with_object({}) { |r, h| h[r[:id]] = r }
161
+
162
+ response[:tool_calls].map do |tc|
163
+ result = results_map[tc[:id]]
164
+ {
165
+ role: "tool",
166
+ tool_call_id: tc[:id],
167
+ content: result ? result[:content] : { error: "Tool result missing" }.to_json
168
+ }
169
+ end
170
+ end
171
+
172
+ # ── Private helpers ───────────────────────────────────────────────────────
173
+
174
+ # Convert a single canonical message to Bedrock Converse API format.
175
+ private_class_method def self.to_api_message(msg)
176
+ role = msg[:role]
177
+ content = msg[:content]
178
+ tool_calls = msg[:tool_calls]
179
+
180
+ # assistant with tool_calls → content blocks with toolUse
181
+ if role == "assistant" && tool_calls&.any?
182
+ blocks = []
183
+ blocks << { text: content } if content.is_a?(String) && !content.empty?
184
+
185
+ tool_calls.each do |tc|
186
+ func = tc[:function] || tc
187
+ name = func[:name] || tc[:name]
188
+ raw_args = func[:arguments] || tc[:arguments]
189
+ input = raw_args.is_a?(String) ? (JSON.parse(raw_args) rescue {}) : (raw_args || {})
190
+ blocks << { toolUse: { toolUseId: tc[:id], name: name, input: input } }
191
+ end
192
+
193
+ return { role: "assistant", content: blocks }
194
+ end
195
+
196
+ # canonical tool result (role: "tool") → Bedrock user message with toolResult block
197
+ if role == "tool"
198
+ result_content = msg[:content]
199
+ # Bedrock toolResult content must be an array of Bedrock-native blocks.
200
+ # If content is an Array of canonical blocks (e.g. image_url + text from file_reader),
201
+ # convert each block to Bedrock format via normalize_block.
202
+ result_blocks = if result_content.is_a?(String)
203
+ [{ text: result_content }]
204
+ elsif result_content.is_a?(Array)
205
+ result_content.map { |b| normalize_block(b) }.compact
206
+ else
207
+ [{ text: result_content.to_s }]
208
+ end
209
+ return {
210
+ role: "user",
211
+ content: [{ toolResult: { toolUseId: msg[:tool_call_id], content: result_blocks } }]
212
+ }
213
+ end
214
+
215
+ # regular user/assistant message
216
+ blocks = content_to_blocks(content)
217
+ # Bedrock rejects messages with an empty content array — use a placeholder text block.
218
+ blocks = [{ text: "..." }] if blocks.empty?
219
+ { role: role, content: blocks }
220
+ end
221
+
222
+ # Convert content (String or Array) to Bedrock content block array.
223
+ private_class_method def self.content_to_blocks(content)
224
+ case content
225
+ when String
226
+ # Bedrock rejects blank text blocks — skip empty strings
227
+ return [] if content.empty?
228
+
229
+ [{ text: content }]
230
+ when Array
231
+ content.map { |b| normalize_block(b) }.compact
232
+ else
233
+ str = content.to_s
234
+ return [] if str.empty?
235
+
236
+ [{ text: str }]
237
+ end
238
+ end
239
+
240
+ # Normalize a content block to Bedrock format.
241
+ private_class_method def self.normalize_block(block)
242
+ return { text: block.to_s } unless block.is_a?(Hash)
243
+
244
+ case block[:type]
245
+ when "text"
246
+ # Bedrock rejects blank text blocks — drop them
247
+ text = block[:text].to_s
248
+ return nil if text.empty?
249
+
250
+ { text: text }
251
+ when "image_url"
252
+ # Bedrock image format — base64 only
253
+ url = block.dig(:image_url, :url) || block[:url]
254
+ url_to_image_block(url)
255
+ when "image"
256
+ block # already Bedrock format
257
+ else
258
+ # Pass through Bedrock-native blocks (e.g. cachePoint) unchanged
259
+ return block if block[:cachePoint]
260
+ # Fallback: try to extract text
261
+ { text: (block[:text] || block.to_s) }
262
+ end
263
+ end
264
+
265
+ # Convert an image URL to Bedrock image block.
266
+ private_class_method def self.url_to_image_block(url)
267
+ return nil unless url
268
+
269
+ if url.start_with?("data:")
270
+ match = url.match(/^data:image\/([^;]+);base64,(.*)$/)
271
+ if match
272
+ {
273
+ image: {
274
+ format: match[1],
275
+ source: { bytes: match[2] }
276
+ }
277
+ }
278
+ end
279
+ else
280
+ # Bedrock doesn't support URL-based images in all regions; skip
281
+ nil
282
+ end
283
+ end
284
+
285
+ # Convert OpenAI-style tool definition to Bedrock toolSpec format.
286
+ private_class_method def self.to_api_tool(tool)
287
+ func = tool[:function] || tool
288
+ {
289
+ toolSpec: {
290
+ name: func[:name],
291
+ description: func[:description],
292
+ inputSchema: { json: func[:parameters] }
293
+ }
294
+ }
295
+ end
296
+
297
+ # Extract plain text from content (String or Array).
298
+ private_class_method def self.extract_text(content)
299
+ case content
300
+ when String then content
301
+ when Array then content.map { |b| b.is_a?(Hash) ? (b[:text] || "") : b.to_s }.join("\n")
302
+ else content.to_s
303
+ end
304
+ end
305
+
306
+ # Bedrock Converse API requires strict user/assistant alternation.
307
+ # Merge consecutive tool result messages (role: "user") into a single message.
308
+ private_class_method def self.merge_consecutive_tool_results(messages)
309
+ return messages if messages.empty?
310
+
311
+ merged = []
312
+ messages.each do |msg|
313
+ prev = merged.last
314
+ # If current and previous are both user messages containing toolResult blocks,
315
+ # merge their content arrays together
316
+ if prev && prev[:role] == "user" && msg[:role] == "user" &&
317
+ prev[:content].is_a?(Array) && msg[:content].is_a?(Array) &&
318
+ prev[:content].any? { |b| b[:toolResult] } &&
319
+ msg[:content].any? { |b| b[:toolResult] }
320
+ merged.last[:content].concat(msg[:content])
321
+ else
322
+ merged << msg.dup
323
+ end
324
+ end
325
+ merged
326
+ end
327
+
328
+ # Inject cachePoint blocks into already-converted Bedrock API format messages.
329
+ # Marks the last 2 messages (from the tail) so Bedrock can cache the conversation
330
+ # prefix up to those points.
331
+ #
332
+ # Why operate on Bedrock API format (not canonical):
333
+ # - tool-result canonical messages (role: "tool") become toolResult blocks inside
334
+ # a user message. Bedrock does NOT allow cachePoint inside toolResult.content.
335
+ # - After merge_consecutive_tool_results, message boundaries may differ from canonical.
336
+ # - Operating here guarantees cachePoint is always a top-level sibling block.
337
+ private_class_method def self.apply_api_caching(api_messages)
338
+ return api_messages if api_messages.empty?
339
+
340
+ candidate_indices = []
341
+ (api_messages.length - 1).downto(0) do |i|
342
+ break if candidate_indices.length >= 2
343
+ candidate_indices << i
344
+ end
345
+
346
+ api_messages.map.with_index do |msg, idx|
347
+ next msg unless candidate_indices.include?(idx)
348
+
349
+ content = msg[:content]
350
+ next msg unless content.is_a?(Array)
351
+
352
+ # Don't double-add cachePoint if already present
353
+ already_marked = content.last.is_a?(Hash) && content.last[:cachePoint]
354
+ next msg if already_marked
355
+
356
+ msg.merge(content: content + [{ cachePoint: { type: "default" } }])
357
+ end
358
+ end
359
+ end
360
+ end
361
+ end
@@ -0,0 +1,222 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Octo
4
+ module MessageFormat
5
+ # Static helpers for OpenAI-compatible API message format.
6
+ #
7
+ # The canonical internal @messages format IS OpenAI format, so this module
8
+ # mainly handles response parsing, tool result formatting, and message
9
+ # type identification — minimal transformation needed.
10
+ module OpenAI
11
+ module_function
12
+
13
+ # ── Message type identification ───────────────────────────────────────────
14
+
15
+ # Returns true if the message is a canonical tool result.
16
+ def tool_result_message?(msg)
17
+ msg[:role] == "tool" && !msg[:tool_call_id].nil?
18
+ end
19
+
20
+ # Returns the tool_call_ids referenced in a tool result message.
21
+ def tool_call_ids(msg)
22
+ return [] unless tool_result_message?(msg)
23
+
24
+ [msg[:tool_call_id]]
25
+ end
26
+
27
+ # ── Request building ──────────────────────────────────────────────────────
28
+
29
+ # Build an OpenAI-compatible request body.
30
+ #
31
+ # Messages go through the canonical→OpenAI conversion layer
32
+ # (normalize_messages). For most models this is identity because
33
+ # the internal canonical format IS OpenAI format. The conversion
34
+ # handles one edge case: image_url content blocks are stripped
35
+ # when vision_supported is false (e.g. DeepSeek, Kimi, MiniMax),
36
+ # replacing them with a text placeholder so the API doesn't reject
37
+ # the request with "unknown variant 'image_url'".
38
+ #
39
+ # @param messages [Array<Hash>] canonical messages
40
+ # @param model [String]
41
+ # @param tools [Array<Hash>] OpenAI-style tool definitions
42
+ # @param max_tokens [Integer]
43
+ # @param caching_enabled [Boolean] (only effective for Claude via OpenRouter)
44
+ # @param vision_supported [Boolean] whether the target model accepts
45
+ # image_url content blocks (default true, conservative)
46
+ # @return [Hash]
47
+ def build_request_body(messages, model, tools, max_tokens, caching_enabled, vision_supported: true, reasoning_effort: nil)
48
+ api_messages = messages.map { |msg| normalize_message_content(msg, vision_supported: vision_supported) }
49
+
50
+ body = { model: model, max_tokens: max_tokens, messages: api_messages }
51
+
52
+ if tools&.any?
53
+ if caching_enabled
54
+ cached_tools = deep_clone(tools)
55
+ cached_tools.last[:cache_control] = { type: "ephemeral" }
56
+ body[:tools] = cached_tools
57
+ else
58
+ body[:tools] = tools
59
+ end
60
+ end
61
+
62
+ if reasoning_effort && !reasoning_effort.to_s.empty?
63
+ body[:reasoning_effort] = reasoning_effort.to_s
64
+ end
65
+
66
+ body
67
+ end
68
+
69
+ # ── Canonical → OpenAI conversion ─────────────────────────────────────────
70
+
71
+ # Process a single message's content through the canonical→OpenAI
72
+ # conversion layer. For String content this is a no-op; for Array
73
+ # content each block goes through normalize_block.
74
+ #
75
+ # @param msg [Hash] canonical message
76
+ # @param vision_supported [Boolean]
77
+ # @return [Hash] message with content normalised for OpenAI API
78
+ def normalize_message_content(msg, vision_supported:)
79
+ content = msg[:content]
80
+ return msg unless content.is_a?(Array)
81
+
82
+ blocks = content_to_blocks(content, vision_supported: vision_supported)
83
+ # Most APIs reject empty content arrays — use a placeholder text block.
84
+ blocks = [{ type: "text", text: "..." }] if blocks.empty?
85
+ msg.merge(content: blocks)
86
+ end
87
+
88
+ # Convert canonical content array to OpenAI-compatible block array.
89
+ # Each block goes through normalize_block; nil results are compacted.
90
+ #
91
+ # @param content [Array<Hash>] canonical content blocks
92
+ # @param vision_supported [Boolean]
93
+ # @return [Array<Hash>]
94
+ def content_to_blocks(content, vision_supported:)
95
+ content.map { |b| normalize_block(b, vision_supported: vision_supported) }.compact
96
+ end
97
+
98
+ # Normalize a single canonical content block to OpenAI API format.
99
+ #
100
+ # Canonical text blocks pass through (with cache_control preserved).
101
+ # image_url blocks are kept for vision-capable models and replaced
102
+ # with a text placeholder for non-vision models (DeepSeek, Kimi, etc.).
103
+ #
104
+ # @param block [Hash] canonical content block
105
+ # @param vision_supported [Boolean]
106
+ # @return [Hash, nil] nil for empty-text blocks (dropped)
107
+ def normalize_block(block, vision_supported:)
108
+ return block unless block.is_a?(Hash)
109
+
110
+ case block[:type]
111
+ when "text"
112
+ # Drop empty text blocks — most APIs (Anthropic, DeepSeek, etc.)
113
+ # reject { type: "text", text: "" }.
114
+ text = block[:text]
115
+ return nil if text.nil? || text.empty?
116
+
117
+ result = { type: "text", text: text }
118
+ result[:cache_control] = block[:cache_control] if block[:cache_control]
119
+ result
120
+ when "image_url"
121
+ if vision_supported
122
+ block # Pass through — GPT-4V, Gemini, etc. accept image_url
123
+ else
124
+ # Replace with text placeholder so the API doesn't reject the
125
+ # request. The model will still see the context that an image
126
+ # was present (from file_prompt / system_injected metadata).
127
+ { type: "text", text: "[Image content removed — current model does not support vision input]" }
128
+ end
129
+ else
130
+ block # Pass through unknown block types (tool_use, tool_result, etc.)
131
+ end
132
+ end
133
+
134
+ # ── Response parsing ──────────────────────────────────────────────────────
135
+
136
+ # Parse OpenAI-compatible API response into canonical internal format.
137
+ # @param data [Hash] parsed JSON response body
138
+ # @return [Hash]
139
+ def parse_response(data)
140
+ message = data["choices"].first["message"]
141
+ usage = data["usage"] || {}
142
+ raw_api_usage = usage.dup
143
+
144
+ usage_data = {
145
+ prompt_tokens: usage["prompt_tokens"],
146
+ completion_tokens: usage["completion_tokens"],
147
+ total_tokens: usage["total_tokens"]
148
+ }
149
+
150
+ usage_data[:api_cost] = usage["cost"] if usage["cost"]
151
+ usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"] if usage["cache_creation_input_tokens"]
152
+ usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"] if usage["cache_read_input_tokens"]
153
+
154
+ # OpenRouter stores cache info under prompt_tokens_details
155
+ if (details = usage["prompt_tokens_details"])
156
+ usage_data[:cache_read_input_tokens] = details["cached_tokens"] if details["cached_tokens"].to_i > 0
157
+ usage_data[:cache_creation_input_tokens] = details["cache_write_tokens"] if details["cache_write_tokens"].to_i > 0
158
+ end
159
+
160
+ result = {
161
+ content: message["content"],
162
+ tool_calls: parse_tool_calls(message["tool_calls"]),
163
+ finish_reason: data["choices"].first["finish_reason"],
164
+ usage: usage_data,
165
+ raw_api_usage: raw_api_usage
166
+ }
167
+
168
+ # Preserve reasoning_content (e.g. Kimi/Moonshot extended thinking)
169
+ result[:reasoning_content] = message["reasoning_content"] if message["reasoning_content"]
170
+
171
+ result
172
+ end
173
+
174
+ # ── Tool result formatting ────────────────────────────────────────────────
175
+
176
+ # Format tool results into canonical messages to append to @messages.
177
+ # @return [Array<Hash>] canonical tool messages
178
+ def format_tool_results(response, tool_results)
179
+ results_map = tool_results.each_with_object({}) { |r, h| h[r[:id]] = r }
180
+
181
+ response[:tool_calls].map do |tc|
182
+ result = results_map[tc[:id]]
183
+ raw_content = result ? result[:content] : { error: "Tool result missing" }.to_json
184
+
185
+ # OpenAI tool message content must be a String.
186
+ # If a tool returned multipart Array blocks (e.g. screenshot image), convert to JSON.
187
+ content = raw_content.is_a?(Array) ? JSON.generate(raw_content) : raw_content
188
+
189
+ {
190
+ role: "tool",
191
+ tool_call_id: tc[:id],
192
+ content: content
193
+ }
194
+ end
195
+ end
196
+
197
+ # ── Private helpers ───────────────────────────────────────────────────────
198
+
199
+ private_class_method def self.parse_tool_calls(raw)
200
+ return nil if raw.nil? || raw.empty?
201
+
202
+ raw.filter_map do |call|
203
+ func = call["function"] || {}
204
+ name = func["name"]
205
+ arguments = func["arguments"]
206
+ # Skip malformed tool calls where name or arguments is nil (broken API response)
207
+ next if name.nil? || arguments.nil?
208
+
209
+ { id: call["id"], type: call["type"], name: name, arguments: arguments }
210
+ end
211
+ end
212
+
213
+ private_class_method def self.deep_clone(obj)
214
+ case obj
215
+ when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
216
+ when Array then obj.map { |item| deep_clone(item) }
217
+ else obj
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end