rubino-agent 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (376) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +115 -0
  4. data/.rubocop_todo.yml +955 -0
  5. data/.ruby-version +1 -0
  6. data/AGENTS.md +97 -0
  7. data/CHANGELOG.md +344 -0
  8. data/CONTRIBUTING.md +69 -0
  9. data/LICENSE +21 -0
  10. data/README.md +200 -0
  11. data/Rakefile +8 -0
  12. data/docs/agents.md +190 -0
  13. data/docs/api/v1.md +414 -0
  14. data/docs/architecture.md +177 -0
  15. data/docs/commands.md +375 -0
  16. data/docs/configuration.md +590 -0
  17. data/docs/getting-started.md +143 -0
  18. data/docs/jobs.md +332 -0
  19. data/docs/mcp.md +128 -0
  20. data/docs/memory.md +98 -0
  21. data/docs/models-and-keys.md +173 -0
  22. data/docs/oauth-providers.md +145 -0
  23. data/docs/plugins.md +195 -0
  24. data/docs/security.md +145 -0
  25. data/docs/skills.md +322 -0
  26. data/docs/tools.md +395 -0
  27. data/docs/troubleshooting.md +73 -0
  28. data/exe/rubino +9 -0
  29. data/install.sh +275 -0
  30. data/lib/rubino/active_skill.rb +50 -0
  31. data/lib/rubino/agent/agent_registry.rb +120 -0
  32. data/lib/rubino/agent/backoff_policy.rb +116 -0
  33. data/lib/rubino/agent/definition.rb +128 -0
  34. data/lib/rubino/agent/degenerate_recovery.rb +271 -0
  35. data/lib/rubino/agent/fallback_chain.rb +194 -0
  36. data/lib/rubino/agent/iteration_budget.rb +50 -0
  37. data/lib/rubino/agent/loop.rb +617 -0
  38. data/lib/rubino/agent/model_call_runner.rb +383 -0
  39. data/lib/rubino/agent/prompts/build.txt +69 -0
  40. data/lib/rubino/agent/prompts/compaction.txt +20 -0
  41. data/lib/rubino/agent/prompts/explore.txt +19 -0
  42. data/lib/rubino/agent/prompts/general.txt +20 -0
  43. data/lib/rubino/agent/prompts/plan.txt +31 -0
  44. data/lib/rubino/agent/response_validator.rb +70 -0
  45. data/lib/rubino/agent/router.rb +65 -0
  46. data/lib/rubino/agent/runner.rb +195 -0
  47. data/lib/rubino/agent/tool_executor.rb +402 -0
  48. data/lib/rubino/agent/truncation_continuation.rb +137 -0
  49. data/lib/rubino/api/middleware/auth.rb +43 -0
  50. data/lib/rubino/api/middleware/error_handler.rb +65 -0
  51. data/lib/rubino/api/middleware/json_parser.rb +100 -0
  52. data/lib/rubino/api/middleware/observability.rb +59 -0
  53. data/lib/rubino/api/middleware/rate_limit.rb +136 -0
  54. data/lib/rubino/api/operations/approvals/decide_operation.rb +49 -0
  55. data/lib/rubino/api/operations/clarifications/decide_operation.rb +44 -0
  56. data/lib/rubino/api/operations/cron_jobs/create_operation.rb +46 -0
  57. data/lib/rubino/api/operations/cron_jobs/delete_operation.rb +36 -0
  58. data/lib/rubino/api/operations/cron_jobs/list_operation.rb +55 -0
  59. data/lib/rubino/api/operations/cron_jobs/pause_operation.rb +34 -0
  60. data/lib/rubino/api/operations/cron_jobs/resume_operation.rb +34 -0
  61. data/lib/rubino/api/operations/cron_jobs/schedule_validation.rb +30 -0
  62. data/lib/rubino/api/operations/cron_jobs/show_operation.rb +32 -0
  63. data/lib/rubino/api/operations/cron_jobs/trigger_operation.rb +38 -0
  64. data/lib/rubino/api/operations/cron_jobs/update_operation.rb +42 -0
  65. data/lib/rubino/api/operations/files/read_operation.rb +40 -0
  66. data/lib/rubino/api/operations/files/upload_operation.rb +175 -0
  67. data/lib/rubino/api/operations/health_operation.rb +46 -0
  68. data/lib/rubino/api/operations/memory/delete_operation.rb +32 -0
  69. data/lib/rubino/api/operations/memory/index_operation.rb +80 -0
  70. data/lib/rubino/api/operations/memory/stats_operation.rb +28 -0
  71. data/lib/rubino/api/operations/metrics_operation.rb +18 -0
  72. data/lib/rubino/api/operations/mode/show_operation.rb +29 -0
  73. data/lib/rubino/api/operations/mode/update_operation.rb +42 -0
  74. data/lib/rubino/api/operations/models/list_operation.rb +45 -0
  75. data/lib/rubino/api/operations/oauth/connections/disconnect_operation.rb +77 -0
  76. data/lib/rubino/api/operations/oauth/connections/list_operation.rb +36 -0
  77. data/lib/rubino/api/operations/oauth/providers/callback_operation.rb +82 -0
  78. data/lib/rubino/api/operations/oauth/providers/connect_operation.rb +44 -0
  79. data/lib/rubino/api/operations/oauth/providers/list_operation.rb +35 -0
  80. data/lib/rubino/api/operations/oauth/serializer.rb +21 -0
  81. data/lib/rubino/api/operations/runs/create_operation.rb +77 -0
  82. data/lib/rubino/api/operations/runs/events_operation.rb +195 -0
  83. data/lib/rubino/api/operations/runs/stop_operation.rb +34 -0
  84. data/lib/rubino/api/operations/sessions/create_operation.rb +46 -0
  85. data/lib/rubino/api/operations/sessions/delete_operation.rb +33 -0
  86. data/lib/rubino/api/operations/sessions/index_operation.rb +82 -0
  87. data/lib/rubino/api/operations/sessions/retry_operation.rb +45 -0
  88. data/lib/rubino/api/operations/sessions/show_operation.rb +59 -0
  89. data/lib/rubino/api/operations/sessions/undo_operation.rb +38 -0
  90. data/lib/rubino/api/operations/skills/list_operation.rb +34 -0
  91. data/lib/rubino/api/operations/skills/toggle_operation.rb +40 -0
  92. data/lib/rubino/api/operations/tasks/index_operation.rb +30 -0
  93. data/lib/rubino/api/operations/tasks/serializer.rb +60 -0
  94. data/lib/rubino/api/operations/tasks/show_operation.rb +33 -0
  95. data/lib/rubino/api/operations/tasks/stop_operation.rb +47 -0
  96. data/lib/rubino/api/request.rb +54 -0
  97. data/lib/rubino/api/responses.rb +64 -0
  98. data/lib/rubino/api/router.rb +72 -0
  99. data/lib/rubino/api/schemas.rb +103 -0
  100. data/lib/rubino/api/server.rb +102 -0
  101. data/lib/rubino/api/tls.rb +108 -0
  102. data/lib/rubino/attachments/classification.rb +16 -0
  103. data/lib/rubino/attachments/classify.rb +171 -0
  104. data/lib/rubino/attachments/defang.rb +47 -0
  105. data/lib/rubino/attachments/policy.rb +36 -0
  106. data/lib/rubino/attachments/preamble.rb +120 -0
  107. data/lib/rubino/boot/encryption_key.rb +32 -0
  108. data/lib/rubino/cli/chat/bang_shell.rb +257 -0
  109. data/lib/rubino/cli/chat/completion_builder.rb +290 -0
  110. data/lib/rubino/cli/chat/idle_card_host.rb +69 -0
  111. data/lib/rubino/cli/chat/image_inbox.rb +168 -0
  112. data/lib/rubino/cli/chat/session_resolver.rb +176 -0
  113. data/lib/rubino/cli/chat_command.rb +1674 -0
  114. data/lib/rubino/cli/commands.rb +250 -0
  115. data/lib/rubino/cli/config_command.rb +96 -0
  116. data/lib/rubino/cli/doctor_command.rb +251 -0
  117. data/lib/rubino/cli/jobs_command.rb +60 -0
  118. data/lib/rubino/cli/memory_command.rb +135 -0
  119. data/lib/rubino/cli/onboarding_wizard.rb +207 -0
  120. data/lib/rubino/cli/server_command.rb +139 -0
  121. data/lib/rubino/cli/session_command.rb +125 -0
  122. data/lib/rubino/cli/setup_command.rb +107 -0
  123. data/lib/rubino/cli/skills_command.rb +85 -0
  124. data/lib/rubino/cli/tools_command.rb +81 -0
  125. data/lib/rubino/cli/trust_gate.rb +71 -0
  126. data/lib/rubino/commands/built_ins.rb +46 -0
  127. data/lib/rubino/commands/command.rb +116 -0
  128. data/lib/rubino/commands/executor.rb +550 -0
  129. data/lib/rubino/commands/handlers/agents.rb +510 -0
  130. data/lib/rubino/commands/handlers/config.rb +88 -0
  131. data/lib/rubino/commands/handlers/help.rb +148 -0
  132. data/lib/rubino/commands/handlers/jobs.rb +71 -0
  133. data/lib/rubino/commands/handlers/mcp.rb +229 -0
  134. data/lib/rubino/commands/handlers/memory.rb +200 -0
  135. data/lib/rubino/commands/handlers/sessions.rb +207 -0
  136. data/lib/rubino/commands/handlers/skills.rb +195 -0
  137. data/lib/rubino/commands/handlers/status.rb +211 -0
  138. data/lib/rubino/commands/loader.rb +90 -0
  139. data/lib/rubino/config/configuration.rb +455 -0
  140. data/lib/rubino/config/defaults.rb +569 -0
  141. data/lib/rubino/config/loader.rb +115 -0
  142. data/lib/rubino/config/reasoning_prefs.rb +67 -0
  143. data/lib/rubino/config/writer.rb +72 -0
  144. data/lib/rubino/context/compressor.rb +149 -0
  145. data/lib/rubino/context/environment_inspector.rb +176 -0
  146. data/lib/rubino/context/file_discovery.rb +45 -0
  147. data/lib/rubino/context/message_boundary.rb +39 -0
  148. data/lib/rubino/context/prompt_assembler.rb +382 -0
  149. data/lib/rubino/context/summary_builder.rb +159 -0
  150. data/lib/rubino/context/token_budget.rb +68 -0
  151. data/lib/rubino/context/tool_pair_sanitizer.rb +70 -0
  152. data/lib/rubino/database/connection.rb +77 -0
  153. data/lib/rubino/database/migrations/001_create_initial_schema.rb +156 -0
  154. data/lib/rubino/database/migrations/002_create_runs.rb +45 -0
  155. data/lib/rubino/database/migrations/003_create_skill_states.rb +15 -0
  156. data/lib/rubino/database/migrations/004_create_cron_jobs.rb +36 -0
  157. data/lib/rubino/database/migrations/005_create_oauth_connections.rb +27 -0
  158. data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +34 -0
  159. data/lib/rubino/database/migrations/007_create_messages_fts.rb +59 -0
  160. data/lib/rubino/database/migrations/008_create_memory_facts.rb +75 -0
  161. data/lib/rubino/database/migrations/009_create_memory_graph.rb +55 -0
  162. data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +20 -0
  163. data/lib/rubino/database/migrator.rb +48 -0
  164. data/lib/rubino/documents/converters/csv.rb +79 -0
  165. data/lib/rubino/documents/converters/docx.rb +129 -0
  166. data/lib/rubino/documents/converters/html.rb +28 -0
  167. data/lib/rubino/documents/converters/json.rb +35 -0
  168. data/lib/rubino/documents/converters/pdf.rb +59 -0
  169. data/lib/rubino/documents/converters/plain.rb +68 -0
  170. data/lib/rubino/documents/converters/pptx.rb +64 -0
  171. data/lib/rubino/documents/converters/xlsx.rb +62 -0
  172. data/lib/rubino/documents/converters/xml.rb +45 -0
  173. data/lib/rubino/documents/html.rb +71 -0
  174. data/lib/rubino/documents/registry.rb +68 -0
  175. data/lib/rubino/documents/table.rb +63 -0
  176. data/lib/rubino/documents.rb +50 -0
  177. data/lib/rubino/errors.rb +119 -0
  178. data/lib/rubino/files/workspace.rb +93 -0
  179. data/lib/rubino/interaction/cancel_token.rb +43 -0
  180. data/lib/rubino/interaction/clipboard_image.rb +84 -0
  181. data/lib/rubino/interaction/event_bus.rb +48 -0
  182. data/lib/rubino/interaction/events.rb +101 -0
  183. data/lib/rubino/interaction/image_input.rb +127 -0
  184. data/lib/rubino/interaction/input_queue.rb +117 -0
  185. data/lib/rubino/interaction/lifecycle.rb +299 -0
  186. data/lib/rubino/interaction/probe.rb +65 -0
  187. data/lib/rubino/interaction/state.rb +56 -0
  188. data/lib/rubino/jobs/cron_job_repository.rb +75 -0
  189. data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +32 -0
  190. data/lib/rubino/jobs/handlers/compact_session_job.rb +21 -0
  191. data/lib/rubino/jobs/handlers/distill_skill_job.rb +186 -0
  192. data/lib/rubino/jobs/handlers/extract_memory_job.rb +37 -0
  193. data/lib/rubino/jobs/handlers/summarize_session_job.rb +21 -0
  194. data/lib/rubino/jobs/queue.rb +184 -0
  195. data/lib/rubino/jobs/registry.rb +45 -0
  196. data/lib/rubino/jobs/runner.rb +79 -0
  197. data/lib/rubino/jobs/scheduler.rb +138 -0
  198. data/lib/rubino/jobs/webhook_delivery.rb +225 -0
  199. data/lib/rubino/jobs/worker.rb +59 -0
  200. data/lib/rubino/llm/adapter_factory.rb +47 -0
  201. data/lib/rubino/llm/adapter_response.rb +65 -0
  202. data/lib/rubino/llm/auxiliary_client.rb +61 -0
  203. data/lib/rubino/llm/bedrock_bearer_client.rb +235 -0
  204. data/lib/rubino/llm/content_builder.rb +55 -0
  205. data/lib/rubino/llm/credential_check.rb +93 -0
  206. data/lib/rubino/llm/error_classifier.rb +364 -0
  207. data/lib/rubino/llm/fake_provider.rb +292 -0
  208. data/lib/rubino/llm/inline_think_filter.rb +58 -0
  209. data/lib/rubino/llm/model_catalog.rb +29 -0
  210. data/lib/rubino/llm/provider_resolver.rb +48 -0
  211. data/lib/rubino/llm/reasoning_manager.rb +100 -0
  212. data/lib/rubino/llm/request.rb +56 -0
  213. data/lib/rubino/llm/ruby_llm_adapter.rb +794 -0
  214. data/lib/rubino/llm/scenario_loader.rb +68 -0
  215. data/lib/rubino/llm/scenario_selector.rb +80 -0
  216. data/lib/rubino/llm/scenarios/agent-creates-cron-failure.yml +29 -0
  217. data/lib/rubino/llm/scenarios/agent-creates-cron.yml +36 -0
  218. data/lib/rubino/llm/scenarios/analysis.yml +501 -0
  219. data/lib/rubino/llm/scenarios/complex-analysis.yml +598 -0
  220. data/lib/rubino/llm/scenarios/failure.yml +65 -0
  221. data/lib/rubino/llm/scenarios/happy-path.yml +24 -0
  222. data/lib/rubino/llm/scenarios/provider-quota-completed.yml +14 -0
  223. data/lib/rubino/llm/scenarios/wide-table.yml +121 -0
  224. data/lib/rubino/llm/scenarios/with-approvals.yml +50 -0
  225. data/lib/rubino/llm/scenarios/with-artifacts.yml +98 -0
  226. data/lib/rubino/llm/scenarios/with-clarify.yml +32 -0
  227. data/lib/rubino/llm/scenarios/with-reasoning.yml +175 -0
  228. data/lib/rubino/llm/scenarios/with-uploads.yml +104 -0
  229. data/lib/rubino/llm/thinking_support.rb +84 -0
  230. data/lib/rubino/llm/tool_bridge.rb +89 -0
  231. data/lib/rubino/logger.rb +99 -0
  232. data/lib/rubino/mcp/manager.rb +180 -0
  233. data/lib/rubino/mcp/mcp_tool_wrapper.rb +69 -0
  234. data/lib/rubino/mcp.rb +57 -0
  235. data/lib/rubino/memory/backend.rb +104 -0
  236. data/lib/rubino/memory/backends/default.rb +101 -0
  237. data/lib/rubino/memory/backends/sqlite.rb +653 -0
  238. data/lib/rubino/memory/backends.rb +53 -0
  239. data/lib/rubino/memory/deduplicator.rb +74 -0
  240. data/lib/rubino/memory/extractor.rb +85 -0
  241. data/lib/rubino/memory/flusher.rb +31 -0
  242. data/lib/rubino/memory/retriever.rb +50 -0
  243. data/lib/rubino/memory/sqlite_extraction_prompt.rb +70 -0
  244. data/lib/rubino/memory/sqlite_graph.rb +154 -0
  245. data/lib/rubino/memory/store.rb +228 -0
  246. data/lib/rubino/memory/threat_scanner.rb +68 -0
  247. data/lib/rubino/metrics.rb +175 -0
  248. data/lib/rubino/modes.rb +93 -0
  249. data/lib/rubino/oauth/connection_repository.rb +95 -0
  250. data/lib/rubino/oauth/provider/github.rb +75 -0
  251. data/lib/rubino/oauth/provider/google.rb +59 -0
  252. data/lib/rubino/oauth/provider.rb +149 -0
  253. data/lib/rubino/oauth/registry.rb +86 -0
  254. data/lib/rubino/oauth/token_encryptor.rb +87 -0
  255. data/lib/rubino/plugins/registry.rb +75 -0
  256. data/lib/rubino/plugins.rb +86 -0
  257. data/lib/rubino/run/approval_gate.rb +243 -0
  258. data/lib/rubino/run/attachment_downloader.rb +166 -0
  259. data/lib/rubino/run/event_store.rb +74 -0
  260. data/lib/rubino/run/executor.rb +383 -0
  261. data/lib/rubino/run/gate_registry.rb +39 -0
  262. data/lib/rubino/run/recorder.rb +69 -0
  263. data/lib/rubino/run/repository.rb +118 -0
  264. data/lib/rubino/run/session_approval_cache.rb +118 -0
  265. data/lib/rubino/security/allowlist_persister.rb +55 -0
  266. data/lib/rubino/security/approval_policy.rb +227 -0
  267. data/lib/rubino/security/command_allowlist.rb +24 -0
  268. data/lib/rubino/security/dangerous_patterns.rb +118 -0
  269. data/lib/rubino/security/deny_persister.rb +73 -0
  270. data/lib/rubino/security/doom_loop_detector.rb +43 -0
  271. data/lib/rubino/security/hardline_guard.rb +105 -0
  272. data/lib/rubino/security/pattern_matcher.rb +62 -0
  273. data/lib/rubino/security/prefix_deriver.rb +124 -0
  274. data/lib/rubino/security/readonly_commands.rb +211 -0
  275. data/lib/rubino/session/exporter.rb +101 -0
  276. data/lib/rubino/session/message.rb +77 -0
  277. data/lib/rubino/session/repository.rb +295 -0
  278. data/lib/rubino/session/store.rb +198 -0
  279. data/lib/rubino/session/summary_store.rb +65 -0
  280. data/lib/rubino/skills/prompt_index.rb +85 -0
  281. data/lib/rubino/skills/registry.rb +208 -0
  282. data/lib/rubino/skills/skill.rb +176 -0
  283. data/lib/rubino/skills/skill_tool.rb +215 -0
  284. data/lib/rubino/skills/state_repository.rb +37 -0
  285. data/lib/rubino/skills/toggle.rb +26 -0
  286. data/lib/rubino/tools/answer_child_tool.rb +83 -0
  287. data/lib/rubino/tools/ask_parent_tool.rb +232 -0
  288. data/lib/rubino/tools/attach_file_tool.rb +120 -0
  289. data/lib/rubino/tools/background_tasks.rb +520 -0
  290. data/lib/rubino/tools/base.rb +222 -0
  291. data/lib/rubino/tools/custom_tool_loader.rb +119 -0
  292. data/lib/rubino/tools/edit_tool.rb +122 -0
  293. data/lib/rubino/tools/git_tool.rb +71 -0
  294. data/lib/rubino/tools/github_tool.rb +233 -0
  295. data/lib/rubino/tools/glob_tool.rb +69 -0
  296. data/lib/rubino/tools/grep_tool.rb +206 -0
  297. data/lib/rubino/tools/memory_tool.rb +184 -0
  298. data/lib/rubino/tools/multi_edit_tool.rb +110 -0
  299. data/lib/rubino/tools/patch_tool.rb +260 -0
  300. data/lib/rubino/tools/probe_tool.rb +175 -0
  301. data/lib/rubino/tools/question_tool.rb +128 -0
  302. data/lib/rubino/tools/read_attachment_tool.rb +180 -0
  303. data/lib/rubino/tools/read_tool.rb +212 -0
  304. data/lib/rubino/tools/read_tracker.rb +98 -0
  305. data/lib/rubino/tools/registry.rb +166 -0
  306. data/lib/rubino/tools/result.rb +113 -0
  307. data/lib/rubino/tools/ruby_tool.rb +0 -0
  308. data/lib/rubino/tools/session_search_tool.rb +103 -0
  309. data/lib/rubino/tools/shell_input_tool.rb +96 -0
  310. data/lib/rubino/tools/shell_kill_tool.rb +76 -0
  311. data/lib/rubino/tools/shell_output_tool.rb +72 -0
  312. data/lib/rubino/tools/shell_registry.rb +158 -0
  313. data/lib/rubino/tools/shell_tail_tool.rb +118 -0
  314. data/lib/rubino/tools/shell_tool.rb +330 -0
  315. data/lib/rubino/tools/steer_tool.rb +118 -0
  316. data/lib/rubino/tools/subagent_probe.rb +89 -0
  317. data/lib/rubino/tools/summarize_file_tool.rb +182 -0
  318. data/lib/rubino/tools/task_result_tool.rb +90 -0
  319. data/lib/rubino/tools/task_stop_tool.rb +80 -0
  320. data/lib/rubino/tools/task_tool.rb +622 -0
  321. data/lib/rubino/tools/test_tool.rb +454 -0
  322. data/lib/rubino/tools/todo_tool.rb +93 -0
  323. data/lib/rubino/tools/tool_call_repository.rb +33 -0
  324. data/lib/rubino/tools/vision_tool.rb +85 -0
  325. data/lib/rubino/tools/webfetch_tool.rb +153 -0
  326. data/lib/rubino/tools/websearch_tool.rb +179 -0
  327. data/lib/rubino/tools/write_tool.rb +61 -0
  328. data/lib/rubino/trust.rb +88 -0
  329. data/lib/rubino/ui/api.rb +296 -0
  330. data/lib/rubino/ui/base.rb +252 -0
  331. data/lib/rubino/ui/bottom_composer.rb +1599 -0
  332. data/lib/rubino/ui/cli.rb +1987 -0
  333. data/lib/rubino/ui/completion_menu.rb +321 -0
  334. data/lib/rubino/ui/completion_source.rb +284 -0
  335. data/lib/rubino/ui/escape_reader.rb +169 -0
  336. data/lib/rubino/ui/indented_io.rb +88 -0
  337. data/lib/rubino/ui/input_history.rb +108 -0
  338. data/lib/rubino/ui/live_region.rb +183 -0
  339. data/lib/rubino/ui/markdown_renderer.rb +506 -0
  340. data/lib/rubino/ui/notifier.rb +163 -0
  341. data/lib/rubino/ui/null.rb +195 -0
  342. data/lib/rubino/ui/paste_store.rb +176 -0
  343. data/lib/rubino/ui/printer_base.rb +79 -0
  344. data/lib/rubino/ui/probe_wait_indicator.rb +75 -0
  345. data/lib/rubino/ui/queued_indicators.rb +66 -0
  346. data/lib/rubino/ui/status_bar.rb +100 -0
  347. data/lib/rubino/ui/stdout_proxy.rb +161 -0
  348. data/lib/rubino/ui/streaming_markdown.rb +186 -0
  349. data/lib/rubino/ui/subagent_cards.rb +134 -0
  350. data/lib/rubino/ui/subagent_view.rb +255 -0
  351. data/lib/rubino/ui.rb +21 -0
  352. data/lib/rubino/update_check.rb +187 -0
  353. data/lib/rubino/util/duration.rb +23 -0
  354. data/lib/rubino/util/hyperlink.rb +105 -0
  355. data/lib/rubino/util/output.rb +145 -0
  356. data/lib/rubino/util/secrets_mask.rb +83 -0
  357. data/lib/rubino/version.rb +5 -0
  358. data/lib/rubino/workspace.rb +85 -0
  359. data/lib/rubino-agent.rb +5 -0
  360. data/lib/rubino.rb +318 -0
  361. data/mise.toml +2 -0
  362. data/rubino-agent.gemspec +103 -0
  363. data/skills/ruby-expert/SKILL.md +67 -0
  364. data/skills/ruby-expert/references/concurrency.md +357 -0
  365. data/skills/ruby-expert/references/datetime-and-encoding.md +363 -0
  366. data/skills/ruby-expert/references/errors-and-types.md +460 -0
  367. data/skills/ruby-expert/references/gem-authoring.md +459 -0
  368. data/skills/ruby-expert/references/language-idioms.md +465 -0
  369. data/skills/ruby-expert/references/metaprogramming.md +339 -0
  370. data/skills/ruby-expert/references/oo-design.md +553 -0
  371. data/skills/ruby-expert/references/performance.md +383 -0
  372. data/skills/ruby-expert/references/rails.md +424 -0
  373. data/skills/ruby-expert/references/security.md +404 -0
  374. data/skills/ruby-expert/references/testing.md +473 -0
  375. data/skills/ruby-expert/references/tooling.md +466 -0
  376. metadata +856 -0
@@ -0,0 +1,794 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby_llm"
4
+ require "faraday"
5
+ require "net/http"
6
+ require_relative "tool_bridge"
7
+ require_relative "inline_think_filter"
8
+ require_relative "provider_resolver"
9
+ require_relative "reasoning_manager"
10
+ require_relative "thinking_support"
11
+
12
+ module Rubino
13
+ module LLM
14
+ # Raised when a stream goes silent past stale_chunk_timeout. (#22)
15
+ class StreamStaleError < StandardError; end
16
+
17
+ # Transport-level drops that surface mid-request. The canonical list lives
18
+ # on ErrorClassifier (the single source of truth for retryability); aliased
19
+ # here for the stream-path `rescue *STREAM_DROP_ERRORS` and existing specs.
20
+ # faraday-net_http re-raises IOError/EOFError (and friends) as
21
+ # Faraday::ConnectionFailed, so that is the type we actually see for an
22
+ # upstream socket close (message often "end of file reached"). Retried ONLY
23
+ # before the first streamed chunk — see #stream_once.
24
+ STREAM_DROP_ERRORS = ErrorClassifier::STREAM_DROP_ERRORS
25
+
26
+ # Adapter wrapping ruby_llm to isolate all LLM integration details.
27
+ # The rest of the application never calls ruby_llm directly.
28
+ class RubyLLMAdapter
29
+ attr_reader :model_id, :provider
30
+
31
+ def initialize(model_id: nil, provider: nil, config: nil, ui: nil, event_bus: nil,
32
+ tool_executor: nil, cancel_token: nil, isolate_config: false)
33
+ @config = config || Rubino.configuration
34
+ @model_id = model_id || @config.model_default
35
+ @provider = provider || resolve_provider
36
+ @temperature = @config.model_temperature
37
+ @ui = ui || Rubino.ui
38
+ @event_bus = event_bus || Rubino.event_bus
39
+ @tool_executor = tool_executor # nil = ToolBridge falls back to direct tool.call
40
+ @cancel_token = cancel_token
41
+
42
+ # SLICE-7: when built as a FallbackChain entry, scope provider config
43
+ # (api keys / base_url / timeout) into a per-adapter RubyLLM::Context
44
+ # instead of the process-global RubyLLM.configure. This is the heart of
45
+ # the global-config hazard fix: switching providers
46
+ # for a fallback must NOT mutate the global, or concurrent sessions on the
47
+ # API/server path corrupt each other's provider config. The primary
48
+ # adapter (isolate_config: false) keeps writing the global exactly as
49
+ # before, so existing single-provider setups are byte-identical.
50
+ if isolate_config
51
+ @context = RubyLLM.context { |c| apply_provider_config!(c) }
52
+ else
53
+ configure_ruby_llm!
54
+ end
55
+ end
56
+
57
+ # The single LLM boundary entry: take one
58
+ # LLM::Request, dispatch to the streaming vs non-streaming transport based
59
+ # on request.stream, and return a normalized AdapterResponse. The streaming
60
+ # variant yields chunks to the block then returns the same Response. This
61
+ # is the front door the conversation loop depends on; #chat / #stream
62
+ # remain as the underlying transports and stay valid for existing callers.
63
+ #
64
+ # Graceful thinking degradation (#75): a provider on the anthropic-
65
+ # compatible path that rejects the thinking budget used to hard-error the
66
+ # user's very first prompt (the default effort is medium). When the
67
+ # rejection is recognised, remember it for the session, tell the user
68
+ # once, and retry this same request WITHOUT the budget. Safe to re-issue:
69
+ # the rejection is a pre-stream 400, so no token reached the UI.
70
+ def call(request, &)
71
+ dispatch(request, &)
72
+ rescue StandardError => e
73
+ raise unless thinking_budget_rejected?(e)
74
+
75
+ ThinkingSupport.mark_unsupported!(@provider, notify: @ui)
76
+ dispatch(request, &)
77
+ end
78
+
79
+ # Sends a chat completion request (non-streaming). image_paths, if any,
80
+ # are forwarded to ruby_llm's `with:` slot so the primary model ingests
81
+ # the bytes natively (no `vision` tool round-trip). Only meaningful on
82
+ # the first model call of a turn — Loop strips it for follow-ups.
83
+ def chat(messages:, tools: nil, response_format: nil, image_paths: [], prefill: nil)
84
+ if bedrock_bearer_mode?
85
+ bedrock_bearer_client.chat(messages: messages, tools: tools)
86
+ else
87
+ chat_instance = build_chat(tools: tools, response_format: response_format)
88
+ load_history(chat_instance, messages)
89
+ apply_prefill(chat_instance, prefill)
90
+ response = chat_instance.ask(last_user_content(messages), with: presence(image_paths))
91
+ build_response(response)
92
+ end
93
+ end
94
+
95
+ # Sends a streaming chat request, yielding chunks. Inline <think>…</think>
96
+ # sentinels are routed to the :thinking channel. Buffered partial content
97
+ # is preserved across mid-stream parse errors so downstream code can show
98
+ # whatever the model produced before the failure.
99
+ def stream(messages:, tools: nil, response_format: nil, image_paths: [], prefill: nil, &)
100
+ if bedrock_bearer_mode?
101
+ # BedrockBearerClient#stream buffers the whole /converse response before
102
+ # its first emit, so a transport error can only fire pre-first-chunk —
103
+ # no token reached the UI. It raises straight through to the runner,
104
+ # which re-issues a fresh request (safe, no double output).
105
+ return bedrock_bearer_client.stream(messages: messages, tools: tools, &)
106
+ end
107
+
108
+ # No retry wrapper here — retry ownership moved to Agent::ModelCallRunner
109
+ # (Slice 4) to avoid double-retrying the same failure. The streaming
110
+ # transport-drop PROTECTION still lives inside #stream_once: it RAISES a
111
+ # transport drop only when NOTHING was emitted to the UI yet
112
+ # (chunks_seen.zero?), so the runner can re-issue a fresh request before
113
+ # any token reached the user — no double output. Once a chunk has flowed
114
+ # it RETURNS the buffered partial instead of raising, so the drop can
115
+ # never be retried mid-stream. The raise-vs-return decision (the only
116
+ # streaming-specific safety) stays here; the actual retrying is the
117
+ # runner's job.
118
+ stream_once(messages: messages, tools: tools, response_format: response_format,
119
+ image_paths: image_paths, prefill: prefill, &)
120
+ end
121
+
122
+ # Returns model information (context window, etc.)
123
+ def model_info
124
+ RubyLLM.models.find(@model_id)
125
+ rescue StandardError
126
+ nil
127
+ end
128
+
129
+ # Returns the context window size for the current model
130
+ def context_window
131
+ info = model_info
132
+ return @config.model_context_length if @config.model_context_length
133
+
134
+ info&.context_window || 128_000
135
+ end
136
+
137
+ private
138
+
139
+ # The raw #call dispatch (streaming vs non-streaming), shared by the
140
+ # normal path and the one-shot thinking-budget retry (#75).
141
+ def dispatch(request, &)
142
+ if request.stream?
143
+ stream(messages: request.messages, tools: request.tools,
144
+ image_paths: request.image_paths, prefill: request.prefill, &)
145
+ else
146
+ chat(messages: request.messages, tools: request.tools,
147
+ image_paths: request.image_paths, prefill: request.prefill)
148
+ end
149
+ end
150
+
151
+ # True when +error+ is a provider's "thinking (budget) is not supported"
152
+ # rejection AND this request actually carried a budget (#75). Once the
153
+ # provider is marked unsupported the budget drops to 0, so this can never
154
+ # match twice — no retry loop.
155
+ def thinking_budget_rejected?(error)
156
+ anthropic_generation_path? && thinking_budget.positive? &&
157
+ ThinkingSupport.rejection?(error)
158
+ end
159
+
160
+ # One streaming attempt. See #stream for the retry / no-double-output
161
+ # contract. Inline <think>…</think> sentinels are routed to :thinking;
162
+ # buffered content is preserved across mid-stream parse/transport errors.
163
+ def stream_once(messages:, tools:, response_format:, image_paths:, prefill: nil, &block)
164
+ chat_instance = build_chat(tools: tools, response_format: response_format)
165
+ load_history(chat_instance, messages)
166
+ apply_prefill(chat_instance, prefill)
167
+
168
+ think_filter = InlineThinkFilter.new
169
+ buffered = +""
170
+ last_chunk_at = monotonic_now
171
+ stale_after = stale_chunk_timeout
172
+ chunks_seen = 0
173
+
174
+ # Each assistant message ruby_llm streams within this one ask() is a
175
+ # distinct content block: on a multi-step tool turn the model emits
176
+ # text → tool_use → (next message) text → … . We tag every content
177
+ # delta with the current block's id so a consumer can regroup the
178
+ # deltas that belong together instead of splitting them around the
179
+ # tool calls that interleave mid-stream. before_message bumps the id;
180
+ # after_message flushes the filter (so a buffered tail lands on THIS
181
+ # block, before the tool fires) and emits the block boundary.
182
+ message_block_id = 0
183
+
184
+ emit = lambda do |type, text|
185
+ next if text.nil? || text.empty?
186
+
187
+ buffered << text if type == :content
188
+
189
+ begin
190
+ block.call({ type: type, text: text, message_id: message_block_id })
191
+ rescue StandardError => e
192
+ # A UI/EventBus error must not abort the whole stream — log and
193
+ # keep buffering so we can still build the response. (issue #6)
194
+ log_safely(event: "llm.stream.emit_error", error: e.message, type: type)
195
+ end
196
+ end
197
+
198
+ # Guarded: prefer ruby_llm's before_message/after_message (the
199
+ # on_new_message/on_end_message names are deprecated in ruby_llm 1.x and
200
+ # dropped in 2.0); fall back to the legacy names on older builds. A chat
201
+ # (or test double) exposing neither simply gets no block boundaries and
202
+ # the consumer falls back to the legacy per-adjacency grouping. Use a
203
+ # proc (not a lambda) for the close handler so it tolerates whatever
204
+ # arity the callback invokes it with.
205
+ if chat_instance.respond_to?(:before_message)
206
+ chat_instance.before_message { message_block_id += 1 }
207
+ elsif chat_instance.respond_to?(:on_new_message)
208
+ chat_instance.on_new_message { message_block_id += 1 }
209
+ end
210
+
211
+ close_block = proc do
212
+ # Flush any tail the think-filter is still holding so it is emitted
213
+ # with THIS block's id before we close the block (and before the
214
+ # tool call that follows a tool-use message executes).
215
+ flush_filter(think_filter, &emit)
216
+ @event_bus&.emit(Interaction::Events::MESSAGE_COMPLETED, message_id: message_block_id)
217
+ end
218
+ if chat_instance.respond_to?(:after_message)
219
+ chat_instance.after_message(&close_block)
220
+ elsif chat_instance.respond_to?(:on_end_message)
221
+ chat_instance.on_end_message(&close_block)
222
+ end
223
+
224
+ begin
225
+ response = chat_instance.ask(last_user_content(messages), with: presence(image_paths)) do |chunk|
226
+ # User interrupt poll. Raised here propagates out of the streaming
227
+ # callback, ruby_llm closes the upstream connection, and Loop /
228
+ # Lifecycle catch the Interrupted exception to bail out cleanly.
229
+ @cancel_token&.check!
230
+
231
+ # Any chunk from upstream — content, thinking, or a tool-call delta —
232
+ # marks this request "committed": something came back, so a later
233
+ # drop must NOT trigger a retry (it would re-run generation and could
234
+ # re-fire a mid-stream tool call / double the output).
235
+ chunks_seen += 1
236
+ last_chunk_at = monotonic_now
237
+ check_stream_stale!(last_chunk_at, stale_after)
238
+
239
+ if chunk.respond_to?(:thinking) && chunk.thinking
240
+ thinking_text = chunk.thinking.respond_to?(:text) ? chunk.thinking.text : chunk.thinking.to_s
241
+ emit.call(:thinking, thinking_text)
242
+ end
243
+ think_filter.feed(chunk.content, &emit) if chunk.content.is_a?(String) && !chunk.content.empty?
244
+ end
245
+ rescue Rubino::Interrupted
246
+ # Flush whatever the filter has buffered, then re-raise. Loop will
247
+ # catch and persist the partial assistant message so the user sees
248
+ # what arrived before they hit Esc.
249
+ flush_filter(think_filter, &emit)
250
+ raise
251
+ rescue JSON::ParserError, StreamStaleError => e
252
+ # Preserve whatever we've buffered so far so the user sees partial
253
+ # output instead of a blank failure. (issues #12, #22)
254
+ log_safely(event: "llm.stream.partial", error: e.message, buffered_bytes: buffered.bytesize)
255
+ flush_filter(think_filter, &emit)
256
+ return partial_response(buffered)
257
+ rescue *STREAM_DROP_ERRORS => e
258
+ # A genuine transport drop (the observed M3 EOF, a connection reset, a
259
+ # read timeout, …). If NOTHING was emitted yet, re-raise so the runner
260
+ # (Agent::ModelCallRunner) can retry a fresh request — safe, no token
261
+ # reached the user. If chunks already flowed, preserve the partial and
262
+ # stop: never
263
+ # re-issue after output. ErrorClassifier classifies these as retryable.
264
+ raise if chunks_seen.zero?
265
+
266
+ log_safely(event: "llm.stream.partial_interrupted", error: e.message,
267
+ buffered_bytes: buffered.bytesize)
268
+ flush_filter(think_filter, &emit)
269
+ return partial_response(buffered)
270
+ end
271
+
272
+ # Guard flush in the same way as the per-chunk emit so a final UI error
273
+ # doesn't lose the response. (issue #21)
274
+ flush_filter(think_filter, event: "llm.stream.flush_error", &emit)
275
+ build_response(response)
276
+ end
277
+
278
+ # Flushes the think-filter, swallowing UI/flush errors so a late failure
279
+ # never loses the response (issues #6, #21).
280
+ def flush_filter(think_filter, event: "llm.stream.flush_error", &emit)
281
+ think_filter.flush(&emit)
282
+ rescue StandardError => e
283
+ log_safely(event: event, error: e.message)
284
+ end
285
+
286
+ # Buffered-partial AdapterResponse returned when a stream is cut after at
287
+ # least one chunk (parse error, stale, or post-first-chunk transport drop).
288
+ # Flagged +interrupted+ so the Loop fails the turn (run.failed) instead of
289
+ # mistaking the truncated buffer for a finished answer (the silent
290
+ # "completed-but-empty" bug — see Rubino::StreamInterruptedError).
291
+ def partial_response(buffered)
292
+ AdapterResponse.new(content: buffered, tool_calls: [], input_tokens: 0,
293
+ output_tokens: 0, model_id: @model_id, interrupted: true)
294
+ end
295
+
296
+ def configure_ruby_llm!
297
+ RubyLLM.configure { |c| apply_provider_config!(c) }
298
+ end
299
+
300
+ # The provider-config block, applied to a config target `c`. The primary
301
+ # adapter passes the process-global (RubyLLM.configure); a fallback adapter
302
+ # passes a per-call RubyLLM::Context config (SLICE-7) so the switch never
303
+ # touches the global. Identical writes either way — only the target differs.
304
+ def apply_provider_config!(c)
305
+ # When RUBYLLM_DEBUG=1, dump every request/response to a log file
306
+ # (NEVER stdout — the TUI is running on stdout). Use this to verify
307
+ # what `tools: [...]` and `messages: [...]` actually go on the wire
308
+ # when a provider misbehaves (e.g. emits roleplay markdown instead
309
+ # of tool_calls).
310
+ if ENV["RUBYLLM_DEBUG"]
311
+ require "logger"
312
+ require "fileutils"
313
+ log_path = debug_log_path
314
+ FileUtils.mkdir_p(File.dirname(log_path))
315
+ # Build the Logger explicitly so that ruby_llm's lazy
316
+ # `@logger ||= config.logger || Logger.new(...)` picks it up
317
+ # even if something already touched RubyLLM.logger (its first
318
+ # access memoizes against current config). Reset the memo too
319
+ # so prior accesses can't shadow our injected logger.
320
+ c.logger = ::Logger.new(log_path, progname: "RubyLLM", level: ::Logger::DEBUG)
321
+ c.log_level = ::Logger::DEBUG
322
+ RubyLLM.instance_variable_set(:@logger, nil)
323
+ end
324
+
325
+ c.openai_api_key = ENV["OPENAI_API_KEY"] if ENV["OPENAI_API_KEY"]
326
+ c.anthropic_api_key = ENV["ANTHROPIC_API_KEY"] if ENV["ANTHROPIC_API_KEY"]
327
+ c.gemini_api_key = ENV["GEMINI_API_KEY"] if ENV["GEMINI_API_KEY"]
328
+
329
+ # Bedrock IAM credentials (Mode 2 / 3)
330
+ if ENV["BEDROCK_API_KEY"] && ENV["BEDROCK_SECRET_KEY"]
331
+ c.bedrock_api_key = ENV["BEDROCK_API_KEY"]
332
+ c.bedrock_secret_key = ENV["BEDROCK_SECRET_KEY"]
333
+ c.bedrock_region = ENV["BEDROCK_REGION"] || "us-east-1"
334
+ c.bedrock_session_token = ENV["BEDROCK_SESSION_TOKEN"] if ENV["BEDROCK_SESSION_TOKEN"]
335
+ end
336
+
337
+ prov_cfg = provider_cfg
338
+
339
+ # Any provider can declare openai_compatible: true in config to route
340
+ # through the OpenAI provider with a custom base_url and API key.
341
+ # Symmetrically, anthropic_compatible: true routes through the Anthropic
342
+ # provider — used for backends that expose a native Anthropic-Messages
343
+ # endpoint (e.g. MiniMax's /anthropic), which avoids the OpenAI-endpoint
344
+ # quirks (no-[DONE] stream close, string-shaped errors).
345
+ if openai_compatible_provider?
346
+ c.openai_api_base = prov_cfg["base_url"] if prov_cfg["base_url"]
347
+ c.openai_api_key = openai_compatible_api_key!(prov_cfg)
348
+ elsif anthropic_compatible_provider?
349
+ c.anthropic_api_base = prov_cfg["base_url"] if prov_cfg["base_url"]
350
+ c.anthropic_api_key = anthropic_compatible_api_key!(prov_cfg)
351
+ elsif @provider == "openai" && prov_cfg["base_url"]
352
+ c.openai_api_base = prov_cfg["base_url"]
353
+ end
354
+
355
+ # We OWN retry/backoff in Agent::ModelCallRunner (token-gated,
356
+ # full-jitter, safe for streaming). Disable ruby_llm's built-in
357
+ # faraday-retry (default max=3): on 1.15 it retries POST and RE-INVOKES
358
+ # the stream on_data handler on a drop -> double-output to the UI, and
359
+ # it would multiply with the runner's retries into a retry storm.
360
+ # Single source of truth.
361
+ c.max_retries = 0
362
+
363
+ # ruby_llm maps request_timeout -> Faraday options.timeout, which the
364
+ # net_http adapter applies as Net::HTTP read_timeout: a PER-READ socket
365
+ # inactivity timer that RESETS on every received chunk (NOT a total).
366
+ # So this one knob is our first-token AND inter-token idle bound — the
367
+ # same mechanism the OpenAI/Anthropic SDKs rely on. Size it to the
368
+ # slowest expected gap (a cold model load before the first token); a
369
+ # truly silent socket then fails within this many seconds as a
370
+ # Net::ReadTimeout (-> Faraday) and is retried pre-first-token by the
371
+ # runner. Override per backend: providers.<name>.request_timeout_seconds
372
+ # (e.g. raise it for a large local Ollama that cold-loads for minutes).
373
+ c.request_timeout = prov_cfg["request_timeout_seconds"] || 600
374
+ end
375
+
376
+ # Returns the api_key for an openai_compatible provider, or raises a
377
+ # clear configuration error. Previously this fell back to the literal
378
+ # "default", which would hit the upstream and surface as a cryptic 401.
379
+ # (issue #3)
380
+ def openai_compatible_api_key!(prov_cfg)
381
+ compatible_api_key!(prov_cfg, env_fallback: "OPENAI_API_KEY")
382
+ end
383
+
384
+ # Anthropic-compatible analogue of #openai_compatible_api_key!: resolves the
385
+ # provider key (config, then ANTHROPIC_API_KEY) or raises the same clear
386
+ # ConfigurationError so an arbitrary Anthropic-Messages backend (MiniMax's
387
+ # /anthropic) never silently sends an empty key and surfaces a cryptic 401.
388
+ def anthropic_compatible_api_key!(prov_cfg)
389
+ compatible_api_key!(prov_cfg, env_fallback: "ANTHROPIC_API_KEY")
390
+ end
391
+
392
+ def compatible_api_key!(prov_cfg, env_fallback:)
393
+ key = prov_cfg["api_key"] || ENV.fetch(env_fallback, nil)
394
+ return key if key && !key.empty?
395
+
396
+ raise Rubino::Error,
397
+ "Missing API key for provider '#{@provider}'. " \
398
+ "Set providers.#{@provider}.api_key in ~/.rubino/config.yml " \
399
+ "(e.g. ${#{@provider.to_s.upcase}_API_KEY} with the value in .env)."
400
+ end
401
+
402
+ # Resolution fallback for the direct-construction edge: AdapterFactory
403
+ # always passes a concrete provider, so this only runs when the adapter is
404
+ # built without one (tests, one-shot callers). Interpret the config
405
+ # default — including "auto" and the Bedrock-bearer override — through the
406
+ # single ProviderResolver seam rather than re-implementing it here.
407
+ def resolve_provider
408
+ ProviderResolver.resolve(@model_id, explicit_provider: @config.model_provider)
409
+ end
410
+
411
+ def build_chat(tools: nil, response_format: nil)
412
+ options = { model: @model_id }
413
+ options[:response_format] = response_format if response_format
414
+
415
+ prov_cfg = provider_cfg
416
+
417
+ # OpenAI-compatible providers (ollama, lm-studio, vllm, etc.):
418
+ # route through the openai provider and skip model validation.
419
+ # Anthropic-compatible providers (MiniMax /anthropic, etc.): route
420
+ # through the anthropic provider, likewise skipping model validation so
421
+ # an arbitrary model id (e.g. MiniMax-M2.7) is accepted without a
422
+ # model-registry entry.
423
+ if openai_compatible_provider?
424
+ options[:provider] = :openai
425
+ options[:assume_model_exists] = true
426
+ elsif anthropic_compatible_provider?
427
+ options[:provider] = :anthropic
428
+ options[:assume_model_exists] = true
429
+ elsif prov_cfg["assume_model_exists"]
430
+ options[:assume_model_exists] = true
431
+ options[:provider] = @provider.to_sym if @provider
432
+ end
433
+
434
+ # SLICE-7: a fallback adapter built with isolate_config: true carries a
435
+ # per-call RubyLLM::Context so its provider config (base_url/keys/timeout)
436
+ # never leaked into the process-global. Build the chat from that context;
437
+ # the primary adapter (@context nil) uses the global RubyLLM.chat exactly
438
+ # as before.
439
+ chat = (@context || RubyLLM).chat(**options)
440
+
441
+ apply_generation_params(chat)
442
+
443
+ # Register tools — ToolBridge wraps each Rubino tool so ruby_llm can
444
+ # call it. When a ToolExecutor is available, execution goes through the
445
+ # full pipeline (approval, truncation, audit recording). Otherwise the
446
+ # bridge calls tool.call() directly (used in tests/one-shot mode).
447
+ Array(tools).each do |tool|
448
+ chat.with_tool(ToolBridge.for(tool, ui: @ui, event_bus: @event_bus,
449
+ tool_executor: @tool_executor))
450
+ end
451
+
452
+ chat
453
+ end
454
+
455
+ # Applies the request-shaping knobs ruby_llm 1.15 supports — temperature,
456
+ # max_tokens, and a thinking/reasoning budget — onto the chat instance.
457
+ # The render rules (enable manual thinking with a budget, force temp=1,
458
+ # raise max_tokens to fit budget + headroom) are a faithful port of the
459
+ # reference and live in LLM::ReasoningManager — the
460
+ # single source of truth for the wire shape. This method only RESOLVES the
461
+ # config inputs (which path, budget, ceiling, headroom, configured temp)
462
+ # and APPLIES the manager's rendered params onto the chat.
463
+ #
464
+ # Why max_tokens matters for MiniMax-M2.7: ruby_llm's anthropic provider
465
+ # defaults max_tokens to 4096 (Anthropic::Chat#build_base_payload:
466
+ # `model.max_tokens || 4096`), and with assume_model_exists the model
467
+ # carries no max_tokens — so a reasoning model can burn the whole 4096 on
468
+ # thinking tokens and return ZERO visible text (the "completed but empty"
469
+ # symptom). The manager raises the ceiling so it has room to think AND
470
+ # answer. Thinking + the aggressive ceiling are Anthropic-Messages concepts
471
+ # only safe on the anthropic-family path; for openai/ollama/etc. we leave
472
+ # token limits to the provider (apply_max_tokens: false) and only apply
473
+ # temperature.
474
+ #
475
+ # ruby_llm wiring confirmed on 1.15:
476
+ # * with_temperature(t) -> payload[:temperature] (anthropic/chat.rb add_optional_fields)
477
+ # * with_params(max_tokens: n) -> deep-merged over payload[:max_tokens] (provider.rb#complete)
478
+ # * with_thinking(budget: n) -> payload[:thinking] = {type:"enabled",
479
+ # budget_tokens:n} (anthropic/chat.rb build_thinking_payload)
480
+ def apply_generation_params(chat)
481
+ anthropic_family = anthropic_generation_path?
482
+
483
+ rendered = reasoning_manager.render(
484
+ budget: anthropic_family ? thinking_budget : 0,
485
+ temperature: @temperature,
486
+ max_tokens: max_output_tokens,
487
+ text_headroom: text_headroom_tokens,
488
+ apply_max_tokens: anthropic_family
489
+ )
490
+
491
+ params = { max_tokens: rendered.max_tokens }.compact
492
+
493
+ if rendered.thinking_enabled?
494
+ if ThinkingSupport.budget_via_params?(provider_cfg, chat)
495
+ params[:thinking] = rendered.thinking
496
+ elsif chat.respond_to?(:with_thinking)
497
+ chat.with_thinking(budget: rendered.thinking[:budget_tokens])
498
+ end
499
+ end
500
+ chat.with_temperature(rendered.temperature) if !rendered.temperature.nil? && chat.respond_to?(:with_temperature)
501
+ # Single with_params call — ruby_llm REPLACES @params on every call,
502
+ # so max_tokens and a params-routed thinking block must travel together.
503
+ chat.with_params(**params) if params.any? && chat.respond_to?(:with_params)
504
+ end
505
+
506
+ def reasoning_manager = @reasoning_manager ||= ReasoningManager.new
507
+
508
+ # True when generation runs through ruby_llm's anthropic provider — the
509
+ # only path where thinking budgets and the 4096 max_tokens default apply.
510
+ def anthropic_generation_path?
511
+ anthropic_compatible_provider? ||
512
+ %w[anthropic bedrock].include?(@provider.to_s)
513
+ end
514
+
515
+ # Configurable max output tokens. providers.<name>.max_tokens wins, then
516
+ # model.max_tokens, then a reasoning-model-sane default (16k vs ruby_llm's
517
+ # 4096). Returns an Integer.
518
+ def max_output_tokens
519
+ (provider_cfg["max_tokens"] ||
520
+ @config.dig("model", "max_tokens") ||
521
+ 16_384).to_i
522
+ end
523
+
524
+ # Thinking/reasoning budget in tokens. 0 / nil disables thinking entirely.
525
+ # thinking.effort wins when set (off→0, low→4000, medium→8000, high→16000);
526
+ # otherwise providers.<name>.thinking_budget, then model.thinking_budget,
527
+ # then a medium default (8000 — the same value the reference THINKING_BUDGET
528
+ # maps "medium" to). Only meaningful for the anthropic-compatible path;
529
+ # other providers ignore with_thinking or never see it (we still set it,
530
+ # ruby_llm only renders thinking for providers that support it).
531
+ def thinking_budget
532
+ # A provider that rejected the budget earlier this session never gets
533
+ # sent one again (#75).
534
+ return 0 if ThinkingSupport.unsupported?(@provider)
535
+ # A provider configured/known to mishandle an ACCEPTED budget never
536
+ # gets sent one at all (#2) — capability beats the requested effort.
537
+ return 0 unless ThinkingSupport.supports?(provider_cfg, @model_id)
538
+
539
+ effort = Config::ReasoningPrefs.effort(@config)
540
+ return Config::ReasoningPrefs.effort_budget(effort).to_i if effort
541
+
542
+ raw = provider_cfg.key?("thinking_budget") ? provider_cfg["thinking_budget"] : nil
543
+ raw = @config.dig("model", "thinking_budget") if raw.nil?
544
+ raw = 8000 if raw.nil?
545
+ raw.to_i
546
+ end
547
+
548
+ # Headroom (tokens) reserved for visible output on top of the thinking
549
+ # budget, so the model can think AND still answer. Mirrors the reference +4096.
550
+ def text_headroom_tokens
551
+ (@config.dig("model", "max_tokens_text_headroom") || 4096).to_i
552
+ end
553
+
554
+ # Returns true when using Bedrock Bearer token (short-term API key, no secret)
555
+ def bedrock_bearer_mode?
556
+ %w[bedrock anthropic].include?(@provider) &&
557
+ ENV.fetch("BEDROCK_API_KEY", nil) && !ENV["BEDROCK_SECRET_KEY"]
558
+ end
559
+
560
+ # Provider config hash from the config file (e.g. providers.ollama.*)
561
+ # The RUBYLLM_DEBUG log path, under the resolved home (RUBINO_HOME ->
562
+ # else ~/.rubino) so an isolated/custom home is not polluted with a log
563
+ # written into the default ~/.rubino (issue #27).
564
+ def debug_log_path
565
+ File.join(Rubino::Config::Loader.default_home_path, "logs", "ruby_llm.log")
566
+ end
567
+
568
+ def provider_cfg
569
+ @config.provider_config(@provider)
570
+ end
571
+
572
+ # True when the provider declares openai_compatible: true in config.
573
+ # Used for ollama, lm-studio, vllm, text-generation-webui, etc.
574
+ def openai_compatible_provider?
575
+ provider_cfg["openai_compatible"] == true
576
+ end
577
+
578
+ # True when the provider declares anthropic_compatible: true in config.
579
+ # Routes through ruby_llm's anthropic provider against a custom base_url
580
+ # (e.g. MiniMax's native Anthropic-Messages endpoint).
581
+ def anthropic_compatible_provider?
582
+ provider_cfg["anthropic_compatible"] == true
583
+ end
584
+
585
+ # True when the "hidden" render mode is active. The streaming emit no
586
+ # longer drops :thinking chunks on it — the CLI buffers them unrendered
587
+ # so Ctrl-O can reveal the last thought even in hidden mode (#76), and
588
+ # UI::API drops them at its own boundary. Still gates the bedrock-bearer
589
+ # client, which has no downstream reveal machinery.
590
+ def reasoning_hidden?
591
+ Config::ReasoningPrefs.mode(@config) == :hidden
592
+ end
593
+
594
+ # ── Streaming resilience helpers (issues #12, #22) ────────────────────
595
+ #
596
+ # NOTE: error-classification, backoff and api_max_retries retries moved to
597
+ # Agent::ModelCallRunner (Slice 4) — the single retry owner. The adapter no
598
+ # longer wraps calls in a retry loop; it only RAISES retryable errors (and
599
+ # pre-first-chunk stream drops) straight through for the runner to retry.
600
+
601
+ def monotonic_now
602
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
603
+ end
604
+
605
+ def stale_chunk_timeout
606
+ @config.dig("providers", @provider, "stale_timeout_seconds") ||
607
+ @config.dig("providers", "openai", "stale_timeout_seconds") ||
608
+ 300
609
+ end
610
+
611
+ def check_stream_stale!(last_chunk_at, stale_after)
612
+ return if stale_after.to_i <= 0
613
+ return if (monotonic_now - last_chunk_at) <= stale_after
614
+
615
+ raise StreamStaleError, "no chunk received for #{stale_after}s"
616
+ end
617
+
618
+ def log_safely(**fields)
619
+ Rubino.logger.warn(**fields)
620
+ rescue StandardError
621
+ # Logger may be uninitialized during early boot — swallow.
622
+ end
623
+
624
+ # Returns a memoized BedrockBearerClient instance
625
+ def bedrock_bearer_client
626
+ @bedrock_bearer_client ||= BedrockBearerClient.new(
627
+ api_key: ENV.fetch("BEDROCK_API_KEY", nil),
628
+ region: ENV["BEDROCK_REGION"] || "us-east-1",
629
+ model_id: @model_id,
630
+ show_reasoning: !reasoning_hidden?,
631
+ event_bus: @event_bus
632
+ )
633
+ end
634
+
635
+ # Returns the content of the last message
636
+ def last_user_content(messages)
637
+ last = messages.last
638
+ last[:content] || last["content"]
639
+ end
640
+
641
+ # ruby_llm's `with:` treats [] as "build a Content with no attachments"
642
+ # which is technically valid but pointless — pass nil so it skips the
643
+ # Content wrapper entirely.
644
+ def presence(arr)
645
+ arr.nil? || arr.empty? ? nil : arr
646
+ end
647
+
648
+ # Loads conversation history into the chat instance, excluding the last message.
649
+ #
650
+ # Tool result messages MUST carry their tool_call_id when reconstructed —
651
+ # Anthropic and Bedrock validate that every tool message's id matches a
652
+ # preceding assistant toolUse block, and reject the request with a 400
653
+ # otherwise. The DB already stores the id (Session::Message#to_context
654
+ # provides it); previously it was dropped on the floor here.
655
+ def load_history(chat_instance, messages)
656
+ history = messages[0..-2]
657
+ return if history.empty?
658
+
659
+ history.each do |msg|
660
+ role = (msg[:role] || msg["role"]).to_sym
661
+ content = msg[:content] || msg["content"]
662
+ next if content.nil? || content.empty?
663
+
664
+ case role
665
+ when :system
666
+ chat_instance.with_instructions(content, append: true)
667
+ when :user
668
+ chat_instance.messages << RubyLLM::Message.new(role: role, content: content)
669
+ when :assistant
670
+ chat_instance.messages << RubyLLM::Message.new(
671
+ role: role,
672
+ content: content,
673
+ tool_calls: rebuild_tool_calls(msg[:tool_calls] || msg["tool_calls"])
674
+ )
675
+ when :tool
676
+ chat_instance.messages << RubyLLM::Message.new(
677
+ role: role,
678
+ content: content,
679
+ tool_call_id: msg[:tool_call_id] || msg["tool_call_id"]
680
+ )
681
+ end
682
+ end
683
+ end
684
+
685
+ # Prefill-to-continue (Slice 5, rung 4): seat the model's own interim text
686
+ # as a TRAILING assistant message so the next completion continues from it
687
+ # instead of starting a fresh turn. The spike confirmed ruby_llm honours a
688
+ # trailing assistant message on the /anthropic path (Anthropic's native
689
+ # "assistant turn prefill"): the response stream picks up where the seed
690
+ # left off, so a thinking-only model is pushed into visible content.
691
+ #
692
+ # No-op when the seed is blank — an empty prefill would add a degenerate
693
+ # empty assistant turn that strict providers reject, so we skip it and let
694
+ # the call behave as a plain re-issue.
695
+ def apply_prefill(chat_instance, prefill)
696
+ seed = prefill.to_s
697
+ return if seed.strip.empty?
698
+
699
+ chat_instance.messages << RubyLLM::Message.new(role: :assistant, content: seed)
700
+ end
701
+
702
+ # Reconstructs RubyLLM::ToolCall objects from the hashes persisted under
703
+ # assistant message metadata. Returns nil for empty/missing input so
704
+ # RubyLLM::Message treats it as a plain assistant turn.
705
+ def rebuild_tool_calls(raw)
706
+ return nil if raw.nil? || (raw.respond_to?(:empty?) && raw.empty?)
707
+
708
+ Array(raw).map do |tc|
709
+ h = tc.transform_keys(&:to_sym) if tc.is_a?(Hash)
710
+ h ||= tc
711
+ RubyLLM::ToolCall.new(
712
+ id: h[:id],
713
+ name: h[:name],
714
+ arguments: h[:arguments] || {}
715
+ )
716
+ end
717
+ end
718
+
719
+ def build_response(response)
720
+ return nil unless response
721
+
722
+ AdapterResponse.new(
723
+ content: response.content,
724
+ tool_calls: extract_tool_calls(response),
725
+ input_tokens: response.input_tokens,
726
+ output_tokens: response.output_tokens,
727
+ model_id: @model_id,
728
+ stop_reason: extract_stop_reason(response),
729
+ thinking: extract_thinking(response),
730
+ raw: response
731
+ )
732
+ end
733
+
734
+ # Normalize the provider's finish/stop reason to the boundary's
735
+ # :stop | :length | :tool_calls | nil vocabulary. Anthropic-compat (the
736
+ # MiniMax /anthropic path) carries it in the raw body as "stop_reason"
737
+ # ("end_turn"/"stop_sequence" ⇒ :stop, "max_tokens" ⇒ :length,
738
+ # "tool_use" ⇒ :tool_calls); OpenAI-style carries "finish_reason"
739
+ # ("stop" ⇒ :stop, "length" ⇒ :length, "tool_calls" ⇒ :tool_calls).
740
+ # Returns nil when unreachable on this path — never fabricated. The
741
+ # streaming path generally does not surface a stop reason on ruby_llm
742
+ # today (see the boundary spike), so this stays nil there.
743
+ def extract_stop_reason(response)
744
+ body = raw_body(response)
745
+ return nil unless body.is_a?(Hash)
746
+
747
+ normalize_stop_reason(body["stop_reason"] || body["finish_reason"])
748
+ end
749
+
750
+ def normalize_stop_reason(reason)
751
+ case reason.to_s
752
+ when "end_turn", "stop_sequence", "stop" then :stop
753
+ when "max_tokens", "length" then :length
754
+ when "tool_use", "tool_calls" then :tool_calls
755
+ end
756
+ end
757
+
758
+ # The raw Anthropic/OpenAI response body hash, when ruby_llm exposes it
759
+ # (response.raw is a Faraday::Response; .body is the parsed JSON). nil on
760
+ # paths where it is unreachable (streaming, doubles, bedrock-bearer).
761
+ def raw_body(response)
762
+ return nil unless response.respond_to?(:raw) && response.raw
763
+ return nil unless response.raw.respond_to?(:body)
764
+
765
+ response.raw.body
766
+ rescue StandardError
767
+ nil
768
+ end
769
+
770
+ # Reasoning text/summary if ruby_llm surfaced it on the message; nil
771
+ # otherwise. Kept defensive — older builds carry no reasoning field.
772
+ def extract_thinking(response)
773
+ return nil unless response.respond_to?(:reasoning) && response.reasoning
774
+
775
+ r = response.reasoning
776
+ r.respond_to?(:text) ? r.text : r.to_s
777
+ rescue StandardError
778
+ nil
779
+ end
780
+
781
+ def extract_tool_calls(response)
782
+ return [] unless response.respond_to?(:tool_calls) && response.tool_calls
783
+
784
+ response.tool_calls.map do |tc|
785
+ {
786
+ id: tc.id,
787
+ name: tc.name,
788
+ arguments: tc.arguments
789
+ }
790
+ end
791
+ end
792
+ end
793
+ end
794
+ end