rubino-agent 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +115 -0
- data/.rubocop_todo.yml +955 -0
- data/.ruby-version +1 -0
- data/AGENTS.md +97 -0
- data/CHANGELOG.md +344 -0
- data/CONTRIBUTING.md +69 -0
- data/LICENSE +21 -0
- data/README.md +200 -0
- data/Rakefile +8 -0
- data/docs/agents.md +190 -0
- data/docs/api/v1.md +414 -0
- data/docs/architecture.md +177 -0
- data/docs/commands.md +375 -0
- data/docs/configuration.md +590 -0
- data/docs/getting-started.md +143 -0
- data/docs/jobs.md +332 -0
- data/docs/mcp.md +128 -0
- data/docs/memory.md +98 -0
- data/docs/models-and-keys.md +173 -0
- data/docs/oauth-providers.md +145 -0
- data/docs/plugins.md +195 -0
- data/docs/security.md +145 -0
- data/docs/skills.md +322 -0
- data/docs/tools.md +395 -0
- data/docs/troubleshooting.md +73 -0
- data/exe/rubino +9 -0
- data/install.sh +275 -0
- data/lib/rubino/active_skill.rb +50 -0
- data/lib/rubino/agent/agent_registry.rb +120 -0
- data/lib/rubino/agent/backoff_policy.rb +116 -0
- data/lib/rubino/agent/definition.rb +128 -0
- data/lib/rubino/agent/degenerate_recovery.rb +271 -0
- data/lib/rubino/agent/fallback_chain.rb +194 -0
- data/lib/rubino/agent/iteration_budget.rb +50 -0
- data/lib/rubino/agent/loop.rb +617 -0
- data/lib/rubino/agent/model_call_runner.rb +383 -0
- data/lib/rubino/agent/prompts/build.txt +69 -0
- data/lib/rubino/agent/prompts/compaction.txt +20 -0
- data/lib/rubino/agent/prompts/explore.txt +19 -0
- data/lib/rubino/agent/prompts/general.txt +20 -0
- data/lib/rubino/agent/prompts/plan.txt +31 -0
- data/lib/rubino/agent/response_validator.rb +70 -0
- data/lib/rubino/agent/router.rb +65 -0
- data/lib/rubino/agent/runner.rb +195 -0
- data/lib/rubino/agent/tool_executor.rb +402 -0
- data/lib/rubino/agent/truncation_continuation.rb +137 -0
- data/lib/rubino/api/middleware/auth.rb +43 -0
- data/lib/rubino/api/middleware/error_handler.rb +65 -0
- data/lib/rubino/api/middleware/json_parser.rb +100 -0
- data/lib/rubino/api/middleware/observability.rb +59 -0
- data/lib/rubino/api/middleware/rate_limit.rb +136 -0
- data/lib/rubino/api/operations/approvals/decide_operation.rb +49 -0
- data/lib/rubino/api/operations/clarifications/decide_operation.rb +44 -0
- data/lib/rubino/api/operations/cron_jobs/create_operation.rb +46 -0
- data/lib/rubino/api/operations/cron_jobs/delete_operation.rb +36 -0
- data/lib/rubino/api/operations/cron_jobs/list_operation.rb +55 -0
- data/lib/rubino/api/operations/cron_jobs/pause_operation.rb +34 -0
- data/lib/rubino/api/operations/cron_jobs/resume_operation.rb +34 -0
- data/lib/rubino/api/operations/cron_jobs/schedule_validation.rb +30 -0
- data/lib/rubino/api/operations/cron_jobs/show_operation.rb +32 -0
- data/lib/rubino/api/operations/cron_jobs/trigger_operation.rb +38 -0
- data/lib/rubino/api/operations/cron_jobs/update_operation.rb +42 -0
- data/lib/rubino/api/operations/files/read_operation.rb +40 -0
- data/lib/rubino/api/operations/files/upload_operation.rb +175 -0
- data/lib/rubino/api/operations/health_operation.rb +46 -0
- data/lib/rubino/api/operations/memory/delete_operation.rb +32 -0
- data/lib/rubino/api/operations/memory/index_operation.rb +80 -0
- data/lib/rubino/api/operations/memory/stats_operation.rb +28 -0
- data/lib/rubino/api/operations/metrics_operation.rb +18 -0
- data/lib/rubino/api/operations/mode/show_operation.rb +29 -0
- data/lib/rubino/api/operations/mode/update_operation.rb +42 -0
- data/lib/rubino/api/operations/models/list_operation.rb +45 -0
- data/lib/rubino/api/operations/oauth/connections/disconnect_operation.rb +77 -0
- data/lib/rubino/api/operations/oauth/connections/list_operation.rb +36 -0
- data/lib/rubino/api/operations/oauth/providers/callback_operation.rb +82 -0
- data/lib/rubino/api/operations/oauth/providers/connect_operation.rb +44 -0
- data/lib/rubino/api/operations/oauth/providers/list_operation.rb +35 -0
- data/lib/rubino/api/operations/oauth/serializer.rb +21 -0
- data/lib/rubino/api/operations/runs/create_operation.rb +77 -0
- data/lib/rubino/api/operations/runs/events_operation.rb +195 -0
- data/lib/rubino/api/operations/runs/stop_operation.rb +34 -0
- data/lib/rubino/api/operations/sessions/create_operation.rb +46 -0
- data/lib/rubino/api/operations/sessions/delete_operation.rb +33 -0
- data/lib/rubino/api/operations/sessions/index_operation.rb +82 -0
- data/lib/rubino/api/operations/sessions/retry_operation.rb +45 -0
- data/lib/rubino/api/operations/sessions/show_operation.rb +59 -0
- data/lib/rubino/api/operations/sessions/undo_operation.rb +38 -0
- data/lib/rubino/api/operations/skills/list_operation.rb +34 -0
- data/lib/rubino/api/operations/skills/toggle_operation.rb +40 -0
- data/lib/rubino/api/operations/tasks/index_operation.rb +30 -0
- data/lib/rubino/api/operations/tasks/serializer.rb +60 -0
- data/lib/rubino/api/operations/tasks/show_operation.rb +33 -0
- data/lib/rubino/api/operations/tasks/stop_operation.rb +47 -0
- data/lib/rubino/api/request.rb +54 -0
- data/lib/rubino/api/responses.rb +64 -0
- data/lib/rubino/api/router.rb +72 -0
- data/lib/rubino/api/schemas.rb +103 -0
- data/lib/rubino/api/server.rb +102 -0
- data/lib/rubino/api/tls.rb +108 -0
- data/lib/rubino/attachments/classification.rb +16 -0
- data/lib/rubino/attachments/classify.rb +171 -0
- data/lib/rubino/attachments/defang.rb +47 -0
- data/lib/rubino/attachments/policy.rb +36 -0
- data/lib/rubino/attachments/preamble.rb +120 -0
- data/lib/rubino/boot/encryption_key.rb +32 -0
- data/lib/rubino/cli/chat/bang_shell.rb +257 -0
- data/lib/rubino/cli/chat/completion_builder.rb +290 -0
- data/lib/rubino/cli/chat/idle_card_host.rb +69 -0
- data/lib/rubino/cli/chat/image_inbox.rb +168 -0
- data/lib/rubino/cli/chat/session_resolver.rb +176 -0
- data/lib/rubino/cli/chat_command.rb +1674 -0
- data/lib/rubino/cli/commands.rb +250 -0
- data/lib/rubino/cli/config_command.rb +96 -0
- data/lib/rubino/cli/doctor_command.rb +251 -0
- data/lib/rubino/cli/jobs_command.rb +60 -0
- data/lib/rubino/cli/memory_command.rb +135 -0
- data/lib/rubino/cli/onboarding_wizard.rb +207 -0
- data/lib/rubino/cli/server_command.rb +139 -0
- data/lib/rubino/cli/session_command.rb +125 -0
- data/lib/rubino/cli/setup_command.rb +107 -0
- data/lib/rubino/cli/skills_command.rb +85 -0
- data/lib/rubino/cli/tools_command.rb +81 -0
- data/lib/rubino/cli/trust_gate.rb +71 -0
- data/lib/rubino/commands/built_ins.rb +46 -0
- data/lib/rubino/commands/command.rb +116 -0
- data/lib/rubino/commands/executor.rb +550 -0
- data/lib/rubino/commands/handlers/agents.rb +510 -0
- data/lib/rubino/commands/handlers/config.rb +88 -0
- data/lib/rubino/commands/handlers/help.rb +148 -0
- data/lib/rubino/commands/handlers/jobs.rb +71 -0
- data/lib/rubino/commands/handlers/mcp.rb +229 -0
- data/lib/rubino/commands/handlers/memory.rb +200 -0
- data/lib/rubino/commands/handlers/sessions.rb +207 -0
- data/lib/rubino/commands/handlers/skills.rb +195 -0
- data/lib/rubino/commands/handlers/status.rb +211 -0
- data/lib/rubino/commands/loader.rb +90 -0
- data/lib/rubino/config/configuration.rb +455 -0
- data/lib/rubino/config/defaults.rb +569 -0
- data/lib/rubino/config/loader.rb +115 -0
- data/lib/rubino/config/reasoning_prefs.rb +67 -0
- data/lib/rubino/config/writer.rb +72 -0
- data/lib/rubino/context/compressor.rb +149 -0
- data/lib/rubino/context/environment_inspector.rb +176 -0
- data/lib/rubino/context/file_discovery.rb +45 -0
- data/lib/rubino/context/message_boundary.rb +39 -0
- data/lib/rubino/context/prompt_assembler.rb +382 -0
- data/lib/rubino/context/summary_builder.rb +159 -0
- data/lib/rubino/context/token_budget.rb +68 -0
- data/lib/rubino/context/tool_pair_sanitizer.rb +70 -0
- data/lib/rubino/database/connection.rb +77 -0
- data/lib/rubino/database/migrations/001_create_initial_schema.rb +156 -0
- data/lib/rubino/database/migrations/002_create_runs.rb +45 -0
- data/lib/rubino/database/migrations/003_create_skill_states.rb +15 -0
- data/lib/rubino/database/migrations/004_create_cron_jobs.rb +36 -0
- data/lib/rubino/database/migrations/005_create_oauth_connections.rb +27 -0
- data/lib/rubino/database/migrations/006_create_webhook_deliveries.rb +34 -0
- data/lib/rubino/database/migrations/007_create_messages_fts.rb +59 -0
- data/lib/rubino/database/migrations/008_create_memory_facts.rb +75 -0
- data/lib/rubino/database/migrations/009_create_memory_graph.rb +55 -0
- data/lib/rubino/database/migrations/010_add_owner_pid_to_sessions.rb +20 -0
- data/lib/rubino/database/migrator.rb +48 -0
- data/lib/rubino/documents/converters/csv.rb +79 -0
- data/lib/rubino/documents/converters/docx.rb +129 -0
- data/lib/rubino/documents/converters/html.rb +28 -0
- data/lib/rubino/documents/converters/json.rb +35 -0
- data/lib/rubino/documents/converters/pdf.rb +59 -0
- data/lib/rubino/documents/converters/plain.rb +68 -0
- data/lib/rubino/documents/converters/pptx.rb +64 -0
- data/lib/rubino/documents/converters/xlsx.rb +62 -0
- data/lib/rubino/documents/converters/xml.rb +45 -0
- data/lib/rubino/documents/html.rb +71 -0
- data/lib/rubino/documents/registry.rb +68 -0
- data/lib/rubino/documents/table.rb +63 -0
- data/lib/rubino/documents.rb +50 -0
- data/lib/rubino/errors.rb +119 -0
- data/lib/rubino/files/workspace.rb +93 -0
- data/lib/rubino/interaction/cancel_token.rb +43 -0
- data/lib/rubino/interaction/clipboard_image.rb +84 -0
- data/lib/rubino/interaction/event_bus.rb +48 -0
- data/lib/rubino/interaction/events.rb +101 -0
- data/lib/rubino/interaction/image_input.rb +127 -0
- data/lib/rubino/interaction/input_queue.rb +117 -0
- data/lib/rubino/interaction/lifecycle.rb +299 -0
- data/lib/rubino/interaction/probe.rb +65 -0
- data/lib/rubino/interaction/state.rb +56 -0
- data/lib/rubino/jobs/cron_job_repository.rb +75 -0
- data/lib/rubino/jobs/handlers/cleanup_sessions_job.rb +32 -0
- data/lib/rubino/jobs/handlers/compact_session_job.rb +21 -0
- data/lib/rubino/jobs/handlers/distill_skill_job.rb +186 -0
- data/lib/rubino/jobs/handlers/extract_memory_job.rb +37 -0
- data/lib/rubino/jobs/handlers/summarize_session_job.rb +21 -0
- data/lib/rubino/jobs/queue.rb +184 -0
- data/lib/rubino/jobs/registry.rb +45 -0
- data/lib/rubino/jobs/runner.rb +79 -0
- data/lib/rubino/jobs/scheduler.rb +138 -0
- data/lib/rubino/jobs/webhook_delivery.rb +225 -0
- data/lib/rubino/jobs/worker.rb +59 -0
- data/lib/rubino/llm/adapter_factory.rb +47 -0
- data/lib/rubino/llm/adapter_response.rb +65 -0
- data/lib/rubino/llm/auxiliary_client.rb +61 -0
- data/lib/rubino/llm/bedrock_bearer_client.rb +235 -0
- data/lib/rubino/llm/content_builder.rb +55 -0
- data/lib/rubino/llm/credential_check.rb +93 -0
- data/lib/rubino/llm/error_classifier.rb +364 -0
- data/lib/rubino/llm/fake_provider.rb +292 -0
- data/lib/rubino/llm/inline_think_filter.rb +58 -0
- data/lib/rubino/llm/model_catalog.rb +29 -0
- data/lib/rubino/llm/provider_resolver.rb +48 -0
- data/lib/rubino/llm/reasoning_manager.rb +100 -0
- data/lib/rubino/llm/request.rb +56 -0
- data/lib/rubino/llm/ruby_llm_adapter.rb +794 -0
- data/lib/rubino/llm/scenario_loader.rb +68 -0
- data/lib/rubino/llm/scenario_selector.rb +80 -0
- data/lib/rubino/llm/scenarios/agent-creates-cron-failure.yml +29 -0
- data/lib/rubino/llm/scenarios/agent-creates-cron.yml +36 -0
- data/lib/rubino/llm/scenarios/analysis.yml +501 -0
- data/lib/rubino/llm/scenarios/complex-analysis.yml +598 -0
- data/lib/rubino/llm/scenarios/failure.yml +65 -0
- data/lib/rubino/llm/scenarios/happy-path.yml +24 -0
- data/lib/rubino/llm/scenarios/provider-quota-completed.yml +14 -0
- data/lib/rubino/llm/scenarios/wide-table.yml +121 -0
- data/lib/rubino/llm/scenarios/with-approvals.yml +50 -0
- data/lib/rubino/llm/scenarios/with-artifacts.yml +98 -0
- data/lib/rubino/llm/scenarios/with-clarify.yml +32 -0
- data/lib/rubino/llm/scenarios/with-reasoning.yml +175 -0
- data/lib/rubino/llm/scenarios/with-uploads.yml +104 -0
- data/lib/rubino/llm/thinking_support.rb +84 -0
- data/lib/rubino/llm/tool_bridge.rb +89 -0
- data/lib/rubino/logger.rb +99 -0
- data/lib/rubino/mcp/manager.rb +180 -0
- data/lib/rubino/mcp/mcp_tool_wrapper.rb +69 -0
- data/lib/rubino/mcp.rb +57 -0
- data/lib/rubino/memory/backend.rb +104 -0
- data/lib/rubino/memory/backends/default.rb +101 -0
- data/lib/rubino/memory/backends/sqlite.rb +653 -0
- data/lib/rubino/memory/backends.rb +53 -0
- data/lib/rubino/memory/deduplicator.rb +74 -0
- data/lib/rubino/memory/extractor.rb +85 -0
- data/lib/rubino/memory/flusher.rb +31 -0
- data/lib/rubino/memory/retriever.rb +50 -0
- data/lib/rubino/memory/sqlite_extraction_prompt.rb +70 -0
- data/lib/rubino/memory/sqlite_graph.rb +154 -0
- data/lib/rubino/memory/store.rb +228 -0
- data/lib/rubino/memory/threat_scanner.rb +68 -0
- data/lib/rubino/metrics.rb +175 -0
- data/lib/rubino/modes.rb +93 -0
- data/lib/rubino/oauth/connection_repository.rb +95 -0
- data/lib/rubino/oauth/provider/github.rb +75 -0
- data/lib/rubino/oauth/provider/google.rb +59 -0
- data/lib/rubino/oauth/provider.rb +149 -0
- data/lib/rubino/oauth/registry.rb +86 -0
- data/lib/rubino/oauth/token_encryptor.rb +87 -0
- data/lib/rubino/plugins/registry.rb +75 -0
- data/lib/rubino/plugins.rb +86 -0
- data/lib/rubino/run/approval_gate.rb +243 -0
- data/lib/rubino/run/attachment_downloader.rb +166 -0
- data/lib/rubino/run/event_store.rb +74 -0
- data/lib/rubino/run/executor.rb +383 -0
- data/lib/rubino/run/gate_registry.rb +39 -0
- data/lib/rubino/run/recorder.rb +69 -0
- data/lib/rubino/run/repository.rb +118 -0
- data/lib/rubino/run/session_approval_cache.rb +118 -0
- data/lib/rubino/security/allowlist_persister.rb +55 -0
- data/lib/rubino/security/approval_policy.rb +227 -0
- data/lib/rubino/security/command_allowlist.rb +24 -0
- data/lib/rubino/security/dangerous_patterns.rb +118 -0
- data/lib/rubino/security/deny_persister.rb +73 -0
- data/lib/rubino/security/doom_loop_detector.rb +43 -0
- data/lib/rubino/security/hardline_guard.rb +105 -0
- data/lib/rubino/security/pattern_matcher.rb +62 -0
- data/lib/rubino/security/prefix_deriver.rb +124 -0
- data/lib/rubino/security/readonly_commands.rb +211 -0
- data/lib/rubino/session/exporter.rb +101 -0
- data/lib/rubino/session/message.rb +77 -0
- data/lib/rubino/session/repository.rb +295 -0
- data/lib/rubino/session/store.rb +198 -0
- data/lib/rubino/session/summary_store.rb +65 -0
- data/lib/rubino/skills/prompt_index.rb +85 -0
- data/lib/rubino/skills/registry.rb +208 -0
- data/lib/rubino/skills/skill.rb +176 -0
- data/lib/rubino/skills/skill_tool.rb +215 -0
- data/lib/rubino/skills/state_repository.rb +37 -0
- data/lib/rubino/skills/toggle.rb +26 -0
- data/lib/rubino/tools/answer_child_tool.rb +83 -0
- data/lib/rubino/tools/ask_parent_tool.rb +232 -0
- data/lib/rubino/tools/attach_file_tool.rb +120 -0
- data/lib/rubino/tools/background_tasks.rb +520 -0
- data/lib/rubino/tools/base.rb +222 -0
- data/lib/rubino/tools/custom_tool_loader.rb +119 -0
- data/lib/rubino/tools/edit_tool.rb +122 -0
- data/lib/rubino/tools/git_tool.rb +71 -0
- data/lib/rubino/tools/github_tool.rb +233 -0
- data/lib/rubino/tools/glob_tool.rb +69 -0
- data/lib/rubino/tools/grep_tool.rb +206 -0
- data/lib/rubino/tools/memory_tool.rb +184 -0
- data/lib/rubino/tools/multi_edit_tool.rb +110 -0
- data/lib/rubino/tools/patch_tool.rb +260 -0
- data/lib/rubino/tools/probe_tool.rb +175 -0
- data/lib/rubino/tools/question_tool.rb +128 -0
- data/lib/rubino/tools/read_attachment_tool.rb +180 -0
- data/lib/rubino/tools/read_tool.rb +212 -0
- data/lib/rubino/tools/read_tracker.rb +98 -0
- data/lib/rubino/tools/registry.rb +166 -0
- data/lib/rubino/tools/result.rb +113 -0
- data/lib/rubino/tools/ruby_tool.rb +0 -0
- data/lib/rubino/tools/session_search_tool.rb +103 -0
- data/lib/rubino/tools/shell_input_tool.rb +96 -0
- data/lib/rubino/tools/shell_kill_tool.rb +76 -0
- data/lib/rubino/tools/shell_output_tool.rb +72 -0
- data/lib/rubino/tools/shell_registry.rb +158 -0
- data/lib/rubino/tools/shell_tail_tool.rb +118 -0
- data/lib/rubino/tools/shell_tool.rb +330 -0
- data/lib/rubino/tools/steer_tool.rb +118 -0
- data/lib/rubino/tools/subagent_probe.rb +89 -0
- data/lib/rubino/tools/summarize_file_tool.rb +182 -0
- data/lib/rubino/tools/task_result_tool.rb +90 -0
- data/lib/rubino/tools/task_stop_tool.rb +80 -0
- data/lib/rubino/tools/task_tool.rb +622 -0
- data/lib/rubino/tools/test_tool.rb +454 -0
- data/lib/rubino/tools/todo_tool.rb +93 -0
- data/lib/rubino/tools/tool_call_repository.rb +33 -0
- data/lib/rubino/tools/vision_tool.rb +85 -0
- data/lib/rubino/tools/webfetch_tool.rb +153 -0
- data/lib/rubino/tools/websearch_tool.rb +179 -0
- data/lib/rubino/tools/write_tool.rb +61 -0
- data/lib/rubino/trust.rb +88 -0
- data/lib/rubino/ui/api.rb +296 -0
- data/lib/rubino/ui/base.rb +252 -0
- data/lib/rubino/ui/bottom_composer.rb +1599 -0
- data/lib/rubino/ui/cli.rb +1987 -0
- data/lib/rubino/ui/completion_menu.rb +321 -0
- data/lib/rubino/ui/completion_source.rb +284 -0
- data/lib/rubino/ui/escape_reader.rb +169 -0
- data/lib/rubino/ui/indented_io.rb +88 -0
- data/lib/rubino/ui/input_history.rb +108 -0
- data/lib/rubino/ui/live_region.rb +183 -0
- data/lib/rubino/ui/markdown_renderer.rb +506 -0
- data/lib/rubino/ui/notifier.rb +163 -0
- data/lib/rubino/ui/null.rb +195 -0
- data/lib/rubino/ui/paste_store.rb +176 -0
- data/lib/rubino/ui/printer_base.rb +79 -0
- data/lib/rubino/ui/probe_wait_indicator.rb +75 -0
- data/lib/rubino/ui/queued_indicators.rb +66 -0
- data/lib/rubino/ui/status_bar.rb +100 -0
- data/lib/rubino/ui/stdout_proxy.rb +161 -0
- data/lib/rubino/ui/streaming_markdown.rb +186 -0
- data/lib/rubino/ui/subagent_cards.rb +134 -0
- data/lib/rubino/ui/subagent_view.rb +255 -0
- data/lib/rubino/ui.rb +21 -0
- data/lib/rubino/update_check.rb +187 -0
- data/lib/rubino/util/duration.rb +23 -0
- data/lib/rubino/util/hyperlink.rb +105 -0
- data/lib/rubino/util/output.rb +145 -0
- data/lib/rubino/util/secrets_mask.rb +83 -0
- data/lib/rubino/version.rb +5 -0
- data/lib/rubino/workspace.rb +85 -0
- data/lib/rubino-agent.rb +5 -0
- data/lib/rubino.rb +318 -0
- data/mise.toml +2 -0
- data/rubino-agent.gemspec +103 -0
- data/skills/ruby-expert/SKILL.md +67 -0
- data/skills/ruby-expert/references/concurrency.md +357 -0
- data/skills/ruby-expert/references/datetime-and-encoding.md +363 -0
- data/skills/ruby-expert/references/errors-and-types.md +460 -0
- data/skills/ruby-expert/references/gem-authoring.md +459 -0
- data/skills/ruby-expert/references/language-idioms.md +465 -0
- data/skills/ruby-expert/references/metaprogramming.md +339 -0
- data/skills/ruby-expert/references/oo-design.md +553 -0
- data/skills/ruby-expert/references/performance.md +383 -0
- data/skills/ruby-expert/references/rails.md +424 -0
- data/skills/ruby-expert/references/security.md +404 -0
- data/skills/ruby-expert/references/testing.md +473 -0
- data/skills/ruby-expert/references/tooling.md +466 -0
- metadata +856 -0
|
@@ -0,0 +1,794 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ruby_llm"
|
|
4
|
+
require "faraday"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require_relative "tool_bridge"
|
|
7
|
+
require_relative "inline_think_filter"
|
|
8
|
+
require_relative "provider_resolver"
|
|
9
|
+
require_relative "reasoning_manager"
|
|
10
|
+
require_relative "thinking_support"
|
|
11
|
+
|
|
12
|
+
module Rubino
|
|
13
|
+
module LLM
|
|
14
|
+
# Raised when a stream goes silent past stale_chunk_timeout. (#22)
|
|
15
|
+
class StreamStaleError < StandardError; end
|
|
16
|
+
|
|
17
|
+
# Transport-level drops that surface mid-request. The canonical list lives
|
|
18
|
+
# on ErrorClassifier (the single source of truth for retryability); aliased
|
|
19
|
+
# here for the stream-path `rescue *STREAM_DROP_ERRORS` and existing specs.
|
|
20
|
+
# faraday-net_http re-raises IOError/EOFError (and friends) as
|
|
21
|
+
# Faraday::ConnectionFailed, so that is the type we actually see for an
|
|
22
|
+
# upstream socket close (message often "end of file reached"). Retried ONLY
|
|
23
|
+
# before the first streamed chunk — see #stream_once.
|
|
24
|
+
STREAM_DROP_ERRORS = ErrorClassifier::STREAM_DROP_ERRORS
|
|
25
|
+
|
|
26
|
+
# Adapter wrapping ruby_llm to isolate all LLM integration details.
|
|
27
|
+
# The rest of the application never calls ruby_llm directly.
|
|
28
|
+
class RubyLLMAdapter
|
|
29
|
+
attr_reader :model_id, :provider
|
|
30
|
+
|
|
31
|
+
def initialize(model_id: nil, provider: nil, config: nil, ui: nil, event_bus: nil,
|
|
32
|
+
tool_executor: nil, cancel_token: nil, isolate_config: false)
|
|
33
|
+
@config = config || Rubino.configuration
|
|
34
|
+
@model_id = model_id || @config.model_default
|
|
35
|
+
@provider = provider || resolve_provider
|
|
36
|
+
@temperature = @config.model_temperature
|
|
37
|
+
@ui = ui || Rubino.ui
|
|
38
|
+
@event_bus = event_bus || Rubino.event_bus
|
|
39
|
+
@tool_executor = tool_executor # nil = ToolBridge falls back to direct tool.call
|
|
40
|
+
@cancel_token = cancel_token
|
|
41
|
+
|
|
42
|
+
# SLICE-7: when built as a FallbackChain entry, scope provider config
|
|
43
|
+
# (api keys / base_url / timeout) into a per-adapter RubyLLM::Context
|
|
44
|
+
# instead of the process-global RubyLLM.configure. This is the heart of
|
|
45
|
+
# the global-config hazard fix: switching providers
|
|
46
|
+
# for a fallback must NOT mutate the global, or concurrent sessions on the
|
|
47
|
+
# API/server path corrupt each other's provider config. The primary
|
|
48
|
+
# adapter (isolate_config: false) keeps writing the global exactly as
|
|
49
|
+
# before, so existing single-provider setups are byte-identical.
|
|
50
|
+
if isolate_config
|
|
51
|
+
@context = RubyLLM.context { |c| apply_provider_config!(c) }
|
|
52
|
+
else
|
|
53
|
+
configure_ruby_llm!
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# The single LLM boundary entry: take one
|
|
58
|
+
# LLM::Request, dispatch to the streaming vs non-streaming transport based
|
|
59
|
+
# on request.stream, and return a normalized AdapterResponse. The streaming
|
|
60
|
+
# variant yields chunks to the block then returns the same Response. This
|
|
61
|
+
# is the front door the conversation loop depends on; #chat / #stream
|
|
62
|
+
# remain as the underlying transports and stay valid for existing callers.
|
|
63
|
+
#
|
|
64
|
+
# Graceful thinking degradation (#75): a provider on the anthropic-
|
|
65
|
+
# compatible path that rejects the thinking budget used to hard-error the
|
|
66
|
+
# user's very first prompt (the default effort is medium). When the
|
|
67
|
+
# rejection is recognised, remember it for the session, tell the user
|
|
68
|
+
# once, and retry this same request WITHOUT the budget. Safe to re-issue:
|
|
69
|
+
# the rejection is a pre-stream 400, so no token reached the UI.
|
|
70
|
+
def call(request, &)
|
|
71
|
+
dispatch(request, &)
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
raise unless thinking_budget_rejected?(e)
|
|
74
|
+
|
|
75
|
+
ThinkingSupport.mark_unsupported!(@provider, notify: @ui)
|
|
76
|
+
dispatch(request, &)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Sends a chat completion request (non-streaming). image_paths, if any,
|
|
80
|
+
# are forwarded to ruby_llm's `with:` slot so the primary model ingests
|
|
81
|
+
# the bytes natively (no `vision` tool round-trip). Only meaningful on
|
|
82
|
+
# the first model call of a turn — Loop strips it for follow-ups.
|
|
83
|
+
def chat(messages:, tools: nil, response_format: nil, image_paths: [], prefill: nil)
|
|
84
|
+
if bedrock_bearer_mode?
|
|
85
|
+
bedrock_bearer_client.chat(messages: messages, tools: tools)
|
|
86
|
+
else
|
|
87
|
+
chat_instance = build_chat(tools: tools, response_format: response_format)
|
|
88
|
+
load_history(chat_instance, messages)
|
|
89
|
+
apply_prefill(chat_instance, prefill)
|
|
90
|
+
response = chat_instance.ask(last_user_content(messages), with: presence(image_paths))
|
|
91
|
+
build_response(response)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Sends a streaming chat request, yielding chunks. Inline <think>…</think>
|
|
96
|
+
# sentinels are routed to the :thinking channel. Buffered partial content
|
|
97
|
+
# is preserved across mid-stream parse errors so downstream code can show
|
|
98
|
+
# whatever the model produced before the failure.
|
|
99
|
+
def stream(messages:, tools: nil, response_format: nil, image_paths: [], prefill: nil, &)
|
|
100
|
+
if bedrock_bearer_mode?
|
|
101
|
+
# BedrockBearerClient#stream buffers the whole /converse response before
|
|
102
|
+
# its first emit, so a transport error can only fire pre-first-chunk —
|
|
103
|
+
# no token reached the UI. It raises straight through to the runner,
|
|
104
|
+
# which re-issues a fresh request (safe, no double output).
|
|
105
|
+
return bedrock_bearer_client.stream(messages: messages, tools: tools, &)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# No retry wrapper here — retry ownership moved to Agent::ModelCallRunner
|
|
109
|
+
# (Slice 4) to avoid double-retrying the same failure. The streaming
|
|
110
|
+
# transport-drop PROTECTION still lives inside #stream_once: it RAISES a
|
|
111
|
+
# transport drop only when NOTHING was emitted to the UI yet
|
|
112
|
+
# (chunks_seen.zero?), so the runner can re-issue a fresh request before
|
|
113
|
+
# any token reached the user — no double output. Once a chunk has flowed
|
|
114
|
+
# it RETURNS the buffered partial instead of raising, so the drop can
|
|
115
|
+
# never be retried mid-stream. The raise-vs-return decision (the only
|
|
116
|
+
# streaming-specific safety) stays here; the actual retrying is the
|
|
117
|
+
# runner's job.
|
|
118
|
+
stream_once(messages: messages, tools: tools, response_format: response_format,
|
|
119
|
+
image_paths: image_paths, prefill: prefill, &)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Returns model information (context window, etc.)
|
|
123
|
+
def model_info
|
|
124
|
+
RubyLLM.models.find(@model_id)
|
|
125
|
+
rescue StandardError
|
|
126
|
+
nil
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Returns the context window size for the current model
|
|
130
|
+
def context_window
|
|
131
|
+
info = model_info
|
|
132
|
+
return @config.model_context_length if @config.model_context_length
|
|
133
|
+
|
|
134
|
+
info&.context_window || 128_000
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private
|
|
138
|
+
|
|
139
|
+
# The raw #call dispatch (streaming vs non-streaming), shared by the
|
|
140
|
+
# normal path and the one-shot thinking-budget retry (#75).
|
|
141
|
+
def dispatch(request, &)
|
|
142
|
+
if request.stream?
|
|
143
|
+
stream(messages: request.messages, tools: request.tools,
|
|
144
|
+
image_paths: request.image_paths, prefill: request.prefill, &)
|
|
145
|
+
else
|
|
146
|
+
chat(messages: request.messages, tools: request.tools,
|
|
147
|
+
image_paths: request.image_paths, prefill: request.prefill)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# True when +error+ is a provider's "thinking (budget) is not supported"
|
|
152
|
+
# rejection AND this request actually carried a budget (#75). Once the
|
|
153
|
+
# provider is marked unsupported the budget drops to 0, so this can never
|
|
154
|
+
# match twice — no retry loop.
|
|
155
|
+
def thinking_budget_rejected?(error)
|
|
156
|
+
anthropic_generation_path? && thinking_budget.positive? &&
|
|
157
|
+
ThinkingSupport.rejection?(error)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# One streaming attempt. See #stream for the retry / no-double-output
|
|
161
|
+
# contract. Inline <think>…</think> sentinels are routed to :thinking;
|
|
162
|
+
# buffered content is preserved across mid-stream parse/transport errors.
|
|
163
|
+
def stream_once(messages:, tools:, response_format:, image_paths:, prefill: nil, &block)
|
|
164
|
+
chat_instance = build_chat(tools: tools, response_format: response_format)
|
|
165
|
+
load_history(chat_instance, messages)
|
|
166
|
+
apply_prefill(chat_instance, prefill)
|
|
167
|
+
|
|
168
|
+
think_filter = InlineThinkFilter.new
|
|
169
|
+
buffered = +""
|
|
170
|
+
last_chunk_at = monotonic_now
|
|
171
|
+
stale_after = stale_chunk_timeout
|
|
172
|
+
chunks_seen = 0
|
|
173
|
+
|
|
174
|
+
# Each assistant message ruby_llm streams within this one ask() is a
|
|
175
|
+
# distinct content block: on a multi-step tool turn the model emits
|
|
176
|
+
# text → tool_use → (next message) text → … . We tag every content
|
|
177
|
+
# delta with the current block's id so a consumer can regroup the
|
|
178
|
+
# deltas that belong together instead of splitting them around the
|
|
179
|
+
# tool calls that interleave mid-stream. before_message bumps the id;
|
|
180
|
+
# after_message flushes the filter (so a buffered tail lands on THIS
|
|
181
|
+
# block, before the tool fires) and emits the block boundary.
|
|
182
|
+
message_block_id = 0
|
|
183
|
+
|
|
184
|
+
emit = lambda do |type, text|
|
|
185
|
+
next if text.nil? || text.empty?
|
|
186
|
+
|
|
187
|
+
buffered << text if type == :content
|
|
188
|
+
|
|
189
|
+
begin
|
|
190
|
+
block.call({ type: type, text: text, message_id: message_block_id })
|
|
191
|
+
rescue StandardError => e
|
|
192
|
+
# A UI/EventBus error must not abort the whole stream — log and
|
|
193
|
+
# keep buffering so we can still build the response. (issue #6)
|
|
194
|
+
log_safely(event: "llm.stream.emit_error", error: e.message, type: type)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Guarded: prefer ruby_llm's before_message/after_message (the
|
|
199
|
+
# on_new_message/on_end_message names are deprecated in ruby_llm 1.x and
|
|
200
|
+
# dropped in 2.0); fall back to the legacy names on older builds. A chat
|
|
201
|
+
# (or test double) exposing neither simply gets no block boundaries and
|
|
202
|
+
# the consumer falls back to the legacy per-adjacency grouping. Use a
|
|
203
|
+
# proc (not a lambda) for the close handler so it tolerates whatever
|
|
204
|
+
# arity the callback invokes it with.
|
|
205
|
+
if chat_instance.respond_to?(:before_message)
|
|
206
|
+
chat_instance.before_message { message_block_id += 1 }
|
|
207
|
+
elsif chat_instance.respond_to?(:on_new_message)
|
|
208
|
+
chat_instance.on_new_message { message_block_id += 1 }
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
close_block = proc do
|
|
212
|
+
# Flush any tail the think-filter is still holding so it is emitted
|
|
213
|
+
# with THIS block's id before we close the block (and before the
|
|
214
|
+
# tool call that follows a tool-use message executes).
|
|
215
|
+
flush_filter(think_filter, &emit)
|
|
216
|
+
@event_bus&.emit(Interaction::Events::MESSAGE_COMPLETED, message_id: message_block_id)
|
|
217
|
+
end
|
|
218
|
+
if chat_instance.respond_to?(:after_message)
|
|
219
|
+
chat_instance.after_message(&close_block)
|
|
220
|
+
elsif chat_instance.respond_to?(:on_end_message)
|
|
221
|
+
chat_instance.on_end_message(&close_block)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
begin
|
|
225
|
+
response = chat_instance.ask(last_user_content(messages), with: presence(image_paths)) do |chunk|
|
|
226
|
+
# User interrupt poll. Raised here propagates out of the streaming
|
|
227
|
+
# callback, ruby_llm closes the upstream connection, and Loop /
|
|
228
|
+
# Lifecycle catch the Interrupted exception to bail out cleanly.
|
|
229
|
+
@cancel_token&.check!
|
|
230
|
+
|
|
231
|
+
# Any chunk from upstream — content, thinking, or a tool-call delta —
|
|
232
|
+
# marks this request "committed": something came back, so a later
|
|
233
|
+
# drop must NOT trigger a retry (it would re-run generation and could
|
|
234
|
+
# re-fire a mid-stream tool call / double the output).
|
|
235
|
+
chunks_seen += 1
|
|
236
|
+
last_chunk_at = monotonic_now
|
|
237
|
+
check_stream_stale!(last_chunk_at, stale_after)
|
|
238
|
+
|
|
239
|
+
if chunk.respond_to?(:thinking) && chunk.thinking
|
|
240
|
+
thinking_text = chunk.thinking.respond_to?(:text) ? chunk.thinking.text : chunk.thinking.to_s
|
|
241
|
+
emit.call(:thinking, thinking_text)
|
|
242
|
+
end
|
|
243
|
+
think_filter.feed(chunk.content, &emit) if chunk.content.is_a?(String) && !chunk.content.empty?
|
|
244
|
+
end
|
|
245
|
+
rescue Rubino::Interrupted
|
|
246
|
+
# Flush whatever the filter has buffered, then re-raise. Loop will
|
|
247
|
+
# catch and persist the partial assistant message so the user sees
|
|
248
|
+
# what arrived before they hit Esc.
|
|
249
|
+
flush_filter(think_filter, &emit)
|
|
250
|
+
raise
|
|
251
|
+
rescue JSON::ParserError, StreamStaleError => e
|
|
252
|
+
# Preserve whatever we've buffered so far so the user sees partial
|
|
253
|
+
# output instead of a blank failure. (issues #12, #22)
|
|
254
|
+
log_safely(event: "llm.stream.partial", error: e.message, buffered_bytes: buffered.bytesize)
|
|
255
|
+
flush_filter(think_filter, &emit)
|
|
256
|
+
return partial_response(buffered)
|
|
257
|
+
rescue *STREAM_DROP_ERRORS => e
|
|
258
|
+
# A genuine transport drop (the observed M3 EOF, a connection reset, a
|
|
259
|
+
# read timeout, …). If NOTHING was emitted yet, re-raise so the runner
|
|
260
|
+
# (Agent::ModelCallRunner) can retry a fresh request — safe, no token
|
|
261
|
+
# reached the user. If chunks already flowed, preserve the partial and
|
|
262
|
+
# stop: never
|
|
263
|
+
# re-issue after output. ErrorClassifier classifies these as retryable.
|
|
264
|
+
raise if chunks_seen.zero?
|
|
265
|
+
|
|
266
|
+
log_safely(event: "llm.stream.partial_interrupted", error: e.message,
|
|
267
|
+
buffered_bytes: buffered.bytesize)
|
|
268
|
+
flush_filter(think_filter, &emit)
|
|
269
|
+
return partial_response(buffered)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Guard flush in the same way as the per-chunk emit so a final UI error
|
|
273
|
+
# doesn't lose the response. (issue #21)
|
|
274
|
+
flush_filter(think_filter, event: "llm.stream.flush_error", &emit)
|
|
275
|
+
build_response(response)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Flushes the think-filter, swallowing UI/flush errors so a late failure
|
|
279
|
+
# never loses the response (issues #6, #21).
|
|
280
|
+
def flush_filter(think_filter, event: "llm.stream.flush_error", &emit)
|
|
281
|
+
think_filter.flush(&emit)
|
|
282
|
+
rescue StandardError => e
|
|
283
|
+
log_safely(event: event, error: e.message)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Buffered-partial AdapterResponse returned when a stream is cut after at
|
|
287
|
+
# least one chunk (parse error, stale, or post-first-chunk transport drop).
|
|
288
|
+
# Flagged +interrupted+ so the Loop fails the turn (run.failed) instead of
|
|
289
|
+
# mistaking the truncated buffer for a finished answer (the silent
|
|
290
|
+
# "completed-but-empty" bug — see Rubino::StreamInterruptedError).
|
|
291
|
+
def partial_response(buffered)
|
|
292
|
+
AdapterResponse.new(content: buffered, tool_calls: [], input_tokens: 0,
|
|
293
|
+
output_tokens: 0, model_id: @model_id, interrupted: true)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def configure_ruby_llm!
|
|
297
|
+
RubyLLM.configure { |c| apply_provider_config!(c) }
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# The provider-config block, applied to a config target `c`. The primary
|
|
301
|
+
# adapter passes the process-global (RubyLLM.configure); a fallback adapter
|
|
302
|
+
# passes a per-call RubyLLM::Context config (SLICE-7) so the switch never
|
|
303
|
+
# touches the global. Identical writes either way — only the target differs.
|
|
304
|
+
def apply_provider_config!(c)
|
|
305
|
+
# When RUBYLLM_DEBUG=1, dump every request/response to a log file
|
|
306
|
+
# (NEVER stdout — the TUI is running on stdout). Use this to verify
|
|
307
|
+
# what `tools: [...]` and `messages: [...]` actually go on the wire
|
|
308
|
+
# when a provider misbehaves (e.g. emits roleplay markdown instead
|
|
309
|
+
# of tool_calls).
|
|
310
|
+
if ENV["RUBYLLM_DEBUG"]
|
|
311
|
+
require "logger"
|
|
312
|
+
require "fileutils"
|
|
313
|
+
log_path = debug_log_path
|
|
314
|
+
FileUtils.mkdir_p(File.dirname(log_path))
|
|
315
|
+
# Build the Logger explicitly so that ruby_llm's lazy
|
|
316
|
+
# `@logger ||= config.logger || Logger.new(...)` picks it up
|
|
317
|
+
# even if something already touched RubyLLM.logger (its first
|
|
318
|
+
# access memoizes against current config). Reset the memo too
|
|
319
|
+
# so prior accesses can't shadow our injected logger.
|
|
320
|
+
c.logger = ::Logger.new(log_path, progname: "RubyLLM", level: ::Logger::DEBUG)
|
|
321
|
+
c.log_level = ::Logger::DEBUG
|
|
322
|
+
RubyLLM.instance_variable_set(:@logger, nil)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
c.openai_api_key = ENV["OPENAI_API_KEY"] if ENV["OPENAI_API_KEY"]
|
|
326
|
+
c.anthropic_api_key = ENV["ANTHROPIC_API_KEY"] if ENV["ANTHROPIC_API_KEY"]
|
|
327
|
+
c.gemini_api_key = ENV["GEMINI_API_KEY"] if ENV["GEMINI_API_KEY"]
|
|
328
|
+
|
|
329
|
+
# Bedrock IAM credentials (Mode 2 / 3)
|
|
330
|
+
if ENV["BEDROCK_API_KEY"] && ENV["BEDROCK_SECRET_KEY"]
|
|
331
|
+
c.bedrock_api_key = ENV["BEDROCK_API_KEY"]
|
|
332
|
+
c.bedrock_secret_key = ENV["BEDROCK_SECRET_KEY"]
|
|
333
|
+
c.bedrock_region = ENV["BEDROCK_REGION"] || "us-east-1"
|
|
334
|
+
c.bedrock_session_token = ENV["BEDROCK_SESSION_TOKEN"] if ENV["BEDROCK_SESSION_TOKEN"]
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
prov_cfg = provider_cfg
|
|
338
|
+
|
|
339
|
+
# Any provider can declare openai_compatible: true in config to route
|
|
340
|
+
# through the OpenAI provider with a custom base_url and API key.
|
|
341
|
+
# Symmetrically, anthropic_compatible: true routes through the Anthropic
|
|
342
|
+
# provider — used for backends that expose a native Anthropic-Messages
|
|
343
|
+
# endpoint (e.g. MiniMax's /anthropic), which avoids the OpenAI-endpoint
|
|
344
|
+
# quirks (no-[DONE] stream close, string-shaped errors).
|
|
345
|
+
if openai_compatible_provider?
|
|
346
|
+
c.openai_api_base = prov_cfg["base_url"] if prov_cfg["base_url"]
|
|
347
|
+
c.openai_api_key = openai_compatible_api_key!(prov_cfg)
|
|
348
|
+
elsif anthropic_compatible_provider?
|
|
349
|
+
c.anthropic_api_base = prov_cfg["base_url"] if prov_cfg["base_url"]
|
|
350
|
+
c.anthropic_api_key = anthropic_compatible_api_key!(prov_cfg)
|
|
351
|
+
elsif @provider == "openai" && prov_cfg["base_url"]
|
|
352
|
+
c.openai_api_base = prov_cfg["base_url"]
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# We OWN retry/backoff in Agent::ModelCallRunner (token-gated,
|
|
356
|
+
# full-jitter, safe for streaming). Disable ruby_llm's built-in
|
|
357
|
+
# faraday-retry (default max=3): on 1.15 it retries POST and RE-INVOKES
|
|
358
|
+
# the stream on_data handler on a drop -> double-output to the UI, and
|
|
359
|
+
# it would multiply with the runner's retries into a retry storm.
|
|
360
|
+
# Single source of truth.
|
|
361
|
+
c.max_retries = 0
|
|
362
|
+
|
|
363
|
+
# ruby_llm maps request_timeout -> Faraday options.timeout, which the
|
|
364
|
+
# net_http adapter applies as Net::HTTP read_timeout: a PER-READ socket
|
|
365
|
+
# inactivity timer that RESETS on every received chunk (NOT a total).
|
|
366
|
+
# So this one knob is our first-token AND inter-token idle bound — the
|
|
367
|
+
# same mechanism the OpenAI/Anthropic SDKs rely on. Size it to the
|
|
368
|
+
# slowest expected gap (a cold model load before the first token); a
|
|
369
|
+
# truly silent socket then fails within this many seconds as a
|
|
370
|
+
# Net::ReadTimeout (-> Faraday) and is retried pre-first-token by the
|
|
371
|
+
# runner. Override per backend: providers.<name>.request_timeout_seconds
|
|
372
|
+
# (e.g. raise it for a large local Ollama that cold-loads for minutes).
|
|
373
|
+
c.request_timeout = prov_cfg["request_timeout_seconds"] || 600
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# Returns the api_key for an openai_compatible provider, or raises a
|
|
377
|
+
# clear configuration error. Previously this fell back to the literal
|
|
378
|
+
# "default", which would hit the upstream and surface as a cryptic 401.
|
|
379
|
+
# (issue #3)
|
|
380
|
+
def openai_compatible_api_key!(prov_cfg)
|
|
381
|
+
compatible_api_key!(prov_cfg, env_fallback: "OPENAI_API_KEY")
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Anthropic-compatible analogue of #openai_compatible_api_key!: resolves the
|
|
385
|
+
# provider key (config, then ANTHROPIC_API_KEY) or raises the same clear
|
|
386
|
+
# ConfigurationError so an arbitrary Anthropic-Messages backend (MiniMax's
|
|
387
|
+
# /anthropic) never silently sends an empty key and surfaces a cryptic 401.
|
|
388
|
+
def anthropic_compatible_api_key!(prov_cfg)
|
|
389
|
+
compatible_api_key!(prov_cfg, env_fallback: "ANTHROPIC_API_KEY")
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def compatible_api_key!(prov_cfg, env_fallback:)
|
|
393
|
+
key = prov_cfg["api_key"] || ENV.fetch(env_fallback, nil)
|
|
394
|
+
return key if key && !key.empty?
|
|
395
|
+
|
|
396
|
+
raise Rubino::Error,
|
|
397
|
+
"Missing API key for provider '#{@provider}'. " \
|
|
398
|
+
"Set providers.#{@provider}.api_key in ~/.rubino/config.yml " \
|
|
399
|
+
"(e.g. ${#{@provider.to_s.upcase}_API_KEY} with the value in .env)."
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Resolution fallback for the direct-construction edge: AdapterFactory
|
|
403
|
+
# always passes a concrete provider, so this only runs when the adapter is
|
|
404
|
+
# built without one (tests, one-shot callers). Interpret the config
|
|
405
|
+
# default — including "auto" and the Bedrock-bearer override — through the
|
|
406
|
+
# single ProviderResolver seam rather than re-implementing it here.
|
|
407
|
+
def resolve_provider
|
|
408
|
+
ProviderResolver.resolve(@model_id, explicit_provider: @config.model_provider)
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def build_chat(tools: nil, response_format: nil)
|
|
412
|
+
options = { model: @model_id }
|
|
413
|
+
options[:response_format] = response_format if response_format
|
|
414
|
+
|
|
415
|
+
prov_cfg = provider_cfg
|
|
416
|
+
|
|
417
|
+
# OpenAI-compatible providers (ollama, lm-studio, vllm, etc.):
|
|
418
|
+
# route through the openai provider and skip model validation.
|
|
419
|
+
# Anthropic-compatible providers (MiniMax /anthropic, etc.): route
|
|
420
|
+
# through the anthropic provider, likewise skipping model validation so
|
|
421
|
+
# an arbitrary model id (e.g. MiniMax-M2.7) is accepted without a
|
|
422
|
+
# model-registry entry.
|
|
423
|
+
if openai_compatible_provider?
|
|
424
|
+
options[:provider] = :openai
|
|
425
|
+
options[:assume_model_exists] = true
|
|
426
|
+
elsif anthropic_compatible_provider?
|
|
427
|
+
options[:provider] = :anthropic
|
|
428
|
+
options[:assume_model_exists] = true
|
|
429
|
+
elsif prov_cfg["assume_model_exists"]
|
|
430
|
+
options[:assume_model_exists] = true
|
|
431
|
+
options[:provider] = @provider.to_sym if @provider
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# SLICE-7: a fallback adapter built with isolate_config: true carries a
|
|
435
|
+
# per-call RubyLLM::Context so its provider config (base_url/keys/timeout)
|
|
436
|
+
# never leaked into the process-global. Build the chat from that context;
|
|
437
|
+
# the primary adapter (@context nil) uses the global RubyLLM.chat exactly
|
|
438
|
+
# as before.
|
|
439
|
+
chat = (@context || RubyLLM).chat(**options)
|
|
440
|
+
|
|
441
|
+
apply_generation_params(chat)
|
|
442
|
+
|
|
443
|
+
# Register tools — ToolBridge wraps each Rubino tool so ruby_llm can
|
|
444
|
+
# call it. When a ToolExecutor is available, execution goes through the
|
|
445
|
+
# full pipeline (approval, truncation, audit recording). Otherwise the
|
|
446
|
+
# bridge calls tool.call() directly (used in tests/one-shot mode).
|
|
447
|
+
Array(tools).each do |tool|
|
|
448
|
+
chat.with_tool(ToolBridge.for(tool, ui: @ui, event_bus: @event_bus,
|
|
449
|
+
tool_executor: @tool_executor))
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
chat
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Applies the request-shaping knobs ruby_llm 1.15 supports — temperature,
|
|
456
|
+
# max_tokens, and a thinking/reasoning budget — onto the chat instance.
|
|
457
|
+
# The render rules (enable manual thinking with a budget, force temp=1,
|
|
458
|
+
# raise max_tokens to fit budget + headroom) are a faithful port of the
|
|
459
|
+
# reference and live in LLM::ReasoningManager — the
|
|
460
|
+
# single source of truth for the wire shape. This method only RESOLVES the
|
|
461
|
+
# config inputs (which path, budget, ceiling, headroom, configured temp)
|
|
462
|
+
# and APPLIES the manager's rendered params onto the chat.
|
|
463
|
+
#
|
|
464
|
+
# Why max_tokens matters for MiniMax-M2.7: ruby_llm's anthropic provider
|
|
465
|
+
# defaults max_tokens to 4096 (Anthropic::Chat#build_base_payload:
|
|
466
|
+
# `model.max_tokens || 4096`), and with assume_model_exists the model
|
|
467
|
+
# carries no max_tokens — so a reasoning model can burn the whole 4096 on
|
|
468
|
+
# thinking tokens and return ZERO visible text (the "completed but empty"
|
|
469
|
+
# symptom). The manager raises the ceiling so it has room to think AND
|
|
470
|
+
# answer. Thinking + the aggressive ceiling are Anthropic-Messages concepts
|
|
471
|
+
# only safe on the anthropic-family path; for openai/ollama/etc. we leave
|
|
472
|
+
# token limits to the provider (apply_max_tokens: false) and only apply
|
|
473
|
+
# temperature.
|
|
474
|
+
#
|
|
475
|
+
# ruby_llm wiring confirmed on 1.15:
|
|
476
|
+
# * with_temperature(t) -> payload[:temperature] (anthropic/chat.rb add_optional_fields)
|
|
477
|
+
# * with_params(max_tokens: n) -> deep-merged over payload[:max_tokens] (provider.rb#complete)
|
|
478
|
+
# * with_thinking(budget: n) -> payload[:thinking] = {type:"enabled",
|
|
479
|
+
# budget_tokens:n} (anthropic/chat.rb build_thinking_payload)
|
|
480
|
+
def apply_generation_params(chat)
|
|
481
|
+
anthropic_family = anthropic_generation_path?
|
|
482
|
+
|
|
483
|
+
rendered = reasoning_manager.render(
|
|
484
|
+
budget: anthropic_family ? thinking_budget : 0,
|
|
485
|
+
temperature: @temperature,
|
|
486
|
+
max_tokens: max_output_tokens,
|
|
487
|
+
text_headroom: text_headroom_tokens,
|
|
488
|
+
apply_max_tokens: anthropic_family
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
params = { max_tokens: rendered.max_tokens }.compact
|
|
492
|
+
|
|
493
|
+
if rendered.thinking_enabled?
|
|
494
|
+
if ThinkingSupport.budget_via_params?(provider_cfg, chat)
|
|
495
|
+
params[:thinking] = rendered.thinking
|
|
496
|
+
elsif chat.respond_to?(:with_thinking)
|
|
497
|
+
chat.with_thinking(budget: rendered.thinking[:budget_tokens])
|
|
498
|
+
end
|
|
499
|
+
end
|
|
500
|
+
chat.with_temperature(rendered.temperature) if !rendered.temperature.nil? && chat.respond_to?(:with_temperature)
|
|
501
|
+
# Single with_params call — ruby_llm REPLACES @params on every call,
|
|
502
|
+
# so max_tokens and a params-routed thinking block must travel together.
|
|
503
|
+
chat.with_params(**params) if params.any? && chat.respond_to?(:with_params)
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
def reasoning_manager = @reasoning_manager ||= ReasoningManager.new
|
|
507
|
+
|
|
508
|
+
# True when generation runs through ruby_llm's anthropic provider — the
|
|
509
|
+
# only path where thinking budgets and the 4096 max_tokens default apply.
|
|
510
|
+
def anthropic_generation_path?
|
|
511
|
+
anthropic_compatible_provider? ||
|
|
512
|
+
%w[anthropic bedrock].include?(@provider.to_s)
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
# Configurable max output tokens. providers.<name>.max_tokens wins, then
|
|
516
|
+
# model.max_tokens, then a reasoning-model-sane default (16k vs ruby_llm's
|
|
517
|
+
# 4096). Returns an Integer.
|
|
518
|
+
def max_output_tokens
|
|
519
|
+
(provider_cfg["max_tokens"] ||
|
|
520
|
+
@config.dig("model", "max_tokens") ||
|
|
521
|
+
16_384).to_i
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
# Thinking/reasoning budget in tokens. 0 / nil disables thinking entirely.
|
|
525
|
+
# thinking.effort wins when set (off→0, low→4000, medium→8000, high→16000);
|
|
526
|
+
# otherwise providers.<name>.thinking_budget, then model.thinking_budget,
|
|
527
|
+
# then a medium default (8000 — the same value the reference THINKING_BUDGET
|
|
528
|
+
# maps "medium" to). Only meaningful for the anthropic-compatible path;
|
|
529
|
+
# other providers ignore with_thinking or never see it (we still set it,
|
|
530
|
+
# ruby_llm only renders thinking for providers that support it).
|
|
531
|
+
def thinking_budget
|
|
532
|
+
# A provider that rejected the budget earlier this session never gets
|
|
533
|
+
# sent one again (#75).
|
|
534
|
+
return 0 if ThinkingSupport.unsupported?(@provider)
|
|
535
|
+
# A provider configured/known to mishandle an ACCEPTED budget never
|
|
536
|
+
# gets sent one at all (#2) — capability beats the requested effort.
|
|
537
|
+
return 0 unless ThinkingSupport.supports?(provider_cfg, @model_id)
|
|
538
|
+
|
|
539
|
+
effort = Config::ReasoningPrefs.effort(@config)
|
|
540
|
+
return Config::ReasoningPrefs.effort_budget(effort).to_i if effort
|
|
541
|
+
|
|
542
|
+
raw = provider_cfg.key?("thinking_budget") ? provider_cfg["thinking_budget"] : nil
|
|
543
|
+
raw = @config.dig("model", "thinking_budget") if raw.nil?
|
|
544
|
+
raw = 8000 if raw.nil?
|
|
545
|
+
raw.to_i
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
# Headroom (tokens) reserved for visible output on top of the thinking
|
|
549
|
+
# budget, so the model can think AND still answer. Mirrors the reference +4096.
|
|
550
|
+
def text_headroom_tokens
|
|
551
|
+
(@config.dig("model", "max_tokens_text_headroom") || 4096).to_i
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# Returns true when using Bedrock Bearer token (short-term API key, no secret)
|
|
555
|
+
def bedrock_bearer_mode?
|
|
556
|
+
%w[bedrock anthropic].include?(@provider) &&
|
|
557
|
+
ENV.fetch("BEDROCK_API_KEY", nil) && !ENV["BEDROCK_SECRET_KEY"]
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
# Provider config hash from the config file (e.g. providers.ollama.*)
|
|
561
|
+
# The RUBYLLM_DEBUG log path, under the resolved home (RUBINO_HOME ->
|
|
562
|
+
# else ~/.rubino) so an isolated/custom home is not polluted with a log
|
|
563
|
+
# written into the default ~/.rubino (issue #27).
|
|
564
|
+
def debug_log_path
|
|
565
|
+
File.join(Rubino::Config::Loader.default_home_path, "logs", "ruby_llm.log")
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def provider_cfg
|
|
569
|
+
@config.provider_config(@provider)
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# True when the provider declares openai_compatible: true in config.
|
|
573
|
+
# Used for ollama, lm-studio, vllm, text-generation-webui, etc.
|
|
574
|
+
def openai_compatible_provider?
|
|
575
|
+
provider_cfg["openai_compatible"] == true
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# True when the provider declares anthropic_compatible: true in config.
|
|
579
|
+
# Routes through ruby_llm's anthropic provider against a custom base_url
|
|
580
|
+
# (e.g. MiniMax's native Anthropic-Messages endpoint).
|
|
581
|
+
def anthropic_compatible_provider?
|
|
582
|
+
provider_cfg["anthropic_compatible"] == true
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# True when the "hidden" render mode is active. The streaming emit no
|
|
586
|
+
# longer drops :thinking chunks on it — the CLI buffers them unrendered
|
|
587
|
+
# so Ctrl-O can reveal the last thought even in hidden mode (#76), and
|
|
588
|
+
# UI::API drops them at its own boundary. Still gates the bedrock-bearer
|
|
589
|
+
# client, which has no downstream reveal machinery.
|
|
590
|
+
def reasoning_hidden?
|
|
591
|
+
Config::ReasoningPrefs.mode(@config) == :hidden
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
# ── Streaming resilience helpers (issues #12, #22) ────────────────────
|
|
595
|
+
#
|
|
596
|
+
# NOTE: error-classification, backoff and api_max_retries retries moved to
|
|
597
|
+
# Agent::ModelCallRunner (Slice 4) — the single retry owner. The adapter no
|
|
598
|
+
# longer wraps calls in a retry loop; it only RAISES retryable errors (and
|
|
599
|
+
# pre-first-chunk stream drops) straight through for the runner to retry.
|
|
600
|
+
|
|
601
|
+
def monotonic_now
|
|
602
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
def stale_chunk_timeout
|
|
606
|
+
@config.dig("providers", @provider, "stale_timeout_seconds") ||
|
|
607
|
+
@config.dig("providers", "openai", "stale_timeout_seconds") ||
|
|
608
|
+
300
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
def check_stream_stale!(last_chunk_at, stale_after)
|
|
612
|
+
return if stale_after.to_i <= 0
|
|
613
|
+
return if (monotonic_now - last_chunk_at) <= stale_after
|
|
614
|
+
|
|
615
|
+
raise StreamStaleError, "no chunk received for #{stale_after}s"
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
def log_safely(**fields)
|
|
619
|
+
Rubino.logger.warn(**fields)
|
|
620
|
+
rescue StandardError
|
|
621
|
+
# Logger may be uninitialized during early boot — swallow.
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
# Returns a memoized BedrockBearerClient instance
|
|
625
|
+
def bedrock_bearer_client
|
|
626
|
+
@bedrock_bearer_client ||= BedrockBearerClient.new(
|
|
627
|
+
api_key: ENV.fetch("BEDROCK_API_KEY", nil),
|
|
628
|
+
region: ENV["BEDROCK_REGION"] || "us-east-1",
|
|
629
|
+
model_id: @model_id,
|
|
630
|
+
show_reasoning: !reasoning_hidden?,
|
|
631
|
+
event_bus: @event_bus
|
|
632
|
+
)
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
# Returns the content of the last message
|
|
636
|
+
def last_user_content(messages)
|
|
637
|
+
last = messages.last
|
|
638
|
+
last[:content] || last["content"]
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# ruby_llm's `with:` treats [] as "build a Content with no attachments"
|
|
642
|
+
# which is technically valid but pointless — pass nil so it skips the
|
|
643
|
+
# Content wrapper entirely.
|
|
644
|
+
def presence(arr)
|
|
645
|
+
arr.nil? || arr.empty? ? nil : arr
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
# Loads conversation history into the chat instance, excluding the last message.
|
|
649
|
+
#
|
|
650
|
+
# Tool result messages MUST carry their tool_call_id when reconstructed —
|
|
651
|
+
# Anthropic and Bedrock validate that every tool message's id matches a
|
|
652
|
+
# preceding assistant toolUse block, and reject the request with a 400
|
|
653
|
+
# otherwise. The DB already stores the id (Session::Message#to_context
|
|
654
|
+
# provides it); previously it was dropped on the floor here.
|
|
655
|
+
def load_history(chat_instance, messages)
|
|
656
|
+
history = messages[0..-2]
|
|
657
|
+
return if history.empty?
|
|
658
|
+
|
|
659
|
+
history.each do |msg|
|
|
660
|
+
role = (msg[:role] || msg["role"]).to_sym
|
|
661
|
+
content = msg[:content] || msg["content"]
|
|
662
|
+
next if content.nil? || content.empty?
|
|
663
|
+
|
|
664
|
+
case role
|
|
665
|
+
when :system
|
|
666
|
+
chat_instance.with_instructions(content, append: true)
|
|
667
|
+
when :user
|
|
668
|
+
chat_instance.messages << RubyLLM::Message.new(role: role, content: content)
|
|
669
|
+
when :assistant
|
|
670
|
+
chat_instance.messages << RubyLLM::Message.new(
|
|
671
|
+
role: role,
|
|
672
|
+
content: content,
|
|
673
|
+
tool_calls: rebuild_tool_calls(msg[:tool_calls] || msg["tool_calls"])
|
|
674
|
+
)
|
|
675
|
+
when :tool
|
|
676
|
+
chat_instance.messages << RubyLLM::Message.new(
|
|
677
|
+
role: role,
|
|
678
|
+
content: content,
|
|
679
|
+
tool_call_id: msg[:tool_call_id] || msg["tool_call_id"]
|
|
680
|
+
)
|
|
681
|
+
end
|
|
682
|
+
end
|
|
683
|
+
end
|
|
684
|
+
|
|
685
|
+
# Prefill-to-continue (Slice 5, rung 4): seat the model's own interim text
|
|
686
|
+
# as a TRAILING assistant message so the next completion continues from it
|
|
687
|
+
# instead of starting a fresh turn. The spike confirmed ruby_llm honours a
|
|
688
|
+
# trailing assistant message on the /anthropic path (Anthropic's native
|
|
689
|
+
# "assistant turn prefill"): the response stream picks up where the seed
|
|
690
|
+
# left off, so a thinking-only model is pushed into visible content.
|
|
691
|
+
#
|
|
692
|
+
# No-op when the seed is blank — an empty prefill would add a degenerate
|
|
693
|
+
# empty assistant turn that strict providers reject, so we skip it and let
|
|
694
|
+
# the call behave as a plain re-issue.
|
|
695
|
+
def apply_prefill(chat_instance, prefill)
|
|
696
|
+
seed = prefill.to_s
|
|
697
|
+
return if seed.strip.empty?
|
|
698
|
+
|
|
699
|
+
chat_instance.messages << RubyLLM::Message.new(role: :assistant, content: seed)
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
# Reconstructs RubyLLM::ToolCall objects from the hashes persisted under
|
|
703
|
+
# assistant message metadata. Returns nil for empty/missing input so
|
|
704
|
+
# RubyLLM::Message treats it as a plain assistant turn.
|
|
705
|
+
def rebuild_tool_calls(raw)
|
|
706
|
+
return nil if raw.nil? || (raw.respond_to?(:empty?) && raw.empty?)
|
|
707
|
+
|
|
708
|
+
Array(raw).map do |tc|
|
|
709
|
+
h = tc.transform_keys(&:to_sym) if tc.is_a?(Hash)
|
|
710
|
+
h ||= tc
|
|
711
|
+
RubyLLM::ToolCall.new(
|
|
712
|
+
id: h[:id],
|
|
713
|
+
name: h[:name],
|
|
714
|
+
arguments: h[:arguments] || {}
|
|
715
|
+
)
|
|
716
|
+
end
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
def build_response(response)
|
|
720
|
+
return nil unless response
|
|
721
|
+
|
|
722
|
+
AdapterResponse.new(
|
|
723
|
+
content: response.content,
|
|
724
|
+
tool_calls: extract_tool_calls(response),
|
|
725
|
+
input_tokens: response.input_tokens,
|
|
726
|
+
output_tokens: response.output_tokens,
|
|
727
|
+
model_id: @model_id,
|
|
728
|
+
stop_reason: extract_stop_reason(response),
|
|
729
|
+
thinking: extract_thinking(response),
|
|
730
|
+
raw: response
|
|
731
|
+
)
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
# Normalize the provider's finish/stop reason to the boundary's
|
|
735
|
+
# :stop | :length | :tool_calls | nil vocabulary. Anthropic-compat (the
|
|
736
|
+
# MiniMax /anthropic path) carries it in the raw body as "stop_reason"
|
|
737
|
+
# ("end_turn"/"stop_sequence" ⇒ :stop, "max_tokens" ⇒ :length,
|
|
738
|
+
# "tool_use" ⇒ :tool_calls); OpenAI-style carries "finish_reason"
|
|
739
|
+
# ("stop" ⇒ :stop, "length" ⇒ :length, "tool_calls" ⇒ :tool_calls).
|
|
740
|
+
# Returns nil when unreachable on this path — never fabricated. The
|
|
741
|
+
# streaming path generally does not surface a stop reason on ruby_llm
|
|
742
|
+
# today (see the boundary spike), so this stays nil there.
|
|
743
|
+
def extract_stop_reason(response)
|
|
744
|
+
body = raw_body(response)
|
|
745
|
+
return nil unless body.is_a?(Hash)
|
|
746
|
+
|
|
747
|
+
normalize_stop_reason(body["stop_reason"] || body["finish_reason"])
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
def normalize_stop_reason(reason)
|
|
751
|
+
case reason.to_s
|
|
752
|
+
when "end_turn", "stop_sequence", "stop" then :stop
|
|
753
|
+
when "max_tokens", "length" then :length
|
|
754
|
+
when "tool_use", "tool_calls" then :tool_calls
|
|
755
|
+
end
|
|
756
|
+
end
|
|
757
|
+
|
|
758
|
+
# The raw Anthropic/OpenAI response body hash, when ruby_llm exposes it
|
|
759
|
+
# (response.raw is a Faraday::Response; .body is the parsed JSON). nil on
|
|
760
|
+
# paths where it is unreachable (streaming, doubles, bedrock-bearer).
|
|
761
|
+
def raw_body(response)
|
|
762
|
+
return nil unless response.respond_to?(:raw) && response.raw
|
|
763
|
+
return nil unless response.raw.respond_to?(:body)
|
|
764
|
+
|
|
765
|
+
response.raw.body
|
|
766
|
+
rescue StandardError
|
|
767
|
+
nil
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
# Reasoning text/summary if ruby_llm surfaced it on the message; nil
|
|
771
|
+
# otherwise. Kept defensive — older builds carry no reasoning field.
|
|
772
|
+
def extract_thinking(response)
|
|
773
|
+
return nil unless response.respond_to?(:reasoning) && response.reasoning
|
|
774
|
+
|
|
775
|
+
r = response.reasoning
|
|
776
|
+
r.respond_to?(:text) ? r.text : r.to_s
|
|
777
|
+
rescue StandardError
|
|
778
|
+
nil
|
|
779
|
+
end
|
|
780
|
+
|
|
781
|
+
def extract_tool_calls(response)
|
|
782
|
+
return [] unless response.respond_to?(:tool_calls) && response.tool_calls
|
|
783
|
+
|
|
784
|
+
response.tool_calls.map do |tc|
|
|
785
|
+
{
|
|
786
|
+
id: tc.id,
|
|
787
|
+
name: tc.name,
|
|
788
|
+
arguments: tc.arguments
|
|
789
|
+
}
|
|
790
|
+
end
|
|
791
|
+
end
|
|
792
|
+
end
|
|
793
|
+
end
|
|
794
|
+
end
|