octo-agent 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clacky/skills/commit/SKILL.md +423 -0
- data/.clacky/skills/gem-release/SKILL.md +199 -0
- data/.clacky/skills/gem-release/scripts/release.sh +304 -0
- data/.clacky/skills/oss-upload/SKILL.md +47 -0
- data/.octorules +106 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +92 -0
- data/Dockerfile +28 -0
- data/LICENSE.txt +22 -0
- data/POSITIONING.md +46 -0
- data/README.md +134 -0
- data/README_CN.md +134 -0
- data/Rakefile +34 -0
- data/benchmark/fixtures/sample_project/Gemfile +3 -0
- data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
- data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
- data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
- data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
- data/benchmark/results/EVALUATION_REPORT.md +165 -0
- data/benchmark/results/baseline_20260511_174424.json +128 -0
- data/benchmark/results/report_20260511_175256.json +271 -0
- data/benchmark/results/report_20260511_175444.json +271 -0
- data/benchmark/results/treatment_20260511_175103.json +130 -0
- data/benchmark/runner.rb +441 -0
- data/bin/octo +7 -0
- data/docs/agent-first-ui-design.md +77 -0
- data/docs/billing-system.md +318 -0
- data/docs/channel-architecture.md +235 -0
- data/docs/engineering-article.md +343 -0
- data/docs/session-skill-invocation.md +69 -0
- data/docs/time_machine_design.md +247 -0
- data/docs/ui2-architecture.md +124 -0
- data/homebrew/README.md +96 -0
- data/homebrew/openocto.rb +24 -0
- data/lib/octo/agent/hook_manager.rb +61 -0
- data/lib/octo/agent/llm_caller.rb +800 -0
- data/lib/octo/agent/memory_updater.rb +246 -0
- data/lib/octo/agent/message_compressor.rb +225 -0
- data/lib/octo/agent/message_compressor_helper.rb +869 -0
- data/lib/octo/agent/next_message_suggester.rb +215 -0
- data/lib/octo/agent/session_serializer.rb +685 -0
- data/lib/octo/agent/skill_auto_creator.rb +114 -0
- data/lib/octo/agent/skill_evolution.rb +61 -0
- data/lib/octo/agent/skill_manager.rb +466 -0
- data/lib/octo/agent/skill_reflector.rb +89 -0
- data/lib/octo/agent/system_prompt_builder.rb +101 -0
- data/lib/octo/agent/time_machine.rb +214 -0
- data/lib/octo/agent/tool_executor.rb +454 -0
- data/lib/octo/agent/tool_registry.rb +150 -0
- data/lib/octo/agent.rb +2180 -0
- data/lib/octo/agent_config.rb +989 -0
- data/lib/octo/agent_profile.rb +112 -0
- data/lib/octo/anthropic_stream_aggregator.rb +137 -0
- data/lib/octo/background_task_registry.rb +324 -0
- data/lib/octo/banner.rb +34 -0
- data/lib/octo/bedrock_stream_aggregator.rb +137 -0
- data/lib/octo/block_font.rb +331 -0
- data/lib/octo/cli.rb +968 -0
- data/lib/octo/client.rb +623 -0
- data/lib/octo/default_agents/SOUL.md +3 -0
- data/lib/octo/default_agents/USER.md +1 -0
- data/lib/octo/default_agents/base_prompt.md +66 -0
- data/lib/octo/default_agents/coding/profile.yml +2 -0
- data/lib/octo/default_agents/coding/system_prompt.md +67 -0
- data/lib/octo/default_agents/general/profile.yml +2 -0
- data/lib/octo/default_agents/general/system_prompt.md +16 -0
- data/lib/octo/default_parsers/doc_parser.rb +69 -0
- data/lib/octo/default_parsers/docx_parser.rb +188 -0
- data/lib/octo/default_parsers/pdf_parser.rb +120 -0
- data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/octo/default_parsers/pptx_parser.rb +140 -0
- data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
- data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
- data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
- data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
- data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
- data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
- data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
- data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
- data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
- data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
- data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
- data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/octo/default_skills/onboard/SKILL.md +578 -0
- data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
- data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
- data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
- data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
- data/lib/octo/default_skills/personal-website/publish.rb +235 -0
- data/lib/octo/default_skills/product-help/SKILL.md +123 -0
- data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
- data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
- data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
- data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
- data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
- data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
- data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
- data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
- data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
- data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
- data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
- data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
- data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
- data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
- data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
- data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
- data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
- data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
- data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
- data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
- data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
- data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
- data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
- data/lib/octo/idle_compression_timer.rb +115 -0
- data/lib/octo/json_ui_controller.rb +204 -0
- data/lib/octo/message_format/anthropic.rb +409 -0
- data/lib/octo/message_format/bedrock.rb +361 -0
- data/lib/octo/message_format/open_ai.rb +222 -0
- data/lib/octo/message_history.rb +373 -0
- data/lib/octo/openai_stream_aggregator.rb +130 -0
- data/lib/octo/plain_ui_controller.rb +166 -0
- data/lib/octo/providers.rb +534 -0
- data/lib/octo/server/browser_manager.rb +397 -0
- data/lib/octo/server/channel/adapters/base.rb +82 -0
- data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
- data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
- data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
- data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
- data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
- data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
- data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
- data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
- data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
- data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
- data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
- data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
- data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
- data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
- data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
- data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
- data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
- data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
- data/lib/octo/server/channel/channel_config.rb +178 -0
- data/lib/octo/server/channel/channel_manager.rb +468 -0
- data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
- data/lib/octo/server/channel.rb +33 -0
- data/lib/octo/server/discover.rb +77 -0
- data/lib/octo/server/epipe_safe_io.rb +105 -0
- data/lib/octo/server/http_server.rb +3554 -0
- data/lib/octo/server/scheduler.rb +317 -0
- data/lib/octo/server/server_master.rb +325 -0
- data/lib/octo/server/session_registry.rb +431 -0
- data/lib/octo/server/web_ui_controller.rb +487 -0
- data/lib/octo/session_manager.rb +385 -0
- data/lib/octo/skill.rb +466 -0
- data/lib/octo/skill_loader.rb +328 -0
- data/lib/octo/tools/base.rb +118 -0
- data/lib/octo/tools/browser.rb +625 -0
- data/lib/octo/tools/edit.rb +165 -0
- data/lib/octo/tools/file_reader.rb +549 -0
- data/lib/octo/tools/glob.rb +162 -0
- data/lib/octo/tools/grep.rb +356 -0
- data/lib/octo/tools/invoke_skill.rb +96 -0
- data/lib/octo/tools/list_tasks.rb +54 -0
- data/lib/octo/tools/redo_task.rb +41 -0
- data/lib/octo/tools/request_user_feedback.rb +84 -0
- data/lib/octo/tools/security.rb +333 -0
- data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
- data/lib/octo/tools/terminal/persistent_session.rb +268 -0
- data/lib/octo/tools/terminal/safe_rm.sh +106 -0
- data/lib/octo/tools/terminal/session_manager.rb +213 -0
- data/lib/octo/tools/terminal.rb +1828 -0
- data/lib/octo/tools/todo_manager.rb +374 -0
- data/lib/octo/tools/trash_manager.rb +388 -0
- data/lib/octo/tools/undo_task.rb +35 -0
- data/lib/octo/tools/web_fetch.rb +242 -0
- data/lib/octo/tools/web_search.rb +260 -0
- data/lib/octo/tools/write.rb +77 -0
- data/lib/octo/ui2/block_font.rb +10 -0
- data/lib/octo/ui2/components/base_component.rb +163 -0
- data/lib/octo/ui2/components/command_suggestions.rb +290 -0
- data/lib/octo/ui2/components/common_component.rb +96 -0
- data/lib/octo/ui2/components/inline_input.rb +226 -0
- data/lib/octo/ui2/components/input_area.rb +1338 -0
- data/lib/octo/ui2/components/message_component.rb +99 -0
- data/lib/octo/ui2/components/modal_component.rb +419 -0
- data/lib/octo/ui2/components/todo_area.rb +149 -0
- data/lib/octo/ui2/components/tool_component.rb +107 -0
- data/lib/octo/ui2/components/welcome_banner.rb +139 -0
- data/lib/octo/ui2/layout_manager.rb +807 -0
- data/lib/octo/ui2/line_editor.rb +363 -0
- data/lib/octo/ui2/markdown_renderer.rb +100 -0
- data/lib/octo/ui2/output_buffer.rb +370 -0
- data/lib/octo/ui2/progress_handle.rb +362 -0
- data/lib/octo/ui2/progress_indicator.rb +55 -0
- data/lib/octo/ui2/screen_buffer.rb +273 -0
- data/lib/octo/ui2/terminal_detector.rb +119 -0
- data/lib/octo/ui2/theme_manager.rb +85 -0
- data/lib/octo/ui2/themes/base_theme.rb +105 -0
- data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
- data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
- data/lib/octo/ui2/thinking_verbs.rb +26 -0
- data/lib/octo/ui2/ui_controller.rb +1625 -0
- data/lib/octo/ui2/view_renderer.rb +177 -0
- data/lib/octo/ui2.rb +40 -0
- data/lib/octo/ui_interface.rb +154 -0
- data/lib/octo/utils/arguments_parser.rb +191 -0
- data/lib/octo/utils/browser_detector.rb +195 -0
- data/lib/octo/utils/encoding.rb +92 -0
- data/lib/octo/utils/environment_detector.rb +140 -0
- data/lib/octo/utils/file_ignore_helper.rb +170 -0
- data/lib/octo/utils/file_processor.rb +601 -0
- data/lib/octo/utils/gitignore_parser.rb +154 -0
- data/lib/octo/utils/limit_stack.rb +152 -0
- data/lib/octo/utils/logger.rb +124 -0
- data/lib/octo/utils/login_shell.rb +72 -0
- data/lib/octo/utils/model_pricing.rb +646 -0
- data/lib/octo/utils/parser_manager.rb +165 -0
- data/lib/octo/utils/path_helper.rb +15 -0
- data/lib/octo/utils/scripts_manager.rb +59 -0
- data/lib/octo/utils/string_matcher.rb +158 -0
- data/lib/octo/utils/trash_directory.rb +112 -0
- data/lib/octo/utils/workspace_rules.rb +46 -0
- data/lib/octo/version.rb +5 -0
- data/lib/octo/web/app.css +7141 -0
- data/lib/octo/web/app.js +543 -0
- data/lib/octo/web/apple-touch-icon.png +0 -0
- data/lib/octo/web/auth.js +150 -0
- data/lib/octo/web/channels.js +276 -0
- data/lib/octo/web/datepicker.js +205 -0
- data/lib/octo/web/favicon.png +0 -0
- data/lib/octo/web/i18n.js +1073 -0
- data/lib/octo/web/icon-512.png +0 -0
- data/lib/octo/web/icon-dark.svg +25 -0
- data/lib/octo/web/icon.svg +29 -0
- data/lib/octo/web/index.html +871 -0
- data/lib/octo/web/marked.min.js +69 -0
- data/lib/octo/web/onboard.js +491 -0
- data/lib/octo/web/profile.js +442 -0
- data/lib/octo/web/sessions.js +4421 -0
- data/lib/octo/web/settings.js +913 -0
- data/lib/octo/web/sidebar.js +32 -0
- data/lib/octo/web/skills.js +885 -0
- data/lib/octo/web/tasks.js +297 -0
- data/lib/octo/web/theme.js +105 -0
- data/lib/octo/web/trash.js +343 -0
- data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
- data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
- data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/katex.min.css +1 -0
- data/lib/octo/web/vendor/katex/katex.min.js +1 -0
- data/lib/octo/web/version.js +449 -0
- data/lib/octo/web/weixin-qr.html +209 -0
- data/lib/octo/web/ws-dispatcher.js +357 -0
- data/lib/octo/web/ws.js +128 -0
- data/lib/octo.rb +145 -0
- data/scripts/build/build.sh +329 -0
- data/scripts/build/lib/apt.sh +56 -0
- data/scripts/build/lib/brew.sh +89 -0
- data/scripts/build/lib/colors.sh +17 -0
- data/scripts/build/lib/gem.sh +95 -0
- data/scripts/build/lib/mise.sh +125 -0
- data/scripts/build/lib/network.sh +157 -0
- data/scripts/build/lib/os.sh +57 -0
- data/scripts/build/lib/shell.sh +37 -0
- data/scripts/build/src/install.sh.cc +174 -0
- data/scripts/build/src/install_browser.sh.cc +101 -0
- data/scripts/build/src/install_full.sh.cc +290 -0
- data/scripts/build/src/install_rails_deps.sh.cc +145 -0
- data/scripts/build/src/install_system_deps.sh.cc +123 -0
- data/scripts/build/src/uninstall.sh.cc +101 -0
- data/scripts/install.ps1 +532 -0
- data/scripts/install.sh +567 -0
- data/scripts/install_browser.sh +479 -0
- data/scripts/install_full.sh +838 -0
- data/scripts/install_rails_deps.sh +746 -0
- data/scripts/install_system_deps.sh +518 -0
- data/scripts/uninstall.sh +287 -0
- data/sig/octo.rbs +4 -0
- metadata +614 -0
data/lib/octo/client.rb
ADDED
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Octo
|
|
7
|
+
class Client
|
|
8
|
+
MAX_RETRIES = 10
|
|
9
|
+
RETRY_DELAY = 5 # seconds
|
|
10
|
+
|
|
11
|
+
def initialize(api_key, base_url:, model:, anthropic_format: false)
|
|
12
|
+
@api_key = api_key
|
|
13
|
+
@base_url = base_url
|
|
14
|
+
@model = model
|
|
15
|
+
# Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Octo AI proxy)
|
|
16
|
+
@use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
|
|
17
|
+
|
|
18
|
+
# Resolve provider once — reused for capability + api-type lookups.
|
|
19
|
+
provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)
|
|
20
|
+
|
|
21
|
+
# Decide anthropic_format dynamically based on provider+model, falling
|
|
22
|
+
# back to the explicit constructor flag for unknown providers / custom
|
|
23
|
+
# base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
|
|
24
|
+
# native /v1/messages endpoint (preserving cache_control byte-for-byte)
|
|
25
|
+
# without requiring any change to user YAML.
|
|
26
|
+
provider_prefers_anthropic = provider_id &&
|
|
27
|
+
Providers.anthropic_format_for_model?(provider_id, @model)
|
|
28
|
+
@use_anthropic_format = provider_prefers_anthropic || anthropic_format
|
|
29
|
+
|
|
30
|
+
# Remember the provider id so we can tune connection headers below
|
|
31
|
+
# (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
|
|
32
|
+
# some OpenRouter-compatible relays only honour Bearer — send both).
|
|
33
|
+
@provider_id = provider_id
|
|
34
|
+
|
|
35
|
+
# Determine vision support once at construction time.
|
|
36
|
+
# Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
|
|
37
|
+
# content blocks; the conversion layer strips them when this is false.
|
|
38
|
+
@vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns true when the client is using the AWS Bedrock Converse API.
|
|
42
|
+
def bedrock?
|
|
43
|
+
@use_bedrock
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns true when the client is talking directly to the Anthropic API
|
|
47
|
+
# (determined at construction time via the anthropic_format flag).
|
|
48
|
+
def anthropic_format?(model = nil)
|
|
49
|
+
@use_anthropic_format && !@use_bedrock
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# ── Connection test ───────────────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
# Test API connection by sending a minimal request.
|
|
55
|
+
# Returns { success: true } or { success: false, error: "..." }.
|
|
56
|
+
def test_connection(model:)
|
|
57
|
+
if bedrock?
|
|
58
|
+
body = MessageFormat::Bedrock.build_request_body(
|
|
59
|
+
[{ role: :user, content: "hi" }], model, [], 16
|
|
60
|
+
).to_json
|
|
61
|
+
response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
|
|
62
|
+
elsif anthropic_format?
|
|
63
|
+
minimal_body = { model: model, max_tokens: 16,
|
|
64
|
+
messages: [{ role: "user", content: "hi" }] }.to_json
|
|
65
|
+
response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
|
|
66
|
+
else
|
|
67
|
+
minimal_body = { model: model, max_tokens: 16,
|
|
68
|
+
messages: [{ role: "user", content: "hi" }] }.to_json
|
|
69
|
+
response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
|
|
70
|
+
end
|
|
71
|
+
handle_test_response(response)
|
|
72
|
+
rescue Faraday::Error => e
|
|
73
|
+
{ success: false, error: "Connection error: #{e.message}" }
|
|
74
|
+
rescue => e
|
|
75
|
+
Octo::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
|
|
76
|
+
{ success: false, error: e.message }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# ── Simple (non-agent) helpers ────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
# Send a single string message and return the reply text.
|
|
82
|
+
def send_message(content, model:, max_tokens:)
|
|
83
|
+
messages = [{ role: "user", content: content }]
|
|
84
|
+
send_messages(messages, model: model, max_tokens: max_tokens)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Send a messages array and return the reply text.
|
|
88
|
+
def send_messages(messages, model:, max_tokens:)
|
|
89
|
+
if bedrock?
|
|
90
|
+
body = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
|
|
91
|
+
response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
|
|
92
|
+
parse_simple_bedrock_response(response)
|
|
93
|
+
elsif anthropic_format?
|
|
94
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false, base_url: @base_url)
|
|
95
|
+
response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
|
|
96
|
+
parse_simple_anthropic_response(response)
|
|
97
|
+
else
|
|
98
|
+
body = { model: model, max_tokens: max_tokens, messages: messages }
|
|
99
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
100
|
+
parse_simple_openai_response(response)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# ── Agent main path ───────────────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
# Send messages with tool-calling support.
|
|
107
|
+
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
|
|
108
|
+
#
|
|
109
|
+
# Latency measurement:
|
|
110
|
+
# Because the current HTTP path is *non-streaming* (plain POST, response
|
|
111
|
+
# body read in one shot), TTFB (time to response headers) is not exposed
|
|
112
|
+
# by Faraday's default adapter without extra plumbing. What we CAN measure
|
|
113
|
+
# cheaply — and what users actually feel — is total request duration,
|
|
114
|
+
# which for a non-streaming call equals the time from "hit Enter" to
|
|
115
|
+
# "first token visible" (since we receive everything at once).
|
|
116
|
+
#
|
|
117
|
+
# So we record `duration_ms` as the authoritative number and alias it to
|
|
118
|
+
# `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
|
|
119
|
+
# signal metric — see docs). When we migrate to streaming later, this
|
|
120
|
+
# same `ttft_ms` field will start carrying the *actual* first-token
|
|
121
|
+
# latency without any schema change.
|
|
122
|
+
# @param on_chunk [Proc, nil] optional streaming progress callback.
|
|
123
|
+
# Receives keyword args { input_tokens:, output_tokens: } with cumulative
|
|
124
|
+
# token counts. When nil, behaves exactly as the historical non-streaming
|
|
125
|
+
# path. When given but streaming is not yet wired for the active provider,
|
|
126
|
+
# a single synthetic invocation is fired after the response is received,
|
|
127
|
+
# so UI plumbing can be exercised end-to-end without the proxy work.
|
|
128
|
+
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil)
|
|
129
|
+
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
130
|
+
cloned = deep_clone(messages)
|
|
131
|
+
|
|
132
|
+
streaming_used = false
|
|
133
|
+
first_chunk_at = nil
|
|
134
|
+
wrapped_on_chunk = on_chunk && lambda do |**kwargs|
|
|
135
|
+
first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
136
|
+
on_chunk.call(**kwargs)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
140
|
+
response =
|
|
141
|
+
if bedrock?
|
|
142
|
+
streaming_used = !on_chunk.nil?
|
|
143
|
+
send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
|
|
144
|
+
elsif anthropic_format?
|
|
145
|
+
streaming_used = !on_chunk.nil?
|
|
146
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
|
|
147
|
+
else
|
|
148
|
+
streaming_used = !on_chunk.nil?
|
|
149
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
|
|
150
|
+
end
|
|
151
|
+
t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
152
|
+
|
|
153
|
+
if on_chunk && !streaming_used
|
|
154
|
+
usage = response[:usage] || {}
|
|
155
|
+
safe_invoke_on_chunk(
|
|
156
|
+
on_chunk,
|
|
157
|
+
input_tokens: usage[:prompt_tokens].to_i,
|
|
158
|
+
output_tokens: usage[:completion_tokens].to_i
|
|
159
|
+
)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
duration_ms = ((t1 - t0) * 1000).round
|
|
163
|
+
ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
|
|
164
|
+
output_tokens = response[:usage]&.dig(:completion_tokens).to_i
|
|
165
|
+
tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
|
|
166
|
+
|
|
167
|
+
response[:latency] = {
|
|
168
|
+
ttft_ms: ttft_ms,
|
|
169
|
+
duration_ms: duration_ms,
|
|
170
|
+
output_tokens: output_tokens,
|
|
171
|
+
tps: tps,
|
|
172
|
+
model: model,
|
|
173
|
+
measured_at: Time.now.to_f,
|
|
174
|
+
streaming: streaming_used
|
|
175
|
+
}
|
|
176
|
+
response
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Format tool results into canonical messages ready to append to @messages.
|
|
180
|
+
# Always returns canonical format (role: "tool") regardless of API type —
|
|
181
|
+
# conversion to API-native happens inside each send_*_request.
|
|
182
|
+
def format_tool_results(response, tool_results, model:)
|
|
183
|
+
return [] if tool_results.empty?
|
|
184
|
+
|
|
185
|
+
if bedrock?
|
|
186
|
+
MessageFormat::Bedrock.format_tool_results(response, tool_results)
|
|
187
|
+
elsif anthropic_format?
|
|
188
|
+
MessageFormat::Anthropic.format_tool_results(response, tool_results)
|
|
189
|
+
else
|
|
190
|
+
MessageFormat::OpenAI.format_tool_results(response, tool_results)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# ── Prompt-caching support ────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
# Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).
|
|
197
|
+
#
|
|
198
|
+
# Handles both direct model names (e.g. "claude-haiku-4-5") and
|
|
199
|
+
# Octo AI Bedrock proxy names with "abs-" prefix (e.g. "abs-claude-haiku-4-5").
|
|
200
|
+
#
|
|
201
|
+
# Why only Claude models:
|
|
202
|
+
# - MiniMax uses automatic server-side caching (no cache_control needed from client)
|
|
203
|
+
# - Kimi uses a proprietary prompt_cache_key param, not cache_control
|
|
204
|
+
# - MiMo has no documented caching API
|
|
205
|
+
# - Only Claude (direct, OpenRouter, or OctoAI Bedrock proxy) consumes our
|
|
206
|
+
# cache_control / cachePoint markers
|
|
207
|
+
def supports_prompt_caching?(model)
|
|
208
|
+
# Strip OctoAI Bedrock proxy prefix before matching
|
|
209
|
+
model_str = model.to_s.downcase.sub(/^abs-/, "")
|
|
210
|
+
return false unless model_str.include?("claude")
|
|
211
|
+
|
|
212
|
+
# Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
|
|
213
|
+
# claude-3.5-sonnet-... claude-3-7-sonnet claude-haiku-4-5 claude-sonnet-4-6
|
|
214
|
+
model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# ── Bedrock Converse request / response ───────────────────────────────────
|
|
219
|
+
|
|
220
|
+
def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
|
|
221
|
+
body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
|
|
222
|
+
return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
|
|
223
|
+
|
|
224
|
+
response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
|
|
225
|
+
|
|
226
|
+
raise_error(response) unless response.status == 200
|
|
227
|
+
check_html_response(response)
|
|
228
|
+
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
229
|
+
MessageFormat::Bedrock.parse_response(parsed_body)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Streaming variant for Bedrock Converse.
|
|
233
|
+
# Posts to /model/{m}/converse-stream with stream:true; the proxy returns
|
|
234
|
+
# SSE frames whose `event` is the Bedrock event-type and whose `data` is
|
|
235
|
+
# the raw Bedrock event JSON. We accumulate frames into a synthetic
|
|
236
|
+
# non-streaming response and feed it back through the existing parser so
|
|
237
|
+
# downstream code is identical.
|
|
238
|
+
private def send_bedrock_stream_request(body, model, on_chunk)
|
|
239
|
+
stream_body = body.merge(stream: true)
|
|
240
|
+
aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
|
|
241
|
+
sse_buf = +""
|
|
242
|
+
|
|
243
|
+
response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
|
|
244
|
+
req.body = stream_body.to_json
|
|
245
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
246
|
+
sse_buf << chunk
|
|
247
|
+
drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
unless response.status == 200
|
|
252
|
+
response.env.body = sse_buf if response.body.to_s.empty?
|
|
253
|
+
raise_error(response)
|
|
254
|
+
end
|
|
255
|
+
MessageFormat::Bedrock.parse_response(aggregator.to_h)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def parse_simple_bedrock_response(response)
|
|
259
|
+
raise_error(response) unless response.status == 200
|
|
260
|
+
data = safe_json_parse(response.body, context: "LLM response")
|
|
261
|
+
(data.dig("output", "message", "content") || [])
|
|
262
|
+
.select { |b| b["text"] }
|
|
263
|
+
.map { |b| b["text"] }
|
|
264
|
+
.join("")
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# ── Anthropic request / response ──────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
|
|
270
|
+
# Apply cache_control to the message that marks the cache breakpoint
|
|
271
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
272
|
+
|
|
273
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, base_url: @base_url)
|
|
274
|
+
return send_anthropic_stream_request(body, on_chunk) if on_chunk
|
|
275
|
+
|
|
276
|
+
response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
|
|
277
|
+
|
|
278
|
+
raise_error(response) unless response.status == 200
|
|
279
|
+
check_html_response(response)
|
|
280
|
+
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
281
|
+
MessageFormat::Anthropic.parse_response(parsed_body)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
private def send_anthropic_stream_request(body, on_chunk)
|
|
285
|
+
stream_body = body.merge(stream: true)
|
|
286
|
+
aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
|
|
287
|
+
sse_buf = +""
|
|
288
|
+
|
|
289
|
+
response = anthropic_connection.post(anthropic_messages_path) do |req|
|
|
290
|
+
req.headers["Accept"] = "text/event-stream"
|
|
291
|
+
req.body = stream_body.to_json
|
|
292
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
293
|
+
sse_buf << chunk
|
|
294
|
+
drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
raise_error(response) unless response.status == 200
|
|
299
|
+
MessageFormat::Anthropic.parse_response(aggregator.to_h)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def parse_simple_anthropic_response(response)
|
|
303
|
+
raise_error(response) unless response.status == 200
|
|
304
|
+
data = safe_json_parse(response.body, context: "LLM response")
|
|
305
|
+
(data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# ── OpenAI request / response ─────────────────────────────────────────────
|
|
309
|
+
|
|
310
|
+
def send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
|
|
311
|
+
# Apply cache_control markers to messages when caching is enabled.
|
|
312
|
+
# OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
|
|
313
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
314
|
+
|
|
315
|
+
body = MessageFormat::OpenAI.build_request_body(
|
|
316
|
+
messages, model, tools, max_tokens, caching_enabled,
|
|
317
|
+
vision_supported: @vision_supported,
|
|
318
|
+
reasoning_effort: reasoning_effort
|
|
319
|
+
)
|
|
320
|
+
return send_openai_stream_request(body, on_chunk) if on_chunk
|
|
321
|
+
|
|
322
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
323
|
+
|
|
324
|
+
raise_error(response) unless response.status == 200
|
|
325
|
+
check_html_response(response)
|
|
326
|
+
|
|
327
|
+
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
328
|
+
MessageFormat::OpenAI.parse_response(parsed_body)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
|
|
332
|
+
# via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
|
|
333
|
+
# accumulates them, and reconstructs the non-streaming JSON response shape so
|
|
334
|
+
# MessageFormat::OpenAI.parse_response works unchanged.
|
|
335
|
+
private def send_openai_stream_request(body, on_chunk)
|
|
336
|
+
stream_body = body.merge(stream: true, stream_options: { include_usage: true })
|
|
337
|
+
aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
|
|
338
|
+
sse_buf = +""
|
|
339
|
+
|
|
340
|
+
response = openai_connection.post("chat/completions") do |req|
|
|
341
|
+
req.body = stream_body.to_json
|
|
342
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
343
|
+
sse_buf << chunk
|
|
344
|
+
drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
raise_error(response) unless response.status == 200
|
|
349
|
+
MessageFormat::OpenAI.parse_response(aggregator.to_h)
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def parse_simple_openai_response(response)
|
|
353
|
+
raise_error(response) unless response.status == 200
|
|
354
|
+
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
355
|
+
parsed_body["choices"].first["message"]["content"]
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# ── Prompt caching helpers ────────────────────────────────────────────────
|
|
359
|
+
|
|
360
|
+
# Add cache_control markers to the last 2 messages in the array.
|
|
361
|
+
#
|
|
362
|
+
# Why 2 markers:
|
|
363
|
+
# Turn N — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
|
|
364
|
+
# Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
|
|
365
|
+
# messages[-1] is the new message (marked) → cache WRITE for Turn N+2
|
|
366
|
+
#
|
|
367
|
+
# With only 1 marker (old behavior): Turn N marks messages[-1]; in Turn N+1 that same
|
|
368
|
+
# message is now [-2] and carries no marker → server sees a different prefix → cache MISS.
|
|
369
|
+
#
|
|
370
|
+
# Compression instructions (system_injected: true) are skipped — we never want to cache
|
|
371
|
+
# those ephemeral injection messages.
|
|
372
|
+
def apply_message_caching(messages)
|
|
373
|
+
return messages if messages.empty?
|
|
374
|
+
|
|
375
|
+
# Collect up to 2 candidate indices from the tail, skipping compression instructions.
|
|
376
|
+
candidate_indices = []
|
|
377
|
+
(messages.length - 1).downto(0) do |i|
|
|
378
|
+
break if candidate_indices.length >= 2
|
|
379
|
+
|
|
380
|
+
candidate_indices << i unless is_compression_instruction?(messages[i])
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
messages.map.with_index do |msg, idx|
|
|
384
|
+
candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Wrap or extend the message's content with a cache_control marker.
|
|
389
|
+
def add_cache_control_to_message(msg)
|
|
390
|
+
content = msg[:content]
|
|
391
|
+
|
|
392
|
+
content_array = case content
|
|
393
|
+
when String
|
|
394
|
+
[{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
|
|
395
|
+
when Array
|
|
396
|
+
content.map.with_index do |block, idx|
|
|
397
|
+
idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
|
|
398
|
+
end
|
|
399
|
+
else
|
|
400
|
+
return msg
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
msg.merge(content: content_array)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def is_compression_instruction?(message)
|
|
407
|
+
message.is_a?(Hash) && message[:system_injected] == true
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# ── HTTP connections ──────────────────────────────────────────────────────
|
|
411
|
+
|
|
412
|
+
# Bedrock Converse API endpoint path for a given model ID.
|
|
413
|
+
def bedrock_endpoint(model)
|
|
414
|
+
"/model/#{model}/converse"
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Bedrock Converse streaming endpoint path.
|
|
418
|
+
private def bedrock_stream_endpoint(model)
|
|
419
|
+
"/model/#{model}/converse-stream"
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Pull complete SSE frames out of a buffer and yield them as (event, data).
|
|
423
|
+
# An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
|
|
424
|
+
# stays in the buffer for the next chunk. Frames without an explicit
|
|
425
|
+
# `event:` line use the default "message" type per the SSE spec.
|
|
426
|
+
private def drain_sse_frames(buf)
|
|
427
|
+
while (sep = buf.index("\n\n"))
|
|
428
|
+
frame = buf.slice!(0, sep + 2)
|
|
429
|
+
event = "message"
|
|
430
|
+
data_lines = []
|
|
431
|
+
frame.each_line do |line|
|
|
432
|
+
line = line.chomp
|
|
433
|
+
if line.start_with?("event:")
|
|
434
|
+
event = line.sub(/^event:\s*/, "")
|
|
435
|
+
elsif line.start_with?("data:")
|
|
436
|
+
data_lines << line.sub(/^data:\s*/, "")
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
next if data_lines.empty?
|
|
440
|
+
yield event, data_lines.join("\n")
|
|
441
|
+
end
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
def bedrock_connection
|
|
445
|
+
@bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
446
|
+
conn.headers["Content-Type"] = "application/json"
|
|
447
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
448
|
+
conn.options.timeout = 300
|
|
449
|
+
conn.options.open_timeout = 10
|
|
450
|
+
conn.ssl.verify = false
|
|
451
|
+
conn.adapter Faraday.default_adapter
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def openai_connection
|
|
456
|
+
@openai_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
457
|
+
conn.headers["Content-Type"] = "application/json"
|
|
458
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
459
|
+
conn.options.timeout = 300
|
|
460
|
+
conn.options.open_timeout = 10
|
|
461
|
+
conn.ssl.verify = false
|
|
462
|
+
conn.adapter Faraday.default_adapter
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def anthropic_connection
|
|
467
|
+
@anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
468
|
+
conn.headers["Content-Type"] = "application/json"
|
|
469
|
+
conn.headers["x-api-key"] = @api_key
|
|
470
|
+
conn.headers["anthropic-version"] = "2023-06-01"
|
|
471
|
+
conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
|
|
472
|
+
# OpenRouter's /v1/messages endpoint authenticates with a Bearer
|
|
473
|
+
# token (the OpenRouter API key), not Anthropic's x-api-key. We send
|
|
474
|
+
# both so the same connection code works for direct Anthropic and
|
|
475
|
+
# for OpenRouter-proxied Claude — each endpoint ignores the header
|
|
476
|
+
# it doesn't recognise.
|
|
477
|
+
if @provider_id == "openrouter"
|
|
478
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
479
|
+
end
|
|
480
|
+
# Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
|
|
481
|
+
# prefix whitelist limited to first-party coding agents (Kimi CLI,
|
|
482
|
+
# Claude Code, Roo Code, Kilo Code, ...). Requests with the default
|
|
483
|
+
# Faraday UA are rejected with HTTP 403 access_terminated_error,
|
|
484
|
+
# despite a valid API key. We send a Claude Code-shaped UA here
|
|
485
|
+
# because octo talks to this endpoint over the same Anthropic
|
|
486
|
+
# /v1/messages protocol that Claude Code uses, so the UA matches the
|
|
487
|
+
# wire-level behaviour. Hardcoding rather than exposing as a config
|
|
488
|
+
# field is intentional: the only UAs known to pass the gate are the
|
|
489
|
+
# whitelisted-client formats, and the project's preset registry is
|
|
490
|
+
# the single source of truth for provider-specific quirks (mirroring
|
|
491
|
+
# how the openrouter Bearer-fallback above is hardcoded).
|
|
492
|
+
if @provider_id == "kimi-coding"
|
|
493
|
+
conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
|
|
494
|
+
end
|
|
495
|
+
conn.options.timeout = 300
|
|
496
|
+
conn.options.open_timeout = 10
|
|
497
|
+
conn.ssl.verify = false
|
|
498
|
+
conn.adapter Faraday.default_adapter
|
|
499
|
+
end
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
# Correct relative path for the Anthropic /v1/messages endpoint, accounting
|
|
503
|
+
# for whether the configured base_url already includes a "/v1" segment.
|
|
504
|
+
#
|
|
505
|
+
# Examples:
|
|
506
|
+
# base_url = "https://api.anthropic.com" → "v1/messages"
|
|
507
|
+
# base_url = "https://openrouter.ai/api/v1" → "messages"
|
|
508
|
+
# base_url = "https://openrouter.ai/api/v1/" → "messages"
|
|
509
|
+
#
|
|
510
|
+
# Without this, OpenRouter would receive POST /api/v1/v1/messages → 404
|
|
511
|
+
# (HTML error page), which bubbles up as the infamous
|
|
512
|
+
# "Invalid API endpoint or server error (received HTML instead of JSON)".
|
|
513
|
+
private def anthropic_messages_path
|
|
514
|
+
base = @base_url.to_s.chomp("/")
|
|
515
|
+
base.end_with?("/v1") ? "messages" : "v1/messages"
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
# ── Error handling ────────────────────────────────────────────────────────
|
|
519
|
+
|
|
520
|
+
def handle_test_response(response)
|
|
521
|
+
return { success: true } if response.status == 200
|
|
522
|
+
|
|
523
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
524
|
+
{ success: false, error: extract_error_message(error_body, response.body) }
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def raise_error(response)
|
|
528
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
529
|
+
error_message = extract_error_message(error_body, response.body)
|
|
530
|
+
|
|
531
|
+
case response.status
|
|
532
|
+
when 400
|
|
533
|
+
# Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
|
|
534
|
+
# However, some proxy/relay providers do — so we inspect the message first.
|
|
535
|
+
# Also, Bedrock returns ThrottlingException as 400 instead of 429.
|
|
536
|
+
if error_message.match?(/ThrottlingException|unavailable|quota/i)
|
|
537
|
+
hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
|
|
538
|
+
raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
# True bad request — our message was malformed. Roll back history so the
|
|
542
|
+
# broken message is not replayed on the next user turn.
|
|
543
|
+
raise BadRequestError, "[LLM] Client request error: #{error_message}"
|
|
544
|
+
when 401 then raise AgentError, "[LLM] Invalid API key"
|
|
545
|
+
when 402 then raise AgentError, "[LLM] Billing or payment issue (possibly out of credits): #{error_message}"
|
|
546
|
+
when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
|
|
547
|
+
when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
|
|
548
|
+
when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
|
|
549
|
+
when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
|
|
550
|
+
else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
|
|
551
|
+
end
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)
|
|
555
|
+
def check_html_response(response)
|
|
556
|
+
body = response.body.to_s.lstrip
|
|
557
|
+
if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
|
|
558
|
+
raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
def extract_error_message(error_body, raw_body)
|
|
563
|
+
if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
|
|
564
|
+
return "Invalid API endpoint or server error (received HTML instead of JSON)"
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
return raw_body unless error_body.is_a?(Hash)
|
|
568
|
+
|
|
569
|
+
error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
|
|
570
|
+
error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
|
|
571
|
+
error_body["message"]&.then { |m| return m }
|
|
572
|
+
error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
# Parse JSON with user-friendly error messages.
|
|
576
|
+
# @param json_string [String] the JSON string to parse
|
|
577
|
+
# @param context [String] a description of what's being parsed (e.g., "LLM response")
|
|
578
|
+
# @return [Hash, Array] the parsed JSON
|
|
579
|
+
# @raise [RetryableError] if parsing fails (indicates a malformed LLM response)
|
|
580
|
+
def safe_json_parse(json_string, context: "response")
|
|
581
|
+
JSON.parse(json_string)
|
|
582
|
+
rescue JSON::ParserError => e
|
|
583
|
+
# Transform technical JSON parsing errors into user-friendly messages.
|
|
584
|
+
# These are usually caused by:
|
|
585
|
+
# 1. Incomplete/truncated LLM response (network issue, timeout)
|
|
586
|
+
# 2. LLM service returned malformed data
|
|
587
|
+
# 3. Proxy/gateway corruption
|
|
588
|
+
error_detail = if json_string.to_s.strip.empty?
|
|
589
|
+
"received empty response"
|
|
590
|
+
elsif json_string.to_s.bytesize > 500
|
|
591
|
+
"response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
|
|
592
|
+
else
|
|
593
|
+
"response format is invalid"
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
|
|
597
|
+
"This usually means the AI service returned incomplete or corrupted data. " \
|
|
598
|
+
"The request will be retried automatically."
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# ── Streaming helpers ─────────────────────────────────────────────────────
|
|
602
|
+
|
|
603
|
+
# Invoke the user's on_chunk callback in a way that never lets a callback
|
|
604
|
+
# error tear down the LLM request. Streaming chunks are best-effort UI
|
|
605
|
+
# updates; a buggy progress renderer must not abort an in-flight call.
|
|
606
|
+
private def safe_invoke_on_chunk(on_chunk, **kwargs)
|
|
607
|
+
return unless on_chunk
|
|
608
|
+
on_chunk.call(**kwargs)
|
|
609
|
+
rescue => e
|
|
610
|
+
Octo::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# ── Utilities ─────────────────────────────────────────────────────────────
|
|
614
|
+
|
|
615
|
+
def deep_clone(obj)
|
|
616
|
+
case obj
|
|
617
|
+
when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
|
|
618
|
+
when Array then obj.map { |item| deep_clone(item) }
|
|
619
|
+
else obj
|
|
620
|
+
end
|
|
621
|
+
end
|
|
622
|
+
end
|
|
623
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
(No user profile configured yet. To personalize, create ~/.octo/agents/USER.md)
|