octo-agent 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clacky/skills/commit/SKILL.md +423 -0
- data/.clacky/skills/gem-release/SKILL.md +199 -0
- data/.clacky/skills/gem-release/scripts/release.sh +304 -0
- data/.clacky/skills/oss-upload/SKILL.md +47 -0
- data/.octorules +106 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +92 -0
- data/Dockerfile +28 -0
- data/LICENSE.txt +22 -0
- data/POSITIONING.md +46 -0
- data/README.md +134 -0
- data/README_CN.md +134 -0
- data/Rakefile +34 -0
- data/benchmark/fixtures/sample_project/Gemfile +3 -0
- data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
- data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
- data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
- data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
- data/benchmark/results/EVALUATION_REPORT.md +165 -0
- data/benchmark/results/baseline_20260511_174424.json +128 -0
- data/benchmark/results/report_20260511_175256.json +271 -0
- data/benchmark/results/report_20260511_175444.json +271 -0
- data/benchmark/results/treatment_20260511_175103.json +130 -0
- data/benchmark/runner.rb +441 -0
- data/bin/octo +7 -0
- data/docs/agent-first-ui-design.md +77 -0
- data/docs/billing-system.md +318 -0
- data/docs/channel-architecture.md +235 -0
- data/docs/engineering-article.md +343 -0
- data/docs/session-skill-invocation.md +69 -0
- data/docs/time_machine_design.md +247 -0
- data/docs/ui2-architecture.md +124 -0
- data/homebrew/README.md +96 -0
- data/homebrew/openocto.rb +24 -0
- data/lib/octo/agent/hook_manager.rb +61 -0
- data/lib/octo/agent/llm_caller.rb +800 -0
- data/lib/octo/agent/memory_updater.rb +246 -0
- data/lib/octo/agent/message_compressor.rb +225 -0
- data/lib/octo/agent/message_compressor_helper.rb +869 -0
- data/lib/octo/agent/next_message_suggester.rb +215 -0
- data/lib/octo/agent/session_serializer.rb +685 -0
- data/lib/octo/agent/skill_auto_creator.rb +114 -0
- data/lib/octo/agent/skill_evolution.rb +61 -0
- data/lib/octo/agent/skill_manager.rb +466 -0
- data/lib/octo/agent/skill_reflector.rb +89 -0
- data/lib/octo/agent/system_prompt_builder.rb +101 -0
- data/lib/octo/agent/time_machine.rb +214 -0
- data/lib/octo/agent/tool_executor.rb +454 -0
- data/lib/octo/agent/tool_registry.rb +150 -0
- data/lib/octo/agent.rb +2180 -0
- data/lib/octo/agent_config.rb +989 -0
- data/lib/octo/agent_profile.rb +112 -0
- data/lib/octo/anthropic_stream_aggregator.rb +137 -0
- data/lib/octo/background_task_registry.rb +324 -0
- data/lib/octo/banner.rb +34 -0
- data/lib/octo/bedrock_stream_aggregator.rb +137 -0
- data/lib/octo/block_font.rb +331 -0
- data/lib/octo/cli.rb +968 -0
- data/lib/octo/client.rb +623 -0
- data/lib/octo/default_agents/SOUL.md +3 -0
- data/lib/octo/default_agents/USER.md +1 -0
- data/lib/octo/default_agents/base_prompt.md +66 -0
- data/lib/octo/default_agents/coding/profile.yml +2 -0
- data/lib/octo/default_agents/coding/system_prompt.md +67 -0
- data/lib/octo/default_agents/general/profile.yml +2 -0
- data/lib/octo/default_agents/general/system_prompt.md +16 -0
- data/lib/octo/default_parsers/doc_parser.rb +69 -0
- data/lib/octo/default_parsers/docx_parser.rb +188 -0
- data/lib/octo/default_parsers/pdf_parser.rb +120 -0
- data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/octo/default_parsers/pptx_parser.rb +140 -0
- data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
- data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
- data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
- data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
- data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
- data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
- data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
- data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
- data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
- data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
- data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
- data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/octo/default_skills/onboard/SKILL.md +578 -0
- data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
- data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
- data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
- data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
- data/lib/octo/default_skills/personal-website/publish.rb +235 -0
- data/lib/octo/default_skills/product-help/SKILL.md +123 -0
- data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
- data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
- data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
- data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
- data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
- data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
- data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
- data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
- data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
- data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
- data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
- data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
- data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
- data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
- data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
- data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
- data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
- data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
- data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
- data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
- data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
- data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
- data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
- data/lib/octo/idle_compression_timer.rb +115 -0
- data/lib/octo/json_ui_controller.rb +204 -0
- data/lib/octo/message_format/anthropic.rb +409 -0
- data/lib/octo/message_format/bedrock.rb +361 -0
- data/lib/octo/message_format/open_ai.rb +222 -0
- data/lib/octo/message_history.rb +373 -0
- data/lib/octo/openai_stream_aggregator.rb +130 -0
- data/lib/octo/plain_ui_controller.rb +166 -0
- data/lib/octo/providers.rb +534 -0
- data/lib/octo/server/browser_manager.rb +397 -0
- data/lib/octo/server/channel/adapters/base.rb +82 -0
- data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
- data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
- data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
- data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
- data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
- data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
- data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
- data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
- data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
- data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
- data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
- data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
- data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
- data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
- data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
- data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
- data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
- data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
- data/lib/octo/server/channel/channel_config.rb +178 -0
- data/lib/octo/server/channel/channel_manager.rb +468 -0
- data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
- data/lib/octo/server/channel.rb +33 -0
- data/lib/octo/server/discover.rb +77 -0
- data/lib/octo/server/epipe_safe_io.rb +105 -0
- data/lib/octo/server/http_server.rb +3554 -0
- data/lib/octo/server/scheduler.rb +317 -0
- data/lib/octo/server/server_master.rb +325 -0
- data/lib/octo/server/session_registry.rb +431 -0
- data/lib/octo/server/web_ui_controller.rb +487 -0
- data/lib/octo/session_manager.rb +385 -0
- data/lib/octo/skill.rb +466 -0
- data/lib/octo/skill_loader.rb +328 -0
- data/lib/octo/tools/base.rb +118 -0
- data/lib/octo/tools/browser.rb +625 -0
- data/lib/octo/tools/edit.rb +165 -0
- data/lib/octo/tools/file_reader.rb +549 -0
- data/lib/octo/tools/glob.rb +162 -0
- data/lib/octo/tools/grep.rb +356 -0
- data/lib/octo/tools/invoke_skill.rb +96 -0
- data/lib/octo/tools/list_tasks.rb +54 -0
- data/lib/octo/tools/redo_task.rb +41 -0
- data/lib/octo/tools/request_user_feedback.rb +84 -0
- data/lib/octo/tools/security.rb +333 -0
- data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
- data/lib/octo/tools/terminal/persistent_session.rb +268 -0
- data/lib/octo/tools/terminal/safe_rm.sh +106 -0
- data/lib/octo/tools/terminal/session_manager.rb +213 -0
- data/lib/octo/tools/terminal.rb +1828 -0
- data/lib/octo/tools/todo_manager.rb +374 -0
- data/lib/octo/tools/trash_manager.rb +388 -0
- data/lib/octo/tools/undo_task.rb +35 -0
- data/lib/octo/tools/web_fetch.rb +242 -0
- data/lib/octo/tools/web_search.rb +260 -0
- data/lib/octo/tools/write.rb +77 -0
- data/lib/octo/ui2/block_font.rb +10 -0
- data/lib/octo/ui2/components/base_component.rb +163 -0
- data/lib/octo/ui2/components/command_suggestions.rb +290 -0
- data/lib/octo/ui2/components/common_component.rb +96 -0
- data/lib/octo/ui2/components/inline_input.rb +226 -0
- data/lib/octo/ui2/components/input_area.rb +1338 -0
- data/lib/octo/ui2/components/message_component.rb +99 -0
- data/lib/octo/ui2/components/modal_component.rb +419 -0
- data/lib/octo/ui2/components/todo_area.rb +149 -0
- data/lib/octo/ui2/components/tool_component.rb +107 -0
- data/lib/octo/ui2/components/welcome_banner.rb +139 -0
- data/lib/octo/ui2/layout_manager.rb +807 -0
- data/lib/octo/ui2/line_editor.rb +363 -0
- data/lib/octo/ui2/markdown_renderer.rb +100 -0
- data/lib/octo/ui2/output_buffer.rb +370 -0
- data/lib/octo/ui2/progress_handle.rb +362 -0
- data/lib/octo/ui2/progress_indicator.rb +55 -0
- data/lib/octo/ui2/screen_buffer.rb +273 -0
- data/lib/octo/ui2/terminal_detector.rb +119 -0
- data/lib/octo/ui2/theme_manager.rb +85 -0
- data/lib/octo/ui2/themes/base_theme.rb +105 -0
- data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
- data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
- data/lib/octo/ui2/thinking_verbs.rb +26 -0
- data/lib/octo/ui2/ui_controller.rb +1625 -0
- data/lib/octo/ui2/view_renderer.rb +177 -0
- data/lib/octo/ui2.rb +40 -0
- data/lib/octo/ui_interface.rb +154 -0
- data/lib/octo/utils/arguments_parser.rb +191 -0
- data/lib/octo/utils/browser_detector.rb +195 -0
- data/lib/octo/utils/encoding.rb +92 -0
- data/lib/octo/utils/environment_detector.rb +140 -0
- data/lib/octo/utils/file_ignore_helper.rb +170 -0
- data/lib/octo/utils/file_processor.rb +601 -0
- data/lib/octo/utils/gitignore_parser.rb +154 -0
- data/lib/octo/utils/limit_stack.rb +152 -0
- data/lib/octo/utils/logger.rb +124 -0
- data/lib/octo/utils/login_shell.rb +72 -0
- data/lib/octo/utils/model_pricing.rb +646 -0
- data/lib/octo/utils/parser_manager.rb +165 -0
- data/lib/octo/utils/path_helper.rb +15 -0
- data/lib/octo/utils/scripts_manager.rb +59 -0
- data/lib/octo/utils/string_matcher.rb +158 -0
- data/lib/octo/utils/trash_directory.rb +112 -0
- data/lib/octo/utils/workspace_rules.rb +46 -0
- data/lib/octo/version.rb +5 -0
- data/lib/octo/web/app.css +7141 -0
- data/lib/octo/web/app.js +543 -0
- data/lib/octo/web/apple-touch-icon.png +0 -0
- data/lib/octo/web/auth.js +150 -0
- data/lib/octo/web/channels.js +276 -0
- data/lib/octo/web/datepicker.js +205 -0
- data/lib/octo/web/favicon.png +0 -0
- data/lib/octo/web/i18n.js +1073 -0
- data/lib/octo/web/icon-512.png +0 -0
- data/lib/octo/web/icon-dark.svg +25 -0
- data/lib/octo/web/icon.svg +29 -0
- data/lib/octo/web/index.html +871 -0
- data/lib/octo/web/marked.min.js +69 -0
- data/lib/octo/web/onboard.js +491 -0
- data/lib/octo/web/profile.js +442 -0
- data/lib/octo/web/sessions.js +4421 -0
- data/lib/octo/web/settings.js +913 -0
- data/lib/octo/web/sidebar.js +32 -0
- data/lib/octo/web/skills.js +885 -0
- data/lib/octo/web/tasks.js +297 -0
- data/lib/octo/web/theme.js +105 -0
- data/lib/octo/web/trash.js +343 -0
- data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
- data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
- data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/katex.min.css +1 -0
- data/lib/octo/web/vendor/katex/katex.min.js +1 -0
- data/lib/octo/web/version.js +449 -0
- data/lib/octo/web/weixin-qr.html +209 -0
- data/lib/octo/web/ws-dispatcher.js +357 -0
- data/lib/octo/web/ws.js +128 -0
- data/lib/octo.rb +145 -0
- data/scripts/build/build.sh +329 -0
- data/scripts/build/lib/apt.sh +56 -0
- data/scripts/build/lib/brew.sh +89 -0
- data/scripts/build/lib/colors.sh +17 -0
- data/scripts/build/lib/gem.sh +95 -0
- data/scripts/build/lib/mise.sh +125 -0
- data/scripts/build/lib/network.sh +157 -0
- data/scripts/build/lib/os.sh +57 -0
- data/scripts/build/lib/shell.sh +37 -0
- data/scripts/build/src/install.sh.cc +174 -0
- data/scripts/build/src/install_browser.sh.cc +101 -0
- data/scripts/build/src/install_full.sh.cc +290 -0
- data/scripts/build/src/install_rails_deps.sh.cc +145 -0
- data/scripts/build/src/install_system_deps.sh.cc +123 -0
- data/scripts/build/src/uninstall.sh.cc +101 -0
- data/scripts/install.ps1 +532 -0
- data/scripts/install.sh +567 -0
- data/scripts/install_browser.sh +479 -0
- data/scripts/install_full.sh +838 -0
- data/scripts/install_rails_deps.sh +746 -0
- data/scripts/install_system_deps.sh +518 -0
- data/scripts/uninstall.sh +287 -0
- data/sig/octo.rbs +4 -0
- metadata +614 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "open3"
|
|
5
|
+
require "timeout"
|
|
6
|
+
require "tmpdir"
|
|
7
|
+
require "shellwords"
|
|
8
|
+
require "yaml"
|
|
9
|
+
require "base64"
|
|
10
|
+
require "fileutils"
|
|
11
|
+
require "securerandom"
|
|
12
|
+
require_relative "base"
|
|
13
|
+
|
|
14
|
+
module Octo
|
|
15
|
+
module Tools
|
|
16
|
+
# Browser tool — controls the user's real Chromium-based browser (Chrome 146+)
|
|
17
|
+
# via the Chrome DevTools MCP server (chrome-devtools-mcp).
|
|
18
|
+
#
|
|
19
|
+
# Architecture: uses the existing-session driver (Chrome MCP).
|
|
20
|
+
# chrome-devtools-mcp --autoConnect --experimentalStructuredContent
|
|
21
|
+
# --experimental-page-id-routing
|
|
22
|
+
#
|
|
23
|
+
# Communication: MCP stdio JSON-RPC 2.0 over a *persistent* (daemon) process.
|
|
24
|
+
# The MCP server process is started once, kept alive across all tool calls,
|
|
25
|
+
# and only restarted when the process dies unexpectedly.
|
|
26
|
+
#
|
|
27
|
+
# pageId is intentionally NOT passed to most MCP calls — the MCP server
|
|
28
|
+
# maintains its own selected page state. Only focus/close actions need pageId.
|
|
29
|
+
# When the selected page has been closed, mcp_call automatically retries once.
|
|
30
|
+
class Browser < Base
|
|
31
|
+
self.tool_name = "browser"
|
|
32
|
+
self.tool_description = <<~DESC.strip
|
|
33
|
+
Control user's real Chrome (146+) for web automation. Prefer web_fetch/web_search for read-only pages.
|
|
34
|
+
Actions: snapshot | act | open | navigate | tabs | focus | close | screenshot | status.
|
|
35
|
+
Always snapshot(interactive:true) before act. screenshot is EXPENSIVE — use ref= for a single element.
|
|
36
|
+
act kinds: click, dblclick, type, fill, press, hover, scroll, drag, select, wait, evaluate, click_at (coord fallback).
|
|
37
|
+
DESC
|
|
38
|
+
self.tool_category = "web"
|
|
39
|
+
self.tool_parameters = {
|
|
40
|
+
type: "object",
|
|
41
|
+
properties: {
|
|
42
|
+
action: {
|
|
43
|
+
type: "string",
|
|
44
|
+
enum: %w[snapshot act open navigate tabs focus close screenshot status]
|
|
45
|
+
},
|
|
46
|
+
kind: {
|
|
47
|
+
type: "string",
|
|
48
|
+
enum: %w[click dblclick type fill press hover drag select scroll wait evaluate click_at],
|
|
49
|
+
description: "act: interaction kind"
|
|
50
|
+
},
|
|
51
|
+
ref: { type: "string", description: "element ref from snapshot (e.g. 'e1'); screenshot: single element" },
|
|
52
|
+
text: { type: "string", description: "act type/fill text" },
|
|
53
|
+
key: { type: "string", description: "act press key (e.g. 'Enter')" },
|
|
54
|
+
direction: { type: "string", enum: %w[up down left right], description: "act scroll" },
|
|
55
|
+
amount: { type: "integer", description: "act scroll pixels" },
|
|
56
|
+
ms: { type: "integer", description: "act wait ms" },
|
|
57
|
+
selector: { type: "string", description: "act wait CSS selector" },
|
|
58
|
+
js: { type: "string", description: "act evaluate JS" },
|
|
59
|
+
target_ref: { type: "string", description: "act drag destination ref" },
|
|
60
|
+
values: { type: "array", items: { type: "string" }, description: "act select options" },
|
|
61
|
+
x: { type: "number", description: "click_at x px" },
|
|
62
|
+
y: { type: "number", description: "click_at y px" },
|
|
63
|
+
url: { type: "string", description: "open/navigate URL" },
|
|
64
|
+
target_id: { type: "string", description: "focus/close tab id" },
|
|
65
|
+
interactive: { type: "boolean", description: "snapshot: interactive only" },
|
|
66
|
+
compact: { type: "boolean", description: "snapshot: compact" },
|
|
67
|
+
depth: { type: "integer", description: "snapshot: max depth" },
|
|
68
|
+
full_page: { type: "boolean", description: "screenshot: full page" }
|
|
69
|
+
},
|
|
70
|
+
required: ["action"]
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
MIN_CHROME_MAJOR = 146
|
|
74
|
+
MCP_HANDSHAKE_TIMEOUT = 10
|
|
75
|
+
MCP_CALL_TIMEOUT = 60
|
|
76
|
+
MIN_NODE_MAJOR = 20
|
|
77
|
+
MAX_SNAPSHOT_CHARS = 4000
|
|
78
|
+
MAX_LLM_OUTPUT_CHARS = 6000
|
|
79
|
+
|
|
80
|
+
def execute(action:, profile: nil, working_dir: nil, **opts)
|
|
81
|
+
bypass = action.to_s == "status" ||
|
|
82
|
+
(action.to_s == "act" && (opts[:kind] || opts["kind"]).to_s == "evaluate")
|
|
83
|
+
unless bypass
|
|
84
|
+
return browser_not_setup_error unless File.exist?(BROWSER_CONFIG_PATH)
|
|
85
|
+
return browser_disabled_error unless browser_enabled?
|
|
86
|
+
end
|
|
87
|
+
execute_user_browser(action, opts)
|
|
88
|
+
rescue StandardError => e
|
|
89
|
+
{ error: classify_browser_error(e) }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def format_call(args)
|
|
93
|
+
action = args[:action] || args["action"] || "browser"
|
|
94
|
+
"browser(#{action})"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def format_result(result)
|
|
98
|
+
return "[Error] #{result[:error].to_s[0..80]}" if result[:error]
|
|
99
|
+
return "[OK] #{result[:output].to_s.lines.size} lines" if result[:output]
|
|
100
|
+
"[OK] Done"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def format_result_for_ui(result)
|
|
104
|
+
return nil if result[:error]
|
|
105
|
+
action = result[:action].to_s
|
|
106
|
+
{
|
|
107
|
+
type: "browser",
|
|
108
|
+
action: action,
|
|
109
|
+
url: result[:url],
|
|
110
|
+
title: result[:title],
|
|
111
|
+
content_preview: result[:output].to_s[0, 500]
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def format_result_for_llm(result)
|
|
116
|
+
return result if result[:error]
|
|
117
|
+
|
|
118
|
+
action = result[:action].to_s
|
|
119
|
+
|
|
120
|
+
if action == "screenshot" && result[:image_data]
|
|
121
|
+
mime_type = result[:mime_type] || "image/png"
|
|
122
|
+
image_data = result[:image_data]
|
|
123
|
+
original_path = result[:original_path]
|
|
124
|
+
compressed_path = result[:compressed_path]
|
|
125
|
+
|
|
126
|
+
text = "Screenshot captured."
|
|
127
|
+
if original_path || compressed_path
|
|
128
|
+
text += "\n- Original (full resolution): #{original_path || 'unavailable'}" \
|
|
129
|
+
"\n- Compressed (800px, sent to AI): #{compressed_path || 'unavailable'}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
content_string: text,
|
|
134
|
+
image_inject: {
|
|
135
|
+
mime_type: mime_type,
|
|
136
|
+
base64_data: image_data,
|
|
137
|
+
path: compressed_path || original_path
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
output = result[:output].to_s
|
|
143
|
+
output = compress_snapshot(output) if action == "snapshot"
|
|
144
|
+
max_chars = action == "snapshot" ? MAX_SNAPSHOT_CHARS : MAX_LLM_OUTPUT_CHARS
|
|
145
|
+
|
|
146
|
+
{
|
|
147
|
+
action: action,
|
|
148
|
+
success: result[:success],
|
|
149
|
+
stdout: truncate_output(output, max_chars),
|
|
150
|
+
profile: result[:profile]
|
|
151
|
+
}.compact
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
BROWSER_CONFIG_PATH = File.expand_path("~/.octo/browser.yml").freeze
|
|
156
|
+
|
|
157
|
+
BROWSER_DIAGNOSIS_HINT = <<~HINT.strip.freeze
|
|
158
|
+
Inform the user and ask if they'd like to run a diagnosis.
|
|
159
|
+
If yes, invoke the browser-setup skill with subcommand "doctor".
|
|
160
|
+
HINT
|
|
161
|
+
|
|
162
|
+
# Cause 1+2: Chrome not running, or Remote Debugging disabled (MCP can't distinguish them)
|
|
163
|
+
BROWSER_NOT_CONNECTED_HINT = <<~HINT.strip.freeze
|
|
164
|
+
Chrome is not reachable. Possible causes:
|
|
165
|
+
1. Chrome is not running — ask the user to open Chrome.
|
|
166
|
+
2. Remote Debugging is disabled — enable via chrome://inspect/#remote-debugging.
|
|
167
|
+
HINT
|
|
168
|
+
|
|
169
|
+
# Cause 3: MCP daemon crashed or failed to start
|
|
170
|
+
BROWSER_DAEMON_HINT = <<~HINT.strip.freeze
|
|
171
|
+
The browser MCP daemon crashed or failed to start. It may recover automatically on the next action.
|
|
172
|
+
If it keeps failing, ask the user to restart Octo.
|
|
173
|
+
HINT
|
|
174
|
+
|
|
175
|
+
# Cause 4: Chrome long-session unresponsiveness
|
|
176
|
+
BROWSER_RESTART_HINT = <<~HINT.strip.freeze
|
|
177
|
+
Chrome has become unresponsive. This often happens after Chrome has been running for a long time.
|
|
178
|
+
Ask the user to restart Chrome, then retry the action.
|
|
179
|
+
HINT
|
|
180
|
+
|
|
181
|
+
# Classify a browser error and return an appropriate message for the AI.
|
|
182
|
+
# Only Chrome connectivity errors (causes 1-4) get a specific hint + diagnosis offer.
|
|
183
|
+
# MCP business errors (wrong params, stale element, page closed, etc.) pass through as-is.
|
|
184
|
+
private def classify_browser_error(e)
|
|
185
|
+
msg = e.message.to_s
|
|
186
|
+
|
|
187
|
+
# Cause 4: Chrome unresponsive after long session (timed out waiting for MCP response)
|
|
188
|
+
if msg.include?("timed out after")
|
|
189
|
+
return "Browser error: #{msg}\n\n#{BROWSER_RESTART_HINT}\n\n#{BROWSER_DIAGNOSIS_HINT}"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Cause 1+2: Chrome not running or Remote Debugging disabled
|
|
193
|
+
if msg.include?("Could not connect to Chrome")
|
|
194
|
+
return "Browser error: #{msg}\n\n#{BROWSER_NOT_CONNECTED_HINT}\n\n#{BROWSER_DIAGNOSIS_HINT}"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Cause 3: MCP daemon crashed or handshake failed
|
|
198
|
+
if msg.include?("handshake timed out") || msg.include?("Chrome MCP tool") || msg.include?("Chrome MCP initialize")
|
|
199
|
+
return "Browser error: #{msg}\n\n#{BROWSER_DAEMON_HINT}\n\n#{BROWSER_DIAGNOSIS_HINT}"
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# All other errors: MCP business errors, element/page errors — AI can self-correct.
|
|
203
|
+
"Browser error: #{msg}"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
private def browser_enabled?
|
|
207
|
+
config = YAMLCompat.safe_load(File.read(BROWSER_CONFIG_PATH), permitted_classes: [Date, Time, Symbol])
|
|
208
|
+
config.is_a?(Hash) && config["enabled"] == true
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
private def browser_not_setup_error
|
|
212
|
+
{
|
|
213
|
+
error: <<~MSG
|
|
214
|
+
The browser tool is not configured. This tool call has been rejected to protect user experience.
|
|
215
|
+
|
|
216
|
+
Ask the user if they'd like to set up the browser, then invoke the browser-setup skill to guide them through the setup. Retry this tool call after setup is complete.
|
|
217
|
+
MSG
|
|
218
|
+
}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
private def browser_disabled_error
|
|
222
|
+
{
|
|
223
|
+
error: <<~MSG
|
|
224
|
+
The browser tool is disabled by the user. This tool call has been rejected.
|
|
225
|
+
|
|
226
|
+
Inform the user that they have disabled the browser tool. They can re-enable it from settings or by running "/browser-setup".
|
|
227
|
+
MSG
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# -----------------------------------------------------------------------
|
|
232
|
+
# Action dispatch
|
|
233
|
+
# -----------------------------------------------------------------------
|
|
234
|
+
|
|
235
|
+
private def execute_user_browser(action, opts)
|
|
236
|
+
|
|
237
|
+
case action.to_s
|
|
238
|
+
when "tabs"
|
|
239
|
+
pages = extract_pages(mcp_call("list_pages"))
|
|
240
|
+
{ action: "tabs", success: true, profile: "user", output: format_tabs(pages), tabs: pages }
|
|
241
|
+
|
|
242
|
+
when "snapshot"
|
|
243
|
+
raw = mcp_call("take_snapshot")
|
|
244
|
+
text = build_ai_snapshot(extract_snapshot(raw),
|
|
245
|
+
interactive: opts[:interactive] || opts["interactive"],
|
|
246
|
+
compact: opts[:compact] || opts["compact"],
|
|
247
|
+
max_depth: opts[:depth] || opts["depth"])
|
|
248
|
+
{ action: "snapshot", success: true, profile: "user", output: text }
|
|
249
|
+
|
|
250
|
+
when "open"
|
|
251
|
+
url = require_url(opts)
|
|
252
|
+
return url if url.is_a?(Hash)
|
|
253
|
+
mcp_call("new_page", { url: url })
|
|
254
|
+
{ action: "open", success: true, profile: "user", url: url, output: "Opened: #{url}" }
|
|
255
|
+
|
|
256
|
+
when "navigate"
|
|
257
|
+
url = require_url(opts)
|
|
258
|
+
return url if url.is_a?(Hash)
|
|
259
|
+
mcp_call("navigate_page", { type: "url", url: url })
|
|
260
|
+
{ action: "navigate", success: true, profile: "user", url: url, output: "Navigated to: #{url}" }
|
|
261
|
+
|
|
262
|
+
when "focus"
|
|
263
|
+
target_id = opts[:target_id] || opts["target_id"]
|
|
264
|
+
return { error: "target_id is required for focus. Use action=tabs to list open tabs." } if target_id.nil? || target_id.to_s.empty?
|
|
265
|
+
mcp_call("select_page", { pageId: target_id.to_i, bringToFront: true })
|
|
266
|
+
{ action: "focus", success: true, profile: "user", output: "Focused tab #{target_id}" }
|
|
267
|
+
|
|
268
|
+
when "close"
|
|
269
|
+
target_id = opts[:target_id] || opts["target_id"]
|
|
270
|
+
return { error: "target_id is required for close. Use action=tabs to list open tabs." } if target_id.nil? || target_id.to_s.empty?
|
|
271
|
+
mcp_call("close_page", { pageId: target_id.to_i })
|
|
272
|
+
{ action: "close", success: true, profile: "user", output: "Closed tab #{target_id}" }
|
|
273
|
+
|
|
274
|
+
when "act"
|
|
275
|
+
do_user_act(opts)
|
|
276
|
+
|
|
277
|
+
when "screenshot"
|
|
278
|
+
do_user_screenshot(opts)
|
|
279
|
+
|
|
280
|
+
when "status"
|
|
281
|
+
pages = extract_pages(mcp_call("list_pages"))
|
|
282
|
+
{ action: "status", success: true, profile: "user",
|
|
283
|
+
output: "Browser running. #{pages.size} tab(s) open.", tabs: pages }
|
|
284
|
+
|
|
285
|
+
else
|
|
286
|
+
{ error: "Action '#{action}' is not supported." }
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
private def do_user_act(opts)
|
|
291
|
+
kind = (opts[:kind] || opts["kind"] || "click").to_s
|
|
292
|
+
ref = opts[:ref] || opts["ref"]
|
|
293
|
+
|
|
294
|
+
case kind
|
|
295
|
+
when "click", "dblclick"
|
|
296
|
+
uid = require_ref(ref)
|
|
297
|
+
return uid if uid.is_a?(Hash)
|
|
298
|
+
args = { uid: uid }
|
|
299
|
+
args[:dblClick] = true if kind == "dblclick"
|
|
300
|
+
mcp_call("click", args)
|
|
301
|
+
|
|
302
|
+
when "fill", "type"
|
|
303
|
+
uid = require_ref(ref)
|
|
304
|
+
return uid if uid.is_a?(Hash)
|
|
305
|
+
mcp_call("fill", { uid: uid, value: opts[:text] || opts["text"] || "" })
|
|
306
|
+
|
|
307
|
+
when "press"
|
|
308
|
+
mcp_call("press_key", { key: opts[:key] || opts["key"] || "Enter" })
|
|
309
|
+
|
|
310
|
+
when "hover"
|
|
311
|
+
uid = require_ref(ref)
|
|
312
|
+
return uid if uid.is_a?(Hash)
|
|
313
|
+
mcp_call("hover", { uid: uid })
|
|
314
|
+
|
|
315
|
+
when "drag"
|
|
316
|
+
uid = require_ref(ref)
|
|
317
|
+
return uid if uid.is_a?(Hash)
|
|
318
|
+
mcp_call("drag", { from_uid: uid, to_uid: opts[:target_ref] || opts["target_ref"] || "" })
|
|
319
|
+
|
|
320
|
+
when "select"
|
|
321
|
+
uid = require_ref(ref)
|
|
322
|
+
return uid if uid.is_a?(Hash)
|
|
323
|
+
values = Array(opts[:values] || opts["values"] || [])
|
|
324
|
+
mcp_call("fill", { uid: uid, value: values.first.to_s })
|
|
325
|
+
|
|
326
|
+
when "scroll"
|
|
327
|
+
direction = opts[:direction] || opts["direction"] || "down"
|
|
328
|
+
amount = (opts[:amount] || opts["amount"] || 300).to_i
|
|
329
|
+
dx = case direction; when "right" then amount; when "left" then -amount; else 0; end
|
|
330
|
+
dy = case direction; when "down" then amount; when "up" then -amount; else 0; end
|
|
331
|
+
mcp_call("evaluate_script", { function: "() => { window.scrollBy(#{dx}, #{dy}) }" })
|
|
332
|
+
|
|
333
|
+
when "wait"
|
|
334
|
+
ms = opts[:ms] || opts["ms"]
|
|
335
|
+
sel = opts[:selector] || opts["selector"]
|
|
336
|
+
if ms
|
|
337
|
+
sleep(ms.to_i / 1000.0)
|
|
338
|
+
return { action: "act", success: true, profile: "user", output: "Waited #{ms}ms" }
|
|
339
|
+
elsif sel
|
|
340
|
+
mcp_call("wait_for", { text: [sel] })
|
|
341
|
+
else
|
|
342
|
+
sleep(1)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
when "evaluate"
|
|
346
|
+
js = opts[:js] || opts["js"] || ""
|
|
347
|
+
pages = extract_pages(mcp_call("list_pages"))
|
|
348
|
+
sel = pages.find { |p| p[:selected] }
|
|
349
|
+
page_id = sel ? sel[:id] : (pages.first && pages.first[:id])
|
|
350
|
+
eval_args = { function: "() => { return (#{js}) }" }
|
|
351
|
+
eval_args[:pageId] = page_id if page_id
|
|
352
|
+
result = mcp_call("evaluate_script", eval_args)
|
|
353
|
+
return { action: "act", success: true, profile: "user", output: extract_message(result).to_s }
|
|
354
|
+
|
|
355
|
+
when "click_at"
|
|
356
|
+
x = opts[:x] || opts["x"]
|
|
357
|
+
y = opts[:y] || opts["y"]
|
|
358
|
+
return { error: "click_at requires x and y coordinates" } unless x && y
|
|
359
|
+
result = mcp_call("click_at", { x: x.to_f, y: y.to_f })
|
|
360
|
+
return { action: "act", success: true, profile: "user", output: extract_message(result).to_s }
|
|
361
|
+
|
|
362
|
+
else
|
|
363
|
+
return { error: "Unknown act kind: #{kind}" }
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
{ action: "act", success: true, profile: "user", output: "#{kind} completed." }
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
SCREENSHOT_MAX_WIDTH = 800
|
|
370
|
+
SCREENSHOT_MAX_BASE64_BYTES = 150_000
|
|
371
|
+
|
|
372
|
+
private def do_user_screenshot(opts)
|
|
373
|
+
full_page = opts[:full_page] || opts["full_page"] || false
|
|
374
|
+
uid = opts[:ref] || opts["ref"]
|
|
375
|
+
|
|
376
|
+
call_args = { format: "png", fullPage: full_page }
|
|
377
|
+
call_args[:uid] = uid if uid
|
|
378
|
+
result = mcp_call("take_screenshot", call_args)
|
|
379
|
+
|
|
380
|
+
image_block = Array(result["content"]).find { |b| b.is_a?(Hash) && b["type"] == "image" }
|
|
381
|
+
|
|
382
|
+
unless image_block
|
|
383
|
+
text = extract_text_content(result)
|
|
384
|
+
return { action: "screenshot", success: true, profile: "user",
|
|
385
|
+
output: text.empty? ? "Screenshot captured." : text }
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Save original (full-resolution) PNG to disk before any downscaling
|
|
389
|
+
original_path = save_screenshot_to_disk(image_block["data"], suffix: "original")
|
|
390
|
+
|
|
391
|
+
image_data = png_downscale_base64(image_block["data"], SCREENSHOT_MAX_WIDTH)
|
|
392
|
+
|
|
393
|
+
if image_data.bytesize > SCREENSHOT_MAX_BASE64_BYTES
|
|
394
|
+
size_kb = image_data.bytesize / 1024
|
|
395
|
+
return { action: "screenshot", success: false, profile: "user",
|
|
396
|
+
output: "Screenshot too large after resize (#{size_kb}KB). Use action=snapshot instead." }
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Save compressed (800px) PNG for AI reference
|
|
400
|
+
compressed_path = save_screenshot_to_disk(image_data, suffix: "compressed")
|
|
401
|
+
|
|
402
|
+
{ action: "screenshot", success: true, profile: "user",
|
|
403
|
+
image_data: image_data, mime_type: "image/png",
|
|
404
|
+
original_path: original_path, compressed_path: compressed_path,
|
|
405
|
+
output: "Screenshot captured." }
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
private def png_downscale_base64(b64, max_width)
|
|
409
|
+
Octo::Utils::FileProcessor.downscale_image_base64(
|
|
410
|
+
b64, "image/png", max_width: max_width
|
|
411
|
+
)
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Save a base64-encoded PNG screenshot to disk and return the file path.
|
|
415
|
+
# suffix: "original" or "compressed" — embedded in filename for clarity.
|
|
416
|
+
# Uses the same upload directory as other image files so the agent can
|
|
417
|
+
# reference, read, or pass the path to other tools.
|
|
418
|
+
private def save_screenshot_to_disk(base64_data, suffix: nil)
|
|
419
|
+
upload_dir = File.join(Dir.tmpdir, "octo-uploads")
|
|
420
|
+
FileUtils.mkdir_p(upload_dir)
|
|
421
|
+
ts = Time.now.strftime("%Y%m%d_%H%M%S")
|
|
422
|
+
hex = SecureRandom.hex(4)
|
|
423
|
+
label = suffix ? "_#{suffix}" : ""
|
|
424
|
+
filename = "screenshot_#{ts}_#{hex}#{label}.png"
|
|
425
|
+
path = File.join(upload_dir, filename)
|
|
426
|
+
File.binwrite(path, Base64.strict_decode64(base64_data))
|
|
427
|
+
path
|
|
428
|
+
rescue => e
|
|
429
|
+
Octo::Logger.error("screenshot_save_failed", error: e.message)
|
|
430
|
+
nil
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# -----------------------------------------------------------------------
|
|
434
|
+
# Chrome MCP
|
|
435
|
+
# -----------------------------------------------------------------------
|
|
436
|
+
|
|
437
|
+
# Delegate to BrowserManager. Auto-retries once on "selected page has been closed".
|
|
438
|
+
private def mcp_call(tool_name, arguments = {})
|
|
439
|
+
Octo::BrowserManager.instance.mcp_call(tool_name, arguments)
|
|
440
|
+
rescue RuntimeError => e
|
|
441
|
+
if e.message.include?("selected page has been closed")
|
|
442
|
+
raise RuntimeError, "The browser tab was closed. Use action=open to open a new tab, then retry."
|
|
443
|
+
else
|
|
444
|
+
raise
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# -----------------------------------------------------------------------
|
|
449
|
+
# MCP response extractors
|
|
450
|
+
# -----------------------------------------------------------------------
|
|
451
|
+
|
|
452
|
+
private def extract_pages(result)
|
|
453
|
+
return [] unless result.is_a?(Hash)
|
|
454
|
+
|
|
455
|
+
structured = result["structuredContent"]
|
|
456
|
+
if structured.is_a?(Hash) && structured["pages"].is_a?(Array)
|
|
457
|
+
return structured["pages"].map do |p|
|
|
458
|
+
{ id: p["id"], url: p["url"], selected: p["selected"] == true }
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
parse_pages_from_text(extract_text_content(result))
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
private def extract_snapshot(result)
|
|
466
|
+
return {} unless result.is_a?(Hash)
|
|
467
|
+
|
|
468
|
+
structured = result["structuredContent"]
|
|
469
|
+
return structured["snapshot"] if structured.is_a?(Hash) && structured["snapshot"].is_a?(Hash)
|
|
470
|
+
|
|
471
|
+
begin
|
|
472
|
+
JSON.parse(extract_text_content(result))
|
|
473
|
+
rescue StandardError
|
|
474
|
+
{}
|
|
475
|
+
end
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
private def extract_message(result)
|
|
479
|
+
return "" unless result.is_a?(Hash)
|
|
480
|
+
|
|
481
|
+
structured = result["structuredContent"]
|
|
482
|
+
return structured["message"].to_s if structured.is_a?(Hash) && structured["message"]
|
|
483
|
+
|
|
484
|
+
extract_text_content(result)
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
private def extract_text_content(result)
|
|
488
|
+
return "" unless result.is_a?(Hash)
|
|
489
|
+
Array(result["content"]).filter_map do |entry|
|
|
490
|
+
entry["text"] if entry.is_a?(Hash) && entry["text"].is_a?(String)
|
|
491
|
+
end.join("\n")
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
private def parse_pages_from_text(text)
|
|
495
|
+
text.each_line.filter_map do |line|
|
|
496
|
+
m = line.match(/^\s*(\d+):\s+(.+?)(?:\s+\[(selected)\])?\s*$/i)
|
|
497
|
+
next unless m
|
|
498
|
+
{ id: m[1].to_i, url: m[2].strip, selected: !m[3].nil? }
|
|
499
|
+
end
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
private def format_tabs(pages)
|
|
503
|
+
return "No open tabs." if pages.empty?
|
|
504
|
+
pages.map { |p| "#{p[:id]}: #{p[:url]}#{p[:selected] ? ' [selected]' : ''}" }.join("\n")
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# -----------------------------------------------------------------------
|
|
508
|
+
# Snapshot rendering
|
|
509
|
+
# -----------------------------------------------------------------------
|
|
510
|
+
|
|
511
|
+
INTERACTIVE_ROLES = %w[
|
|
512
|
+
button link textbox checkbox radio select combobox
|
|
513
|
+
menuitem option tab switch searchbox spinbutton
|
|
514
|
+
slider menuitemcheckbox menuitemradio
|
|
515
|
+
].freeze
|
|
516
|
+
|
|
517
|
+
STRUCTURAL_ROLES = %w[
|
|
518
|
+
generic none presentation group region section
|
|
519
|
+
].freeze
|
|
520
|
+
|
|
521
|
+
CONTENT_ROLES = %w[
|
|
522
|
+
heading paragraph text statictext image img
|
|
523
|
+
listitem term definition
|
|
524
|
+
].freeze
|
|
525
|
+
|
|
526
|
+
private def build_ai_snapshot(node, interactive: false, compact: false, max_depth: nil)
|
|
527
|
+
return "" unless node.is_a?(Hash) && !node.empty?
|
|
528
|
+
|
|
529
|
+
lines = []
|
|
530
|
+
refs = {}
|
|
531
|
+
visit_node(node, 0, lines, refs, interactive: interactive, compact: compact, max_depth: max_depth)
|
|
532
|
+
lines.join("\n")
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
private def visit_node(node, depth, lines, refs, interactive:, compact:, max_depth:)
|
|
536
|
+
return if max_depth && depth > max_depth
|
|
537
|
+
|
|
538
|
+
role = node["role"].to_s.downcase.strip
|
|
539
|
+
role = "generic" if role.empty?
|
|
540
|
+
name = node["name"].to_s.strip
|
|
541
|
+
uid = node["id"].to_s.strip
|
|
542
|
+
val = node["value"]
|
|
543
|
+
desc = node["description"].to_s.strip
|
|
544
|
+
|
|
545
|
+
render = true
|
|
546
|
+
render = false if interactive && !INTERACTIVE_ROLES.include?(role)
|
|
547
|
+
render = false if compact && STRUCTURAL_ROLES.include?(role) && name.empty?
|
|
548
|
+
|
|
549
|
+
if render
|
|
550
|
+
line = "#{" " * (depth * 2)}- #{role}"
|
|
551
|
+
line += " \"#{escape_quoted(name)}\"" unless name.empty?
|
|
552
|
+
|
|
553
|
+
if uid && !uid.empty? && (INTERACTIVE_ROLES.include?(role) ||
|
|
554
|
+
(CONTENT_ROLES.include?(role) && !name.empty?))
|
|
555
|
+
refs[uid] = { role: role, name: name }
|
|
556
|
+
line += " [ref=#{uid}]"
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
line += " value=\"#{escape_quoted(val.to_s)}\"" unless val.nil? || val.to_s.empty?
|
|
560
|
+
line += " description=\"#{escape_quoted(desc)}\"" unless desc.empty?
|
|
561
|
+
lines << line
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
child_depth = render ? depth + 1 : depth
|
|
565
|
+
Array(node["children"]).each do |child|
|
|
566
|
+
visit_node(child, child_depth, lines, refs, interactive: interactive, compact: compact, max_depth: max_depth)
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
|
|
570
|
+
private def escape_quoted(str)
|
|
571
|
+
str.to_s.gsub("\\", "\\\\").gsub('"', '\\"')
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
# -----------------------------------------------------------------------
|
|
575
|
+
# Parameter helpers
|
|
576
|
+
# -----------------------------------------------------------------------
|
|
577
|
+
|
|
578
|
+
private def require_url(opts)
|
|
579
|
+
url = opts[:url] || opts["url"] || ""
|
|
580
|
+
return { error: "url is required for this action" } if url.empty?
|
|
581
|
+
url
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
private def require_ref(ref)
|
|
585
|
+
return { error: "ref is required for this act kind (snapshot first to get refs)" } if ref.nil? || ref.to_s.empty?
|
|
586
|
+
ref.to_s
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
# -----------------------------------------------------------------------
|
|
590
|
+
# Output helpers
|
|
591
|
+
# -----------------------------------------------------------------------
|
|
592
|
+
|
|
593
|
+
private def compress_snapshot(output)
|
|
594
|
+
return output if output.empty?
|
|
595
|
+
|
|
596
|
+
lines = output.lines
|
|
597
|
+
orig = lines.size
|
|
598
|
+
filtered = lines.reject do |line|
|
|
599
|
+
s = line.strip
|
|
600
|
+
s.start_with?("- /url:", "/url:", "- /placeholder:", "/placeholder:") ||
|
|
601
|
+
s == "- img" || s.match?(/\A-\s+img\s*\z/)
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
removed = orig - filtered.size
|
|
605
|
+
filtered << "\n[snapshot compressed: #{removed} lines removed]\n" if removed > 0
|
|
606
|
+
filtered.join
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
private def truncate_output(output, max_chars)
|
|
610
|
+
return output if output.length <= max_chars
|
|
611
|
+
|
|
612
|
+
lines = output.lines
|
|
613
|
+
available = max_chars - 150
|
|
614
|
+
first_part = []
|
|
615
|
+
acc = 0
|
|
616
|
+
lines.each do |line|
|
|
617
|
+
break if acc + line.length > available
|
|
618
|
+
first_part << line
|
|
619
|
+
acc += line.length
|
|
620
|
+
end
|
|
621
|
+
first_part.join + "\n... [truncated: #{first_part.size}/#{lines.size} lines shown] ..."
|
|
622
|
+
end
|
|
623
|
+
end
|
|
624
|
+
end
|
|
625
|
+
end
|