octo-agent 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clacky/skills/commit/SKILL.md +423 -0
- data/.clacky/skills/gem-release/SKILL.md +199 -0
- data/.clacky/skills/gem-release/scripts/release.sh +304 -0
- data/.clacky/skills/oss-upload/SKILL.md +47 -0
- data/.octorules +106 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +92 -0
- data/Dockerfile +28 -0
- data/LICENSE.txt +22 -0
- data/POSITIONING.md +46 -0
- data/README.md +134 -0
- data/README_CN.md +134 -0
- data/Rakefile +34 -0
- data/benchmark/fixtures/sample_project/Gemfile +3 -0
- data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
- data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
- data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
- data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
- data/benchmark/results/EVALUATION_REPORT.md +165 -0
- data/benchmark/results/baseline_20260511_174424.json +128 -0
- data/benchmark/results/report_20260511_175256.json +271 -0
- data/benchmark/results/report_20260511_175444.json +271 -0
- data/benchmark/results/treatment_20260511_175103.json +130 -0
- data/benchmark/runner.rb +441 -0
- data/bin/octo +7 -0
- data/docs/agent-first-ui-design.md +77 -0
- data/docs/billing-system.md +318 -0
- data/docs/channel-architecture.md +235 -0
- data/docs/engineering-article.md +343 -0
- data/docs/session-skill-invocation.md +69 -0
- data/docs/time_machine_design.md +247 -0
- data/docs/ui2-architecture.md +124 -0
- data/homebrew/README.md +96 -0
- data/homebrew/openocto.rb +24 -0
- data/lib/octo/agent/hook_manager.rb +61 -0
- data/lib/octo/agent/llm_caller.rb +800 -0
- data/lib/octo/agent/memory_updater.rb +246 -0
- data/lib/octo/agent/message_compressor.rb +225 -0
- data/lib/octo/agent/message_compressor_helper.rb +869 -0
- data/lib/octo/agent/next_message_suggester.rb +215 -0
- data/lib/octo/agent/session_serializer.rb +685 -0
- data/lib/octo/agent/skill_auto_creator.rb +114 -0
- data/lib/octo/agent/skill_evolution.rb +61 -0
- data/lib/octo/agent/skill_manager.rb +466 -0
- data/lib/octo/agent/skill_reflector.rb +89 -0
- data/lib/octo/agent/system_prompt_builder.rb +101 -0
- data/lib/octo/agent/time_machine.rb +214 -0
- data/lib/octo/agent/tool_executor.rb +454 -0
- data/lib/octo/agent/tool_registry.rb +150 -0
- data/lib/octo/agent.rb +2180 -0
- data/lib/octo/agent_config.rb +989 -0
- data/lib/octo/agent_profile.rb +112 -0
- data/lib/octo/anthropic_stream_aggregator.rb +137 -0
- data/lib/octo/background_task_registry.rb +324 -0
- data/lib/octo/banner.rb +34 -0
- data/lib/octo/bedrock_stream_aggregator.rb +137 -0
- data/lib/octo/block_font.rb +331 -0
- data/lib/octo/cli.rb +968 -0
- data/lib/octo/client.rb +623 -0
- data/lib/octo/default_agents/SOUL.md +3 -0
- data/lib/octo/default_agents/USER.md +1 -0
- data/lib/octo/default_agents/base_prompt.md +66 -0
- data/lib/octo/default_agents/coding/profile.yml +2 -0
- data/lib/octo/default_agents/coding/system_prompt.md +67 -0
- data/lib/octo/default_agents/general/profile.yml +2 -0
- data/lib/octo/default_agents/general/system_prompt.md +16 -0
- data/lib/octo/default_parsers/doc_parser.rb +69 -0
- data/lib/octo/default_parsers/docx_parser.rb +188 -0
- data/lib/octo/default_parsers/pdf_parser.rb +120 -0
- data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/octo/default_parsers/pptx_parser.rb +140 -0
- data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
- data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
- data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
- data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
- data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
- data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
- data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
- data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
- data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
- data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
- data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
- data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/octo/default_skills/onboard/SKILL.md +578 -0
- data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
- data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
- data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
- data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
- data/lib/octo/default_skills/personal-website/publish.rb +235 -0
- data/lib/octo/default_skills/product-help/SKILL.md +123 -0
- data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
- data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
- data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
- data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
- data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
- data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
- data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
- data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
- data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
- data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
- data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
- data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
- data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
- data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
- data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
- data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
- data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
- data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
- data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
- data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
- data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
- data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
- data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
- data/lib/octo/idle_compression_timer.rb +115 -0
- data/lib/octo/json_ui_controller.rb +204 -0
- data/lib/octo/message_format/anthropic.rb +409 -0
- data/lib/octo/message_format/bedrock.rb +361 -0
- data/lib/octo/message_format/open_ai.rb +222 -0
- data/lib/octo/message_history.rb +373 -0
- data/lib/octo/openai_stream_aggregator.rb +130 -0
- data/lib/octo/plain_ui_controller.rb +166 -0
- data/lib/octo/providers.rb +534 -0
- data/lib/octo/server/browser_manager.rb +397 -0
- data/lib/octo/server/channel/adapters/base.rb +82 -0
- data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
- data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
- data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
- data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
- data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
- data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
- data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
- data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
- data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
- data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
- data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
- data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
- data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
- data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
- data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
- data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
- data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
- data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
- data/lib/octo/server/channel/channel_config.rb +178 -0
- data/lib/octo/server/channel/channel_manager.rb +468 -0
- data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
- data/lib/octo/server/channel.rb +33 -0
- data/lib/octo/server/discover.rb +77 -0
- data/lib/octo/server/epipe_safe_io.rb +105 -0
- data/lib/octo/server/http_server.rb +3554 -0
- data/lib/octo/server/scheduler.rb +317 -0
- data/lib/octo/server/server_master.rb +325 -0
- data/lib/octo/server/session_registry.rb +431 -0
- data/lib/octo/server/web_ui_controller.rb +487 -0
- data/lib/octo/session_manager.rb +385 -0
- data/lib/octo/skill.rb +466 -0
- data/lib/octo/skill_loader.rb +328 -0
- data/lib/octo/tools/base.rb +118 -0
- data/lib/octo/tools/browser.rb +625 -0
- data/lib/octo/tools/edit.rb +165 -0
- data/lib/octo/tools/file_reader.rb +549 -0
- data/lib/octo/tools/glob.rb +162 -0
- data/lib/octo/tools/grep.rb +356 -0
- data/lib/octo/tools/invoke_skill.rb +96 -0
- data/lib/octo/tools/list_tasks.rb +54 -0
- data/lib/octo/tools/redo_task.rb +41 -0
- data/lib/octo/tools/request_user_feedback.rb +84 -0
- data/lib/octo/tools/security.rb +333 -0
- data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
- data/lib/octo/tools/terminal/persistent_session.rb +268 -0
- data/lib/octo/tools/terminal/safe_rm.sh +106 -0
- data/lib/octo/tools/terminal/session_manager.rb +213 -0
- data/lib/octo/tools/terminal.rb +1828 -0
- data/lib/octo/tools/todo_manager.rb +374 -0
- data/lib/octo/tools/trash_manager.rb +388 -0
- data/lib/octo/tools/undo_task.rb +35 -0
- data/lib/octo/tools/web_fetch.rb +242 -0
- data/lib/octo/tools/web_search.rb +260 -0
- data/lib/octo/tools/write.rb +77 -0
- data/lib/octo/ui2/block_font.rb +10 -0
- data/lib/octo/ui2/components/base_component.rb +163 -0
- data/lib/octo/ui2/components/command_suggestions.rb +290 -0
- data/lib/octo/ui2/components/common_component.rb +96 -0
- data/lib/octo/ui2/components/inline_input.rb +226 -0
- data/lib/octo/ui2/components/input_area.rb +1338 -0
- data/lib/octo/ui2/components/message_component.rb +99 -0
- data/lib/octo/ui2/components/modal_component.rb +419 -0
- data/lib/octo/ui2/components/todo_area.rb +149 -0
- data/lib/octo/ui2/components/tool_component.rb +107 -0
- data/lib/octo/ui2/components/welcome_banner.rb +139 -0
- data/lib/octo/ui2/layout_manager.rb +807 -0
- data/lib/octo/ui2/line_editor.rb +363 -0
- data/lib/octo/ui2/markdown_renderer.rb +100 -0
- data/lib/octo/ui2/output_buffer.rb +370 -0
- data/lib/octo/ui2/progress_handle.rb +362 -0
- data/lib/octo/ui2/progress_indicator.rb +55 -0
- data/lib/octo/ui2/screen_buffer.rb +273 -0
- data/lib/octo/ui2/terminal_detector.rb +119 -0
- data/lib/octo/ui2/theme_manager.rb +85 -0
- data/lib/octo/ui2/themes/base_theme.rb +105 -0
- data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
- data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
- data/lib/octo/ui2/thinking_verbs.rb +26 -0
- data/lib/octo/ui2/ui_controller.rb +1625 -0
- data/lib/octo/ui2/view_renderer.rb +177 -0
- data/lib/octo/ui2.rb +40 -0
- data/lib/octo/ui_interface.rb +154 -0
- data/lib/octo/utils/arguments_parser.rb +191 -0
- data/lib/octo/utils/browser_detector.rb +195 -0
- data/lib/octo/utils/encoding.rb +92 -0
- data/lib/octo/utils/environment_detector.rb +140 -0
- data/lib/octo/utils/file_ignore_helper.rb +170 -0
- data/lib/octo/utils/file_processor.rb +601 -0
- data/lib/octo/utils/gitignore_parser.rb +154 -0
- data/lib/octo/utils/limit_stack.rb +152 -0
- data/lib/octo/utils/logger.rb +124 -0
- data/lib/octo/utils/login_shell.rb +72 -0
- data/lib/octo/utils/model_pricing.rb +646 -0
- data/lib/octo/utils/parser_manager.rb +165 -0
- data/lib/octo/utils/path_helper.rb +15 -0
- data/lib/octo/utils/scripts_manager.rb +59 -0
- data/lib/octo/utils/string_matcher.rb +158 -0
- data/lib/octo/utils/trash_directory.rb +112 -0
- data/lib/octo/utils/workspace_rules.rb +46 -0
- data/lib/octo/version.rb +5 -0
- data/lib/octo/web/app.css +7141 -0
- data/lib/octo/web/app.js +543 -0
- data/lib/octo/web/apple-touch-icon.png +0 -0
- data/lib/octo/web/auth.js +150 -0
- data/lib/octo/web/channels.js +276 -0
- data/lib/octo/web/datepicker.js +205 -0
- data/lib/octo/web/favicon.png +0 -0
- data/lib/octo/web/i18n.js +1073 -0
- data/lib/octo/web/icon-512.png +0 -0
- data/lib/octo/web/icon-dark.svg +25 -0
- data/lib/octo/web/icon.svg +29 -0
- data/lib/octo/web/index.html +871 -0
- data/lib/octo/web/marked.min.js +69 -0
- data/lib/octo/web/onboard.js +491 -0
- data/lib/octo/web/profile.js +442 -0
- data/lib/octo/web/sessions.js +4421 -0
- data/lib/octo/web/settings.js +913 -0
- data/lib/octo/web/sidebar.js +32 -0
- data/lib/octo/web/skills.js +885 -0
- data/lib/octo/web/tasks.js +297 -0
- data/lib/octo/web/theme.js +105 -0
- data/lib/octo/web/trash.js +343 -0
- data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
- data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
- data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/katex.min.css +1 -0
- data/lib/octo/web/vendor/katex/katex.min.js +1 -0
- data/lib/octo/web/version.js +449 -0
- data/lib/octo/web/weixin-qr.html +209 -0
- data/lib/octo/web/ws-dispatcher.js +357 -0
- data/lib/octo/web/ws.js +128 -0
- data/lib/octo.rb +145 -0
- data/scripts/build/build.sh +329 -0
- data/scripts/build/lib/apt.sh +56 -0
- data/scripts/build/lib/brew.sh +89 -0
- data/scripts/build/lib/colors.sh +17 -0
- data/scripts/build/lib/gem.sh +95 -0
- data/scripts/build/lib/mise.sh +125 -0
- data/scripts/build/lib/network.sh +157 -0
- data/scripts/build/lib/os.sh +57 -0
- data/scripts/build/lib/shell.sh +37 -0
- data/scripts/build/src/install.sh.cc +174 -0
- data/scripts/build/src/install_browser.sh.cc +101 -0
- data/scripts/build/src/install_full.sh.cc +290 -0
- data/scripts/build/src/install_rails_deps.sh.cc +145 -0
- data/scripts/build/src/install_system_deps.sh.cc +123 -0
- data/scripts/build/src/uninstall.sh.cc +101 -0
- data/scripts/install.ps1 +532 -0
- data/scripts/install.sh +567 -0
- data/scripts/install_browser.sh +479 -0
- data/scripts/install_full.sh +838 -0
- data/scripts/install_rails_deps.sh +746 -0
- data/scripts/install_system_deps.sh +518 -0
- data/scripts/uninstall.sh +287 -0
- data/sig/octo.rbs +4 -0
- metadata +614 -0
|
@@ -0,0 +1,601 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "tmpdir"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
require "stringio"
|
|
7
|
+
|
|
8
|
+
require_relative "parser_manager"
|
|
9
|
+
require "zip"
|
|
10
|
+
|
|
11
|
+
module Octo
|
|
12
|
+
module Utils
|
|
13
|
+
# File processing pipeline.
|
|
14
|
+
#
|
|
15
|
+
# Two entry points:
|
|
16
|
+
# FileProcessor.save(body:, filename:)
|
|
17
|
+
# → Store raw bytes to disk only. Returns { name:, path: }.
|
|
18
|
+
# Used by http_server and channel adapters — no parsing here.
|
|
19
|
+
#
|
|
20
|
+
# FileProcessor.process_path(path, name: nil)
|
|
21
|
+
# → Parse an already-saved file. Returns FileRef (with preview_path or parse_error).
|
|
22
|
+
# Used by agent.run when building the file prompt.
|
|
23
|
+
#
|
|
24
|
+
# (FileProcessor.process = save + process_path in one call, for convenience.)
|
|
25
|
+
module FileProcessor
|
|
26
|
+
UPLOAD_DIR = File.join(Dir.tmpdir, "octo-uploads").freeze
|
|
27
|
+
MAX_FILE_BYTES = 32 * 1024 * 1024 # 32 MB
|
|
28
|
+
MAX_IMAGE_BYTES = 5 * 1024 * 1024 # 5 MB
|
|
29
|
+
|
|
30
|
+
# Alias used by FileReader tool
|
|
31
|
+
MAX_FILE_SIZE = MAX_FILE_BYTES
|
|
32
|
+
|
|
33
|
+
# Images wider than this will be downscaled before sending to LLM (pixels)
|
|
34
|
+
IMAGE_MAX_WIDTH = 800
|
|
35
|
+
# Hard limit for images that can't be resized: Anthropic/Bedrock vision API supports up to 5MB
|
|
36
|
+
IMAGE_MAX_BASE64_BYTES = 5_000_000
|
|
37
|
+
|
|
38
|
+
BINARY_EXTENSIONS = %w[
|
|
39
|
+
.png .jpg .jpeg .gif .webp .bmp .tiff .ico .svg
|
|
40
|
+
.pdf
|
|
41
|
+
.zip .gz .tgz .tar .rar .7z
|
|
42
|
+
.exe .dll .so .dylib
|
|
43
|
+
.mp3 .mp4 .avi .mov .mkv .wav .flac
|
|
44
|
+
.ttf .otf .woff .woff2
|
|
45
|
+
.db .sqlite .bin .dat
|
|
46
|
+
].freeze
|
|
47
|
+
|
|
48
|
+
GLOB_ALLOWED_BINARY_EXTENSIONS = %w[
|
|
49
|
+
.pdf .doc .docx .ppt .pptx .xls .xlsx .odt .odp .ods
|
|
50
|
+
].freeze
|
|
51
|
+
|
|
52
|
+
LLM_BINARY_EXTENSIONS = %w[.png .jpg .jpeg .gif .webp .pdf].freeze
|
|
53
|
+
|
|
54
|
+
MIME_TYPES = {
|
|
55
|
+
".png" => "image/png",
|
|
56
|
+
".jpg" => "image/jpeg",
|
|
57
|
+
".jpeg" => "image/jpeg",
|
|
58
|
+
".gif" => "image/gif",
|
|
59
|
+
".webp" => "image/webp",
|
|
60
|
+
".pdf" => "application/pdf"
|
|
61
|
+
}.freeze
|
|
62
|
+
|
|
63
|
+
FILE_TYPES = {
|
|
64
|
+
".docx" => :document, ".doc" => :document,
|
|
65
|
+
".xlsx" => :spreadsheet, ".xls" => :spreadsheet,
|
|
66
|
+
".pptx" => :presentation, ".ppt" => :presentation,
|
|
67
|
+
".pdf" => :pdf,
|
|
68
|
+
".zip" => :zip, ".gz" => :zip, ".tgz" => :zip, ".tar" => :zip, ".rar" => :zip, ".7z" => :zip,
|
|
69
|
+
".png" => :image, ".jpg" => :image, ".jpeg" => :image,
|
|
70
|
+
".gif" => :image, ".webp" => :image,
|
|
71
|
+
".csv" => :csv,
|
|
72
|
+
".md" => :text, ".markdown" => :text, ".txt" => :text, ".log" => :text
|
|
73
|
+
}.freeze
|
|
74
|
+
|
|
75
|
+
# Plain-text extensions whose raw content can be embedded directly as the
|
|
76
|
+
# preview (no external parser needed). Kept conservative to avoid pulling
|
|
77
|
+
# in huge source files by mistake.
|
|
78
|
+
TEXT_PREVIEW_EXTENSIONS = %w[.md .markdown .txt .log].freeze
|
|
79
|
+
|
|
80
|
+
# FileRef: result of process / process_path.
|
|
81
|
+
FileRef = Struct.new(:name, :type, :original_path, :preview_path, :parse_error, :parser_path, keyword_init: true) do
|
|
82
|
+
def parse_failed?
|
|
83
|
+
preview_path.nil? && !parse_error.nil?
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Public API
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
# Store raw bytes to disk — no parsing.
|
|
92
|
+
# Used by http_server upload endpoint and channel adapters.
|
|
93
|
+
#
|
|
94
|
+
# @return [Hash] { name: String, path: String }
|
|
95
|
+
def self.save(body:, filename:)
|
|
96
|
+
FileUtils.mkdir_p(UPLOAD_DIR)
|
|
97
|
+
safe_name = sanitize_filename(filename)
|
|
98
|
+
dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}")
|
|
99
|
+
File.binwrite(dest, body)
|
|
100
|
+
{ name: safe_name, path: dest }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Parse an already-saved file and return a FileRef.
|
|
104
|
+
# Called by agent.run for each disk file before building the prompt.
|
|
105
|
+
#
|
|
106
|
+
# @param path [String] Path to the file on disk
|
|
107
|
+
# @param name [String] Display name (defaults to basename)
|
|
108
|
+
# @return [FileRef]
|
|
109
|
+
def self.process_path(path, name: nil)
|
|
110
|
+
name ||= File.basename(path.to_s)
|
|
111
|
+
# Use compound extension for .tar.gz so it's treated as a tarball, not gzip.
|
|
112
|
+
basename_lower = name.to_s.downcase
|
|
113
|
+
ext =
|
|
114
|
+
if basename_lower.end_with?(".tar.gz")
|
|
115
|
+
".tar.gz"
|
|
116
|
+
else
|
|
117
|
+
File.extname(path.to_s).downcase
|
|
118
|
+
end
|
|
119
|
+
type = FILE_TYPES[ext] || :file
|
|
120
|
+
|
|
121
|
+
case ext
|
|
122
|
+
when ".zip"
|
|
123
|
+
body = File.binread(path)
|
|
124
|
+
preview_content = parse_zip_listing(body)
|
|
125
|
+
preview_path = save_preview(preview_content, path)
|
|
126
|
+
FileRef.new(name: name, type: :zip, original_path: path, preview_path: preview_path)
|
|
127
|
+
|
|
128
|
+
when ".tar", ".tar.gz", ".tgz", ".gz"
|
|
129
|
+
# Archive listing for tarballs and gzip'd files. Provides the LLM a
|
|
130
|
+
# file-tree preview so it can decide whether to ask the user to
|
|
131
|
+
# extract them (via the shell tool).
|
|
132
|
+
begin
|
|
133
|
+
preview_content = parse_tar_listing(path, ext)
|
|
134
|
+
preview_path = save_preview(preview_content, path)
|
|
135
|
+
FileRef.new(name: name, type: :zip, original_path: path, preview_path: preview_path)
|
|
136
|
+
rescue => e
|
|
137
|
+
FileRef.new(name: name, type: :zip, original_path: path, parse_error: e.message)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
when ".png", ".jpg", ".jpeg", ".gif", ".webp"
|
|
141
|
+
FileRef.new(name: name, type: :image, original_path: path)
|
|
142
|
+
|
|
143
|
+
when ".csv"
|
|
144
|
+
# CSV is plain text — the file itself IS the preview. No parser, no copy.
|
|
145
|
+
# FileReader handles encoding fallback via safe_utf8 when it reads the file.
|
|
146
|
+
FileRef.new(name: name, type: :csv, original_path: path, preview_path: path)
|
|
147
|
+
|
|
148
|
+
when *TEXT_PREVIEW_EXTENSIONS
|
|
149
|
+
# Markdown / plain text / log: the file itself IS the preview.
|
|
150
|
+
# No parser needed, no tmpdir copy — just point preview_path at the original.
|
|
151
|
+
FileRef.new(name: name, type: :text, original_path: path, preview_path: path)
|
|
152
|
+
|
|
153
|
+
else
|
|
154
|
+
result = Utils::ParserManager.parse(path)
|
|
155
|
+
if result[:success]
|
|
156
|
+
preview_path = save_preview(result[:text], path)
|
|
157
|
+
FileRef.new(name: name, type: type, original_path: path, preview_path: preview_path)
|
|
158
|
+
else
|
|
159
|
+
FileRef.new(name: name, type: type, original_path: path,
|
|
160
|
+
parse_error: result[:error], parser_path: result[:parser_path])
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Save + parse in one call (convenience method).
|
|
166
|
+
#
|
|
167
|
+
# @return [FileRef]
|
|
168
|
+
def self.process(body:, filename:)
|
|
169
|
+
saved = save(body: body, filename: filename)
|
|
170
|
+
process_path(saved[:path], name: saved[:name])
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Save raw image bytes to disk and return a FileRef.
|
|
174
|
+
# Used by agent when an image exceeds MAX_IMAGE_BYTES and must be downgraded to disk.
|
|
175
|
+
def self.save_image_to_disk(body:, mime_type:, filename: "image.jpg")
|
|
176
|
+
FileUtils.mkdir_p(UPLOAD_DIR)
|
|
177
|
+
safe_name = sanitize_filename(filename)
|
|
178
|
+
dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}")
|
|
179
|
+
File.binwrite(dest, body)
|
|
180
|
+
FileRef.new(name: safe_name, type: :image, original_path: dest)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# File type helpers (used by tools and agent)
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
def self.binary_file_path?(path)
|
|
188
|
+
ext = File.extname(path).downcase
|
|
189
|
+
return true if BINARY_EXTENSIONS.include?(ext)
|
|
190
|
+
File.binread(path, 512).to_s.include?("\x00")
|
|
191
|
+
rescue
|
|
192
|
+
false
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def self.glob_allowed_binary?(path)
|
|
196
|
+
GLOB_ALLOWED_BINARY_EXTENSIONS.include?(File.extname(path).downcase)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def self.supported_binary_file?(path)
|
|
200
|
+
LLM_BINARY_EXTENSIONS.include?(File.extname(path).downcase)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def self.detect_mime_type(path, _data = nil)
|
|
204
|
+
MIME_TYPES[File.extname(path).downcase] || "application/octet-stream"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Downscale a base64-encoded image so its width is at most max_width pixels.
|
|
208
|
+
#
|
|
209
|
+
# Strategy:
|
|
210
|
+
# PNG → chunky_png (pure Ruby, always available as gem dependency)
|
|
211
|
+
# other formats (JPG/WEBP/GIF) → sips on macOS, `convert` (ImageMagick) on Linux
|
|
212
|
+
# fallback (no CLI tool) → return as-is, but raise if larger than IMAGE_MAX_BASE64_BYTES
|
|
213
|
+
#
|
|
214
|
+
# @param b64 [String] base64-encoded image data
|
|
215
|
+
# @param mime_type [String] e.g. "image/png", "image/jpeg", "image/webp"
|
|
216
|
+
# @param max_width [Integer] maximum output width in pixels (default: IMAGE_MAX_WIDTH)
|
|
217
|
+
# @return [String] base64-encoded (possibly downscaled) image data
|
|
218
|
+
def self.downscale_image_base64(b64, mime_type, max_width: IMAGE_MAX_WIDTH)
|
|
219
|
+
require "base64"
|
|
220
|
+
|
|
221
|
+
result = if mime_type == "image/png"
|
|
222
|
+
downscale_png_chunky(b64, max_width)
|
|
223
|
+
else
|
|
224
|
+
downscale_via_cli(b64, mime_type, max_width)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
return result if result
|
|
228
|
+
|
|
229
|
+
# No resize tool available — enforce API hard size limit (5MB)
|
|
230
|
+
if b64.bytesize > IMAGE_MAX_BASE64_BYTES
|
|
231
|
+
size_kb = b64.bytesize / 1024
|
|
232
|
+
limit_mb = IMAGE_MAX_BASE64_BYTES / 1_000_000
|
|
233
|
+
raise ArgumentError,
|
|
234
|
+
"Image too large to send (#{size_kb}KB > #{limit_mb}MB). " \
|
|
235
|
+
"Install ImageMagick (`brew install imagemagick`) to enable automatic resizing."
|
|
236
|
+
end
|
|
237
|
+
b64
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def self.file_to_base64(path)
|
|
241
|
+
require "base64"
|
|
242
|
+
ext = File.extname(path).downcase
|
|
243
|
+
size = File.size(path)
|
|
244
|
+
raise ArgumentError, "File too large: #{path}" if size > MAX_FILE_BYTES
|
|
245
|
+
ext_mime = MIME_TYPES[ext] || "application/octet-stream"
|
|
246
|
+
raw_data = File.binread(path)
|
|
247
|
+
# Detect actual image format from magic bytes (ignore misleading extensions)
|
|
248
|
+
mime = ext_mime.start_with?("image/") ? detect_image_mime_type(raw_data, ext_mime) : ext_mime
|
|
249
|
+
data = Base64.strict_encode64(raw_data)
|
|
250
|
+
# Downscale images before sending to LLM to reduce token cost
|
|
251
|
+
data = downscale_image_base64(data, mime) if mime.start_with?("image/")
|
|
252
|
+
{ format: ext[1..], mime_type: mime, size_bytes: size, base64_data: data }
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def self.image_path_to_data_url(path)
|
|
256
|
+
raise ArgumentError, "Image file not found: #{path}" unless File.exist?(path)
|
|
257
|
+
size = File.size(path)
|
|
258
|
+
if size > MAX_IMAGE_BYTES
|
|
259
|
+
raise ArgumentError, "Image too large (#{size / 1024}KB > #{MAX_IMAGE_BYTES / 1024}KB): #{path}"
|
|
260
|
+
end
|
|
261
|
+
require "base64"
|
|
262
|
+
# Extension-based guess as fallback only
|
|
263
|
+
ext = File.extname(path).downcase.delete(".")
|
|
264
|
+
ext_mime = case ext
|
|
265
|
+
when "jpg", "jpeg" then "image/jpeg"
|
|
266
|
+
when "png" then "image/png"
|
|
267
|
+
when "gif" then "image/gif"
|
|
268
|
+
when "webp" then "image/webp"
|
|
269
|
+
else "image/#{ext}"
|
|
270
|
+
end
|
|
271
|
+
raw_data = File.binread(path)
|
|
272
|
+
# Detect actual image format from magic bytes (ignore misleading extensions)
|
|
273
|
+
mime = detect_image_mime_type(raw_data, ext_mime)
|
|
274
|
+
b64 = Base64.strict_encode64(raw_data)
|
|
275
|
+
# Downscale images before sending to LLM to reduce token cost
|
|
276
|
+
b64 = downscale_image_base64(b64, mime)
|
|
277
|
+
"data:#{mime};base64,#{b64}"
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
# Private helpers
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
def self.parse_zip_listing(body)
|
|
285
|
+
lines = ["# ZIP Contents\n"]
|
|
286
|
+
Zip::InputStream.open(StringIO.new(body)) do |zis|
|
|
287
|
+
while (entry = zis.get_next_entry)
|
|
288
|
+
size = entry.size ? " (#{entry.size} bytes)" : ""
|
|
289
|
+
lines << "- #{entry.name}#{size}"
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
lines.join("\n")
|
|
293
|
+
rescue => e
|
|
294
|
+
"# ZIP Contents\n(could not list entries: #{e.message})"
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# List entries in a tarball or gzip file.
|
|
298
|
+
#
|
|
299
|
+
# Handles:
|
|
300
|
+
# .tar → raw tar reader
|
|
301
|
+
# .tar.gz/.tgz → gunzip stream + tar reader
|
|
302
|
+
# .gz → single gzipped file → show original filename + uncompressed size
|
|
303
|
+
def self.parse_tar_listing(path, ext)
|
|
304
|
+
require "rubygems/package"
|
|
305
|
+
require "zlib"
|
|
306
|
+
|
|
307
|
+
case ext
|
|
308
|
+
when ".tar"
|
|
309
|
+
lines = ["# TAR Contents\n"]
|
|
310
|
+
File.open(path, "rb") do |file|
|
|
311
|
+
Gem::Package::TarReader.new(file) do |tar|
|
|
312
|
+
tar.each do |entry|
|
|
313
|
+
kind = entry.directory? ? "[dir] " : ""
|
|
314
|
+
size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
|
|
315
|
+
lines << "- #{kind}#{entry.full_name}#{size}"
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
lines.join("\n")
|
|
320
|
+
|
|
321
|
+
when ".tar.gz", ".tgz"
|
|
322
|
+
lines = ["# TAR.GZ Contents\n"]
|
|
323
|
+
File.open(path, "rb") do |file|
|
|
324
|
+
Zlib::GzipReader.wrap(file) do |gz|
|
|
325
|
+
Gem::Package::TarReader.new(gz) do |tar|
|
|
326
|
+
tar.each do |entry|
|
|
327
|
+
kind = entry.directory? ? "[dir] " : ""
|
|
328
|
+
size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
|
|
329
|
+
lines << "- #{kind}#{entry.full_name}#{size}"
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
lines.join("\n")
|
|
335
|
+
|
|
336
|
+
when ".gz"
|
|
337
|
+
# Could be gzipped-tar with a misleading extension, or a single-file gzip.
|
|
338
|
+
# Try tar first; on failure, fall back to single-file metadata.
|
|
339
|
+
begin
|
|
340
|
+
lines = ["# TAR.GZ Contents\n"]
|
|
341
|
+
found_tar = false
|
|
342
|
+
File.open(path, "rb") do |file|
|
|
343
|
+
Zlib::GzipReader.wrap(file) do |gz|
|
|
344
|
+
Gem::Package::TarReader.new(gz) do |tar|
|
|
345
|
+
tar.each do |entry|
|
|
346
|
+
found_tar = true
|
|
347
|
+
kind = entry.directory? ? "[dir] " : ""
|
|
348
|
+
size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
|
|
349
|
+
lines << "- #{kind}#{entry.full_name}#{size}"
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
return lines.join("\n") if found_tar
|
|
355
|
+
rescue StandardError
|
|
356
|
+
# fall through to single-file gzip handling
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# Single-file gzip: report the original filename (if recorded) and compressed/uncompressed sizes.
|
|
360
|
+
original_name = nil
|
|
361
|
+
uncompressed = nil
|
|
362
|
+
File.open(path, "rb") do |file|
|
|
363
|
+
Zlib::GzipReader.wrap(file) do |gz|
|
|
364
|
+
original_name = gz.orig_name
|
|
365
|
+
# Read fully to get the uncompressed size. Guarded: stop after 64MB
|
|
366
|
+
# to avoid blowing memory on pathological inputs — the preview only
|
|
367
|
+
# needs a size estimate, not the content.
|
|
368
|
+
limit = 64 * 1024 * 1024
|
|
369
|
+
total = 0
|
|
370
|
+
while (chunk = gz.read(1024 * 1024))
|
|
371
|
+
total += chunk.bytesize
|
|
372
|
+
break if total > limit
|
|
373
|
+
end
|
|
374
|
+
uncompressed = total
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
lines = ["# GZIP Contents\n"]
|
|
378
|
+
lines << "- Original filename: #{original_name || "(not recorded)"}"
|
|
379
|
+
lines << "- Compressed size: #{File.size(path)} bytes"
|
|
380
|
+
lines << "- Uncompressed size: #{uncompressed} bytes#{uncompressed && uncompressed > 64 * 1024 * 1024 ? " (truncated)" : ""}"
|
|
381
|
+
lines.join("\n")
|
|
382
|
+
end
|
|
383
|
+
rescue => e
|
|
384
|
+
"# Archive Contents\n(could not list entries: #{e.message})"
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def self.save_preview(content, original_path)
|
|
388
|
+
# Always write previews to a tmpdir-based path to avoid polluting the
|
|
389
|
+
# user's working directory with .preview.md sidecar files.
|
|
390
|
+
# Use the same UPLOAD_DIR that uploaded files live in; for on-disk files
|
|
391
|
+
# outside that dir (e.g. project files opened by file_reader), we still
|
|
392
|
+
# land in UPLOAD_DIR so the user's tree stays clean.
|
|
393
|
+
FileUtils.mkdir_p(UPLOAD_DIR)
|
|
394
|
+
safe_name = File.basename(original_path.to_s).gsub(/[\/\:\*?"<>|\x00]/, "_")
|
|
395
|
+
dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}.preview.md")
|
|
396
|
+
File.write(dest, content)
|
|
397
|
+
dest
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def self.sanitize_filename(name)
|
|
401
|
+
# Keep Unicode letters/digits (including CJK), ASCII word chars, dots, hyphens, spaces.
|
|
402
|
+
# Only strip characters that are unsafe on common filesystems: / \ : * ? " < > | \0
|
|
403
|
+
# to_utf8 first: HTTP multipart headers arrive as ASCII-8BIT on Ruby 2.6,
|
|
404
|
+
# and regex matching against ASCII-8BIT raises "invalid byte sequence in UTF-8".
|
|
405
|
+
base = File.basename(Octo::Utils::Encoding.to_utf8(name.to_s))
|
|
406
|
+
.gsub(/[\/\\:\*?"<>|\x00]/, '_')
|
|
407
|
+
.strip
|
|
408
|
+
base.empty? ? 'upload' : base
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Detect the actual image MIME type from raw binary data by inspecting
|
|
412
|
+
# magic bytes, ignoring the file extension. Falls back to extension-based
|
|
413
|
+
# detection when magic bytes don't match any known format.
|
|
414
|
+
#
|
|
415
|
+
# Handles: PNG, JPEG, GIF, WEBP, BMP, TIFF
|
|
416
|
+
#
|
|
417
|
+
# @param data [String] raw binary data (first 12 bytes is sufficient)
|
|
418
|
+
# @param fallback_mime [String] MIME type from extension, used as fallback
|
|
419
|
+
# @return [String] detected MIME type (e.g. "image/png", "image/jpeg")
|
|
420
|
+
def self.detect_image_mime_type(data, fallback_mime = "image/png")
|
|
421
|
+
return fallback_mime if data.nil? || data.bytesize < 4
|
|
422
|
+
|
|
423
|
+
bytes = data.bytes
|
|
424
|
+
|
|
425
|
+
case
|
|
426
|
+
# PNG: \x89 P N G \r \n \x1a \n
|
|
427
|
+
when bytes[0] == 0x89 && bytes[1] == 0x50 && bytes[2] == 0x4E && bytes[3] == 0x47
|
|
428
|
+
"image/png"
|
|
429
|
+
# JPEG: \xFF \xD8 \xFF
|
|
430
|
+
when bytes[0] == 0xFF && bytes[1] == 0xD8 && bytes[2] == 0xFF
|
|
431
|
+
"image/jpeg"
|
|
432
|
+
# GIF: GIF87a or GIF89a
|
|
433
|
+
when bytes[0] == 0x47 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x38
|
|
434
|
+
"image/gif"
|
|
435
|
+
# WEBP: RIFF .... WEBP
|
|
436
|
+
when bytes[0] == 0x52 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x46 &&
|
|
437
|
+
data.bytesize >= 12 && data[8, 4] == "WEBP"
|
|
438
|
+
"image/webp"
|
|
439
|
+
# BMP: BM
|
|
440
|
+
when bytes[0] == 0x42 && bytes[1] == 0x4D
|
|
441
|
+
"image/bmp"
|
|
442
|
+
# TIFF: II*\x00 (little-endian) or MM\x00* (big-endian)
|
|
443
|
+
when (bytes[0] == 0x49 && bytes[1] == 0x49 && bytes[2] == 0x2A && bytes[3] == 0x00) ||
|
|
444
|
+
(bytes[0] == 0x4D && bytes[1] == 0x4D && bytes[2] == 0x00 && bytes[3] == 0x2A)
|
|
445
|
+
"image/tiff"
|
|
446
|
+
else
|
|
447
|
+
fallback_mime
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# ---------------------------------------------------------------------------
|
|
452
|
+
# Image downscale helpers (private)
|
|
453
|
+
# ---------------------------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
# Downscale a PNG using chunky_png (pure Ruby — always available).
|
|
456
|
+
# Returns downscaled base64, or original base64 if already within max_width.
|
|
457
|
+
def self.downscale_png_chunky(b64, max_width)
|
|
458
|
+
require "chunky_png"
|
|
459
|
+
require "base64"
|
|
460
|
+
image = ChunkyPNG::Image.from_blob(Base64.strict_decode64(b64))
|
|
461
|
+
return b64 if image.width <= max_width
|
|
462
|
+
|
|
463
|
+
src_w, src_h = image.width, image.height
|
|
464
|
+
dst_h = (src_h * max_width.to_f / src_w).round
|
|
465
|
+
image.resample_nearest_neighbor!(max_width, dst_h)
|
|
466
|
+
before_kb = b64.bytesize / 1024
|
|
467
|
+
result = Base64.strict_encode64(image.to_blob)
|
|
468
|
+
after_kb = result.bytesize / 1024
|
|
469
|
+
Octo::Logger.debug("image_downscaled",
|
|
470
|
+
format: "png",
|
|
471
|
+
from: "#{src_w}x#{src_h} (#{before_kb}KB)",
|
|
472
|
+
to: "#{max_width}x#{dst_h} (#{after_kb}KB)")
|
|
473
|
+
result
|
|
474
|
+
rescue => e
|
|
475
|
+
Octo::Logger.debug("image_downscale_skipped", format: "png", reason: e.message)
|
|
476
|
+
nil
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Downscale a non-PNG image using CLI tools:
|
|
480
|
+
# macOS → sips (built-in, no extra deps)
|
|
481
|
+
# Linux → convert (ImageMagick, must be installed)
|
|
482
|
+
# Returns downscaled base64, or nil if no tool is available.
|
|
483
|
+
def self.downscale_via_cli(b64, mime_type, max_width)
|
|
484
|
+
require "base64"
|
|
485
|
+
require "tmpdir"
|
|
486
|
+
|
|
487
|
+
ext = mime_type.split("/").last
|
|
488
|
+
ext = "jpg" if ext == "jpeg"
|
|
489
|
+
|
|
490
|
+
# Write input to a temp file
|
|
491
|
+
Dir.mktmpdir("octo-img") do |dir|
|
|
492
|
+
input = File.join(dir, "input.#{ext}")
|
|
493
|
+
output = File.join(dir, "output.#{ext}")
|
|
494
|
+
File.binwrite(input, Base64.strict_decode64(b64))
|
|
495
|
+
|
|
496
|
+
before_kb = b64.bytesize / 1024
|
|
497
|
+
success = false
|
|
498
|
+
|
|
499
|
+
if RUBY_PLATFORM.include?("darwin")
|
|
500
|
+
# macOS: sips is always available
|
|
501
|
+
success = system("sips", "-Z", max_width.to_s, input, "--out", output,
|
|
502
|
+
out: File::NULL, err: File::NULL)
|
|
503
|
+
else
|
|
504
|
+
# Linux/other: try ImageMagick convert
|
|
505
|
+
if system("which convert > /dev/null 2>&1")
|
|
506
|
+
success = system("convert", input, "-resize", "#{max_width}x>",
|
|
507
|
+
output, out: File::NULL, err: File::NULL)
|
|
508
|
+
end
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
return nil unless success && File.exist?(output) && File.size(output) > 0
|
|
512
|
+
|
|
513
|
+
result = Base64.strict_encode64(File.binread(output))
|
|
514
|
+
after_kb = result.bytesize / 1024
|
|
515
|
+
Octo::Logger.debug("image_downscaled",
|
|
516
|
+
format: ext,
|
|
517
|
+
from: "#{before_kb}KB",
|
|
518
|
+
to: "#{after_kb}KB (max #{max_width}px wide)")
|
|
519
|
+
result
|
|
520
|
+
end
|
|
521
|
+
rescue => e
|
|
522
|
+
Octo::Logger.debug("image_downscale_skipped", mime: mime_type, reason: e.message)
|
|
523
|
+
nil
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Image extensions that can be inlined as data URLs in markdown content.
|
|
527
|
+
LOCAL_IMAGE_EXTENSIONS = %w[.png .jpg .jpeg .gif .webp].freeze
|
|
528
|
+
|
|
529
|
+
# Replace local image paths in markdown content with base64 data URLs.
|
|
530
|
+
#
|
|
531
|
+
# Handles both `file:///path/to/img.png` and bare `/path/to/img.png` in
|
|
532
|
+
# markdown image syntax ``.
|
|
533
|
+
#
|
|
534
|
+
# @param content [String] markdown text potentially containing local image references
|
|
535
|
+
# @return [String] content with local images replaced by data URLs
|
|
536
|
+
def self.inline_local_images(content)
|
|
537
|
+
return content if content.nil? || content.empty?
|
|
538
|
+
|
|
539
|
+
content.gsub(%r{(!\[[^\]]*\])\((file://)?(/[^)]+)\)}) do
|
|
540
|
+
prefix = $1
|
|
541
|
+
_scheme = $2
|
|
542
|
+
raw_path = $3
|
|
543
|
+
path = CGI.unescape(raw_path)
|
|
544
|
+
ext = File.extname(path).downcase
|
|
545
|
+
full_match = $&
|
|
546
|
+
|
|
547
|
+
unless LOCAL_IMAGE_EXTENSIONS.include?(ext) && File.exist?(path)
|
|
548
|
+
next full_match
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
begin
|
|
552
|
+
data_url = image_path_to_data_url(path)
|
|
553
|
+
Octo::Logger.info("file_processor.inline_local_images", path: path, size: File.size(path))
|
|
554
|
+
"#{prefix}(#{data_url})"
|
|
555
|
+
rescue StandardError => e
|
|
556
|
+
Octo::Logger.warn("file_processor.inline_local_images.failed", path: path, error: e.message)
|
|
557
|
+
full_match
|
|
558
|
+
end
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
private_class_method :parse_zip_listing, :parse_tar_listing, :save_preview, :sanitize_filename,
|
|
563
|
+
:downscale_png_chunky, :downscale_via_cli
|
|
564
|
+
|
|
565
|
+
# -------------------------------------------------------------------------
|
|
566
|
+
# Local image URL rewriting
|
|
567
|
+
# -------------------------------------------------------------------------
|
|
568
|
+
|
|
569
|
+
# Rewrite local image paths in markdown content to use the /api/local-image proxy.
|
|
570
|
+
#
|
|
571
|
+
# Matches two patterns inside ``:
|
|
572
|
+
# 1. file:// URLs → 
|
|
573
|
+
# 2. bare absolute paths → 
|
|
574
|
+
#
|
|
575
|
+
# https:// URLs and non-image files are left untouched.
|
|
576
|
+
#
|
|
577
|
+
# @param content [String, nil] markdown text
|
|
578
|
+
# @return [String, nil] rewritten content (or original if nothing matched)
|
|
579
|
+
def self.rewrite_local_image_urls(content)
|
|
580
|
+
return content if content.nil? || content.empty?
|
|
581
|
+
|
|
582
|
+
content.gsub(/!\[([^\]]*)\]\(((?:file:\/\/)?\/[^)]+)\)/) do |match|
|
|
583
|
+
alt = Regexp.last_match(1)
|
|
584
|
+
href = Regexp.last_match(2)
|
|
585
|
+
|
|
586
|
+
# Extract the filesystem path from the href
|
|
587
|
+
path = href.sub(%r{\Afile://}, "")
|
|
588
|
+
path = CGI.unescape(path)
|
|
589
|
+
|
|
590
|
+
ext = File.extname(path).downcase
|
|
591
|
+
if LOCAL_IMAGE_EXTENSIONS.include?(ext) && File.exist?(path)
|
|
592
|
+
encoded = CGI.escape(href)
|
|
593
|
+
""
|
|
594
|
+
else
|
|
595
|
+
match # return original match unchanged
|
|
596
|
+
end
|
|
597
|
+
end
|
|
598
|
+
end
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
end
|