octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,601 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tmpdir"
4
+ require "fileutils"
5
+ require "securerandom"
6
+ require "stringio"
7
+
8
+ require_relative "parser_manager"
9
+ require "zip"
10
+
11
+ module Octo
12
+ module Utils
13
+ # File processing pipeline.
14
+ #
15
+ # Two entry points:
16
+ # FileProcessor.save(body:, filename:)
17
+ # → Store raw bytes to disk only. Returns { name:, path: }.
18
+ # Used by http_server and channel adapters — no parsing here.
19
+ #
20
+ # FileProcessor.process_path(path, name: nil)
21
+ # → Parse an already-saved file. Returns FileRef (with preview_path or parse_error).
22
+ # Used by agent.run when building the file prompt.
23
+ #
24
+ # (FileProcessor.process = save + process_path in one call, for convenience.)
25
+ module FileProcessor
26
+ UPLOAD_DIR = File.join(Dir.tmpdir, "octo-uploads").freeze
27
+ MAX_FILE_BYTES = 32 * 1024 * 1024 # 32 MB
28
+ MAX_IMAGE_BYTES = 5 * 1024 * 1024 # 5 MB
29
+
30
+ # Alias used by FileReader tool
31
+ MAX_FILE_SIZE = MAX_FILE_BYTES
32
+
33
+ # Images wider than this will be downscaled before sending to LLM (pixels)
34
+ IMAGE_MAX_WIDTH = 800
35
+ # Hard limit for images that can't be resized: Anthropic/Bedrock vision API supports up to 5MB
36
+ IMAGE_MAX_BASE64_BYTES = 5_000_000
37
+
38
+ BINARY_EXTENSIONS = %w[
39
+ .png .jpg .jpeg .gif .webp .bmp .tiff .ico .svg
40
+ .pdf
41
+ .zip .gz .tgz .tar .rar .7z
42
+ .exe .dll .so .dylib
43
+ .mp3 .mp4 .avi .mov .mkv .wav .flac
44
+ .ttf .otf .woff .woff2
45
+ .db .sqlite .bin .dat
46
+ ].freeze
47
+
48
+ GLOB_ALLOWED_BINARY_EXTENSIONS = %w[
49
+ .pdf .doc .docx .ppt .pptx .xls .xlsx .odt .odp .ods
50
+ ].freeze
51
+
52
+ LLM_BINARY_EXTENSIONS = %w[.png .jpg .jpeg .gif .webp .pdf].freeze
53
+
54
+ MIME_TYPES = {
55
+ ".png" => "image/png",
56
+ ".jpg" => "image/jpeg",
57
+ ".jpeg" => "image/jpeg",
58
+ ".gif" => "image/gif",
59
+ ".webp" => "image/webp",
60
+ ".pdf" => "application/pdf"
61
+ }.freeze
62
+
63
+ FILE_TYPES = {
64
+ ".docx" => :document, ".doc" => :document,
65
+ ".xlsx" => :spreadsheet, ".xls" => :spreadsheet,
66
+ ".pptx" => :presentation, ".ppt" => :presentation,
67
+ ".pdf" => :pdf,
68
+ ".zip" => :zip, ".gz" => :zip, ".tgz" => :zip, ".tar" => :zip, ".rar" => :zip, ".7z" => :zip,
69
+ ".png" => :image, ".jpg" => :image, ".jpeg" => :image,
70
+ ".gif" => :image, ".webp" => :image,
71
+ ".csv" => :csv,
72
+ ".md" => :text, ".markdown" => :text, ".txt" => :text, ".log" => :text
73
+ }.freeze
74
+
75
+ # Plain-text extensions whose raw content can be embedded directly as the
76
+ # preview (no external parser needed). Kept conservative to avoid pulling
77
+ # in huge source files by mistake.
78
+ TEXT_PREVIEW_EXTENSIONS = %w[.md .markdown .txt .log].freeze
79
+
80
+ # FileRef: result of process / process_path.
81
+ FileRef = Struct.new(:name, :type, :original_path, :preview_path, :parse_error, :parser_path, keyword_init: true) do
82
+ def parse_failed?
83
+ preview_path.nil? && !parse_error.nil?
84
+ end
85
+ end
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Public API
89
+ # ---------------------------------------------------------------------------
90
+
91
+ # Store raw bytes to disk — no parsing.
92
+ # Used by http_server upload endpoint and channel adapters.
93
+ #
94
+ # @return [Hash] { name: String, path: String }
95
+ def self.save(body:, filename:)
96
+ FileUtils.mkdir_p(UPLOAD_DIR)
97
+ safe_name = sanitize_filename(filename)
98
+ dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}")
99
+ File.binwrite(dest, body)
100
+ { name: safe_name, path: dest }
101
+ end
102
+
103
+ # Parse an already-saved file and return a FileRef.
104
+ # Called by agent.run for each disk file before building the prompt.
105
+ #
106
+ # @param path [String] Path to the file on disk
107
+ # @param name [String] Display name (defaults to basename)
108
+ # @return [FileRef]
109
+ def self.process_path(path, name: nil)
110
+ name ||= File.basename(path.to_s)
111
+ # Use compound extension for .tar.gz so it's treated as a tarball, not gzip.
112
+ basename_lower = name.to_s.downcase
113
+ ext =
114
+ if basename_lower.end_with?(".tar.gz")
115
+ ".tar.gz"
116
+ else
117
+ File.extname(path.to_s).downcase
118
+ end
119
+ type = FILE_TYPES[ext] || :file
120
+
121
+ case ext
122
+ when ".zip"
123
+ body = File.binread(path)
124
+ preview_content = parse_zip_listing(body)
125
+ preview_path = save_preview(preview_content, path)
126
+ FileRef.new(name: name, type: :zip, original_path: path, preview_path: preview_path)
127
+
128
+ when ".tar", ".tar.gz", ".tgz", ".gz"
129
+ # Archive listing for tarballs and gzip'd files. Provides the LLM a
130
+ # file-tree preview so it can decide whether to ask the user to
131
+ # extract them (via the shell tool).
132
+ begin
133
+ preview_content = parse_tar_listing(path, ext)
134
+ preview_path = save_preview(preview_content, path)
135
+ FileRef.new(name: name, type: :zip, original_path: path, preview_path: preview_path)
136
+ rescue => e
137
+ FileRef.new(name: name, type: :zip, original_path: path, parse_error: e.message)
138
+ end
139
+
140
+ when ".png", ".jpg", ".jpeg", ".gif", ".webp"
141
+ FileRef.new(name: name, type: :image, original_path: path)
142
+
143
+ when ".csv"
144
+ # CSV is plain text — the file itself IS the preview. No parser, no copy.
145
+ # FileReader handles encoding fallback via safe_utf8 when it reads the file.
146
+ FileRef.new(name: name, type: :csv, original_path: path, preview_path: path)
147
+
148
+ when *TEXT_PREVIEW_EXTENSIONS
149
+ # Markdown / plain text / log: the file itself IS the preview.
150
+ # No parser needed, no tmpdir copy — just point preview_path at the original.
151
+ FileRef.new(name: name, type: :text, original_path: path, preview_path: path)
152
+
153
+ else
154
+ result = Utils::ParserManager.parse(path)
155
+ if result[:success]
156
+ preview_path = save_preview(result[:text], path)
157
+ FileRef.new(name: name, type: type, original_path: path, preview_path: preview_path)
158
+ else
159
+ FileRef.new(name: name, type: type, original_path: path,
160
+ parse_error: result[:error], parser_path: result[:parser_path])
161
+ end
162
+ end
163
+ end
164
+
165
+ # Save + parse in one call (convenience method).
166
+ #
167
+ # @return [FileRef]
168
+ def self.process(body:, filename:)
169
+ saved = save(body: body, filename: filename)
170
+ process_path(saved[:path], name: saved[:name])
171
+ end
172
+
173
+ # Save raw image bytes to disk and return a FileRef.
174
+ # Used by agent when an image exceeds MAX_IMAGE_BYTES and must be downgraded to disk.
175
+ def self.save_image_to_disk(body:, mime_type:, filename: "image.jpg")
176
+ FileUtils.mkdir_p(UPLOAD_DIR)
177
+ safe_name = sanitize_filename(filename)
178
+ dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}")
179
+ File.binwrite(dest, body)
180
+ FileRef.new(name: safe_name, type: :image, original_path: dest)
181
+ end
182
+
183
+ # ---------------------------------------------------------------------------
184
+ # File type helpers (used by tools and agent)
185
+ # ---------------------------------------------------------------------------
186
+
187
+ def self.binary_file_path?(path)
188
+ ext = File.extname(path).downcase
189
+ return true if BINARY_EXTENSIONS.include?(ext)
190
+ File.binread(path, 512).to_s.include?("\x00")
191
+ rescue
192
+ false
193
+ end
194
+
195
+ def self.glob_allowed_binary?(path)
196
+ GLOB_ALLOWED_BINARY_EXTENSIONS.include?(File.extname(path).downcase)
197
+ end
198
+
199
+ def self.supported_binary_file?(path)
200
+ LLM_BINARY_EXTENSIONS.include?(File.extname(path).downcase)
201
+ end
202
+
203
+ def self.detect_mime_type(path, _data = nil)
204
+ MIME_TYPES[File.extname(path).downcase] || "application/octet-stream"
205
+ end
206
+
207
+ # Downscale a base64-encoded image so its width is at most max_width pixels.
208
+ #
209
+ # Strategy:
210
+ # PNG → chunky_png (pure Ruby, always available as gem dependency)
211
+ # other formats (JPG/WEBP/GIF) → sips on macOS, `convert` (ImageMagick) on Linux
212
+ # fallback (no CLI tool) → return as-is, but raise if larger than IMAGE_MAX_BASE64_BYTES
213
+ #
214
+ # @param b64 [String] base64-encoded image data
215
+ # @param mime_type [String] e.g. "image/png", "image/jpeg", "image/webp"
216
+ # @param max_width [Integer] maximum output width in pixels (default: IMAGE_MAX_WIDTH)
217
+ # @return [String] base64-encoded (possibly downscaled) image data
218
+ def self.downscale_image_base64(b64, mime_type, max_width: IMAGE_MAX_WIDTH)
219
+ require "base64"
220
+
221
+ result = if mime_type == "image/png"
222
+ downscale_png_chunky(b64, max_width)
223
+ else
224
+ downscale_via_cli(b64, mime_type, max_width)
225
+ end
226
+
227
+ return result if result
228
+
229
+ # No resize tool available — enforce API hard size limit (5MB)
230
+ if b64.bytesize > IMAGE_MAX_BASE64_BYTES
231
+ size_kb = b64.bytesize / 1024
232
+ limit_mb = IMAGE_MAX_BASE64_BYTES / 1_000_000
233
+ raise ArgumentError,
234
+ "Image too large to send (#{size_kb}KB > #{limit_mb}MB). " \
235
+ "Install ImageMagick (`brew install imagemagick`) to enable automatic resizing."
236
+ end
237
+ b64
238
+ end
239
+
240
+ def self.file_to_base64(path)
241
+ require "base64"
242
+ ext = File.extname(path).downcase
243
+ size = File.size(path)
244
+ raise ArgumentError, "File too large: #{path}" if size > MAX_FILE_BYTES
245
+ ext_mime = MIME_TYPES[ext] || "application/octet-stream"
246
+ raw_data = File.binread(path)
247
+ # Detect actual image format from magic bytes (ignore misleading extensions)
248
+ mime = ext_mime.start_with?("image/") ? detect_image_mime_type(raw_data, ext_mime) : ext_mime
249
+ data = Base64.strict_encode64(raw_data)
250
+ # Downscale images before sending to LLM to reduce token cost
251
+ data = downscale_image_base64(data, mime) if mime.start_with?("image/")
252
+ { format: ext[1..], mime_type: mime, size_bytes: size, base64_data: data }
253
+ end
254
+
255
+ def self.image_path_to_data_url(path)
256
+ raise ArgumentError, "Image file not found: #{path}" unless File.exist?(path)
257
+ size = File.size(path)
258
+ if size > MAX_IMAGE_BYTES
259
+ raise ArgumentError, "Image too large (#{size / 1024}KB > #{MAX_IMAGE_BYTES / 1024}KB): #{path}"
260
+ end
261
+ require "base64"
262
+ # Extension-based guess as fallback only
263
+ ext = File.extname(path).downcase.delete(".")
264
+ ext_mime = case ext
265
+ when "jpg", "jpeg" then "image/jpeg"
266
+ when "png" then "image/png"
267
+ when "gif" then "image/gif"
268
+ when "webp" then "image/webp"
269
+ else "image/#{ext}"
270
+ end
271
+ raw_data = File.binread(path)
272
+ # Detect actual image format from magic bytes (ignore misleading extensions)
273
+ mime = detect_image_mime_type(raw_data, ext_mime)
274
+ b64 = Base64.strict_encode64(raw_data)
275
+ # Downscale images before sending to LLM to reduce token cost
276
+ b64 = downscale_image_base64(b64, mime)
277
+ "data:#{mime};base64,#{b64}"
278
+ end
279
+
280
+ # ---------------------------------------------------------------------------
281
+ # Private helpers
282
+ # ---------------------------------------------------------------------------
283
+
284
+ def self.parse_zip_listing(body)
285
+ lines = ["# ZIP Contents\n"]
286
+ Zip::InputStream.open(StringIO.new(body)) do |zis|
287
+ while (entry = zis.get_next_entry)
288
+ size = entry.size ? " (#{entry.size} bytes)" : ""
289
+ lines << "- #{entry.name}#{size}"
290
+ end
291
+ end
292
+ lines.join("\n")
293
+ rescue => e
294
+ "# ZIP Contents\n(could not list entries: #{e.message})"
295
+ end
296
+
297
+ # List entries in a tarball or gzip file.
298
+ #
299
+ # Handles:
300
+ # .tar → raw tar reader
301
+ # .tar.gz/.tgz → gunzip stream + tar reader
302
+ # .gz → single gzipped file → show original filename + uncompressed size
303
+ def self.parse_tar_listing(path, ext)
304
+ require "rubygems/package"
305
+ require "zlib"
306
+
307
+ case ext
308
+ when ".tar"
309
+ lines = ["# TAR Contents\n"]
310
+ File.open(path, "rb") do |file|
311
+ Gem::Package::TarReader.new(file) do |tar|
312
+ tar.each do |entry|
313
+ kind = entry.directory? ? "[dir] " : ""
314
+ size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
315
+ lines << "- #{kind}#{entry.full_name}#{size}"
316
+ end
317
+ end
318
+ end
319
+ lines.join("\n")
320
+
321
+ when ".tar.gz", ".tgz"
322
+ lines = ["# TAR.GZ Contents\n"]
323
+ File.open(path, "rb") do |file|
324
+ Zlib::GzipReader.wrap(file) do |gz|
325
+ Gem::Package::TarReader.new(gz) do |tar|
326
+ tar.each do |entry|
327
+ kind = entry.directory? ? "[dir] " : ""
328
+ size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
329
+ lines << "- #{kind}#{entry.full_name}#{size}"
330
+ end
331
+ end
332
+ end
333
+ end
334
+ lines.join("\n")
335
+
336
+ when ".gz"
337
+ # Could be gzipped-tar with a misleading extension, or a single-file gzip.
338
+ # Try tar first; on failure, fall back to single-file metadata.
339
+ begin
340
+ lines = ["# TAR.GZ Contents\n"]
341
+ found_tar = false
342
+ File.open(path, "rb") do |file|
343
+ Zlib::GzipReader.wrap(file) do |gz|
344
+ Gem::Package::TarReader.new(gz) do |tar|
345
+ tar.each do |entry|
346
+ found_tar = true
347
+ kind = entry.directory? ? "[dir] " : ""
348
+ size = entry.header.size ? " (#{entry.header.size} bytes)" : ""
349
+ lines << "- #{kind}#{entry.full_name}#{size}"
350
+ end
351
+ end
352
+ end
353
+ end
354
+ return lines.join("\n") if found_tar
355
+ rescue StandardError
356
+ # fall through to single-file gzip handling
357
+ end
358
+
359
+ # Single-file gzip: report the original filename (if recorded) and compressed/uncompressed sizes.
360
+ original_name = nil
361
+ uncompressed = nil
362
+ File.open(path, "rb") do |file|
363
+ Zlib::GzipReader.wrap(file) do |gz|
364
+ original_name = gz.orig_name
365
+ # Read fully to get the uncompressed size. Guarded: stop after 64MB
366
+ # to avoid blowing memory on pathological inputs — the preview only
367
+ # needs a size estimate, not the content.
368
+ limit = 64 * 1024 * 1024
369
+ total = 0
370
+ while (chunk = gz.read(1024 * 1024))
371
+ total += chunk.bytesize
372
+ break if total > limit
373
+ end
374
+ uncompressed = total
375
+ end
376
+ end
377
+ lines = ["# GZIP Contents\n"]
378
+ lines << "- Original filename: #{original_name || "(not recorded)"}"
379
+ lines << "- Compressed size: #{File.size(path)} bytes"
380
+ lines << "- Uncompressed size: #{uncompressed} bytes#{uncompressed && uncompressed > 64 * 1024 * 1024 ? " (truncated)" : ""}"
381
+ lines.join("\n")
382
+ end
383
+ rescue => e
384
+ "# Archive Contents\n(could not list entries: #{e.message})"
385
+ end
386
+
387
+ def self.save_preview(content, original_path)
388
+ # Always write previews to a tmpdir-based path to avoid polluting the
389
+ # user's working directory with .preview.md sidecar files.
390
+ # Use the same UPLOAD_DIR that uploaded files live in; for on-disk files
391
+ # outside that dir (e.g. project files opened by file_reader), we still
392
+ # land in UPLOAD_DIR so the user's tree stays clean.
393
+ FileUtils.mkdir_p(UPLOAD_DIR)
394
+ safe_name = File.basename(original_path.to_s).gsub(/[\/\:\*?"<>|\x00]/, "_")
395
+ dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}.preview.md")
396
+ File.write(dest, content)
397
+ dest
398
+ end
399
+
400
+ def self.sanitize_filename(name)
401
+ # Keep Unicode letters/digits (including CJK), ASCII word chars, dots, hyphens, spaces.
402
+ # Only strip characters that are unsafe on common filesystems: / \ : * ? " < > | \0
403
+ # to_utf8 first: HTTP multipart headers arrive as ASCII-8BIT on Ruby 2.6,
404
+ # and regex matching against ASCII-8BIT raises "invalid byte sequence in UTF-8".
405
+ base = File.basename(Octo::Utils::Encoding.to_utf8(name.to_s))
406
+ .gsub(/[\/\\:\*?"<>|\x00]/, '_')
407
+ .strip
408
+ base.empty? ? 'upload' : base
409
+ end
410
+
411
+ # Detect the actual image MIME type from raw binary data by inspecting
412
+ # magic bytes, ignoring the file extension. Falls back to extension-based
413
+ # detection when magic bytes don't match any known format.
414
+ #
415
+ # Handles: PNG, JPEG, GIF, WEBP, BMP, TIFF
416
+ #
417
+ # @param data [String] raw binary data (first 12 bytes is sufficient)
418
+ # @param fallback_mime [String] MIME type from extension, used as fallback
419
+ # @return [String] detected MIME type (e.g. "image/png", "image/jpeg")
420
+ def self.detect_image_mime_type(data, fallback_mime = "image/png")
421
+ return fallback_mime if data.nil? || data.bytesize < 4
422
+
423
+ bytes = data.bytes
424
+
425
+ case
426
+ # PNG: \x89 P N G \r \n \x1a \n
427
+ when bytes[0] == 0x89 && bytes[1] == 0x50 && bytes[2] == 0x4E && bytes[3] == 0x47
428
+ "image/png"
429
+ # JPEG: \xFF \xD8 \xFF
430
+ when bytes[0] == 0xFF && bytes[1] == 0xD8 && bytes[2] == 0xFF
431
+ "image/jpeg"
432
+ # GIF: GIF87a or GIF89a
433
+ when bytes[0] == 0x47 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x38
434
+ "image/gif"
435
+ # WEBP: RIFF .... WEBP
436
+ when bytes[0] == 0x52 && bytes[1] == 0x49 && bytes[2] == 0x46 && bytes[3] == 0x46 &&
437
+ data.bytesize >= 12 && data[8, 4] == "WEBP"
438
+ "image/webp"
439
+ # BMP: BM
440
+ when bytes[0] == 0x42 && bytes[1] == 0x4D
441
+ "image/bmp"
442
+ # TIFF: II*\x00 (little-endian) or MM\x00* (big-endian)
443
+ when (bytes[0] == 0x49 && bytes[1] == 0x49 && bytes[2] == 0x2A && bytes[3] == 0x00) ||
444
+ (bytes[0] == 0x4D && bytes[1] == 0x4D && bytes[2] == 0x00 && bytes[3] == 0x2A)
445
+ "image/tiff"
446
+ else
447
+ fallback_mime
448
+ end
449
+ end
450
+
451
+ # ---------------------------------------------------------------------------
452
+ # Image downscale helpers (private)
453
+ # ---------------------------------------------------------------------------
454
+
455
+ # Downscale a PNG using chunky_png (pure Ruby — always available).
456
+ # Returns downscaled base64, or original base64 if already within max_width.
457
+ def self.downscale_png_chunky(b64, max_width)
458
+ require "chunky_png"
459
+ require "base64"
460
+ image = ChunkyPNG::Image.from_blob(Base64.strict_decode64(b64))
461
+ return b64 if image.width <= max_width
462
+
463
+ src_w, src_h = image.width, image.height
464
+ dst_h = (src_h * max_width.to_f / src_w).round
465
+ image.resample_nearest_neighbor!(max_width, dst_h)
466
+ before_kb = b64.bytesize / 1024
467
+ result = Base64.strict_encode64(image.to_blob)
468
+ after_kb = result.bytesize / 1024
469
+ Octo::Logger.debug("image_downscaled",
470
+ format: "png",
471
+ from: "#{src_w}x#{src_h} (#{before_kb}KB)",
472
+ to: "#{max_width}x#{dst_h} (#{after_kb}KB)")
473
+ result
474
+ rescue => e
475
+ Octo::Logger.debug("image_downscale_skipped", format: "png", reason: e.message)
476
+ nil
477
+ end
478
+
479
+ # Downscale a non-PNG image using CLI tools:
480
+ # macOS → sips (built-in, no extra deps)
481
+ # Linux → convert (ImageMagick, must be installed)
482
+ # Returns downscaled base64, or nil if no tool is available.
483
+ def self.downscale_via_cli(b64, mime_type, max_width)
484
+ require "base64"
485
+ require "tmpdir"
486
+
487
+ ext = mime_type.split("/").last
488
+ ext = "jpg" if ext == "jpeg"
489
+
490
+ # Write input to a temp file
491
+ Dir.mktmpdir("octo-img") do |dir|
492
+ input = File.join(dir, "input.#{ext}")
493
+ output = File.join(dir, "output.#{ext}")
494
+ File.binwrite(input, Base64.strict_decode64(b64))
495
+
496
+ before_kb = b64.bytesize / 1024
497
+ success = false
498
+
499
+ if RUBY_PLATFORM.include?("darwin")
500
+ # macOS: sips is always available
501
+ success = system("sips", "-Z", max_width.to_s, input, "--out", output,
502
+ out: File::NULL, err: File::NULL)
503
+ else
504
+ # Linux/other: try ImageMagick convert
505
+ if system("which convert > /dev/null 2>&1")
506
+ success = system("convert", input, "-resize", "#{max_width}x>",
507
+ output, out: File::NULL, err: File::NULL)
508
+ end
509
+ end
510
+
511
+ return nil unless success && File.exist?(output) && File.size(output) > 0
512
+
513
+ result = Base64.strict_encode64(File.binread(output))
514
+ after_kb = result.bytesize / 1024
515
+ Octo::Logger.debug("image_downscaled",
516
+ format: ext,
517
+ from: "#{before_kb}KB",
518
+ to: "#{after_kb}KB (max #{max_width}px wide)")
519
+ result
520
+ end
521
+ rescue => e
522
+ Octo::Logger.debug("image_downscale_skipped", mime: mime_type, reason: e.message)
523
+ nil
524
+ end
525
+
526
+ # Image extensions that can be inlined as data URLs in markdown content.
527
+ LOCAL_IMAGE_EXTENSIONS = %w[.png .jpg .jpeg .gif .webp].freeze
528
+
529
+ # Replace local image paths in markdown content with base64 data URLs.
530
+ #
531
+ # Handles both `file:///path/to/img.png` and bare `/path/to/img.png` in
532
+ # markdown image syntax `![alt](src)`.
533
+ #
534
+ # @param content [String] markdown text potentially containing local image references
535
+ # @return [String] content with local images replaced by data URLs
536
+ def self.inline_local_images(content)
537
+ return content if content.nil? || content.empty?
538
+
539
+ content.gsub(%r{(!\[[^\]]*\])\((file://)?(/[^)]+)\)}) do
540
+ prefix = $1
541
+ _scheme = $2
542
+ raw_path = $3
543
+ path = CGI.unescape(raw_path)
544
+ ext = File.extname(path).downcase
545
+ full_match = $&
546
+
547
+ unless LOCAL_IMAGE_EXTENSIONS.include?(ext) && File.exist?(path)
548
+ next full_match
549
+ end
550
+
551
+ begin
552
+ data_url = image_path_to_data_url(path)
553
+ Octo::Logger.info("file_processor.inline_local_images", path: path, size: File.size(path))
554
+ "#{prefix}(#{data_url})"
555
+ rescue StandardError => e
556
+ Octo::Logger.warn("file_processor.inline_local_images.failed", path: path, error: e.message)
557
+ full_match
558
+ end
559
+ end
560
+ end
561
+
562
+ private_class_method :parse_zip_listing, :parse_tar_listing, :save_preview, :sanitize_filename,
563
+ :downscale_png_chunky, :downscale_via_cli
564
+
565
+ # -------------------------------------------------------------------------
566
+ # Local image URL rewriting
567
+ # -------------------------------------------------------------------------
568
+
569
+ # Rewrite local image paths in markdown content to use the /api/local-image proxy.
570
+ #
571
+ # Matches two patterns inside `![alt](url)`:
572
+ # 1. file:// URLs → ![alt](/api/local-image?path=file:///abs/path.png)
573
+ # 2. bare absolute paths → ![alt](/api/local-image?path=/abs/path.png)
574
+ #
575
+ # https:// URLs and non-image files are left untouched.
576
+ #
577
+ # @param content [String, nil] markdown text
578
+ # @return [String, nil] rewritten content (or original if nothing matched)
579
+ def self.rewrite_local_image_urls(content)
580
+ return content if content.nil? || content.empty?
581
+
582
+ content.gsub(/!\[([^\]]*)\]\(((?:file:\/\/)?\/[^)]+)\)/) do |match|
583
+ alt = Regexp.last_match(1)
584
+ href = Regexp.last_match(2)
585
+
586
+ # Extract the filesystem path from the href
587
+ path = href.sub(%r{\Afile://}, "")
588
+ path = CGI.unescape(path)
589
+
590
+ ext = File.extname(path).downcase
591
+ if LOCAL_IMAGE_EXTENSIONS.include?(ext) && File.exist?(path)
592
+ encoded = CGI.escape(href)
593
+ "![#{alt}](/api/local-image?path=#{encoded})"
594
+ else
595
+ match # return original match unchanged
596
+ end
597
+ end
598
+ end
599
+ end
600
+ end
601
+ end