octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,549 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require_relative "../utils/file_processor"
5
+
6
+ module Octo
7
+ module Tools
8
+ class FileReader < Base
9
+ self.tool_name = "file_reader"
10
+ self.tool_description = "Read contents of a file from the filesystem. Supports text files, images (PNG/JPG/GIF/WEBP), and documents (PDF/DOCX/XLSX/PPTX — auto-converted to text via parsers, with OCR fallback for scanned PDFs)."
11
+ self.tool_category = "file_system"
12
+ self.tool_parameters = {
13
+ type: "object",
14
+ properties: {
15
+ path: {
16
+ type: "string",
17
+ description: "Absolute or relative path to the file"
18
+ },
19
+ max_lines: {
20
+ type: "integer",
21
+ description: "Maximum number of lines to read from start (default: 1000)",
22
+ default: 1000
23
+ },
24
+ start_line: {
25
+ type: "integer",
26
+ description: "Start line number (1-indexed, e.g., 100 reads from line 100)"
27
+ },
28
+ end_line: {
29
+ type: "integer",
30
+ description: "End line number (1-indexed, e.g., 200 reads up to line 200)"
31
+ }
32
+ },
33
+ required: ["path"]
34
+ }
35
+
36
+
37
+
38
+ # Maximum text file size (1MB)
39
+ MAX_TEXT_FILE_SIZE = 1 * 1024 * 1024
40
+
41
+ # Maximum content size to return (~10,000 tokens = ~40,000 characters)
42
+ MAX_CONTENT_CHARS = 60_000
43
+
44
+ # Maximum characters per line (prevent single huge lines from bloating tokens)
45
+ MAX_LINE_CHARS = 1000
46
+
47
+ def execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil)
48
+ # Expand path relative to working_dir when provided
49
+ expanded_path = expand_path(path, working_dir: working_dir)
50
+
51
+ unless File.exist?(expanded_path)
52
+ return {
53
+ path: expanded_path,
54
+ content: nil,
55
+ error: "File not found: #{expanded_path}"
56
+ }
57
+ end
58
+
59
+ # If path is a directory, list its first-level contents (similar to filetree)
60
+ if File.directory?(expanded_path)
61
+ return list_directory_contents(expanded_path)
62
+ end
63
+
64
+ unless File.file?(expanded_path)
65
+ return {
66
+ path: expanded_path,
67
+ content: nil,
68
+ error: "Path is not a file: #{expanded_path}"
69
+ }
70
+ end
71
+
72
+ begin
73
+ # Delegate to FileProcessor for file type dispatch. FileProcessor is
74
+ # the single source of truth for how a file becomes a readable form
75
+ # (parser-extracted text, image base64, archive listing, plain text).
76
+ # FileReader here only shapes the result for the LLM.
77
+ ref = Utils::FileProcessor.process_path(expanded_path)
78
+
79
+ case ref.type
80
+ when :image
81
+ # Images go to LLM as base64 via the image_inject sidecar channel.
82
+ return handle_image_file(expanded_path)
83
+
84
+ when :pdf, :document, :spreadsheet, :presentation
85
+ # Parser-backed document formats. FileProcessor has already
86
+ # produced a preview markdown file (or set parse_error on failure).
87
+ if ref.preview_path && File.exist?(ref.preview_path)
88
+ return read_text_file(
89
+ expanded_path,
90
+ max_lines: max_lines,
91
+ start_line: start_line,
92
+ end_line: end_line,
93
+ source_path: ref.preview_path,
94
+ parsed_from: ref.type
95
+ )
96
+ else
97
+ return build_parser_failure_result(expanded_path, ref)
98
+ end
99
+
100
+ when :text, :csv, :zip
101
+ # FileProcessor already produced a preview (raw text copy for
102
+ # text/csv, archive listing for zip/tar). Read the preview with
103
+ # normal line-range + truncation rules.
104
+ source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : expanded_path
105
+ return read_text_file(
106
+ expanded_path,
107
+ max_lines: max_lines,
108
+ start_line: start_line,
109
+ end_line: end_line,
110
+ source_path: source
111
+ )
112
+
113
+ else
114
+ # Unknown / :file — could be an unrecognised source file, a binary
115
+ # blob, or anything else. Fall back to:
116
+ # 1. If FileProcessor.binary_file_path? says it's binary → report unsupported.
117
+ # 2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.).
118
+ if Utils::FileProcessor.binary_file_path?(expanded_path)
119
+ return handle_unsupported_binary(expanded_path, ref)
120
+ end
121
+
122
+ return read_text_file(
123
+ expanded_path,
124
+ max_lines: max_lines,
125
+ start_line: start_line,
126
+ end_line: end_line
127
+ )
128
+ end
129
+ rescue StandardError => e
130
+ {
131
+ path: expanded_path,
132
+ content: nil,
133
+ error: "Error reading file: #{e.message}"
134
+ }
135
+ end
136
+ end
137
+
138
+ # Read a plain-text file with line-range selection and token-budget
139
+ # truncation. The source of the text can be:
140
+ # - the original file itself (source_path == expanded_path)
141
+ # - a parser-generated preview.md for documents (source_path = ref.preview_path)
142
+ # The reported `path` is always the original file so the LLM sees a
143
+ # consistent identity.
144
+ private def read_text_file(display_path, max_lines:, start_line:, end_line:, source_path: nil, parsed_from: nil)
145
+ source_path ||= display_path
146
+
147
+ file_size = File.size(source_path)
148
+ if file_size > MAX_TEXT_FILE_SIZE
149
+ return {
150
+ path: display_path,
151
+ content: nil,
152
+ size_bytes: file_size,
153
+ error: "Text file too large: #{format_file_size(file_size)} (max: #{format_file_size(MAX_TEXT_FILE_SIZE)}). Please use grep tool to search within this file instead."
154
+ }
155
+ end
156
+
157
+ # Read text file with optional line range.
158
+ # Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
159
+ # JSON.generate / history persistence won't blow up later.
160
+ all_lines = File.readlines(source_path).map! { |line| safe_utf8(line) }
161
+ total_lines = all_lines.size
162
+
163
+ # Calculate start index (convert 1-indexed to 0-indexed)
164
+ start_idx = start_line ? [start_line - 1, 0].max : 0
165
+
166
+ # Calculate end index based on parameters
167
+ if end_line
168
+ end_idx = [end_line - 1, total_lines - 1].min
169
+ elsif start_line
170
+ calculated_end_line = start_line + max_lines - 1
171
+ end_idx = [calculated_end_line - 1, total_lines - 1].min
172
+ else
173
+ end_idx = [max_lines - 1, total_lines - 1].min
174
+ end
175
+
176
+ if total_lines == 0
177
+ return {
178
+ path: display_path,
179
+ content: "",
180
+ lines_read: 0,
181
+ total_lines: 0,
182
+ truncated: false,
183
+ start_line: start_line,
184
+ end_line: end_line,
185
+ parsed_from: parsed_from&.to_s,
186
+ source_path: (source_path != display_path ? source_path : nil),
187
+ error: nil
188
+ }
189
+ end
190
+
191
+ # Check if start_line exceeds file length first
192
+ if start_idx >= total_lines
193
+ return {
194
+ path: display_path,
195
+ content: nil,
196
+ lines_read: 0,
197
+ error: "Invalid line range: start_line #{start_line} exceeds total lines (#{total_lines})"
198
+ }
199
+ end
200
+
201
+ # Validate range
202
+ if start_idx > end_idx
203
+ return {
204
+ path: display_path,
205
+ content: nil,
206
+ lines_read: 0,
207
+ error: "Invalid line range: start_line #{start_line} > end_line #{end_line || (start_line + max_lines)}"
208
+ }
209
+ end
210
+
211
+ lines = all_lines[start_idx..end_idx] || []
212
+
213
+ # Truncate individual lines that are too long
214
+ lines = lines.map do |line|
215
+ if line.length > MAX_LINE_CHARS
216
+ line[0...MAX_LINE_CHARS] + "... [Line truncated - #{line.length} chars]\n"
217
+ else
218
+ line
219
+ end
220
+ end
221
+
222
+ content = lines.join
223
+ truncated = end_idx < (total_lines - 1)
224
+
225
+ # Truncate total content if it exceeds maximum size
226
+ if content.length > MAX_CONTENT_CHARS
227
+ content = content[0...MAX_CONTENT_CHARS] +
228
+ "\n\n[Content truncated - exceeded #{MAX_CONTENT_CHARS} characters (~10,000 tokens)]" +
229
+ "\nUse start_line/end_line parameters to read specific sections, or grep tool to search for keywords."
230
+ truncated = true
231
+ end
232
+
233
+ {
234
+ path: display_path,
235
+ content: content,
236
+ lines_read: lines.size,
237
+ total_lines: total_lines,
238
+ truncated: truncated,
239
+ start_line: start_line,
240
+ end_line: end_line,
241
+ parsed_from: parsed_from&.to_s,
242
+ source_path: (source_path != display_path ? source_path : nil),
243
+ error: nil
244
+ }
245
+ end
246
+
247
+ def format_call(args)
248
+ path = args[:path] || args['path']
249
+ "Read(#{Utils::PathHelper.safe_basename(path)})"
250
+ end
251
+
252
+ def format_result(result)
253
+ return result[:error] if result[:error]
254
+
255
+ # Handle directory listing
256
+ if result[:is_directory] || result['is_directory']
257
+ entries = result[:entries_count] || result['entries_count'] || 0
258
+ dirs = result[:directories_count] || result['directories_count'] || 0
259
+ files = result[:files_count] || result['files_count'] || 0
260
+ return "Listed #{entries} entries (#{dirs} directories, #{files} files)"
261
+ end
262
+
263
+ # Handle binary file
264
+ if result[:binary] || result['binary']
265
+ format_type = result[:format] || result['format'] || 'unknown'
266
+ size = result[:size_bytes] || result['size_bytes'] || 0
267
+
268
+ # Check if it has base64 data (LLM-compatible format)
269
+ if result[:base64_data] || result['base64_data']
270
+ size_warning = size > Utils::FileProcessor::MAX_FILE_SIZE ? " (WARNING: large file)" : ""
271
+ return "Binary file (#{format_type}, #{format_file_size(size)}) - sent to LLM#{size_warning}"
272
+ else
273
+ return "Binary file (#{format_type}, #{format_file_size(size)}) - cannot be read as text"
274
+ end
275
+ end
276
+
277
+ # Handle text file reading (including parser-extracted documents)
278
+ lines = result[:lines_read] || result['lines_read'] || 0
279
+ truncated = result[:truncated] || result['truncated']
280
+ parsed_from = result[:parsed_from] || result['parsed_from']
281
+ suffix = parsed_from ? " (from #{parsed_from})" : ""
282
+ "Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}"
283
+ end
284
+
285
+ def format_result_for_ui(result)
286
+ return nil if result[:error]
287
+
288
+ if result[:is_directory] || result['is_directory']
289
+ entries = result[:entries] || []
290
+ return {
291
+ type: "file_list",
292
+ path: result[:path],
293
+ entries: entries.map { |e| { name: e, is_dir: e.end_with?("/") } },
294
+ total: result[:entries_count] || entries.size
295
+ }
296
+ end
297
+
298
+ if result[:binary] || result['binary']
299
+ return {
300
+ type: "file_read",
301
+ path: result[:path],
302
+ is_binary: true,
303
+ format: result[:format],
304
+ size_bytes: result[:size_bytes]
305
+ }
306
+ end
307
+
308
+ {
309
+ type: "file_read",
310
+ path: result[:path],
311
+ lines_read: result[:lines_read],
312
+ total_lines: result[:total_lines],
313
+ truncated: result[:truncated] || false,
314
+ content_preview: result[:content]&.slice(0, 500),
315
+ language: detect_language(result[:path])
316
+ }
317
+ end
318
+
319
+ LANG_MAP = {
320
+ ".rb" => "ruby", ".py" => "python", ".js" => "javascript",
321
+ ".ts" => "typescript", ".jsx" => "jsx", ".tsx" => "tsx",
322
+ ".java" => "java", ".go" => "go", ".rs" => "rust",
323
+ ".c" => "c", ".cpp" => "cpp", ".h" => "c",
324
+ ".cs" => "csharp", ".php" => "php", ".swift" => "swift",
325
+ ".kt" => "kotlin", ".scala" => "scala", ".r" => "r",
326
+ ".sh" => "bash", ".zsh" => "bash", ".bash" => "bash",
327
+ ".yaml" => "yaml", ".yml" => "yaml", ".json" => "json",
328
+ ".xml" => "xml", ".html" => "html", ".css" => "css",
329
+ ".scss" => "scss", ".sass" => "scss", ".less" => "less",
330
+ ".md" => "markdown", ".sql" => "sql", ".dockerfile" => "dockerfile",
331
+ ".gemfile" => "ruby", ".rake" => "ruby"
332
+ }.freeze
333
+
334
+ private def detect_language(path)
335
+ ext = File.extname(path.to_s).downcase
336
+ LANG_MAP[ext]
337
+ end
338
+
339
+ # Format result for LLM - handles both text and binary (image) content
340
+ # This method is called by the agent to format tool results before sending to LLM
341
+ def format_result_for_llm(result)
342
+ # For LLM-compatible binary files with base64 data (images only — documents
343
+ # are converted to text upstream via FileProcessor parsers).
344
+ if result[:binary] && result[:base64_data]
345
+ description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}"
346
+
347
+ if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE
348
+ description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens"
349
+ end
350
+
351
+ # For images: return a plain-text tool result + a sidecar `image_inject`
352
+ # payload that the agent will append as a follow-up `role: "user"` message.
353
+ #
354
+ # WHY: OpenAI-compatible APIs (including OpenRouter/Gemini) only accept
355
+ # image_url content blocks inside `role: "user"` messages, NOT inside
356
+ # `role: "tool"` messages. Putting base64 in a tool message causes it to
357
+ # be JSON-encoded as a plain string, which the tokeniser treats as text —
358
+ # blowing up token counts by 20-40x (observed: ~115k tokens for a 124 KB jpg).
359
+ #
360
+ # The agent detects `:image_inject` in the tool result after observe() and
361
+ # appends a `role: "user"` system_injected message containing the image block.
362
+ if result[:mime_type]&.start_with?("image/")
363
+ return {
364
+ type: "text",
365
+ text: description,
366
+ image_inject: {
367
+ mime_type: result[:mime_type],
368
+ base64_data: result[:base64_data],
369
+ path: result[:path]
370
+ }
371
+ }
372
+ end
373
+
374
+ # No non-image binary type should reach here anymore — documents now
375
+ # go through the parser + text path. Keep this as a defensive fallback.
376
+ return {
377
+ type: "document",
378
+ path: result[:path],
379
+ format: result[:format],
380
+ size_bytes: result[:size_bytes],
381
+ mime_type: result[:mime_type],
382
+ description: description
383
+ }
384
+ end
385
+
386
+ # For error cases, return hash as-is
387
+ return result if result[:error] || result[:content].nil?
388
+
389
+ # For directory listings, return as-is (no raw file content to preserve)
390
+ return result if result[:is_directory]
391
+
392
+ # For plain text files (and parser-extracted documents): return a plain
393
+ # string so the agent sends it directly to the LLM without JSON-encoding
394
+ # (avoids \" / \n escaping).
395
+ header = "File: #{result[:path]}"
396
+ if result[:parsed_from]
397
+ header += " [extracted from #{result[:parsed_from]}]"
398
+ end
399
+ header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line]
400
+ header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]"
401
+ header += " [TRUNCATED]" if result[:truncated]
402
+ "#{header}\n\n#{result[:content]}"
403
+ end
404
+
405
+ # Handle an image file: convert to base64 and return an LLM-ready result
406
+ # with the image_inject sidecar. Used by execute() for :image type files.
407
+ private def handle_image_file(path)
408
+ begin
409
+ result = Utils::FileProcessor.file_to_base64(path)
410
+ {
411
+ path: path,
412
+ binary: true,
413
+ format: result[:format],
414
+ mime_type: result[:mime_type],
415
+ size_bytes: result[:size_bytes],
416
+ base64_data: result[:base64_data],
417
+ error: nil
418
+ }
419
+ rescue ArgumentError => e
420
+ # File too large or unreadable
421
+ file_size = File.size(path)
422
+ ext = File.extname(path).downcase
423
+ {
424
+ path: path,
425
+ binary: true,
426
+ format: ext.empty? ? "unknown" : ext[1..-1],
427
+ size_bytes: file_size,
428
+ content: nil,
429
+ error: e.message
430
+ }
431
+ end
432
+ end
433
+
434
+ # Handle an unsupported binary file (no parser available, not an image).
435
+ # Returns a clear error message so the LLM knows it needs a different approach.
436
+ private def handle_unsupported_binary(path, ref = nil)
437
+ file_size = File.size(path)
438
+ ext = File.extname(path).downcase
439
+ {
440
+ path: path,
441
+ binary: true,
442
+ format: ext.empty? ? "unknown" : ext[1..-1],
443
+ size_bytes: file_size,
444
+ content: nil,
445
+ error: "Binary file detected. This format cannot be read as text. File size: #{format_file_size(file_size)}"
446
+ }
447
+ end
448
+
449
+ # Build an error result when the parser for a supported document format
450
+ # failed. The LLM receives the parser path so it can fix and retry, matching
451
+ # the behaviour of the file-upload pipeline (agent.rb's file_prompt).
452
+ private def build_parser_failure_result(path, ref)
453
+ ext = File.extname(path).downcase
454
+ file_size = File.size(path) rescue 0
455
+ message_lines = ["Failed to extract text from #{ext.empty? ? 'file' : ext[1..-1].upcase}."]
456
+ message_lines << "Parser error: #{ref.parse_error}" if ref.parse_error
457
+ if ref.parser_path
458
+ expected_preview = "#{path}.preview.md"
459
+ message_lines << "Parser script: #{ref.parser_path}"
460
+ message_lines << "To fix: edit the parser, then run: ruby #{ref.parser_path} #{path} > #{expected_preview}"
461
+ message_lines << "After a successful parse, re-run file_reader on this file."
462
+ end
463
+ {
464
+ path: path,
465
+ binary: true,
466
+ format: ext.empty? ? "unknown" : ext[1..-1],
467
+ size_bytes: file_size,
468
+ content: nil,
469
+ parser_path: ref.parser_path,
470
+ parse_error: ref.parse_error,
471
+ error: message_lines.join("\n")
472
+ }
473
+ end
474
+
475
+ private def detect_mime_type(path, data)
476
+ Utils::FileProcessor.detect_mime_type(path, data)
477
+ end
478
+
479
+ private def format_file_size(bytes)
480
+ if bytes < 1024
481
+ "#{bytes} bytes"
482
+ elsif bytes < 1024 * 1024
483
+ "#{(bytes / 1024.0).round(2)} KB"
484
+ else
485
+ "#{(bytes / (1024.0 * 1024)).round(2)} MB"
486
+ end
487
+ end
488
+
489
+
490
+ # List first-level directory contents (files and directories)
491
+ private def list_directory_contents(path)
492
+ begin
493
+ # Scrub entry names — filenames on disk may contain non-UTF-8 bytes
494
+ # (e.g. GBK/Shift-JIS names on macOS/Linux) which would poison history.
495
+ entries = Dir.entries(path)
496
+ .map { |entry| safe_utf8(entry) }
497
+ .reject { |entry| entry == "." || entry == ".." }
498
+
499
+ # Separate files and directories
500
+ files = []
501
+ directories = []
502
+
503
+ entries.each do |entry|
504
+ full_path = File.join(path, entry)
505
+ if File.directory?(full_path)
506
+ directories << entry + "/"
507
+ else
508
+ files << entry
509
+ end
510
+ end
511
+
512
+ # Sort directories and files separately, then combine
513
+ directories.sort!
514
+ files.sort!
515
+ all_entries = directories + files
516
+
517
+ # Format as a tree-like structure
518
+ content = all_entries.map { |entry| " #{entry}" }.join("\n")
519
+
520
+ {
521
+ path: path,
522
+ content: "Directory listing:\n#{content}",
523
+ entries_count: all_entries.size,
524
+ directories_count: directories.size,
525
+ files_count: files.size,
526
+ is_directory: true,
527
+ error: nil
528
+ }
529
+ rescue StandardError => e
530
+ {
531
+ path: path,
532
+ content: nil,
533
+ error: "Error reading directory: #{e.message}"
534
+ }
535
+ end
536
+ end
537
+
538
+ # Scrub invalid UTF-8 byte sequences so the result survives
539
+ # JSON.generate (session replay, API responses).
540
+ # Invalid bytes are replaced with U+FFFD (�). Valid UTF-8 is
541
+ # returned untouched via the fast path.
542
+ private def safe_utf8(str)
543
+ return str if str.nil?
544
+ return str if str.encoding == Encoding::UTF_8 && str.valid_encoding?
545
+ str.encode("UTF-8", invalid: :replace, undef: :replace, replace: "\u{FFFD}")
546
+ end
547
+ end
548
+ end
549
+ end