octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,331 @@
1
+ #!/usr/bin/env python3
2
+ """Run the eval + improve loop until all pass or max iterations reached.
3
+
4
+ Combines run_eval.py and improve_description.py in a loop, tracking history
5
+ and returning the best description found. Supports train/test split to prevent
6
+ overfitting.
7
+
8
+ Octo adaptation:
9
+ - Queries execute serially (no parallel workers; --num-workers ignored)
10
+ - Model comes from ~/.octo/config.yml (--model is kept for compat but ignored)
11
+ - Skill dir: ~/.octo/skills/
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import random
17
+ import sys
18
+ import tempfile
19
+ import time
20
+ import webbrowser
21
+ from pathlib import Path
22
+
23
+ from scripts.generate_report import generate_html
24
+ from scripts.improve_description import improve_description
25
+ from scripts.run_eval import find_project_root, run_eval
26
+ from scripts.utils import parse_skill_md
27
+
28
+
29
+ def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
30
+ """Split eval set into train and test sets, stratified by should_trigger."""
31
+ random.seed(seed)
32
+
33
+ trigger = [e for e in eval_set if e["should_trigger"]]
34
+ no_trigger = [e for e in eval_set if not e["should_trigger"]]
35
+
36
+ random.shuffle(trigger)
37
+ random.shuffle(no_trigger)
38
+
39
+ n_trigger_test = max(1, int(len(trigger) * holdout))
40
+ n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
41
+
42
+ test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
43
+ train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
44
+
45
+ return train_set, test_set
46
+
47
+
48
+ def run_loop(
49
+ eval_set: list[dict],
50
+ skill_path: Path,
51
+ description_override: str | None,
52
+ timeout: int,
53
+ max_iterations: int,
54
+ runs_per_query: int,
55
+ trigger_threshold: float,
56
+ holdout: float,
57
+ verbose: bool,
58
+ live_report_path: Path | None = None,
59
+ log_dir: Path | None = None,
60
+ # Legacy params (kept for API compat, ignored in Octo)
61
+ num_workers: int = 1,
62
+ model: str = "",
63
+ ) -> dict:
64
+ """Run the eval + improvement loop."""
65
+ project_root = find_project_root()
66
+ name, original_description, content = parse_skill_md(skill_path)
67
+ current_description = description_override or original_description
68
+
69
+ # Split into train/test if holdout > 0
70
+ if holdout > 0:
71
+ train_set, test_set = split_eval_set(eval_set, holdout)
72
+ if verbose:
73
+ print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
74
+ else:
75
+ train_set = eval_set
76
+ test_set = []
77
+
78
+ history = []
79
+ exit_reason = "unknown"
80
+
81
+ for iteration in range(1, max_iterations + 1):
82
+ if verbose:
83
+ print(f"\n{'='*60}", file=sys.stderr)
84
+ print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
85
+ print(f"Description: {current_description}", file=sys.stderr)
86
+ print(f"{'='*60}", file=sys.stderr)
87
+
88
+ # Run eval on all queries (train + test) serially
89
+ all_queries = train_set + test_set
90
+ t0 = time.time()
91
+ all_results = run_eval(
92
+ eval_set=all_queries,
93
+ skill_name=name,
94
+ description=current_description,
95
+ timeout=timeout,
96
+ project_root=project_root,
97
+ runs_per_query=runs_per_query,
98
+ trigger_threshold=trigger_threshold,
99
+ )
100
+ eval_elapsed = time.time() - t0
101
+
102
+ # Split results back into train/test by matching queries
103
+ train_queries_set = {q["query"] for q in train_set}
104
+ train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
105
+ test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
106
+
107
+ train_passed = sum(1 for r in train_result_list if r["pass"])
108
+ train_total = len(train_result_list)
109
+ train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
110
+ train_results = {"results": train_result_list, "summary": train_summary}
111
+
112
+ if test_set:
113
+ test_passed = sum(1 for r in test_result_list if r["pass"])
114
+ test_total = len(test_result_list)
115
+ test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
116
+ test_results = {"results": test_result_list, "summary": test_summary}
117
+ else:
118
+ test_results = None
119
+ test_summary = None
120
+
121
+ history.append({
122
+ "iteration": iteration,
123
+ "description": current_description,
124
+ "train_passed": train_summary["passed"],
125
+ "train_failed": train_summary["failed"],
126
+ "train_total": train_summary["total"],
127
+ "train_results": train_results["results"],
128
+ "test_passed": test_summary["passed"] if test_summary else None,
129
+ "test_failed": test_summary["failed"] if test_summary else None,
130
+ "test_total": test_summary["total"] if test_summary else None,
131
+ "test_results": test_results["results"] if test_results else None,
132
+ # Backward compat with report generator
133
+ "passed": train_summary["passed"],
134
+ "failed": train_summary["failed"],
135
+ "total": train_summary["total"],
136
+ "results": train_results["results"],
137
+ })
138
+
139
+ # Write live report if path provided
140
+ if live_report_path:
141
+ partial_output = {
142
+ "original_description": original_description,
143
+ "best_description": current_description,
144
+ "best_score": "in progress",
145
+ "iterations_run": len(history),
146
+ "holdout": holdout,
147
+ "train_size": len(train_set),
148
+ "test_size": len(test_set),
149
+ "history": history,
150
+ }
151
+ live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
152
+
153
+ if verbose:
154
+ def print_eval_stats(label, results, elapsed):
155
+ pos = [r for r in results if r["should_trigger"]]
156
+ neg = [r for r in results if not r["should_trigger"]]
157
+ tp = sum(r["triggers"] for r in pos)
158
+ pos_runs = sum(r["runs"] for r in pos)
159
+ fn = pos_runs - tp
160
+ fp = sum(r["triggers"] for r in neg)
161
+ neg_runs = sum(r["runs"] for r in neg)
162
+ tn = neg_runs - fp
163
+ total = tp + tn + fp + fn
164
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
165
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
166
+ accuracy = (tp + tn) / total if total > 0 else 0.0
167
+ print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
168
+ for r in results:
169
+ status = "PASS" if r["pass"] else "FAIL"
170
+ rate_str = f"{r['triggers']}/{r['runs']}"
171
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
172
+
173
+ print_eval_stats("Train", train_results["results"], eval_elapsed)
174
+ if test_summary:
175
+ print_eval_stats("Test ", test_results["results"], 0)
176
+
177
+ if train_summary["failed"] == 0:
178
+ exit_reason = f"all_passed (iteration {iteration})"
179
+ if verbose:
180
+ print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
181
+ break
182
+
183
+ if iteration == max_iterations:
184
+ exit_reason = f"max_iterations ({max_iterations})"
185
+ if verbose:
186
+ print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
187
+ break
188
+
189
+ # Improve description based on train results
190
+ if verbose:
191
+ print(f"\nImproving description...", file=sys.stderr)
192
+
193
+ t0 = time.time()
194
+ # Blind history to test scores so improvement model can't overfit to them
195
+ blinded_history = [
196
+ {k: v for k, v in h.items() if not k.startswith("test_")}
197
+ for h in history
198
+ ]
199
+ new_description = improve_description(
200
+ skill_name=name,
201
+ skill_content=content,
202
+ current_description=current_description,
203
+ eval_results=train_results,
204
+ history=blinded_history,
205
+ model=model, # ignored internally; model comes from config.yml
206
+ log_dir=log_dir,
207
+ iteration=iteration,
208
+ )
209
+ improve_elapsed = time.time() - t0
210
+
211
+ if verbose:
212
+ print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
213
+
214
+ current_description = new_description
215
+
216
+ # Find the best iteration by TEST score (or train if no test set)
217
+ if test_set:
218
+ best = max(history, key=lambda h: h["test_passed"] or 0)
219
+ best_score = f"{best['test_passed']}/{best['test_total']}"
220
+ else:
221
+ best = max(history, key=lambda h: h["train_passed"])
222
+ best_score = f"{best['train_passed']}/{best['train_total']}"
223
+
224
+ if verbose:
225
+ print(f"\nExit reason: {exit_reason}", file=sys.stderr)
226
+ print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
227
+
228
+ return {
229
+ "exit_reason": exit_reason,
230
+ "original_description": original_description,
231
+ "best_description": best["description"],
232
+ "best_score": best_score,
233
+ "best_train_score": f"{best['train_passed']}/{best['train_total']}",
234
+ "best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
235
+ "final_description": current_description,
236
+ "iterations_run": len(history),
237
+ "holdout": holdout,
238
+ "train_size": len(train_set),
239
+ "test_size": len(test_set),
240
+ "history": history,
241
+ }
242
+
243
+
244
+ def main():
245
+ parser = argparse.ArgumentParser(description="Run eval + improve loop (Octo)")
246
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
247
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
248
+ parser.add_argument("--description", default=None, help="Override starting description")
249
+ parser.add_argument("--timeout", type=int, default=45, help="Timeout per query in seconds")
250
+ parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
251
+ parser.add_argument("--runs-per-query", type=int, default=1, help="Number of runs per query (serially)")
252
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
253
+ parser.add_argument("--holdout", type=float, default=0.4, help="Fraction to hold out for testing (0 to disable)")
254
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
255
+ parser.add_argument("--report", default="auto", help="HTML report path ('auto'=temp file, 'none'=disable)")
256
+ parser.add_argument("--results-dir", default=None, help="Save results.json + report.html to a timestamped subdir here")
257
+ # Ignored legacy args (kept for CLI compat)
258
+ parser.add_argument("--num-workers", type=int, default=1, help="Ignored — Octo runs serially")
259
+ parser.add_argument("--model", default="", help="Ignored — model comes from ~/.octo/config.yml")
260
+ args = parser.parse_args()
261
+
262
+ eval_set = json.loads(Path(args.eval_set).read_text())
263
+ skill_path = Path(args.skill_path)
264
+
265
+ if not (skill_path / "SKILL.md").exists():
266
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
267
+ sys.exit(1)
268
+
269
+ name, _, _ = parse_skill_md(skill_path)
270
+
271
+ # Set up live report path
272
+ if args.report != "none":
273
+ if args.report == "auto":
274
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
275
+ live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
276
+ else:
277
+ live_report_path = Path(args.report)
278
+ live_report_path.write_text(
279
+ "<html><body><h1>Starting optimization loop...</h1>"
280
+ "<meta http-equiv='refresh' content='5'></body></html>"
281
+ )
282
+ webbrowser.open(str(live_report_path))
283
+ else:
284
+ live_report_path = None
285
+
286
+ # Determine output directory
287
+ if args.results_dir:
288
+ timestamp = time.strftime("%Y-%m-%d_%H%M%S")
289
+ results_dir = Path(args.results_dir) / timestamp
290
+ results_dir.mkdir(parents=True, exist_ok=True)
291
+ else:
292
+ results_dir = None
293
+
294
+ log_dir = results_dir / "logs" if results_dir else None
295
+
296
+ output = run_loop(
297
+ eval_set=eval_set,
298
+ skill_path=skill_path,
299
+ description_override=args.description,
300
+ timeout=args.timeout,
301
+ max_iterations=args.max_iterations,
302
+ runs_per_query=args.runs_per_query,
303
+ trigger_threshold=args.trigger_threshold,
304
+ holdout=args.holdout,
305
+ verbose=args.verbose,
306
+ live_report_path=live_report_path,
307
+ log_dir=log_dir,
308
+ num_workers=args.num_workers,
309
+ model=args.model,
310
+ )
311
+
312
+ # Output JSON
313
+ json_output = json.dumps(output, indent=2)
314
+ print(json_output)
315
+ if results_dir:
316
+ (results_dir / "results.json").write_text(json_output)
317
+
318
+ # Write final HTML report
319
+ if live_report_path:
320
+ live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
321
+ print(f"\nReport: {live_report_path}", file=sys.stderr)
322
+
323
+ if results_dir and live_report_path:
324
+ (results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
325
+
326
+ if results_dir:
327
+ print(f"Results saved to: {results_dir}", file=sys.stderr)
328
+
329
+
330
+ if __name__ == "__main__":
331
+ main()
@@ -0,0 +1,47 @@
1
+ """Shared utilities for skill-creator scripts."""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+
7
+ def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
8
+ """Parse a SKILL.md file, returning (name, description, full_content)."""
9
+ content = (skill_path / "SKILL.md").read_text()
10
+ lines = content.split("\n")
11
+
12
+ if lines[0].strip() != "---":
13
+ raise ValueError("SKILL.md missing frontmatter (no opening ---)")
14
+
15
+ end_idx = None
16
+ for i, line in enumerate(lines[1:], start=1):
17
+ if line.strip() == "---":
18
+ end_idx = i
19
+ break
20
+
21
+ if end_idx is None:
22
+ raise ValueError("SKILL.md missing frontmatter (no closing ---)")
23
+
24
+ name = ""
25
+ description = ""
26
+ frontmatter_lines = lines[1:end_idx]
27
+ i = 0
28
+ while i < len(frontmatter_lines):
29
+ line = frontmatter_lines[i]
30
+ if line.startswith("name:"):
31
+ name = line[len("name:"):].strip().strip('"').strip("'")
32
+ elif line.startswith("description:"):
33
+ value = line[len("description:"):].strip()
34
+ # Handle YAML multiline indicators (>, |, >-, |-)
35
+ if value in (">", "|", ">-", "|-"):
36
+ continuation_lines: list[str] = []
37
+ i += 1
38
+ while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
39
+ continuation_lines.append(frontmatter_lines[i].strip())
40
+ i += 1
41
+ description = " ".join(continuation_lines)
42
+ continue
43
+ else:
44
+ description = value.strip('"').strip("'")
45
+ i += 1
46
+
47
+ return name, description, content
@@ -0,0 +1,143 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # validate_skill_frontmatter.rb
5
+ #
6
+ # Validates and auto-fixes the YAML frontmatter of a SKILL.md file.
7
+ #
8
+ # Usage:
9
+ # ruby validate_skill_frontmatter.rb <path/to/SKILL.md>
10
+ #
11
+ # What it does:
12
+ # 1. Parses the frontmatter between --- delimiters
13
+ # 2. If YAML is invalid OR description is not a plain String:
14
+ # - Extracts name/description via regex fallback
15
+ # - Re-wraps description in single quotes (collapsed to one line)
16
+ # - Rewrites the frontmatter in the file
17
+ # 3. Exits 0 on success (with or without auto-fix), 1 on unrecoverable error
18
+
19
+ require "yaml"
20
+
21
+ path = ARGV[0]
22
+
23
+ if path.nil? || path.strip.empty?
24
+ warn "Usage: ruby validate_skill_frontmatter.rb <path/to/SKILL.md>"
25
+ exit 1
26
+ end
27
+
28
+ unless File.exist?(path)
29
+ warn "File not found: #{path}"
30
+ exit 1
31
+ end
32
+
33
+ content = File.read(path)
34
+
35
+ # Extract frontmatter block
36
+ fm_match = content.match(/\A(---\n)(.*?)(\n---[ \t]*\n?)/m)
37
+ unless fm_match
38
+ warn "ERROR: No frontmatter block found in #{path}"
39
+ exit 1
40
+ end
41
+
42
+ prefix = fm_match[1] # "---\n"
43
+ yaml_raw = fm_match[2] # raw YAML text
44
+ suffix = fm_match[3] # "\n---\n"
45
+ body = content[fm_match.end(0)..] # rest of file after frontmatter
46
+
47
+ # Attempt normal YAML parse
48
+ parse_ok = false
49
+ data = nil
50
+ begin
51
+ data = YAML.safe_load(yaml_raw) || {}
52
+ parse_ok = data["description"].is_a?(String)
53
+ rescue Psych::Exception => e
54
+ warn "YAML parse error: #{e.message}"
55
+ end
56
+
57
+ if parse_ok
58
+ puts "OK: name=#{data['name'].inspect} description_length=#{data['description'].length}"
59
+ exit 0
60
+ end
61
+
62
+ # --- Auto-fix ---
63
+ puts "Frontmatter invalid or description broken — attempting auto-fix..."
64
+
65
+ # Regex fallback: extract name and description lines
66
+ name_match = yaml_raw.match(/^name:\s*(.+)$/)
67
+ unless name_match
68
+ warn "ERROR: Cannot extract 'name' field from frontmatter. Manual fix required."
69
+ exit 1
70
+ end
71
+ name_value = name_match[1].strip.gsub(/\A['"]|['"]\z/, "")
72
+
73
+ # description may be:
74
+ # description: some text (unquoted)
75
+ # description: 'some text' (single-quoted)
76
+ # description: "some text" (double-quoted)
77
+ # description: first line\n continuation (multi-line block scalar)
78
+ desc_match = yaml_raw.match(/^description:\s*(.+?)(?=\n[a-z]|\z)/m)
79
+ unless desc_match
80
+ warn "ERROR: Cannot extract 'description' field from frontmatter. Manual fix required."
81
+ exit 1
82
+ end
83
+
84
+ raw_desc = desc_match[1].strip
85
+
86
+ # Strip existing outer quotes if present (simple single-line quoted values)
87
+ if raw_desc.start_with?("'") && raw_desc.end_with?("'")
88
+ raw_desc = raw_desc[1..-2]
89
+ elsif raw_desc.start_with?('"') && raw_desc.end_with?('"')
90
+ raw_desc = raw_desc[1..-2]
91
+ end
92
+
93
+ # Collapse multi-line: strip leading whitespace from continuation lines
94
+ description_value = raw_desc.gsub(/\n\s+/, " ").strip
95
+
96
+ # Escape any single quotes inside the description value
97
+ description_value_escaped = description_value.gsub("'", "''")
98
+
99
+ # Extract all other frontmatter lines (everything except name: and description:)
100
+ other_lines = yaml_raw.each_line.reject do |line|
101
+ line.match?(/^(name|description):/) || line.match?(/^\s+\S/) && yaml_raw.match?(/^description:.*\n(\s+.+\n)*/m)
102
+ end
103
+
104
+ # More precise: collect lines that are not part of the name/description block
105
+ remaining = []
106
+ skip_continuation = false
107
+ yaml_raw.each_line do |line|
108
+ if line.match?(/^(name|description):/)
109
+ skip_continuation = true
110
+ next
111
+ end
112
+ if skip_continuation && line.match?(/^\s+\S/)
113
+ next # continuation of a multi-line block value
114
+ end
115
+ skip_continuation = false
116
+ remaining << line unless line.strip.empty? && remaining.empty?
117
+ end
118
+
119
+ # Rebuild frontmatter
120
+ fixed_fm_lines = []
121
+ fixed_fm_lines << "name: #{name_value}"
122
+ fixed_fm_lines << "description: '#{description_value_escaped}'"
123
+ remaining.each { |l| fixed_fm_lines << l.chomp }
124
+
125
+ # Remove trailing blank lines from remaining
126
+ fixed_fm = fixed_fm_lines.join("\n").strip
127
+
128
+ new_content = "#{prefix}#{fixed_fm}#{suffix}#{body}"
129
+
130
+ File.write(path, new_content)
131
+ puts "Auto-fixed and saved: #{path}"
132
+
133
+ # Final verification
134
+ begin
135
+ verify_content = File.read(path)
136
+ verify_match = verify_content.match(/\A---\n(.*?)\n---/m)
137
+ verify_data = YAML.safe_load(verify_match[1])
138
+ raise "description not a String" unless verify_data["description"].is_a?(String)
139
+ puts "OK: name=#{verify_data['name'].inspect} description_length=#{verify_data['description'].length}"
140
+ rescue => e
141
+ warn "ERROR: Auto-fix failed, manual intervention required: #{e.message}"
142
+ exit 1
143
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Octo
4
+ # IdleCompressionTimer triggers memory compression after a period of inactivity.
5
+ #
6
+ # Both CLI and WebUI use the same agent-level compression logic; this class
7
+ # abstracts the "wait N seconds, then compress" pattern so it can be shared.
8
+ #
9
+ # Usage:
10
+ # timer = IdleCompressionTimer.new(agent: agent, session_manager: sm) do |success|
11
+ # # called on the compression thread after compression finishes
12
+ # broadcast_update if success
13
+ # end
14
+ # timer.start # call after each agent run completes
15
+ # timer.cancel # call when new user input arrives
16
+ class IdleCompressionTimer
17
+ # Seconds of inactivity before idle compression is triggered
18
+ IDLE_DELAY = 180
19
+
20
+ # @param agent [Octo::Agent] the agent whose messages will be compressed
21
+ # @param session_manager [Octo::SessionManager, nil] used to persist session after compression
22
+ # @param logger [#call, nil] optional logger lambda: ->(msg, level:) { ... }
23
+ # @param on_compress [Proc, nil] block called after compression attempt with success (bool)
24
+ def initialize(agent:, session_manager: nil, logger: nil, &on_compress)
25
+ @agent = agent
26
+ @session_manager = session_manager
27
+ @logger = logger
28
+ @on_compress = on_compress
29
+
30
+ @timer_thread = nil
31
+ @compress_thread = nil
32
+ @mutex = Mutex.new
33
+ end
34
+
35
+ # Start (or restart) the idle timer.
36
+ # Cancels any existing timer first, then waits IDLE_DELAY seconds before compressing.
37
+ def start
38
+ cancel # reset any existing timer
39
+
40
+ @timer_thread = Thread.new do
41
+ Thread.current.name = "idle-compression-timer"
42
+ sleep IDLE_DELAY
43
+
44
+ # Register @compress_thread inside the mutex BEFORE the thread starts running,
45
+ # so cancel() can always find and interrupt it even if it fires immediately.
46
+ compress_thread = nil
47
+ @mutex.synchronize do
48
+ compress_thread = Thread.new do
49
+ Thread.current.name = "idle-compression-work"
50
+ run_compression
51
+ end
52
+ @compress_thread = compress_thread
53
+ end
54
+
55
+ compress_thread.join
56
+ @mutex.synchronize { @compress_thread = nil; @timer_thread = nil }
57
+ end
58
+ end
59
+
60
+ # Cancel the timer and any in-progress compression.
61
+ # Raises AgentInterrupted on the compress thread and waits for it to fully exit,
62
+ # ensuring history rollback completes before the caller starts a new agent.run.
63
+ def cancel
64
+ compress_thread_to_join = nil
65
+
66
+ @mutex.synchronize do
67
+ @timer_thread&.kill
68
+ if @compress_thread&.alive?
69
+ @compress_thread.raise(Octo::AgentInterrupted, "Idle timer cancelled")
70
+ compress_thread_to_join = @compress_thread
71
+ end
72
+ @timer_thread = nil
73
+ @compress_thread = nil
74
+ end
75
+
76
+ # Join outside the mutex to avoid deadlock.
77
+ # This blocks until the compress thread has finished rolling back history,
78
+ # so the subsequent agent.run sees a clean, consistent history.
79
+ compress_thread_to_join&.join(5)
80
+ end
81
+
82
+ # True if the timer or compression is currently active.
83
+ def active?
84
+ @mutex.synchronize { @timer_thread&.alive? || @compress_thread&.alive? }
85
+ end
86
+
87
+ # True only when compression work is actually in flight (not during the
88
+ # pre-compression idle countdown). Used by callers that want to treat
89
+ # Ctrl+C during active compression as "stop compressing" rather than
90
+ # "exit the program".
91
+ def compressing?
92
+ @mutex.synchronize { @compress_thread&.alive? || false }
93
+ end
94
+
95
+ private def run_compression
96
+ success = @agent.trigger_idle_compression
97
+
98
+ if success && @session_manager
99
+ @session_manager.save(@agent.to_session_data(status: :success))
100
+ end
101
+
102
+ @on_compress&.call(success)
103
+ rescue Octo::AgentInterrupted
104
+ log("Idle compression cancelled", level: :info)
105
+ @on_compress&.call(false)
106
+ rescue => e
107
+ log("Idle compression error: #{e.message}", level: :error)
108
+ @on_compress&.call(false)
109
+ end
110
+
111
+ private def log(message, level: :info)
112
+ @logger&.call(message, level: level)
113
+ end
114
+ end
115
+ end