octo-agent 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/.clacky/skills/commit/SKILL.md +423 -0
  3. data/.clacky/skills/gem-release/SKILL.md +199 -0
  4. data/.clacky/skills/gem-release/scripts/release.sh +304 -0
  5. data/.clacky/skills/oss-upload/SKILL.md +47 -0
  6. data/.octorules +106 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +8 -0
  9. data/CHANGELOG.md +76 -0
  10. data/CODE_OF_CONDUCT.md +132 -0
  11. data/CONTRIBUTING.md +92 -0
  12. data/Dockerfile +28 -0
  13. data/LICENSE.txt +22 -0
  14. data/POSITIONING.md +46 -0
  15. data/README.md +134 -0
  16. data/README_CN.md +134 -0
  17. data/Rakefile +34 -0
  18. data/benchmark/fixtures/sample_project/Gemfile +3 -0
  19. data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
  20. data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
  21. data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
  22. data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
  23. data/benchmark/results/EVALUATION_REPORT.md +165 -0
  24. data/benchmark/results/baseline_20260511_174424.json +128 -0
  25. data/benchmark/results/report_20260511_175256.json +271 -0
  26. data/benchmark/results/report_20260511_175444.json +271 -0
  27. data/benchmark/results/treatment_20260511_175103.json +130 -0
  28. data/benchmark/runner.rb +441 -0
  29. data/bin/octo +7 -0
  30. data/docs/agent-first-ui-design.md +77 -0
  31. data/docs/billing-system.md +318 -0
  32. data/docs/channel-architecture.md +235 -0
  33. data/docs/engineering-article.md +343 -0
  34. data/docs/session-skill-invocation.md +69 -0
  35. data/docs/time_machine_design.md +247 -0
  36. data/docs/ui2-architecture.md +124 -0
  37. data/homebrew/README.md +96 -0
  38. data/homebrew/openocto.rb +24 -0
  39. data/lib/octo/agent/hook_manager.rb +61 -0
  40. data/lib/octo/agent/llm_caller.rb +800 -0
  41. data/lib/octo/agent/memory_updater.rb +246 -0
  42. data/lib/octo/agent/message_compressor.rb +225 -0
  43. data/lib/octo/agent/message_compressor_helper.rb +869 -0
  44. data/lib/octo/agent/next_message_suggester.rb +215 -0
  45. data/lib/octo/agent/session_serializer.rb +685 -0
  46. data/lib/octo/agent/skill_auto_creator.rb +114 -0
  47. data/lib/octo/agent/skill_evolution.rb +61 -0
  48. data/lib/octo/agent/skill_manager.rb +466 -0
  49. data/lib/octo/agent/skill_reflector.rb +89 -0
  50. data/lib/octo/agent/system_prompt_builder.rb +101 -0
  51. data/lib/octo/agent/time_machine.rb +214 -0
  52. data/lib/octo/agent/tool_executor.rb +454 -0
  53. data/lib/octo/agent/tool_registry.rb +150 -0
  54. data/lib/octo/agent.rb +2180 -0
  55. data/lib/octo/agent_config.rb +989 -0
  56. data/lib/octo/agent_profile.rb +112 -0
  57. data/lib/octo/anthropic_stream_aggregator.rb +137 -0
  58. data/lib/octo/background_task_registry.rb +324 -0
  59. data/lib/octo/banner.rb +34 -0
  60. data/lib/octo/bedrock_stream_aggregator.rb +137 -0
  61. data/lib/octo/block_font.rb +331 -0
  62. data/lib/octo/cli.rb +968 -0
  63. data/lib/octo/client.rb +623 -0
  64. data/lib/octo/default_agents/SOUL.md +3 -0
  65. data/lib/octo/default_agents/USER.md +1 -0
  66. data/lib/octo/default_agents/base_prompt.md +66 -0
  67. data/lib/octo/default_agents/coding/profile.yml +2 -0
  68. data/lib/octo/default_agents/coding/system_prompt.md +67 -0
  69. data/lib/octo/default_agents/general/profile.yml +2 -0
  70. data/lib/octo/default_agents/general/system_prompt.md +16 -0
  71. data/lib/octo/default_parsers/doc_parser.rb +69 -0
  72. data/lib/octo/default_parsers/docx_parser.rb +188 -0
  73. data/lib/octo/default_parsers/pdf_parser.rb +120 -0
  74. data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
  75. data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
  76. data/lib/octo/default_parsers/pptx_parser.rb +140 -0
  77. data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
  78. data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
  79. data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
  80. data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
  81. data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
  82. data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
  83. data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
  84. data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
  85. data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
  86. data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
  87. data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
  88. data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
  89. data/lib/octo/default_skills/onboard/SKILL.md +578 -0
  90. data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
  91. data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
  92. data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
  93. data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
  94. data/lib/octo/default_skills/personal-website/publish.rb +235 -0
  95. data/lib/octo/default_skills/product-help/SKILL.md +123 -0
  96. data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
  97. data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
  98. data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
  99. data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
  100. data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
  101. data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
  102. data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
  103. data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
  104. data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
  105. data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
  106. data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
  107. data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
  108. data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
  109. data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
  110. data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
  111. data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
  112. data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
  113. data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
  114. data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
  115. data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
  116. data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
  117. data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
  118. data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
  119. data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
  120. data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
  121. data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  122. data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  123. data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
  124. data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
  125. data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  126. data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
  127. data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
  128. data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  129. data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
  130. data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
  131. data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
  132. data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
  133. data/lib/octo/idle_compression_timer.rb +115 -0
  134. data/lib/octo/json_ui_controller.rb +204 -0
  135. data/lib/octo/message_format/anthropic.rb +409 -0
  136. data/lib/octo/message_format/bedrock.rb +361 -0
  137. data/lib/octo/message_format/open_ai.rb +222 -0
  138. data/lib/octo/message_history.rb +373 -0
  139. data/lib/octo/openai_stream_aggregator.rb +130 -0
  140. data/lib/octo/plain_ui_controller.rb +166 -0
  141. data/lib/octo/providers.rb +534 -0
  142. data/lib/octo/server/browser_manager.rb +397 -0
  143. data/lib/octo/server/channel/adapters/base.rb +82 -0
  144. data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
  145. data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
  146. data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
  147. data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
  148. data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
  149. data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
  150. data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
  151. data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
  152. data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
  153. data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
  154. data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
  155. data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
  156. data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
  157. data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
  158. data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
  159. data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
  160. data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
  161. data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
  162. data/lib/octo/server/channel/channel_config.rb +178 -0
  163. data/lib/octo/server/channel/channel_manager.rb +468 -0
  164. data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
  165. data/lib/octo/server/channel.rb +33 -0
  166. data/lib/octo/server/discover.rb +77 -0
  167. data/lib/octo/server/epipe_safe_io.rb +105 -0
  168. data/lib/octo/server/http_server.rb +3554 -0
  169. data/lib/octo/server/scheduler.rb +317 -0
  170. data/lib/octo/server/server_master.rb +325 -0
  171. data/lib/octo/server/session_registry.rb +431 -0
  172. data/lib/octo/server/web_ui_controller.rb +487 -0
  173. data/lib/octo/session_manager.rb +385 -0
  174. data/lib/octo/skill.rb +466 -0
  175. data/lib/octo/skill_loader.rb +328 -0
  176. data/lib/octo/tools/base.rb +118 -0
  177. data/lib/octo/tools/browser.rb +625 -0
  178. data/lib/octo/tools/edit.rb +165 -0
  179. data/lib/octo/tools/file_reader.rb +549 -0
  180. data/lib/octo/tools/glob.rb +162 -0
  181. data/lib/octo/tools/grep.rb +356 -0
  182. data/lib/octo/tools/invoke_skill.rb +96 -0
  183. data/lib/octo/tools/list_tasks.rb +54 -0
  184. data/lib/octo/tools/redo_task.rb +41 -0
  185. data/lib/octo/tools/request_user_feedback.rb +84 -0
  186. data/lib/octo/tools/security.rb +333 -0
  187. data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
  188. data/lib/octo/tools/terminal/persistent_session.rb +268 -0
  189. data/lib/octo/tools/terminal/safe_rm.sh +106 -0
  190. data/lib/octo/tools/terminal/session_manager.rb +213 -0
  191. data/lib/octo/tools/terminal.rb +1828 -0
  192. data/lib/octo/tools/todo_manager.rb +374 -0
  193. data/lib/octo/tools/trash_manager.rb +388 -0
  194. data/lib/octo/tools/undo_task.rb +35 -0
  195. data/lib/octo/tools/web_fetch.rb +242 -0
  196. data/lib/octo/tools/web_search.rb +260 -0
  197. data/lib/octo/tools/write.rb +77 -0
  198. data/lib/octo/ui2/block_font.rb +10 -0
  199. data/lib/octo/ui2/components/base_component.rb +163 -0
  200. data/lib/octo/ui2/components/command_suggestions.rb +290 -0
  201. data/lib/octo/ui2/components/common_component.rb +96 -0
  202. data/lib/octo/ui2/components/inline_input.rb +226 -0
  203. data/lib/octo/ui2/components/input_area.rb +1338 -0
  204. data/lib/octo/ui2/components/message_component.rb +99 -0
  205. data/lib/octo/ui2/components/modal_component.rb +419 -0
  206. data/lib/octo/ui2/components/todo_area.rb +149 -0
  207. data/lib/octo/ui2/components/tool_component.rb +107 -0
  208. data/lib/octo/ui2/components/welcome_banner.rb +139 -0
  209. data/lib/octo/ui2/layout_manager.rb +807 -0
  210. data/lib/octo/ui2/line_editor.rb +363 -0
  211. data/lib/octo/ui2/markdown_renderer.rb +100 -0
  212. data/lib/octo/ui2/output_buffer.rb +370 -0
  213. data/lib/octo/ui2/progress_handle.rb +362 -0
  214. data/lib/octo/ui2/progress_indicator.rb +55 -0
  215. data/lib/octo/ui2/screen_buffer.rb +273 -0
  216. data/lib/octo/ui2/terminal_detector.rb +119 -0
  217. data/lib/octo/ui2/theme_manager.rb +85 -0
  218. data/lib/octo/ui2/themes/base_theme.rb +105 -0
  219. data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
  220. data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
  221. data/lib/octo/ui2/thinking_verbs.rb +26 -0
  222. data/lib/octo/ui2/ui_controller.rb +1625 -0
  223. data/lib/octo/ui2/view_renderer.rb +177 -0
  224. data/lib/octo/ui2.rb +40 -0
  225. data/lib/octo/ui_interface.rb +154 -0
  226. data/lib/octo/utils/arguments_parser.rb +191 -0
  227. data/lib/octo/utils/browser_detector.rb +195 -0
  228. data/lib/octo/utils/encoding.rb +92 -0
  229. data/lib/octo/utils/environment_detector.rb +140 -0
  230. data/lib/octo/utils/file_ignore_helper.rb +170 -0
  231. data/lib/octo/utils/file_processor.rb +601 -0
  232. data/lib/octo/utils/gitignore_parser.rb +154 -0
  233. data/lib/octo/utils/limit_stack.rb +152 -0
  234. data/lib/octo/utils/logger.rb +124 -0
  235. data/lib/octo/utils/login_shell.rb +72 -0
  236. data/lib/octo/utils/model_pricing.rb +646 -0
  237. data/lib/octo/utils/parser_manager.rb +165 -0
  238. data/lib/octo/utils/path_helper.rb +15 -0
  239. data/lib/octo/utils/scripts_manager.rb +59 -0
  240. data/lib/octo/utils/string_matcher.rb +158 -0
  241. data/lib/octo/utils/trash_directory.rb +112 -0
  242. data/lib/octo/utils/workspace_rules.rb +46 -0
  243. data/lib/octo/version.rb +5 -0
  244. data/lib/octo/web/app.css +7141 -0
  245. data/lib/octo/web/app.js +543 -0
  246. data/lib/octo/web/apple-touch-icon.png +0 -0
  247. data/lib/octo/web/auth.js +150 -0
  248. data/lib/octo/web/channels.js +276 -0
  249. data/lib/octo/web/datepicker.js +205 -0
  250. data/lib/octo/web/favicon.png +0 -0
  251. data/lib/octo/web/i18n.js +1073 -0
  252. data/lib/octo/web/icon-512.png +0 -0
  253. data/lib/octo/web/icon-dark.svg +25 -0
  254. data/lib/octo/web/icon.svg +29 -0
  255. data/lib/octo/web/index.html +871 -0
  256. data/lib/octo/web/marked.min.js +69 -0
  257. data/lib/octo/web/onboard.js +491 -0
  258. data/lib/octo/web/profile.js +442 -0
  259. data/lib/octo/web/sessions.js +4421 -0
  260. data/lib/octo/web/settings.js +913 -0
  261. data/lib/octo/web/sidebar.js +32 -0
  262. data/lib/octo/web/skills.js +885 -0
  263. data/lib/octo/web/tasks.js +297 -0
  264. data/lib/octo/web/theme.js +105 -0
  265. data/lib/octo/web/trash.js +343 -0
  266. data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
  267. data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
  268. data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
  269. data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  270. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  271. data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  272. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  273. data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  274. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  275. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  276. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  277. data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  278. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  279. data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  280. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  281. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  282. data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  283. data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  284. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  285. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  286. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  287. data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  288. data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  289. data/lib/octo/web/vendor/katex/katex.min.css +1 -0
  290. data/lib/octo/web/vendor/katex/katex.min.js +1 -0
  291. data/lib/octo/web/version.js +449 -0
  292. data/lib/octo/web/weixin-qr.html +209 -0
  293. data/lib/octo/web/ws-dispatcher.js +357 -0
  294. data/lib/octo/web/ws.js +128 -0
  295. data/lib/octo.rb +145 -0
  296. data/scripts/build/build.sh +329 -0
  297. data/scripts/build/lib/apt.sh +56 -0
  298. data/scripts/build/lib/brew.sh +89 -0
  299. data/scripts/build/lib/colors.sh +17 -0
  300. data/scripts/build/lib/gem.sh +95 -0
  301. data/scripts/build/lib/mise.sh +125 -0
  302. data/scripts/build/lib/network.sh +157 -0
  303. data/scripts/build/lib/os.sh +57 -0
  304. data/scripts/build/lib/shell.sh +37 -0
  305. data/scripts/build/src/install.sh.cc +174 -0
  306. data/scripts/build/src/install_browser.sh.cc +101 -0
  307. data/scripts/build/src/install_full.sh.cc +290 -0
  308. data/scripts/build/src/install_rails_deps.sh.cc +145 -0
  309. data/scripts/build/src/install_system_deps.sh.cc +123 -0
  310. data/scripts/build/src/uninstall.sh.cc +101 -0
  311. data/scripts/install.ps1 +532 -0
  312. data/scripts/install.sh +567 -0
  313. data/scripts/install_browser.sh +479 -0
  314. data/scripts/install_full.sh +838 -0
  315. data/scripts/install_rails_deps.sh +746 -0
  316. data/scripts/install_system_deps.sh +518 -0
  317. data/scripts/uninstall.sh +287 -0
  318. data/sig/octo.rbs +4 -0
  319. metadata +614 -0
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick validation script for skills - minimal version
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ import re
9
+ import yaml
10
+ from pathlib import Path
11
+
12
+ def validate_skill(skill_path):
13
+ """Basic validation of a skill"""
14
+ skill_path = Path(skill_path)
15
+
16
+ # Check SKILL.md exists
17
+ skill_md = skill_path / 'SKILL.md'
18
+ if not skill_md.exists():
19
+ return False, "SKILL.md not found"
20
+
21
+ # Read and validate frontmatter
22
+ content = skill_md.read_text()
23
+ if not content.startswith('---'):
24
+ return False, "No YAML frontmatter found"
25
+
26
+ # Extract frontmatter
27
+ match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
28
+ if not match:
29
+ return False, "Invalid frontmatter format"
30
+
31
+ frontmatter_text = match.group(1)
32
+
33
+ # Parse YAML frontmatter
34
+ try:
35
+ frontmatter = yaml.safe_load(frontmatter_text)
36
+ if not isinstance(frontmatter, dict):
37
+ return False, "Frontmatter must be a YAML dictionary"
38
+ except yaml.YAMLError as e:
39
+ return False, f"Invalid YAML in frontmatter: {e}"
40
+
41
+ # Define allowed properties
42
+ ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata', 'compatibility'}
43
+
44
+ # Check for unexpected properties (excluding nested keys under metadata)
45
+ unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
46
+ if unexpected_keys:
47
+ return False, (
48
+ f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. "
49
+ f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}"
50
+ )
51
+
52
+ # Check required fields
53
+ if 'name' not in frontmatter:
54
+ return False, "Missing 'name' in frontmatter"
55
+ if 'description' not in frontmatter:
56
+ return False, "Missing 'description' in frontmatter"
57
+
58
+ # Extract name for validation
59
+ name = frontmatter.get('name', '')
60
+ if not isinstance(name, str):
61
+ return False, f"Name must be a string, got {type(name).__name__}"
62
+ name = name.strip()
63
+ if name:
64
+ # Check naming convention (kebab-case: lowercase with hyphens)
65
+ if not re.match(r'^[a-z0-9-]+$', name):
66
+ return False, f"Name '{name}' should be kebab-case (lowercase letters, digits, and hyphens only)"
67
+ if name.startswith('-') or name.endswith('-') or '--' in name:
68
+ return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
69
+ # Check name length (max 64 characters per spec)
70
+ if len(name) > 64:
71
+ return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
72
+
73
+ # Extract and validate description
74
+ description = frontmatter.get('description', '')
75
+ if not isinstance(description, str):
76
+ return False, f"Description must be a string, got {type(description).__name__}"
77
+ description = description.strip()
78
+ if description:
79
+ # Check for angle brackets
80
+ if '<' in description or '>' in description:
81
+ return False, "Description cannot contain angle brackets (< or >)"
82
+ # Check description length (max 1024 characters per spec)
83
+ if len(description) > 1024:
84
+ return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
85
+
86
+ # Validate compatibility field if present (optional)
87
+ compatibility = frontmatter.get('compatibility', '')
88
+ if compatibility:
89
+ if not isinstance(compatibility, str):
90
+ return False, f"Compatibility must be a string, got {type(compatibility).__name__}"
91
+ if len(compatibility) > 500:
92
+ return False, f"Compatibility is too long ({len(compatibility)} characters). Maximum is 500 characters."
93
+
94
+ return True, "Skill is valid!"
95
+
96
+ if __name__ == "__main__":
97
+ if len(sys.argv) != 2:
98
+ print("Usage: python quick_validate.py <skill_directory>")
99
+ sys.exit(1)
100
+
101
+ valid, message = validate_skill(sys.argv[1])
102
+ print(message)
103
+ sys.exit(0 if valid else 1)
@@ -0,0 +1,317 @@
1
+ #!/usr/bin/env python3
2
+ """Run trigger evaluation for a skill description.
3
+
4
+ Tests whether Octo's agent triggers (invokes) a skill for a set of queries.
5
+ Runs octo agent --json in persistent mode, sends queries via stdin NDJSON,
6
+ detects {"type":"tool_call","name":"invoke_skill","args":{"skill_name":"<name>"}}
7
+ events, and returns pass/fail results as JSON.
8
+
9
+ Executes queries serially (Octo is single-agent, no parallel workers).
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import select
16
+ import shutil
17
+ import subprocess
18
+ import sys
19
+ import time
20
+ import uuid
21
+ from pathlib import Path
22
+
23
+ from scripts.utils import parse_skill_md
24
+
25
+
26
+ OCTO_BIN = shutil.which("octo") or "/Users/sizzy/.local/share/mise/shims/octo"
27
+ SKILLS_DIR = Path.home() / ".octo" / "skills"
28
+
29
+
30
+ def find_project_root() -> Path:
31
+ """Find the project root by walking up from cwd, used for --path arg."""
32
+ current = Path.cwd()
33
+ for parent in [current, *current.parents]:
34
+ if (parent / ".octo").is_dir():
35
+ return parent
36
+ return current
37
+
38
+
39
+ def _read_ndjson_lines(proc, timeout: float) -> list[dict]:
40
+ """Read NDJSON lines from proc.stdout until timeout or process exits."""
41
+ lines = []
42
+ buffer = b""
43
+ start = time.time()
44
+ while time.time() - start < timeout:
45
+ ready = select.select([proc.stdout], [], [], 0.5)[0]
46
+ if ready:
47
+ chunk = os.read(proc.stdout.fileno(), 8192)
48
+ if chunk:
49
+ buffer += chunk
50
+ while b"\n" in buffer:
51
+ line_b, buffer = buffer.split(b"\n", 1)
52
+ line = line_b.decode("utf-8", errors="replace").strip()
53
+ if not line:
54
+ continue
55
+ try:
56
+ lines.append(json.loads(line))
57
+ except json.JSONDecodeError:
58
+ pass
59
+ if proc.poll() is not None:
60
+ # drain remaining
61
+ remaining = proc.stdout.read()
62
+ if remaining:
63
+ for line in remaining.decode("utf-8", errors="replace").splitlines():
64
+ line = line.strip()
65
+ if line:
66
+ try:
67
+ lines.append(json.loads(line))
68
+ except json.JSONDecodeError:
69
+ pass
70
+ break
71
+ return lines
72
+
73
+
74
+ def run_single_query(
75
+ query: str,
76
+ skill_name: str,
77
+ skill_description: str,
78
+ timeout: int,
79
+ project_root: str,
80
+ ) -> bool:
81
+ """Run a single query via octo agent --json and detect skill trigger.
82
+
83
+ Creates a temp skill in ~/.octo/skills/, starts octo agent in JSON mode,
84
+ sends the query, watches for invoke_skill tool_call event targeting our temp skill.
85
+ """
86
+ unique_id = uuid.uuid4().hex[:8]
87
+ temp_skill_name = f"{skill_name}-eval-{unique_id}"
88
+ temp_skill_dir = SKILLS_DIR / temp_skill_name
89
+
90
+ try:
91
+ # Write temporary skill
92
+ temp_skill_dir.mkdir(parents=True, exist_ok=True)
93
+ skill_md = (
94
+ f"---\n"
95
+ f"name: {temp_skill_name}\n"
96
+ f"description: {skill_description}\n"
97
+ f"---\n\n"
98
+ f"# {skill_name}\n\n"
99
+ f"This skill handles: {skill_description}\n"
100
+ )
101
+ (temp_skill_dir / "SKILL.md").write_text(skill_md)
102
+
103
+ # Launch octo agent in persistent JSON mode
104
+ proc = subprocess.Popen(
105
+ [OCTO_BIN, "agent", "--json", "--mode", "auto_approve",
106
+ "--path", project_root],
107
+ stdin=subprocess.PIPE,
108
+ stdout=subprocess.PIPE,
109
+ stderr=subprocess.DEVNULL,
110
+ bufsize=0,
111
+ )
112
+
113
+ try:
114
+ # Wait for "system" ready event before sending query
115
+ start = time.time()
116
+ buffer = b""
117
+ ready_received = False
118
+ while time.time() - start < 10:
119
+ r = select.select([proc.stdout], [], [], 0.5)[0]
120
+ if r:
121
+ chunk = os.read(proc.stdout.fileno(), 4096)
122
+ if chunk:
123
+ buffer += chunk
124
+ while b"\n" in buffer:
125
+ line_b, buffer = buffer.split(b"\n", 1)
126
+ line = line_b.strip()
127
+ if line:
128
+ try:
129
+ evt = json.loads(line)
130
+ if evt.get("type") == "system":
131
+ ready_received = True
132
+ except json.JSONDecodeError:
133
+ pass
134
+ if ready_received:
135
+ break
136
+
137
+ # Send query
138
+ msg = (json.dumps({"type": "message", "content": query}) + "\n").encode()
139
+ proc.stdin.write(msg)
140
+ proc.stdin.flush()
141
+
142
+ # Read events until "complete" or timeout
143
+ triggered = False
144
+ start = time.time()
145
+ buffer = b""
146
+ while time.time() - start < timeout:
147
+ r = select.select([proc.stdout], [], [], 0.5)[0]
148
+ if r:
149
+ chunk = os.read(proc.stdout.fileno(), 8192)
150
+ if chunk:
151
+ buffer += chunk
152
+ while b"\n" in buffer:
153
+ line_b, buffer = buffer.split(b"\n", 1)
154
+ line = line_b.decode("utf-8", errors="replace").strip()
155
+ if not line:
156
+ continue
157
+ try:
158
+ event = json.loads(line)
159
+ except json.JSONDecodeError:
160
+ continue
161
+
162
+ # Detect skill trigger
163
+ if event.get("type") == "tool_call" and event.get("name") == "invoke_skill":
164
+ args = event.get("args", {})
165
+ invoked = args.get("skill_name", "")
166
+ if invoked == temp_skill_name:
167
+ return True # triggered — exit early
168
+
169
+ # Task complete
170
+ if event.get("type") == "complete":
171
+ return triggered
172
+
173
+ if proc.poll() is not None:
174
+ break
175
+
176
+ return triggered
177
+
178
+ finally:
179
+ # Gracefully exit the agent
180
+ try:
181
+ proc.stdin.write((json.dumps({"type": "exit"}) + "\n").encode())
182
+ proc.stdin.flush()
183
+ except Exception:
184
+ pass
185
+ if proc.poll() is None:
186
+ proc.kill()
187
+ proc.wait()
188
+
189
+ finally:
190
+ # Always remove temp skill directory
191
+ if temp_skill_dir.exists():
192
+ shutil.rmtree(temp_skill_dir, ignore_errors=True)
193
+
194
+
195
+ def run_eval(
196
+ eval_set: list[dict],
197
+ skill_name: str,
198
+ description: str,
199
+ timeout: int,
200
+ project_root: Path,
201
+ runs_per_query: int = 1,
202
+ trigger_threshold: float = 0.5,
203
+ ) -> dict:
204
+ """Run the full eval set serially and return results.
205
+
206
+ Note: Octo is single-agent — queries are executed serially, not in parallel.
207
+ Each query spawns a fresh octo agent process to avoid session contamination.
208
+ """
209
+ results = []
210
+ query_triggers: dict[str, list[bool]] = {}
211
+ query_items: dict[str, dict] = {}
212
+
213
+ for item in eval_set:
214
+ query = item["query"]
215
+ query_items[query] = item
216
+ if query not in query_triggers:
217
+ query_triggers[query] = []
218
+
219
+ for run_idx in range(runs_per_query):
220
+ try:
221
+ triggered = run_single_query(
222
+ query=query,
223
+ skill_name=skill_name,
224
+ skill_description=description,
225
+ timeout=timeout,
226
+ project_root=str(project_root),
227
+ )
228
+ query_triggers[query].append(triggered)
229
+ except Exception as e:
230
+ print(f"Warning: query failed (run {run_idx}): {e}", file=sys.stderr)
231
+ query_triggers[query].append(False)
232
+
233
+ for query, triggers in query_triggers.items():
234
+ item = query_items[query]
235
+ trigger_rate = sum(triggers) / len(triggers)
236
+ should_trigger = item["should_trigger"]
237
+ if should_trigger:
238
+ did_pass = trigger_rate >= trigger_threshold
239
+ else:
240
+ did_pass = trigger_rate < trigger_threshold
241
+ results.append({
242
+ "query": query,
243
+ "should_trigger": should_trigger,
244
+ "trigger_rate": trigger_rate,
245
+ "triggers": sum(triggers),
246
+ "runs": len(triggers),
247
+ "pass": did_pass,
248
+ })
249
+
250
+ passed = sum(1 for r in results if r["pass"])
251
+ total = len(results)
252
+
253
+ return {
254
+ "skill_name": skill_name,
255
+ "description": description,
256
+ "results": results,
257
+ "summary": {
258
+ "total": total,
259
+ "passed": passed,
260
+ "failed": total - passed,
261
+ },
262
+ }
263
+
264
+
265
+ def main():
266
+ parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description (Octo)")
267
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
268
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
269
+ parser.add_argument("--description", default=None, help="Override description to test")
270
+ parser.add_argument("--timeout", type=int, default=45, help="Timeout per query in seconds")
271
+ parser.add_argument("--runs-per-query", type=int, default=1, help="Number of runs per query (serially)")
272
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
273
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
274
+ # --num-workers kept for CLI compat but ignored (Octo is serial)
275
+ parser.add_argument("--num-workers", type=int, default=1, help="Ignored — Octo runs serially")
276
+ parser.add_argument("--model", default=None, help="Ignored — model comes from ~/.octo/config.yml")
277
+ args = parser.parse_args()
278
+
279
+ eval_set = json.loads(Path(args.eval_set).read_text())
280
+ skill_path = Path(args.skill_path)
281
+
282
+ if not (skill_path / "SKILL.md").exists():
283
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
284
+ sys.exit(1)
285
+
286
+ name, original_description, content = parse_skill_md(skill_path)
287
+ description = args.description or original_description
288
+ project_root = find_project_root()
289
+
290
+ if args.verbose:
291
+ print(f"Evaluating skill: {name}", file=sys.stderr)
292
+ print(f"Description: {description}", file=sys.stderr)
293
+ print(f"Queries: {len(eval_set)}, runs-per-query: {args.runs_per_query}", file=sys.stderr)
294
+
295
+ output = run_eval(
296
+ eval_set=eval_set,
297
+ skill_name=name,
298
+ description=description,
299
+ timeout=args.timeout,
300
+ project_root=project_root,
301
+ runs_per_query=args.runs_per_query,
302
+ trigger_threshold=args.trigger_threshold,
303
+ )
304
+
305
+ if args.verbose:
306
+ summary = output["summary"]
307
+ print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
308
+ for r in output["results"]:
309
+ status = "PASS" if r["pass"] else "FAIL"
310
+ rate_str = f"{r['triggers']}/{r['runs']}"
311
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
312
+
313
+ print(json.dumps(output, indent=2))
314
+
315
+
316
+ if __name__ == "__main__":
317
+ main()