octo-agent 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clacky/skills/commit/SKILL.md +423 -0
- data/.clacky/skills/gem-release/SKILL.md +199 -0
- data/.clacky/skills/gem-release/scripts/release.sh +304 -0
- data/.clacky/skills/oss-upload/SKILL.md +47 -0
- data/.octorules +106 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +76 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +92 -0
- data/Dockerfile +28 -0
- data/LICENSE.txt +22 -0
- data/POSITIONING.md +46 -0
- data/README.md +134 -0
- data/README_CN.md +134 -0
- data/Rakefile +34 -0
- data/benchmark/fixtures/sample_project/Gemfile +3 -0
- data/benchmark/fixtures/sample_project/lib/api_handler.rb +32 -0
- data/benchmark/fixtures/sample_project/lib/order_calculator.rb +23 -0
- data/benchmark/fixtures/sample_project/lib/user_renderer.rb +20 -0
- data/benchmark/fixtures/sample_project/spec/order_calculator_spec.rb +20 -0
- data/benchmark/results/EVALUATION_REPORT.md +165 -0
- data/benchmark/results/baseline_20260511_174424.json +128 -0
- data/benchmark/results/report_20260511_175256.json +271 -0
- data/benchmark/results/report_20260511_175444.json +271 -0
- data/benchmark/results/treatment_20260511_175103.json +130 -0
- data/benchmark/runner.rb +441 -0
- data/bin/octo +7 -0
- data/docs/agent-first-ui-design.md +77 -0
- data/docs/billing-system.md +318 -0
- data/docs/channel-architecture.md +235 -0
- data/docs/engineering-article.md +343 -0
- data/docs/session-skill-invocation.md +69 -0
- data/docs/time_machine_design.md +247 -0
- data/docs/ui2-architecture.md +124 -0
- data/homebrew/README.md +96 -0
- data/homebrew/openocto.rb +24 -0
- data/lib/octo/agent/hook_manager.rb +61 -0
- data/lib/octo/agent/llm_caller.rb +800 -0
- data/lib/octo/agent/memory_updater.rb +246 -0
- data/lib/octo/agent/message_compressor.rb +225 -0
- data/lib/octo/agent/message_compressor_helper.rb +869 -0
- data/lib/octo/agent/next_message_suggester.rb +215 -0
- data/lib/octo/agent/session_serializer.rb +685 -0
- data/lib/octo/agent/skill_auto_creator.rb +114 -0
- data/lib/octo/agent/skill_evolution.rb +61 -0
- data/lib/octo/agent/skill_manager.rb +466 -0
- data/lib/octo/agent/skill_reflector.rb +89 -0
- data/lib/octo/agent/system_prompt_builder.rb +101 -0
- data/lib/octo/agent/time_machine.rb +214 -0
- data/lib/octo/agent/tool_executor.rb +454 -0
- data/lib/octo/agent/tool_registry.rb +150 -0
- data/lib/octo/agent.rb +2180 -0
- data/lib/octo/agent_config.rb +989 -0
- data/lib/octo/agent_profile.rb +112 -0
- data/lib/octo/anthropic_stream_aggregator.rb +137 -0
- data/lib/octo/background_task_registry.rb +324 -0
- data/lib/octo/banner.rb +34 -0
- data/lib/octo/bedrock_stream_aggregator.rb +137 -0
- data/lib/octo/block_font.rb +331 -0
- data/lib/octo/cli.rb +968 -0
- data/lib/octo/client.rb +623 -0
- data/lib/octo/default_agents/SOUL.md +3 -0
- data/lib/octo/default_agents/USER.md +1 -0
- data/lib/octo/default_agents/base_prompt.md +66 -0
- data/lib/octo/default_agents/coding/profile.yml +2 -0
- data/lib/octo/default_agents/coding/system_prompt.md +67 -0
- data/lib/octo/default_agents/general/profile.yml +2 -0
- data/lib/octo/default_agents/general/system_prompt.md +16 -0
- data/lib/octo/default_parsers/doc_parser.rb +69 -0
- data/lib/octo/default_parsers/docx_parser.rb +188 -0
- data/lib/octo/default_parsers/pdf_parser.rb +120 -0
- data/lib/octo/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/octo/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/octo/default_parsers/pptx_parser.rb +140 -0
- data/lib/octo/default_parsers/xlsx_parser.rb +121 -0
- data/lib/octo/default_skills/browser-setup/SKILL.md +426 -0
- data/lib/octo/default_skills/channel-manager/SKILL.md +623 -0
- data/lib/octo/default_skills/channel-manager/dingtalk_setup.rb +191 -0
- data/lib/octo/default_skills/channel-manager/discord_setup.rb +199 -0
- data/lib/octo/default_skills/channel-manager/feishu_setup.rb +574 -0
- data/lib/octo/default_skills/channel-manager/import_lark_skills.rb +97 -0
- data/lib/octo/default_skills/channel-manager/install_feishu_skills.rb +105 -0
- data/lib/octo/default_skills/channel-manager/weixin_setup.rb +274 -0
- data/lib/octo/default_skills/code-explorer/SKILL.md +36 -0
- data/lib/octo/default_skills/cron-task-creator/SKILL.md +257 -0
- data/lib/octo/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/octo/default_skills/onboard/SKILL.md +578 -0
- data/lib/octo/default_skills/onboard/scripts/import_external_skills.rb +413 -0
- data/lib/octo/default_skills/onboard/scripts/install_builtin_skills.rb +97 -0
- data/lib/octo/default_skills/persist-memory/SKILL.md +59 -0
- data/lib/octo/default_skills/personal-website/SKILL.md +113 -0
- data/lib/octo/default_skills/personal-website/publish.rb +235 -0
- data/lib/octo/default_skills/product-help/SKILL.md +123 -0
- data/lib/octo/default_skills/product-help/docs/agent-config.md +74 -0
- data/lib/octo/default_skills/product-help/docs/best-practices.md +49 -0
- data/lib/octo/default_skills/product-help/docs/browser-tool.md +53 -0
- data/lib/octo/default_skills/product-help/docs/built-in-skills.md +43 -0
- data/lib/octo/default_skills/product-help/docs/cli-reference.md +82 -0
- data/lib/octo/default_skills/product-help/docs/create-your-first-skill.md +47 -0
- data/lib/octo/default_skills/product-help/docs/faq.md +98 -0
- data/lib/octo/default_skills/product-help/docs/how-to-use-a-skill.md +58 -0
- data/lib/octo/default_skills/product-help/docs/installation.md +59 -0
- data/lib/octo/default_skills/product-help/docs/memory-system.md +61 -0
- data/lib/octo/default_skills/product-help/docs/octorules.md +62 -0
- data/lib/octo/default_skills/product-help/docs/session-management.md +63 -0
- data/lib/octo/default_skills/product-help/docs/skill-basics.md +55 -0
- data/lib/octo/default_skills/product-help/docs/skill-frontmatter.md +61 -0
- data/lib/octo/default_skills/product-help/docs/web-server.md +49 -0
- data/lib/octo/default_skills/product-help/docs/what-is-octo.md +37 -0
- data/lib/octo/default_skills/product-help/docs/windows-installation.md +36 -0
- data/lib/octo/default_skills/product-help/docs/writing-tips.md +53 -0
- data/lib/octo/default_skills/recall-memory/SKILL.md +65 -0
- data/lib/octo/default_skills/skill-add/SKILL.md +59 -0
- data/lib/octo/default_skills/skill-add/scripts/install_from_zip.rb +295 -0
- data/lib/octo/default_skills/skill-creator/SKILL.md +602 -0
- data/lib/octo/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/octo/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/octo/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/octo/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/octo/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/octo/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/octo/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/octo/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/octo/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/octo/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/octo/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/octo/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/octo/default_skills/skill-creator/scripts/validate_skill_frontmatter.rb +143 -0
- data/lib/octo/idle_compression_timer.rb +115 -0
- data/lib/octo/json_ui_controller.rb +204 -0
- data/lib/octo/message_format/anthropic.rb +409 -0
- data/lib/octo/message_format/bedrock.rb +361 -0
- data/lib/octo/message_format/open_ai.rb +222 -0
- data/lib/octo/message_history.rb +373 -0
- data/lib/octo/openai_stream_aggregator.rb +130 -0
- data/lib/octo/plain_ui_controller.rb +166 -0
- data/lib/octo/providers.rb +534 -0
- data/lib/octo/server/browser_manager.rb +397 -0
- data/lib/octo/server/channel/adapters/base.rb +82 -0
- data/lib/octo/server/channel/adapters/dingtalk/adapter.rb +314 -0
- data/lib/octo/server/channel/adapters/dingtalk/api_client.rb +391 -0
- data/lib/octo/server/channel/adapters/dingtalk/stream_client.rb +203 -0
- data/lib/octo/server/channel/adapters/discord/adapter.rb +229 -0
- data/lib/octo/server/channel/adapters/discord/api_client.rb +107 -0
- data/lib/octo/server/channel/adapters/discord/gateway_client.rb +270 -0
- data/lib/octo/server/channel/adapters/feishu/adapter.rb +320 -0
- data/lib/octo/server/channel/adapters/feishu/bot.rb +478 -0
- data/lib/octo/server/channel/adapters/feishu/file_processor.rb +36 -0
- data/lib/octo/server/channel/adapters/feishu/message_parser.rb +129 -0
- data/lib/octo/server/channel/adapters/feishu/ws_client.rb +423 -0
- data/lib/octo/server/channel/adapters/telegram/adapter.rb +375 -0
- data/lib/octo/server/channel/adapters/telegram/api_client.rb +205 -0
- data/lib/octo/server/channel/adapters/wecom/adapter.rb +148 -0
- data/lib/octo/server/channel/adapters/wecom/media_downloader.rb +115 -0
- data/lib/octo/server/channel/adapters/wecom/ws_client.rb +395 -0
- data/lib/octo/server/channel/adapters/weixin/adapter.rb +692 -0
- data/lib/octo/server/channel/adapters/weixin/api_client.rb +402 -0
- data/lib/octo/server/channel/channel_config.rb +178 -0
- data/lib/octo/server/channel/channel_manager.rb +468 -0
- data/lib/octo/server/channel/channel_ui_controller.rb +224 -0
- data/lib/octo/server/channel.rb +33 -0
- data/lib/octo/server/discover.rb +77 -0
- data/lib/octo/server/epipe_safe_io.rb +105 -0
- data/lib/octo/server/http_server.rb +3554 -0
- data/lib/octo/server/scheduler.rb +317 -0
- data/lib/octo/server/server_master.rb +325 -0
- data/lib/octo/server/session_registry.rb +431 -0
- data/lib/octo/server/web_ui_controller.rb +487 -0
- data/lib/octo/session_manager.rb +385 -0
- data/lib/octo/skill.rb +466 -0
- data/lib/octo/skill_loader.rb +328 -0
- data/lib/octo/tools/base.rb +118 -0
- data/lib/octo/tools/browser.rb +625 -0
- data/lib/octo/tools/edit.rb +165 -0
- data/lib/octo/tools/file_reader.rb +549 -0
- data/lib/octo/tools/glob.rb +162 -0
- data/lib/octo/tools/grep.rb +356 -0
- data/lib/octo/tools/invoke_skill.rb +96 -0
- data/lib/octo/tools/list_tasks.rb +54 -0
- data/lib/octo/tools/redo_task.rb +41 -0
- data/lib/octo/tools/request_user_feedback.rb +84 -0
- data/lib/octo/tools/security.rb +333 -0
- data/lib/octo/tools/terminal/output_cleaner.rb +63 -0
- data/lib/octo/tools/terminal/persistent_session.rb +268 -0
- data/lib/octo/tools/terminal/safe_rm.sh +106 -0
- data/lib/octo/tools/terminal/session_manager.rb +213 -0
- data/lib/octo/tools/terminal.rb +1828 -0
- data/lib/octo/tools/todo_manager.rb +374 -0
- data/lib/octo/tools/trash_manager.rb +388 -0
- data/lib/octo/tools/undo_task.rb +35 -0
- data/lib/octo/tools/web_fetch.rb +242 -0
- data/lib/octo/tools/web_search.rb +260 -0
- data/lib/octo/tools/write.rb +77 -0
- data/lib/octo/ui2/block_font.rb +10 -0
- data/lib/octo/ui2/components/base_component.rb +163 -0
- data/lib/octo/ui2/components/command_suggestions.rb +290 -0
- data/lib/octo/ui2/components/common_component.rb +96 -0
- data/lib/octo/ui2/components/inline_input.rb +226 -0
- data/lib/octo/ui2/components/input_area.rb +1338 -0
- data/lib/octo/ui2/components/message_component.rb +99 -0
- data/lib/octo/ui2/components/modal_component.rb +419 -0
- data/lib/octo/ui2/components/todo_area.rb +149 -0
- data/lib/octo/ui2/components/tool_component.rb +107 -0
- data/lib/octo/ui2/components/welcome_banner.rb +139 -0
- data/lib/octo/ui2/layout_manager.rb +807 -0
- data/lib/octo/ui2/line_editor.rb +363 -0
- data/lib/octo/ui2/markdown_renderer.rb +100 -0
- data/lib/octo/ui2/output_buffer.rb +370 -0
- data/lib/octo/ui2/progress_handle.rb +362 -0
- data/lib/octo/ui2/progress_indicator.rb +55 -0
- data/lib/octo/ui2/screen_buffer.rb +273 -0
- data/lib/octo/ui2/terminal_detector.rb +119 -0
- data/lib/octo/ui2/theme_manager.rb +85 -0
- data/lib/octo/ui2/themes/base_theme.rb +105 -0
- data/lib/octo/ui2/themes/hacker_theme.rb +62 -0
- data/lib/octo/ui2/themes/minimal_theme.rb +56 -0
- data/lib/octo/ui2/thinking_verbs.rb +26 -0
- data/lib/octo/ui2/ui_controller.rb +1625 -0
- data/lib/octo/ui2/view_renderer.rb +177 -0
- data/lib/octo/ui2.rb +40 -0
- data/lib/octo/ui_interface.rb +154 -0
- data/lib/octo/utils/arguments_parser.rb +191 -0
- data/lib/octo/utils/browser_detector.rb +195 -0
- data/lib/octo/utils/encoding.rb +92 -0
- data/lib/octo/utils/environment_detector.rb +140 -0
- data/lib/octo/utils/file_ignore_helper.rb +170 -0
- data/lib/octo/utils/file_processor.rb +601 -0
- data/lib/octo/utils/gitignore_parser.rb +154 -0
- data/lib/octo/utils/limit_stack.rb +152 -0
- data/lib/octo/utils/logger.rb +124 -0
- data/lib/octo/utils/login_shell.rb +72 -0
- data/lib/octo/utils/model_pricing.rb +646 -0
- data/lib/octo/utils/parser_manager.rb +165 -0
- data/lib/octo/utils/path_helper.rb +15 -0
- data/lib/octo/utils/scripts_manager.rb +59 -0
- data/lib/octo/utils/string_matcher.rb +158 -0
- data/lib/octo/utils/trash_directory.rb +112 -0
- data/lib/octo/utils/workspace_rules.rb +46 -0
- data/lib/octo/version.rb +5 -0
- data/lib/octo/web/app.css +7141 -0
- data/lib/octo/web/app.js +543 -0
- data/lib/octo/web/apple-touch-icon.png +0 -0
- data/lib/octo/web/auth.js +150 -0
- data/lib/octo/web/channels.js +276 -0
- data/lib/octo/web/datepicker.js +205 -0
- data/lib/octo/web/favicon.png +0 -0
- data/lib/octo/web/i18n.js +1073 -0
- data/lib/octo/web/icon-512.png +0 -0
- data/lib/octo/web/icon-dark.svg +25 -0
- data/lib/octo/web/icon.svg +29 -0
- data/lib/octo/web/index.html +871 -0
- data/lib/octo/web/marked.min.js +69 -0
- data/lib/octo/web/onboard.js +491 -0
- data/lib/octo/web/profile.js +442 -0
- data/lib/octo/web/sessions.js +4421 -0
- data/lib/octo/web/settings.js +913 -0
- data/lib/octo/web/sidebar.js +32 -0
- data/lib/octo/web/skills.js +885 -0
- data/lib/octo/web/tasks.js +297 -0
- data/lib/octo/web/theme.js +105 -0
- data/lib/octo/web/trash.js +343 -0
- data/lib/octo/web/vendor/hljs/highlight.min.js +1244 -0
- data/lib/octo/web/vendor/hljs/hljs-theme.css +95 -0
- data/lib/octo/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/octo/web/vendor/katex/katex.min.css +1 -0
- data/lib/octo/web/vendor/katex/katex.min.js +1 -0
- data/lib/octo/web/version.js +449 -0
- data/lib/octo/web/weixin-qr.html +209 -0
- data/lib/octo/web/ws-dispatcher.js +357 -0
- data/lib/octo/web/ws.js +128 -0
- data/lib/octo.rb +145 -0
- data/scripts/build/build.sh +329 -0
- data/scripts/build/lib/apt.sh +56 -0
- data/scripts/build/lib/brew.sh +89 -0
- data/scripts/build/lib/colors.sh +17 -0
- data/scripts/build/lib/gem.sh +95 -0
- data/scripts/build/lib/mise.sh +125 -0
- data/scripts/build/lib/network.sh +157 -0
- data/scripts/build/lib/os.sh +57 -0
- data/scripts/build/lib/shell.sh +37 -0
- data/scripts/build/src/install.sh.cc +174 -0
- data/scripts/build/src/install_browser.sh.cc +101 -0
- data/scripts/build/src/install_full.sh.cc +290 -0
- data/scripts/build/src/install_rails_deps.sh.cc +145 -0
- data/scripts/build/src/install_system_deps.sh.cc +123 -0
- data/scripts/build/src/uninstall.sh.cc +101 -0
- data/scripts/install.ps1 +532 -0
- data/scripts/install.sh +567 -0
- data/scripts/install_browser.sh +479 -0
- data/scripts/install_full.sh +838 -0
- data/scripts/install_rails_deps.sh +746 -0
- data/scripts/install_system_deps.sh +518 -0
- data/scripts/uninstall.sh +287 -0
- data/sig/octo.rbs +4 -0
- metadata +614 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Quick validation script for skills - minimal version
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import yaml
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
def validate_skill(skill_path):
|
|
13
|
+
"""Basic validation of a skill"""
|
|
14
|
+
skill_path = Path(skill_path)
|
|
15
|
+
|
|
16
|
+
# Check SKILL.md exists
|
|
17
|
+
skill_md = skill_path / 'SKILL.md'
|
|
18
|
+
if not skill_md.exists():
|
|
19
|
+
return False, "SKILL.md not found"
|
|
20
|
+
|
|
21
|
+
# Read and validate frontmatter
|
|
22
|
+
content = skill_md.read_text()
|
|
23
|
+
if not content.startswith('---'):
|
|
24
|
+
return False, "No YAML frontmatter found"
|
|
25
|
+
|
|
26
|
+
# Extract frontmatter
|
|
27
|
+
match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
28
|
+
if not match:
|
|
29
|
+
return False, "Invalid frontmatter format"
|
|
30
|
+
|
|
31
|
+
frontmatter_text = match.group(1)
|
|
32
|
+
|
|
33
|
+
# Parse YAML frontmatter
|
|
34
|
+
try:
|
|
35
|
+
frontmatter = yaml.safe_load(frontmatter_text)
|
|
36
|
+
if not isinstance(frontmatter, dict):
|
|
37
|
+
return False, "Frontmatter must be a YAML dictionary"
|
|
38
|
+
except yaml.YAMLError as e:
|
|
39
|
+
return False, f"Invalid YAML in frontmatter: {e}"
|
|
40
|
+
|
|
41
|
+
# Define allowed properties
|
|
42
|
+
ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata', 'compatibility'}
|
|
43
|
+
|
|
44
|
+
# Check for unexpected properties (excluding nested keys under metadata)
|
|
45
|
+
unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
|
|
46
|
+
if unexpected_keys:
|
|
47
|
+
return False, (
|
|
48
|
+
f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. "
|
|
49
|
+
f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Check required fields
|
|
53
|
+
if 'name' not in frontmatter:
|
|
54
|
+
return False, "Missing 'name' in frontmatter"
|
|
55
|
+
if 'description' not in frontmatter:
|
|
56
|
+
return False, "Missing 'description' in frontmatter"
|
|
57
|
+
|
|
58
|
+
# Extract name for validation
|
|
59
|
+
name = frontmatter.get('name', '')
|
|
60
|
+
if not isinstance(name, str):
|
|
61
|
+
return False, f"Name must be a string, got {type(name).__name__}"
|
|
62
|
+
name = name.strip()
|
|
63
|
+
if name:
|
|
64
|
+
# Check naming convention (kebab-case: lowercase with hyphens)
|
|
65
|
+
if not re.match(r'^[a-z0-9-]+$', name):
|
|
66
|
+
return False, f"Name '{name}' should be kebab-case (lowercase letters, digits, and hyphens only)"
|
|
67
|
+
if name.startswith('-') or name.endswith('-') or '--' in name:
|
|
68
|
+
return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
|
|
69
|
+
# Check name length (max 64 characters per spec)
|
|
70
|
+
if len(name) > 64:
|
|
71
|
+
return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
|
|
72
|
+
|
|
73
|
+
# Extract and validate description
|
|
74
|
+
description = frontmatter.get('description', '')
|
|
75
|
+
if not isinstance(description, str):
|
|
76
|
+
return False, f"Description must be a string, got {type(description).__name__}"
|
|
77
|
+
description = description.strip()
|
|
78
|
+
if description:
|
|
79
|
+
# Check for angle brackets
|
|
80
|
+
if '<' in description or '>' in description:
|
|
81
|
+
return False, "Description cannot contain angle brackets (< or >)"
|
|
82
|
+
# Check description length (max 1024 characters per spec)
|
|
83
|
+
if len(description) > 1024:
|
|
84
|
+
return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
|
|
85
|
+
|
|
86
|
+
# Validate compatibility field if present (optional)
|
|
87
|
+
compatibility = frontmatter.get('compatibility', '')
|
|
88
|
+
if compatibility:
|
|
89
|
+
if not isinstance(compatibility, str):
|
|
90
|
+
return False, f"Compatibility must be a string, got {type(compatibility).__name__}"
|
|
91
|
+
if len(compatibility) > 500:
|
|
92
|
+
return False, f"Compatibility is too long ({len(compatibility)} characters). Maximum is 500 characters."
|
|
93
|
+
|
|
94
|
+
return True, "Skill is valid!"
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
if len(sys.argv) != 2:
|
|
98
|
+
print("Usage: python quick_validate.py <skill_directory>")
|
|
99
|
+
sys.exit(1)
|
|
100
|
+
|
|
101
|
+
valid, message = validate_skill(sys.argv[1])
|
|
102
|
+
print(message)
|
|
103
|
+
sys.exit(0 if valid else 1)
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Run trigger evaluation for a skill description.
|
|
3
|
+
|
|
4
|
+
Tests whether Octo's agent triggers (invokes) a skill for a set of queries.
|
|
5
|
+
Runs octo agent --json in persistent mode, sends queries via stdin NDJSON,
|
|
6
|
+
detects {"type":"tool_call","name":"invoke_skill","args":{"skill_name":"<name>"}}
|
|
7
|
+
events, and returns pass/fail results as JSON.
|
|
8
|
+
|
|
9
|
+
Executes queries serially (Octo is single-agent, no parallel workers).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import select
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
import uuid
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from scripts.utils import parse_skill_md
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
OCTO_BIN = shutil.which("octo") or "/Users/sizzy/.local/share/mise/shims/octo"
|
|
27
|
+
SKILLS_DIR = Path.home() / ".octo" / "skills"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def find_project_root() -> Path:
|
|
31
|
+
"""Find the project root by walking up from cwd, used for --path arg."""
|
|
32
|
+
current = Path.cwd()
|
|
33
|
+
for parent in [current, *current.parents]:
|
|
34
|
+
if (parent / ".octo").is_dir():
|
|
35
|
+
return parent
|
|
36
|
+
return current
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read_ndjson_lines(proc, timeout: float) -> list[dict]:
|
|
40
|
+
"""Read NDJSON lines from proc.stdout until timeout or process exits."""
|
|
41
|
+
lines = []
|
|
42
|
+
buffer = b""
|
|
43
|
+
start = time.time()
|
|
44
|
+
while time.time() - start < timeout:
|
|
45
|
+
ready = select.select([proc.stdout], [], [], 0.5)[0]
|
|
46
|
+
if ready:
|
|
47
|
+
chunk = os.read(proc.stdout.fileno(), 8192)
|
|
48
|
+
if chunk:
|
|
49
|
+
buffer += chunk
|
|
50
|
+
while b"\n" in buffer:
|
|
51
|
+
line_b, buffer = buffer.split(b"\n", 1)
|
|
52
|
+
line = line_b.decode("utf-8", errors="replace").strip()
|
|
53
|
+
if not line:
|
|
54
|
+
continue
|
|
55
|
+
try:
|
|
56
|
+
lines.append(json.loads(line))
|
|
57
|
+
except json.JSONDecodeError:
|
|
58
|
+
pass
|
|
59
|
+
if proc.poll() is not None:
|
|
60
|
+
# drain remaining
|
|
61
|
+
remaining = proc.stdout.read()
|
|
62
|
+
if remaining:
|
|
63
|
+
for line in remaining.decode("utf-8", errors="replace").splitlines():
|
|
64
|
+
line = line.strip()
|
|
65
|
+
if line:
|
|
66
|
+
try:
|
|
67
|
+
lines.append(json.loads(line))
|
|
68
|
+
except json.JSONDecodeError:
|
|
69
|
+
pass
|
|
70
|
+
break
|
|
71
|
+
return lines
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def run_single_query(
|
|
75
|
+
query: str,
|
|
76
|
+
skill_name: str,
|
|
77
|
+
skill_description: str,
|
|
78
|
+
timeout: int,
|
|
79
|
+
project_root: str,
|
|
80
|
+
) -> bool:
|
|
81
|
+
"""Run a single query via octo agent --json and detect skill trigger.
|
|
82
|
+
|
|
83
|
+
Creates a temp skill in ~/.octo/skills/, starts octo agent in JSON mode,
|
|
84
|
+
sends the query, watches for invoke_skill tool_call event targeting our temp skill.
|
|
85
|
+
"""
|
|
86
|
+
unique_id = uuid.uuid4().hex[:8]
|
|
87
|
+
temp_skill_name = f"{skill_name}-eval-{unique_id}"
|
|
88
|
+
temp_skill_dir = SKILLS_DIR / temp_skill_name
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Write temporary skill
|
|
92
|
+
temp_skill_dir.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
skill_md = (
|
|
94
|
+
f"---\n"
|
|
95
|
+
f"name: {temp_skill_name}\n"
|
|
96
|
+
f"description: {skill_description}\n"
|
|
97
|
+
f"---\n\n"
|
|
98
|
+
f"# {skill_name}\n\n"
|
|
99
|
+
f"This skill handles: {skill_description}\n"
|
|
100
|
+
)
|
|
101
|
+
(temp_skill_dir / "SKILL.md").write_text(skill_md)
|
|
102
|
+
|
|
103
|
+
# Launch octo agent in persistent JSON mode
|
|
104
|
+
proc = subprocess.Popen(
|
|
105
|
+
[OCTO_BIN, "agent", "--json", "--mode", "auto_approve",
|
|
106
|
+
"--path", project_root],
|
|
107
|
+
stdin=subprocess.PIPE,
|
|
108
|
+
stdout=subprocess.PIPE,
|
|
109
|
+
stderr=subprocess.DEVNULL,
|
|
110
|
+
bufsize=0,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
# Wait for "system" ready event before sending query
|
|
115
|
+
start = time.time()
|
|
116
|
+
buffer = b""
|
|
117
|
+
ready_received = False
|
|
118
|
+
while time.time() - start < 10:
|
|
119
|
+
r = select.select([proc.stdout], [], [], 0.5)[0]
|
|
120
|
+
if r:
|
|
121
|
+
chunk = os.read(proc.stdout.fileno(), 4096)
|
|
122
|
+
if chunk:
|
|
123
|
+
buffer += chunk
|
|
124
|
+
while b"\n" in buffer:
|
|
125
|
+
line_b, buffer = buffer.split(b"\n", 1)
|
|
126
|
+
line = line_b.strip()
|
|
127
|
+
if line:
|
|
128
|
+
try:
|
|
129
|
+
evt = json.loads(line)
|
|
130
|
+
if evt.get("type") == "system":
|
|
131
|
+
ready_received = True
|
|
132
|
+
except json.JSONDecodeError:
|
|
133
|
+
pass
|
|
134
|
+
if ready_received:
|
|
135
|
+
break
|
|
136
|
+
|
|
137
|
+
# Send query
|
|
138
|
+
msg = (json.dumps({"type": "message", "content": query}) + "\n").encode()
|
|
139
|
+
proc.stdin.write(msg)
|
|
140
|
+
proc.stdin.flush()
|
|
141
|
+
|
|
142
|
+
# Read events until "complete" or timeout
|
|
143
|
+
triggered = False
|
|
144
|
+
start = time.time()
|
|
145
|
+
buffer = b""
|
|
146
|
+
while time.time() - start < timeout:
|
|
147
|
+
r = select.select([proc.stdout], [], [], 0.5)[0]
|
|
148
|
+
if r:
|
|
149
|
+
chunk = os.read(proc.stdout.fileno(), 8192)
|
|
150
|
+
if chunk:
|
|
151
|
+
buffer += chunk
|
|
152
|
+
while b"\n" in buffer:
|
|
153
|
+
line_b, buffer = buffer.split(b"\n", 1)
|
|
154
|
+
line = line_b.decode("utf-8", errors="replace").strip()
|
|
155
|
+
if not line:
|
|
156
|
+
continue
|
|
157
|
+
try:
|
|
158
|
+
event = json.loads(line)
|
|
159
|
+
except json.JSONDecodeError:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Detect skill trigger
|
|
163
|
+
if event.get("type") == "tool_call" and event.get("name") == "invoke_skill":
|
|
164
|
+
args = event.get("args", {})
|
|
165
|
+
invoked = args.get("skill_name", "")
|
|
166
|
+
if invoked == temp_skill_name:
|
|
167
|
+
return True # triggered — exit early
|
|
168
|
+
|
|
169
|
+
# Task complete
|
|
170
|
+
if event.get("type") == "complete":
|
|
171
|
+
return triggered
|
|
172
|
+
|
|
173
|
+
if proc.poll() is not None:
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
return triggered
|
|
177
|
+
|
|
178
|
+
finally:
|
|
179
|
+
# Gracefully exit the agent
|
|
180
|
+
try:
|
|
181
|
+
proc.stdin.write((json.dumps({"type": "exit"}) + "\n").encode())
|
|
182
|
+
proc.stdin.flush()
|
|
183
|
+
except Exception:
|
|
184
|
+
pass
|
|
185
|
+
if proc.poll() is None:
|
|
186
|
+
proc.kill()
|
|
187
|
+
proc.wait()
|
|
188
|
+
|
|
189
|
+
finally:
|
|
190
|
+
# Always remove temp skill directory
|
|
191
|
+
if temp_skill_dir.exists():
|
|
192
|
+
shutil.rmtree(temp_skill_dir, ignore_errors=True)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def run_eval(
|
|
196
|
+
eval_set: list[dict],
|
|
197
|
+
skill_name: str,
|
|
198
|
+
description: str,
|
|
199
|
+
timeout: int,
|
|
200
|
+
project_root: Path,
|
|
201
|
+
runs_per_query: int = 1,
|
|
202
|
+
trigger_threshold: float = 0.5,
|
|
203
|
+
) -> dict:
|
|
204
|
+
"""Run the full eval set serially and return results.
|
|
205
|
+
|
|
206
|
+
Note: Octo is single-agent — queries are executed serially, not in parallel.
|
|
207
|
+
Each query spawns a fresh octo agent process to avoid session contamination.
|
|
208
|
+
"""
|
|
209
|
+
results = []
|
|
210
|
+
query_triggers: dict[str, list[bool]] = {}
|
|
211
|
+
query_items: dict[str, dict] = {}
|
|
212
|
+
|
|
213
|
+
for item in eval_set:
|
|
214
|
+
query = item["query"]
|
|
215
|
+
query_items[query] = item
|
|
216
|
+
if query not in query_triggers:
|
|
217
|
+
query_triggers[query] = []
|
|
218
|
+
|
|
219
|
+
for run_idx in range(runs_per_query):
|
|
220
|
+
try:
|
|
221
|
+
triggered = run_single_query(
|
|
222
|
+
query=query,
|
|
223
|
+
skill_name=skill_name,
|
|
224
|
+
skill_description=description,
|
|
225
|
+
timeout=timeout,
|
|
226
|
+
project_root=str(project_root),
|
|
227
|
+
)
|
|
228
|
+
query_triggers[query].append(triggered)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
print(f"Warning: query failed (run {run_idx}): {e}", file=sys.stderr)
|
|
231
|
+
query_triggers[query].append(False)
|
|
232
|
+
|
|
233
|
+
for query, triggers in query_triggers.items():
|
|
234
|
+
item = query_items[query]
|
|
235
|
+
trigger_rate = sum(triggers) / len(triggers)
|
|
236
|
+
should_trigger = item["should_trigger"]
|
|
237
|
+
if should_trigger:
|
|
238
|
+
did_pass = trigger_rate >= trigger_threshold
|
|
239
|
+
else:
|
|
240
|
+
did_pass = trigger_rate < trigger_threshold
|
|
241
|
+
results.append({
|
|
242
|
+
"query": query,
|
|
243
|
+
"should_trigger": should_trigger,
|
|
244
|
+
"trigger_rate": trigger_rate,
|
|
245
|
+
"triggers": sum(triggers),
|
|
246
|
+
"runs": len(triggers),
|
|
247
|
+
"pass": did_pass,
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
passed = sum(1 for r in results if r["pass"])
|
|
251
|
+
total = len(results)
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
"skill_name": skill_name,
|
|
255
|
+
"description": description,
|
|
256
|
+
"results": results,
|
|
257
|
+
"summary": {
|
|
258
|
+
"total": total,
|
|
259
|
+
"passed": passed,
|
|
260
|
+
"failed": total - passed,
|
|
261
|
+
},
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def main():
|
|
266
|
+
parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description (Octo)")
|
|
267
|
+
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
|
|
268
|
+
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
|
|
269
|
+
parser.add_argument("--description", default=None, help="Override description to test")
|
|
270
|
+
parser.add_argument("--timeout", type=int, default=45, help="Timeout per query in seconds")
|
|
271
|
+
parser.add_argument("--runs-per-query", type=int, default=1, help="Number of runs per query (serially)")
|
|
272
|
+
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
|
|
273
|
+
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
|
|
274
|
+
# --num-workers kept for CLI compat but ignored (Octo is serial)
|
|
275
|
+
parser.add_argument("--num-workers", type=int, default=1, help="Ignored — Octo runs serially")
|
|
276
|
+
parser.add_argument("--model", default=None, help="Ignored — model comes from ~/.octo/config.yml")
|
|
277
|
+
args = parser.parse_args()
|
|
278
|
+
|
|
279
|
+
eval_set = json.loads(Path(args.eval_set).read_text())
|
|
280
|
+
skill_path = Path(args.skill_path)
|
|
281
|
+
|
|
282
|
+
if not (skill_path / "SKILL.md").exists():
|
|
283
|
+
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
|
|
284
|
+
sys.exit(1)
|
|
285
|
+
|
|
286
|
+
name, original_description, content = parse_skill_md(skill_path)
|
|
287
|
+
description = args.description or original_description
|
|
288
|
+
project_root = find_project_root()
|
|
289
|
+
|
|
290
|
+
if args.verbose:
|
|
291
|
+
print(f"Evaluating skill: {name}", file=sys.stderr)
|
|
292
|
+
print(f"Description: {description}", file=sys.stderr)
|
|
293
|
+
print(f"Queries: {len(eval_set)}, runs-per-query: {args.runs_per_query}", file=sys.stderr)
|
|
294
|
+
|
|
295
|
+
output = run_eval(
|
|
296
|
+
eval_set=eval_set,
|
|
297
|
+
skill_name=name,
|
|
298
|
+
description=description,
|
|
299
|
+
timeout=args.timeout,
|
|
300
|
+
project_root=project_root,
|
|
301
|
+
runs_per_query=args.runs_per_query,
|
|
302
|
+
trigger_threshold=args.trigger_threshold,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if args.verbose:
|
|
306
|
+
summary = output["summary"]
|
|
307
|
+
print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
|
|
308
|
+
for r in output["results"]:
|
|
309
|
+
status = "PASS" if r["pass"] else "FAIL"
|
|
310
|
+
rate_str = f"{r['triggers']}/{r['runs']}"
|
|
311
|
+
print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
|
|
312
|
+
|
|
313
|
+
print(json.dumps(output, indent=2))
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
if __name__ == "__main__":
|
|
317
|
+
main()
|