@heyai-rules/pilo-masterkit 1.2.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/agents/architect.md +211 -211
- package/.agent/agents/build-error-resolver.md +114 -114
- package/.agent/agents/chief-of-staff.md +151 -151
- package/.agent/agents/code-reviewer.md +237 -237
- package/.agent/agents/cpp-build-resolver.md +90 -90
- package/.agent/agents/cpp-reviewer.md +72 -72
- package/.agent/agents/csharp-reviewer.md +101 -0
- package/.agent/agents/dart-build-resolver.md +201 -0
- package/.agent/agents/database-reviewer.md +91 -91
- package/.agent/agents/doc-updater.md +107 -107
- package/.agent/agents/docs-lookup.md +68 -68
- package/.agent/agents/e2e-runner.md +107 -107
- package/.agent/agents/flutter-reviewer.md +243 -243
- package/.agent/agents/gan-evaluator.md +209 -0
- package/.agent/agents/gan-generator.md +131 -0
- package/.agent/agents/gan-planner.md +99 -0
- package/.agent/agents/go-build-resolver.md +94 -94
- package/.agent/agents/go-reviewer.md +76 -76
- package/.agent/agents/harness-optimizer.md +35 -35
- package/.agent/agents/healthcare-reviewer.md +83 -0
- package/.agent/agents/java-build-resolver.md +153 -153
- package/.agent/agents/java-reviewer.md +92 -92
- package/.agent/agents/kotlin-build-resolver.md +118 -118
- package/.agent/agents/kotlin-reviewer.md +159 -159
- package/.agent/agents/loop-operator.md +36 -36
- package/.agent/agents/opensource-forker.md +198 -0
- package/.agent/agents/opensource-packager.md +249 -0
- package/.agent/agents/opensource-sanitizer.md +188 -0
- package/.agent/agents/performance-optimizer.md +392 -133
- package/.agent/agents/personas/athena-agent/agent.json +10 -0
- package/.agent/agents/personas/athena-agent/athena-backend-logic-architecture-profile.md +189 -0
- package/.agent/agents/personas/athena-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/athena-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/athena-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/athena-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/athena-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/athena-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/da-vinci-agent/agent.json +10 -0
- package/.agent/agents/personas/da-vinci-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/da-vinci-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/da-vinci-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/da-vinci-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/da-vinci-agent/da-vinci-frontend-ui-ux-design-profile.md +189 -0
- package/.agent/agents/personas/da-vinci-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/da-vinci-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/duong-tang-agent/agent.json +10 -0
- package/.agent/agents/personas/duong-tang-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/duong-tang-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/duong-tang-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/duong-tang-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/duong-tang-agent/tang-monk-quality-testing-documentation-profile.md +189 -0
- package/.agent/agents/personas/duong-tang-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/duong-tang-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/gia-cat-luong-agent/agent.json +10 -0
- package/.agent/agents/personas/gia-cat-luong-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/gia-cat-luong-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/gia-cat-luong-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/gia-cat-luong-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/gia-cat-luong-agent/kongming-research-strategy-analysis-profile.md +189 -0
- package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/mihata-agent/agent.json +10 -0
- package/.agent/agents/personas/mihata-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/mihata-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/mihata-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/mihata-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/mihata-agent/mihata-multi-agent-orchestration-profile.md +189 -0
- package/.agent/agents/personas/mihata-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/mihata-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/tesla-agent/agent.json +10 -0
- package/.agent/agents/personas/tesla-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/tesla-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/tesla-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/tesla-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/tesla-agent/tesla-fullstack-system-optimization-profile.md +189 -0
- package/.agent/agents/personas/tesla-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/tesla-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/tu-ma-y-agent/agent.json +10 -0
- package/.agent/agents/personas/tu-ma-y-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/tu-ma-y-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/tu-ma-y-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/tu-ma-y-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/tu-ma-y-agent/simayi-feasibility-risk-control-profile.md +189 -0
- package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/venti-agent/agent.json +10 -0
- package/.agent/agents/personas/venti-agent/context-files/agents.md +55 -0
- package/.agent/agents/personas/venti-agent/context-files/identity.md +23 -0
- package/.agent/agents/personas/venti-agent/context-files/soul.md +51 -0
- package/.agent/agents/personas/venti-agent/context-files/user-predefined.md +15 -0
- package/.agent/agents/personas/venti-agent/user-context-files/system/bootstrap.md +37 -0
- package/.agent/agents/personas/venti-agent/user-context-files/system/user.md +45 -0
- package/.agent/agents/personas/venti-agent/venti-learning-communication-mentoring-profile.md +189 -0
- package/.agent/agents/planner.md +212 -212
- package/.agent/agents/python-reviewer.md +98 -98
- package/.agent/agents/pytorch-build-resolver.md +120 -120
- package/.agent/agents/refactor-cleaner.md +85 -85
- package/.agent/agents/rust-build-resolver.md +148 -148
- package/.agent/agents/rust-reviewer.md +94 -94
- package/.agent/agents/security-reviewer.md +108 -108
- package/.agent/agents/tdd-guide.md +91 -91
- package/.agent/agents/typescript-reviewer.md +112 -112
- package/.agent/contexts/dev.md +20 -0
- package/.agent/contexts/research.md +26 -0
- package/.agent/contexts/review.md +22 -0
- package/.agent/hooks/hooks.json +395 -0
- package/.agent/hooks/readme.md +222 -0
- package/.agent/mcp-configs/mcp-servers.json +181 -0
- package/.agent/rules/common/agents.md +50 -0
- package/.agent/rules/common/code-review.md +124 -0
- package/.agent/rules/common/coding-style.md +48 -0
- package/.agent/rules/common/development-workflow.md +44 -0
- package/.agent/rules/common/git-workflow.md +24 -0
- package/.agent/rules/common/hooks.md +30 -0
- package/.agent/rules/common/patterns.md +31 -0
- package/.agent/rules/common/performance.md +55 -0
- package/.agent/rules/common/security.md +29 -0
- package/.agent/rules/common/testing.md +29 -0
- package/.agent/rules/cpp/coding-style.md +44 -0
- package/.agent/rules/cpp/hooks.md +39 -0
- package/.agent/rules/cpp/patterns.md +51 -0
- package/.agent/rules/cpp/security.md +51 -0
- package/.agent/rules/cpp/testing.md +44 -0
- package/.agent/rules/csharp/coding-style.md +72 -0
- package/.agent/rules/csharp/hooks.md +25 -0
- package/.agent/rules/csharp/patterns.md +50 -0
- package/.agent/rules/csharp/security.md +58 -0
- package/.agent/rules/csharp/testing.md +46 -0
- package/.agent/rules/dart/coding-style.md +159 -0
- package/.agent/rules/dart/hooks.md +66 -0
- package/.agent/rules/dart/patterns.md +261 -0
- package/.agent/rules/dart/security.md +135 -0
- package/.agent/rules/dart/testing.md +215 -0
- package/.agent/rules/golang/coding-style.md +32 -0
- package/.agent/rules/golang/hooks.md +17 -0
- package/.agent/rules/golang/patterns.md +45 -0
- package/.agent/rules/golang/security.md +34 -0
- package/.agent/rules/golang/testing.md +31 -0
- package/.agent/rules/java/coding-style.md +114 -0
- package/.agent/rules/java/hooks.md +18 -0
- package/.agent/rules/java/patterns.md +146 -0
- package/.agent/rules/java/security.md +100 -0
- package/.agent/rules/java/testing.md +131 -0
- package/.agent/rules/kotlin/coding-style.md +86 -0
- package/.agent/rules/kotlin/hooks.md +17 -0
- package/.agent/rules/kotlin/patterns.md +146 -0
- package/.agent/rules/kotlin/security.md +82 -0
- package/.agent/rules/kotlin/testing.md +128 -0
- package/.agent/rules/perl/coding-style.md +46 -0
- package/.agent/rules/perl/hooks.md +22 -0
- package/.agent/rules/perl/patterns.md +76 -0
- package/.agent/rules/perl/security.md +69 -0
- package/.agent/rules/perl/testing.md +54 -0
- package/.agent/rules/php/coding-style.md +40 -0
- package/.agent/rules/php/hooks.md +24 -0
- package/.agent/rules/php/patterns.md +33 -0
- package/.agent/rules/php/security.md +37 -0
- package/.agent/rules/php/testing.md +39 -0
- package/.agent/rules/python/coding-style.md +42 -0
- package/.agent/rules/python/hooks.md +19 -0
- package/.agent/rules/python/patterns.md +39 -0
- package/.agent/rules/python/security.md +30 -0
- package/.agent/rules/python/testing.md +38 -0
- package/.agent/rules/readme.md +111 -0
- package/.agent/rules/rust/coding-style.md +151 -0
- package/.agent/rules/rust/hooks.md +16 -0
- package/.agent/rules/rust/patterns.md +168 -0
- package/.agent/rules/rust/security.md +141 -0
- package/.agent/rules/rust/testing.md +154 -0
- package/.agent/rules/swift/coding-style.md +47 -0
- package/.agent/rules/swift/hooks.md +20 -0
- package/.agent/rules/swift/patterns.md +66 -0
- package/.agent/rules/swift/security.md +33 -0
- package/.agent/rules/swift/testing.md +45 -0
- package/.agent/rules/typescript/coding-style.md +199 -0
- package/.agent/rules/typescript/hooks.md +22 -0
- package/.agent/rules/typescript/patterns.md +52 -0
- package/.agent/rules/typescript/security.md +28 -0
- package/.agent/rules/typescript/testing.md +18 -0
- package/.agent/rules/web/coding-style.md +96 -0
- package/.agent/rules/web/design-quality.md +63 -0
- package/.agent/rules/web/hooks.md +120 -0
- package/.agent/rules/web/patterns.md +79 -0
- package/.agent/rules/web/performance.md +64 -0
- package/.agent/rules/web/security.md +57 -0
- package/.agent/rules/web/testing.md +55 -0
- package/.agent/rules/zh/agents.md +50 -0
- package/.agent/rules/zh/code-review.md +124 -0
- package/.agent/rules/zh/coding-style.md +48 -0
- package/.agent/rules/zh/development-workflow.md +44 -0
- package/.agent/rules/zh/git-workflow.md +24 -0
- package/.agent/rules/zh/hooks.md +30 -0
- package/.agent/rules/zh/patterns.md +31 -0
- package/.agent/rules/zh/performance.md +55 -0
- package/.agent/rules/zh/readme.md +108 -0
- package/.agent/rules/zh/security.md +29 -0
- package/.agent/rules/zh/testing.md +29 -0
- package/.agent/skills/agent-eval/SKILL.md +145 -0
- package/.agent/skills/agent-harness-construction/SKILL.md +73 -0
- package/.agent/skills/agent-payment-x402/SKILL.md +178 -0
- package/.agent/skills/agentic-engineering/SKILL.md +63 -0
- package/.agent/skills/ai-first-engineering/SKILL.md +51 -0
- package/.agent/skills/ai-regression-testing/SKILL.md +385 -0
- package/.agent/skills/android-clean-architecture/SKILL.md +339 -0
- package/.agent/skills/api-design/SKILL.md +523 -0
- package/.agent/skills/architecture-decision-records/SKILL.md +179 -0
- package/.agent/skills/article-writing/SKILL.md +79 -0
- package/.agent/skills/autonomous-agent-harness/SKILL.md +267 -0
- package/.agent/skills/autonomous-loops/SKILL.md +610 -0
- package/.agent/skills/backend-patterns/SKILL.md +598 -0
- package/.agent/skills/benchmark/SKILL.md +93 -0
- package/.agent/skills/blueprint/SKILL.md +105 -0
- package/.agent/skills/brand-voice/SKILL.md +97 -0
- package/.agent/skills/brand-voice/references/voice-profile-schema.md +55 -0
- package/.agent/skills/browser-qa/SKILL.md +87 -0
- package/.agent/skills/bun-runtime/SKILL.md +84 -0
- package/.agent/skills/canary-watch/SKILL.md +99 -0
- package/.agent/skills/carrier-relationship-management/SKILL.md +212 -0
- package/.agent/skills/ck/SKILL.md +147 -0
- package/.agent/skills/ck/commands/forget.mjs +44 -0
- package/.agent/skills/ck/commands/info.mjs +24 -0
- package/.agent/skills/ck/commands/init.mjs +143 -0
- package/.agent/skills/ck/commands/list.mjs +40 -0
- package/.agent/skills/ck/commands/migrate.mjs +202 -0
- package/.agent/skills/ck/commands/resume.mjs +36 -0
- package/.agent/skills/ck/commands/save.mjs +210 -0
- package/.agent/skills/ck/commands/shared.mjs +387 -0
- package/.agent/skills/ck/hooks/session-start.mjs +224 -0
- package/.agent/skills/claude-api/SKILL.md +337 -0
- package/.agent/skills/claude-devfleet/SKILL.md +103 -0
- package/.agent/skills/click-path-audit/SKILL.md +244 -0
- package/.agent/skills/clickhouse-io/SKILL.md +439 -0
- package/.agent/skills/codebase-onboarding/SKILL.md +233 -0
- package/.agent/skills/coding-standards/SKILL.md +530 -0
- package/.agent/skills/compose-multiplatform-patterns/SKILL.md +299 -0
- package/.agent/skills/configure-ecc/SKILL.md +367 -0
- package/.agent/skills/connections-optimizer/SKILL.md +189 -0
- package/.agent/skills/content-engine/SKILL.md +131 -0
- package/.agent/skills/content-hash-cache-pattern/SKILL.md +161 -0
- package/.agent/skills/context-budget/SKILL.md +135 -0
- package/.agent/skills/continuous-agent-loop/SKILL.md +45 -0
- package/.agent/skills/continuous-learning/SKILL.md +119 -0
- package/.agent/skills/continuous-learning/config.json +18 -0
- package/.agent/skills/continuous-learning/evaluate-session.sh +69 -0
- package/.agent/skills/continuous-learning-v2/SKILL.md +365 -0
- package/.agent/skills/continuous-learning-v2/agents/observer-loop.sh +271 -0
- package/.agent/skills/continuous-learning-v2/agents/observer.md +198 -0
- package/.agent/skills/continuous-learning-v2/agents/session-guardian.sh +150 -0
- package/.agent/skills/continuous-learning-v2/agents/start-observer.sh +244 -0
- package/.agent/skills/continuous-learning-v2/config.json +8 -0
- package/.agent/skills/continuous-learning-v2/hooks/observe.sh +428 -0
- package/.agent/skills/continuous-learning-v2/scripts/detect-project.sh +228 -0
- package/.agent/skills/continuous-learning-v2/scripts/instinct-cli.py +1426 -0
- package/.agent/skills/continuous-learning-v2/scripts/test-parse-instinct.py +984 -0
- package/.agent/skills/cost-aware-llm-pipeline/SKILL.md +183 -0
- package/.agent/skills/cpp-coding-standards/SKILL.md +723 -0
- package/.agent/skills/cpp-testing/SKILL.md +324 -0
- package/.agent/skills/crosspost/SKILL.md +111 -0
- package/.agent/skills/csharp-testing/SKILL.md +321 -0
- package/.agent/skills/customer-billing-ops/SKILL.md +140 -0
- package/.agent/skills/customs-trade-compliance/SKILL.md +263 -0
- package/.agent/skills/dart-flutter-patterns/SKILL.md +563 -0
- package/.agent/skills/data-scraper-agent/SKILL.md +764 -0
- package/.agent/skills/database-migrations/SKILL.md +429 -0
- package/.agent/skills/deep-research/SKILL.md +155 -0
- package/.agent/skills/deployment-patterns/SKILL.md +427 -0
- package/.agent/skills/design-system/SKILL.md +82 -0
- package/.agent/skills/django-patterns/SKILL.md +734 -0
- package/.agent/skills/django-security/SKILL.md +593 -0
- package/.agent/skills/django-tdd/SKILL.md +729 -0
- package/.agent/skills/django-verification/SKILL.md +469 -0
- package/.agent/skills/dmux-workflows/SKILL.md +191 -0
- package/.agent/skills/docker-patterns/SKILL.md +364 -0
- package/.agent/skills/documentation-lookup/SKILL.md +90 -0
- package/.agent/skills/dotnet-patterns/SKILL.md +321 -0
- package/.agent/skills/e2e-testing/SKILL.md +326 -0
- package/.agent/skills/energy-procurement/SKILL.md +228 -0
- package/.agent/skills/enterprise-agent-ops/SKILL.md +50 -0
- package/.agent/skills/eval-harness/SKILL.md +270 -0
- package/.agent/skills/exa-search/SKILL.md +103 -0
- package/.agent/skills/fal-ai-media/SKILL.md +284 -0
- package/.agent/skills/flutter-dart-code-review/SKILL.md +435 -0
- package/.agent/skills/foundation-models-on-device/SKILL.md +243 -0
- package/.agent/skills/frontend-patterns/SKILL.md +642 -0
- package/.agent/skills/frontend-slides/SKILL.md +184 -0
- package/.agent/skills/frontend-slides/style-presets.md +330 -0
- package/.agent/skills/gan-style-harness/SKILL.md +278 -0
- package/.agent/skills/git-workflow/SKILL.md +715 -0
- package/.agent/skills/golang-patterns/SKILL.md +674 -0
- package/.agent/skills/golang-testing/SKILL.md +720 -0
- package/.agent/skills/google-workspace-ops/SKILL.md +95 -0
- package/.agent/skills/healthcare-cdss-patterns/SKILL.md +245 -0
- package/.agent/skills/healthcare-emr-patterns/SKILL.md +159 -0
- package/.agent/skills/healthcare-eval-harness/SKILL.md +207 -0
- package/.agent/skills/healthcare-phi-compliance/SKILL.md +145 -0
- package/.agent/skills/hexagonal-architecture/SKILL.md +276 -0
- package/.agent/skills/inventory-demand-planning/SKILL.md +247 -0
- package/.agent/skills/investor-materials/SKILL.md +96 -0
- package/.agent/skills/investor-outreach/SKILL.md +91 -0
- package/.agent/skills/iterative-retrieval/SKILL.md +211 -0
- package/.agent/skills/java-coding-standards/SKILL.md +147 -0
- package/.agent/skills/jira-integration/SKILL.md +293 -0
- package/.agent/skills/jpa-patterns/SKILL.md +151 -0
- package/.agent/skills/kotlin-coroutines-flows/SKILL.md +284 -0
- package/.agent/skills/kotlin-exposed-patterns/SKILL.md +719 -0
- package/.agent/skills/kotlin-ktor-patterns/SKILL.md +689 -0
- package/.agent/skills/kotlin-patterns/SKILL.md +711 -0
- package/.agent/skills/kotlin-testing/SKILL.md +824 -0
- package/.agent/skills/laravel-patterns/SKILL.md +415 -0
- package/.agent/skills/laravel-plugin-discovery/SKILL.md +229 -0
- package/.agent/skills/laravel-security/SKILL.md +285 -0
- package/.agent/skills/laravel-tdd/SKILL.md +283 -0
- package/.agent/skills/laravel-verification/SKILL.md +179 -0
- package/.agent/skills/lead-intelligence/SKILL.md +321 -0
- package/.agent/skills/lead-intelligence/agents/enrichment-agent.md +85 -0
- package/.agent/skills/lead-intelligence/agents/mutual-mapper.md +75 -0
- package/.agent/skills/lead-intelligence/agents/outreach-drafter.md +98 -0
- package/.agent/skills/lead-intelligence/agents/signal-scorer.md +60 -0
- package/.agent/skills/liquid-glass-design/SKILL.md +279 -0
- package/.agent/skills/logistics-exception-management/SKILL.md +222 -0
- package/.agent/skills/manim-video/SKILL.md +89 -0
- package/.agent/skills/manim-video/assets/network-graph-scene.py +52 -0
- package/.agent/skills/market-research/SKILL.md +75 -0
- package/.agent/skills/mcp-builder/SKILL.md +173 -113
- package/.agent/skills/mcp-builder/license.txt +202 -0
- package/.agent/skills/mcp-builder/reference/evaluation.md +602 -0
- package/.agent/skills/mcp-builder/reference/mcp-best-practices.md +249 -0
- package/.agent/skills/mcp-builder/reference/node-mcp-server.md +970 -0
- package/.agent/skills/mcp-builder/reference/python-mcp-server.md +719 -0
- package/.agent/skills/mcp-builder/scripts/connections.py +151 -0
- package/.agent/skills/mcp-builder/scripts/evaluation.py +373 -0
- package/.agent/skills/mcp-builder/scripts/example-evaluation.xml +22 -0
- package/.agent/skills/mcp-builder/scripts/requirements.txt +2 -0
- package/.agent/skills/mcp-server-patterns/SKILL.md +67 -0
- package/.agent/skills/nanoclaw-repl/SKILL.md +33 -0
- package/.agent/skills/nestjs-patterns/SKILL.md +230 -0
- package/.agent/skills/nextjs-turbopack/SKILL.md +44 -0
- package/.agent/skills/nutrient-document-processing/SKILL.md +167 -0
- package/.agent/skills/nuxt4-patterns/SKILL.md +100 -0
- package/.agent/skills/openclaw-persona-forge/SKILL.md +296 -0
- package/.agent/skills/openclaw-persona-forge/gacha.py +224 -0
- package/.agent/skills/openclaw-persona-forge/gacha.sh +5 -0
- package/.agent/skills/openclaw-persona-forge/references/avatar-style.md +124 -0
- package/.agent/skills/openclaw-persona-forge/references/boundary-rules.md +53 -0
- package/.agent/skills/openclaw-persona-forge/references/error-handling.md +53 -0
- package/.agent/skills/openclaw-persona-forge/references/identity-tension.md +48 -0
- package/.agent/skills/openclaw-persona-forge/references/naming-system.md +39 -0
- package/.agent/skills/openclaw-persona-forge/references/output-template.md +166 -0
- package/.agent/skills/opensource-pipeline/SKILL.md +255 -0
- package/.agent/skills/perl-patterns/SKILL.md +504 -0
- package/.agent/skills/perl-security/SKILL.md +503 -0
- package/.agent/skills/perl-testing/SKILL.md +475 -0
- package/.agent/skills/plankton-code-quality/SKILL.md +236 -0
- package/.agent/skills/postgres-patterns/SKILL.md +147 -0
- package/.agent/skills/product-lens/SKILL.md +85 -0
- package/.agent/skills/production-scheduling/SKILL.md +238 -0
- package/.agent/skills/project-flow-ops/SKILL.md +111 -0
- package/.agent/skills/project-guidelines-example/SKILL.md +349 -0
- package/.agent/skills/prompt-optimizer/SKILL.md +397 -0
- package/.agent/skills/python-patterns/SKILL.md +622 -313
- package/.agent/skills/python-testing/SKILL.md +816 -0
- package/.agent/skills/pytorch-patterns/SKILL.md +396 -0
- package/.agent/skills/quality-nonconformance/SKILL.md +260 -0
- package/.agent/skills/ralphinho-rfc-pipeline/SKILL.md +67 -0
- package/.agent/skills/regex-vs-llm-structured-text/SKILL.md +220 -0
- package/.agent/skills/remotion-video-creation/SKILL.md +43 -0
- package/.agent/skills/remotion-video-creation/rules/3d.md +86 -0
- package/.agent/skills/remotion-video-creation/rules/animations.md +29 -0
- package/.agent/skills/remotion-video-creation/rules/assets/charts-bar-chart.tsx +173 -0
- package/.agent/skills/remotion-video-creation/rules/assets/text-animations-typewriter.tsx +100 -0
- package/.agent/skills/remotion-video-creation/rules/assets/text-animations-word-highlight.tsx +108 -0
- package/.agent/skills/remotion-video-creation/rules/assets.md +78 -0
- package/.agent/skills/remotion-video-creation/rules/audio.md +172 -0
- package/.agent/skills/remotion-video-creation/rules/calculate-metadata.md +104 -0
- package/.agent/skills/remotion-video-creation/rules/can-decode.md +75 -0
- package/.agent/skills/remotion-video-creation/rules/charts.md +58 -0
- package/.agent/skills/remotion-video-creation/rules/compositions.md +146 -0
- package/.agent/skills/remotion-video-creation/rules/display-captions.md +126 -0
- package/.agent/skills/remotion-video-creation/rules/extract-frames.md +229 -0
- package/.agent/skills/remotion-video-creation/rules/fonts.md +152 -0
- package/.agent/skills/remotion-video-creation/rules/get-audio-duration.md +58 -0
- package/.agent/skills/remotion-video-creation/rules/get-video-dimensions.md +68 -0
- package/.agent/skills/remotion-video-creation/rules/get-video-duration.md +58 -0
- package/.agent/skills/remotion-video-creation/rules/gifs.md +138 -0
- package/.agent/skills/remotion-video-creation/rules/images.md +130 -0
- package/.agent/skills/remotion-video-creation/rules/import-srt-captions.md +67 -0
- package/.agent/skills/remotion-video-creation/rules/lottie.md +67 -0
- package/.agent/skills/remotion-video-creation/rules/measuring-dom-nodes.md +34 -0
- package/.agent/skills/remotion-video-creation/rules/measuring-text.md +143 -0
- package/.agent/skills/remotion-video-creation/rules/sequencing.md +106 -0
- package/.agent/skills/remotion-video-creation/rules/tailwind.md +11 -0
- package/.agent/skills/remotion-video-creation/rules/text-animations.md +20 -0
- package/.agent/skills/remotion-video-creation/rules/timing.md +179 -0
- package/.agent/skills/remotion-video-creation/rules/transcribe-captions.md +19 -0
- package/.agent/skills/remotion-video-creation/rules/transitions.md +122 -0
- package/.agent/skills/remotion-video-creation/rules/trimming.md +52 -0
- package/.agent/skills/remotion-video-creation/rules/videos.md +171 -0
- package/.agent/skills/repo-scan/SKILL.md +78 -0
- package/.agent/skills/returns-reverse-logistics/SKILL.md +240 -0
- package/.agent/skills/rules-distill/SKILL.md +264 -0
- package/.agent/skills/rules-distill/scripts/scan-rules.sh +58 -0
- package/.agent/skills/rules-distill/scripts/scan-skills.sh +129 -0
- package/.agent/skills/rust-patterns/SKILL.md +499 -0
- package/.agent/skills/rust-testing/SKILL.md +500 -0
- package/.agent/skills/safety-guard/SKILL.md +75 -0
- package/.agent/skills/santa-method/SKILL.md +306 -0
- package/.agent/skills/search-first/SKILL.md +161 -0
- package/.agent/skills/security-review/SKILL.md +495 -0
- package/.agent/skills/security-review/cloud-infrastructure-security.md +361 -0
- package/.agent/skills/security-scan/SKILL.md +165 -0
- package/.agent/skills/skill-comply/SKILL.md +58 -0
- package/.agent/skills/skill-comply/fixtures/compliant-trace.jsonl +5 -0
- package/.agent/skills/skill-comply/fixtures/noncompliant-trace.jsonl +3 -0
- package/.agent/skills/skill-comply/fixtures/tdd-spec.yaml +44 -0
- package/.agent/skills/skill-comply/prompts/classifier.md +24 -0
- package/.agent/skills/skill-comply/prompts/scenario-generator.md +62 -0
- package/.agent/skills/skill-comply/prompts/spec-generator.md +42 -0
- package/.agent/skills/skill-comply/pyproject.toml +15 -0
- package/.agent/skills/skill-comply/scripts/classifier.py +85 -0
- package/.agent/skills/skill-comply/scripts/grader.py +122 -0
- package/.agent/skills/skill-comply/scripts/init.py +0 -0
- package/.agent/skills/skill-comply/scripts/parser.py +107 -0
- package/.agent/skills/skill-comply/scripts/report.py +170 -0
- package/.agent/skills/skill-comply/scripts/run.py +127 -0
- package/.agent/skills/skill-comply/scripts/runner.py +161 -0
- package/.agent/skills/skill-comply/scripts/scenario-generator.py +70 -0
- package/.agent/skills/skill-comply/scripts/spec-generator.py +72 -0
- package/.agent/skills/skill-comply/scripts/utils.py +13 -0
- package/.agent/skills/skill-comply/tests/test-grader.py +137 -0
- package/.agent/skills/skill-comply/tests/test-parser.py +90 -0
- package/.agent/skills/skill-creator/SKILL.md +485 -0
- package/.agent/skills/skill-creator/agents/analyzer.md +274 -0
- package/.agent/skills/skill-creator/agents/comparator.md +202 -0
- package/.agent/skills/skill-creator/agents/grader.md +223 -0
- package/.agent/skills/skill-creator/assets/eval-review.html +146 -0
- package/.agent/skills/skill-creator/eval-viewer/generate-review.py +471 -0
- package/.agent/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/.agent/skills/skill-creator/license.txt +202 -0
- package/.agent/skills/skill-creator/references/schemas.md +430 -0
- package/.agent/skills/skill-creator/scripts/aggregate-benchmark.py +401 -0
- package/.agent/skills/skill-creator/scripts/generate-report.py +326 -0
- package/.agent/skills/skill-creator/scripts/improve-description.py +247 -0
- package/.agent/skills/skill-creator/scripts/init.py +0 -0
- package/.agent/skills/skill-creator/scripts/package-skill.py +136 -0
- package/.agent/skills/skill-creator/scripts/quick-validate.py +103 -0
- package/.agent/skills/skill-creator/scripts/run-eval.py +310 -0
- package/.agent/skills/skill-creator/scripts/run-loop.py +328 -0
- package/.agent/skills/skill-creator/scripts/utils.py +47 -0
- package/.agent/skills/skill-stocktake/SKILL.md +193 -0
- package/.agent/skills/skill-stocktake/scripts/quick-diff.sh +87 -0
- package/.agent/skills/skill-stocktake/scripts/save-results.sh +56 -0
- package/.agent/skills/skill-stocktake/scripts/scan.sh +170 -0
- package/.agent/skills/social-graph-ranker/SKILL.md +154 -0
- package/.agent/skills/springboot-patterns/SKILL.md +314 -0
- package/.agent/skills/springboot-security/SKILL.md +272 -0
- package/.agent/skills/springboot-tdd/SKILL.md +158 -0
- package/.agent/skills/springboot-verification/SKILL.md +231 -0
- package/.agent/skills/strategic-compact/SKILL.md +131 -0
- package/.agent/skills/strategic-compact/suggest-compact.sh +54 -0
- package/.agent/skills/swift-actor-persistence/SKILL.md +143 -0
- package/.agent/skills/swift-concurrency-6-2/SKILL.md +216 -0
- package/.agent/skills/swift-protocol-di-testing/SKILL.md +190 -0
- package/.agent/skills/swiftui-patterns/SKILL.md +259 -0
- package/.agent/skills/tdd-workflow/SKILL.md +412 -98
- package/.agent/skills/team-builder/SKILL.md +168 -0
- package/.agent/skills/token-budget-advisor/SKILL.md +133 -0
- package/.agent/skills/ui-demo/SKILL.md +465 -0
- package/.agent/skills/ui-ux-pro-max/data/charts.csv +26 -26
- package/.agent/skills/ui-ux-pro-max/data/colors.csv +97 -97
- package/.agent/skills/ui-ux-pro-max/data/landing.csv +28 -28
- package/.agent/skills/ui-ux-pro-max/data/products.csv +96 -96
- package/.agent/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -53
- package/.agent/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -56
- package/.agent/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -53
- package/.agent/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -52
- package/.agent/skills/ui-ux-pro-max/data/stacks/react.csv +54 -54
- package/.agent/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -54
- package/.agent/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -51
- package/.agent/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -50
- package/.agent/skills/ui-ux-pro-max/data/styles.csv +68 -68
- package/.agent/skills/ui-ux-pro-max/data/ux-guidelines.csv +99 -99
- package/.agent/skills/ui-ux-pro-max/scripts/search.py +114 -114
- package/.agent/skills/verification-loop/SKILL.md +126 -0
- package/.agent/skills/video-editing/SKILL.md +310 -0
- package/.agent/skills/videodb/SKILL.md +374 -0
- package/.agent/skills/videodb/reference/api-reference.md +550 -0
- package/.agent/skills/videodb/reference/capture-reference.md +407 -0
- package/.agent/skills/videodb/reference/capture.md +101 -0
- package/.agent/skills/videodb/reference/editor.md +443 -0
- package/.agent/skills/videodb/reference/generative.md +331 -0
- package/.agent/skills/videodb/reference/rtstream-reference.md +564 -0
- package/.agent/skills/videodb/reference/rtstream.md +65 -0
- package/.agent/skills/videodb/reference/search.md +230 -0
- package/.agent/skills/videodb/reference/streaming.md +406 -0
- package/.agent/skills/videodb/reference/use-cases.md +118 -0
- package/.agent/skills/videodb/scripts/ws-listener.py +282 -0
- package/.agent/skills/visa-doc-translate/SKILL.md +117 -0
- package/.agent/skills/visa-doc-translate/readme.md +86 -0
- package/.agent/skills/workspace-surface-audit/SKILL.md +125 -0
- package/.agent/skills/x-api/SKILL.md +230 -0
- package/.agent/tasks/two-track-merge-contract.md +29 -0
- package/.agent/workflows/aside.md +164 -164
- package/.agent/workflows/build-fix.md +62 -62
- package/.agent/workflows/checkpoint.md +74 -74
- package/.agent/workflows/claw.md +23 -51
- package/.agent/workflows/clean-memory.md +34 -0
- package/.agent/workflows/code-review.md +289 -40
- package/.agent/workflows/context-budget.md +23 -29
- package/.agent/workflows/cpp-build.md +173 -173
- package/.agent/workflows/cpp-review.md +132 -132
- package/.agent/workflows/cpp-test.md +251 -251
- package/.agent/workflows/devfleet.md +23 -92
- package/.agent/workflows/docs.md +23 -31
- package/.agent/workflows/e2e.md +268 -365
- package/.agent/workflows/eval.md +23 -120
- package/.agent/workflows/evolve.md +178 -178
- package/.agent/workflows/flutter-build.md +164 -0
- package/.agent/workflows/flutter-review.md +116 -0
- package/.agent/workflows/flutter-test.md +144 -0
- package/.agent/workflows/gan-build.md +99 -0
- package/.agent/workflows/gan-design.md +35 -0
- package/.agent/workflows/go-build.md +183 -183
- package/.agent/workflows/go-review.md +148 -148
- package/.agent/workflows/go-test.md +268 -268
- package/.agent/workflows/gradle-build.md +70 -70
- package/.agent/workflows/harness-audit.md +73 -71
- package/.agent/workflows/instinct-export.md +66 -66
- package/.agent/workflows/instinct-import.md +114 -114
- package/.agent/workflows/instinct-status.md +59 -59
- package/.agent/workflows/jira.md +106 -0
- package/.agent/workflows/kotlin-build.md +174 -174
- package/.agent/workflows/kotlin-review.md +140 -140
- package/.agent/workflows/kotlin-test.md +312 -312
- package/.agent/workflows/learn-eval.md +116 -116
- package/.agent/workflows/learn.md +70 -70
- package/.agent/workflows/loop-start.md +32 -32
- package/.agent/workflows/loop-status.md +24 -24
- package/.agent/workflows/model-route.md +26 -26
- package/.agent/workflows/multi-backend.md +158 -158
- package/.agent/workflows/multi-execute.md +315 -315
- package/.agent/workflows/multi-frontend.md +158 -158
- package/.agent/workflows/multi-plan.md +268 -268
- package/.agent/workflows/multi-workflow.md +191 -191
- package/.agent/workflows/orchestrate.md +135 -231
- package/.agent/workflows/plan.md +117 -115
- package/.agent/workflows/pm2.md +272 -272
- package/.agent/workflows/projects.md +39 -39
- package/.agent/workflows/promote.md +41 -41
- package/.agent/workflows/prompt-optimize.md +23 -38
- package/.agent/workflows/prp-commit.md +112 -0
- package/.agent/workflows/prp-implement.md +385 -0
- package/.agent/workflows/prp-plan.md +502 -0
- package/.agent/workflows/prp-pr.md +184 -0
- package/.agent/workflows/prp-prd.md +447 -0
- package/.agent/workflows/prune.md +31 -31
- package/.agent/workflows/python-review.md +297 -297
- package/.agent/workflows/quality-gate.md +29 -29
- package/.agent/workflows/refactor-clean.md +80 -80
- package/.agent/workflows/resume-session.md +156 -156
- package/.agent/workflows/rules-distill.md +20 -11
- package/.agent/workflows/rust-build.md +187 -187
- package/.agent/workflows/rust-review.md +142 -142
- package/.agent/workflows/rust-test.md +308 -308
- package/.agent/workflows/santa-loop.md +175 -0
- package/.agent/workflows/save-session.md +275 -275
- package/.agent/workflows/sessions.md +333 -333
- package/.agent/workflows/setup-pm.md +80 -80
- package/.agent/workflows/skill-create.md +174 -174
- package/.agent/workflows/skill-health.md +54 -54
- package/.agent/workflows/tdd.md +231 -328
- package/.agent/workflows/test-coverage.md +69 -69
- package/.agent/workflows/update-codemaps.md +72 -72
- package/.agent/workflows/update-docs.md +84 -84
- package/.agent/workflows/verify.md +23 -59
- package/LICENSE +176 -176
- package/README.md +28 -20
- package/RELEASE.md +32 -36
- package/package.json +87 -79
- package/scripts/release-check.js +55 -55
- package/src/bin/cli.js +399 -53
- package/src/lib/installer.js +360 -114
- package/src/lib/manifests/stacks.js +122 -0
- package/src/lib/slash-commands.js +28 -0
- package/src/templates/claude/CLAUDE.en.md +42 -0
- package/src/templates/claude/CLAUDE.md +42 -0
- package/src/templates/claude/CLAUDE.vi.md +42 -0
- package/src/templates/codex/AGENTS.en.md +40 -0
- package/src/templates/codex/AGENTS.md +40 -0
- package/src/templates/codex/AGENTS.vi.md +40 -0
- package/src/templates/cursor/pilo-masterkit.mdc +20 -0
- package/src/templates/gemini/GEMINI.en.md +56 -0
- package/src/templates/gemini/GEMINI.md +56 -0
- package/src/templates/gemini/GEMINI.vi.md +56 -0
- package/src/templates/github/copilot-instructions.md +16 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""CLI entry point for skill-comply."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
from scripts.grader import grade
|
|
14
|
+
from scripts.report import generate_report
|
|
15
|
+
from scripts.runner import run_scenario
|
|
16
|
+
from scripts.scenario_generator import generate_scenarios
|
|
17
|
+
from scripts.spec_generator import generate_spec
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main() -> None:
|
|
23
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
24
|
+
|
|
25
|
+
parser = argparse.ArgumentParser(
|
|
26
|
+
description="skill-comply: Measure skill compliance rates",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"skill",
|
|
30
|
+
type=Path,
|
|
31
|
+
help="Path to skill/rule file to test",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--model",
|
|
35
|
+
default="sonnet",
|
|
36
|
+
help="Model for scenario execution (default: sonnet)",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--gen-model",
|
|
40
|
+
default="haiku",
|
|
41
|
+
help="Model for spec/scenario generation (default: haiku)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--dry-run",
|
|
45
|
+
action="store_true",
|
|
46
|
+
help="Generate spec and scenarios without executing",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--output",
|
|
50
|
+
type=Path,
|
|
51
|
+
default=None,
|
|
52
|
+
help="Output report path (default: results/<skill-name>.md)",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
args = parser.parse_args()
|
|
56
|
+
|
|
57
|
+
if not args.skill.is_file():
|
|
58
|
+
logger.error("Error: Skill file not found: %s", args.skill)
|
|
59
|
+
sys.exit(1)
|
|
60
|
+
|
|
61
|
+
results_dir = Path(__file__).parent.parent / "results"
|
|
62
|
+
results_dir.mkdir(exist_ok=True)
|
|
63
|
+
|
|
64
|
+
# Step 1: Generate compliance spec
|
|
65
|
+
logger.info("[1/4] Generating compliance spec from %s...", args.skill.name)
|
|
66
|
+
spec = generate_spec(args.skill, model=args.gen_model)
|
|
67
|
+
logger.info(" %d steps extracted", len(spec.steps))
|
|
68
|
+
|
|
69
|
+
# Step 2: Generate scenarios
|
|
70
|
+
spec_yaml = yaml.dump({
|
|
71
|
+
"steps": [
|
|
72
|
+
{"id": s.id, "description": s.description, "required": s.required}
|
|
73
|
+
for s in spec.steps
|
|
74
|
+
]
|
|
75
|
+
})
|
|
76
|
+
logger.info("[2/4] Generating scenarios (3 prompt strictness levels)...")
|
|
77
|
+
scenarios = generate_scenarios(args.skill, spec_yaml, model=args.gen_model)
|
|
78
|
+
logger.info(" %d scenarios generated", len(scenarios))
|
|
79
|
+
|
|
80
|
+
for s in scenarios:
|
|
81
|
+
logger.info(" - %s: %s", s.level_name, s.description[:60])
|
|
82
|
+
|
|
83
|
+
if args.dry_run:
|
|
84
|
+
logger.info("\n[dry-run] Spec and scenarios generated. Skipping execution.")
|
|
85
|
+
logger.info("\nSpec: %s (%d steps)", spec.id, len(spec.steps))
|
|
86
|
+
for step in spec.steps:
|
|
87
|
+
marker = "*" if step.required else " "
|
|
88
|
+
logger.info(" [%s] %s: %s", marker, step.id, step.description)
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Step 3: Execute scenarios
|
|
92
|
+
logger.info("[3/4] Executing scenarios (model=%s)...", args.model)
|
|
93
|
+
graded_results: list[tuple[str, Any, list[Any]]] = []
|
|
94
|
+
|
|
95
|
+
for scenario in scenarios:
|
|
96
|
+
logger.info(" Running %s...", scenario.level_name)
|
|
97
|
+
run = run_scenario(scenario, model=args.model)
|
|
98
|
+
result = grade(spec, list(run.observations))
|
|
99
|
+
graded_results.append((scenario.level_name, result, list(run.observations)))
|
|
100
|
+
logger.info(" %s: %.0f%%", scenario.level_name, result.compliance_rate * 100)
|
|
101
|
+
|
|
102
|
+
# Step 4: Generate report
|
|
103
|
+
skill_name = args.skill.parent.name if args.skill.stem == "SKILL" else args.skill.stem
|
|
104
|
+
output_path = args.output or results_dir / f"{skill_name}.md"
|
|
105
|
+
logger.info("[4/4] Generating report...")
|
|
106
|
+
|
|
107
|
+
report = generate_report(args.skill, spec, graded_results, scenarios=scenarios)
|
|
108
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
output_path.write_text(report)
|
|
110
|
+
logger.info(" Report saved to %s", output_path)
|
|
111
|
+
|
|
112
|
+
# Summary
|
|
113
|
+
if not graded_results:
|
|
114
|
+
logger.warning("No scenarios were executed.")
|
|
115
|
+
return
|
|
116
|
+
overall = sum(r.compliance_rate for _, r, _obs in graded_results) / len(graded_results)
|
|
117
|
+
logger.info("\n%s", "=" * 50)
|
|
118
|
+
logger.info("Overall Compliance: %.0f%%", overall * 100)
|
|
119
|
+
if overall < spec.threshold_promote_to_hook:
|
|
120
|
+
logger.info(
|
|
121
|
+
"Recommendation: Some steps have low compliance. "
|
|
122
|
+
"Consider promoting them to hooks. See the report for details."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Run scenarios via claude -p and parse tool calls from stream-json output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import shlex
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from scripts.parser import ObservationEvent
|
|
14
|
+
from scripts.scenario_generator import Scenario
|
|
15
|
+
|
|
16
|
+
SANDBOX_BASE = Path("/tmp/skill-comply-sandbox")
|
|
17
|
+
ALLOWED_MODELS = frozenset({"haiku", "sonnet", "opus"})
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class ScenarioRun:
|
|
22
|
+
scenario: Scenario
|
|
23
|
+
observations: tuple[ObservationEvent, ...]
|
|
24
|
+
sandbox_dir: Path
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run_scenario(
|
|
28
|
+
scenario: Scenario,
|
|
29
|
+
model: str = "sonnet",
|
|
30
|
+
max_turns: int = 30,
|
|
31
|
+
timeout: int = 300,
|
|
32
|
+
) -> ScenarioRun:
|
|
33
|
+
"""Execute a scenario and extract tool calls from stream-json output."""
|
|
34
|
+
if model not in ALLOWED_MODELS:
|
|
35
|
+
raise ValueError(f"Unknown model: {model!r}. Allowed: {ALLOWED_MODELS}")
|
|
36
|
+
|
|
37
|
+
sandbox_dir = _safe_sandbox_dir(scenario.id)
|
|
38
|
+
_setup_sandbox(sandbox_dir, scenario)
|
|
39
|
+
|
|
40
|
+
result = subprocess.run(
|
|
41
|
+
[
|
|
42
|
+
"claude", "-p", scenario.prompt,
|
|
43
|
+
"--model", model,
|
|
44
|
+
"--max-turns", str(max_turns),
|
|
45
|
+
"--add-dir", str(sandbox_dir),
|
|
46
|
+
"--allowedTools", "Read,Write,Edit,Bash,Glob,Grep",
|
|
47
|
+
"--output-format", "stream-json",
|
|
48
|
+
"--verbose",
|
|
49
|
+
],
|
|
50
|
+
capture_output=True,
|
|
51
|
+
text=True,
|
|
52
|
+
timeout=timeout,
|
|
53
|
+
cwd=sandbox_dir,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
if result.returncode != 0:
|
|
57
|
+
raise RuntimeError(
|
|
58
|
+
f"claude -p failed (rc={result.returncode}): {result.stderr[:500]}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
observations = _parse_stream_json(result.stdout)
|
|
62
|
+
|
|
63
|
+
return ScenarioRun(
|
|
64
|
+
scenario=scenario,
|
|
65
|
+
observations=tuple(observations),
|
|
66
|
+
sandbox_dir=sandbox_dir,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _safe_sandbox_dir(scenario_id: str) -> Path:
|
|
71
|
+
"""Sanitize scenario ID and ensure path stays within sandbox base."""
|
|
72
|
+
safe_id = re.sub(r"[^a-zA-Z0-9\-_]", "_", scenario_id)
|
|
73
|
+
path = SANDBOX_BASE / safe_id
|
|
74
|
+
# Validate path stays within sandbox base (raises ValueError on traversal)
|
|
75
|
+
path.resolve().relative_to(SANDBOX_BASE.resolve())
|
|
76
|
+
return path
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _setup_sandbox(sandbox_dir: Path, scenario: Scenario) -> None:
|
|
80
|
+
"""Create sandbox directory and run setup commands."""
|
|
81
|
+
if sandbox_dir.exists():
|
|
82
|
+
shutil.rmtree(sandbox_dir)
|
|
83
|
+
sandbox_dir.mkdir(parents=True)
|
|
84
|
+
|
|
85
|
+
subprocess.run(["git", "init"], cwd=sandbox_dir, capture_output=True)
|
|
86
|
+
|
|
87
|
+
for cmd in scenario.setup_commands:
|
|
88
|
+
parts = shlex.split(cmd)
|
|
89
|
+
subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _parse_stream_json(stdout: str) -> list[ObservationEvent]:
|
|
93
|
+
"""Parse claude -p stream-json output into ObservationEvents.
|
|
94
|
+
|
|
95
|
+
Stream-json format:
|
|
96
|
+
- type=assistant with content[].type=tool_use → tool call (name, input)
|
|
97
|
+
- type=user with content[].type=tool_result → tool result (output)
|
|
98
|
+
"""
|
|
99
|
+
events: list[ObservationEvent] = []
|
|
100
|
+
pending: dict[str, dict] = {}
|
|
101
|
+
event_counter = 0
|
|
102
|
+
|
|
103
|
+
for line in stdout.strip().splitlines():
|
|
104
|
+
try:
|
|
105
|
+
msg = json.loads(line)
|
|
106
|
+
except json.JSONDecodeError:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
msg_type = msg.get("type")
|
|
110
|
+
|
|
111
|
+
if msg_type == "assistant":
|
|
112
|
+
content = msg.get("message", {}).get("content", [])
|
|
113
|
+
for block in content:
|
|
114
|
+
if block.get("type") == "tool_use":
|
|
115
|
+
tool_use_id = block.get("id", "")
|
|
116
|
+
tool_input = block.get("input", {})
|
|
117
|
+
input_str = (
|
|
118
|
+
json.dumps(tool_input)[:5000]
|
|
119
|
+
if isinstance(tool_input, dict)
|
|
120
|
+
else str(tool_input)[:5000]
|
|
121
|
+
)
|
|
122
|
+
pending[tool_use_id] = {
|
|
123
|
+
"tool": block.get("name", "unknown"),
|
|
124
|
+
"input": input_str,
|
|
125
|
+
"order": event_counter,
|
|
126
|
+
}
|
|
127
|
+
event_counter += 1
|
|
128
|
+
|
|
129
|
+
elif msg_type == "user":
|
|
130
|
+
content = msg.get("message", {}).get("content", [])
|
|
131
|
+
if isinstance(content, list):
|
|
132
|
+
for block in content:
|
|
133
|
+
tool_use_id = block.get("tool_use_id", "")
|
|
134
|
+
if tool_use_id in pending:
|
|
135
|
+
info = pending.pop(tool_use_id)
|
|
136
|
+
output_content = block.get("content", "")
|
|
137
|
+
if isinstance(output_content, list):
|
|
138
|
+
output_str = json.dumps(output_content)[:5000]
|
|
139
|
+
else:
|
|
140
|
+
output_str = str(output_content)[:5000]
|
|
141
|
+
|
|
142
|
+
events.append(ObservationEvent(
|
|
143
|
+
timestamp=f"T{info['order']:04d}",
|
|
144
|
+
event="tool_complete",
|
|
145
|
+
tool=info["tool"],
|
|
146
|
+
session=msg.get("session_id", "unknown"),
|
|
147
|
+
input=info["input"],
|
|
148
|
+
output=output_str,
|
|
149
|
+
))
|
|
150
|
+
|
|
151
|
+
for _tool_use_id, info in pending.items():
|
|
152
|
+
events.append(ObservationEvent(
|
|
153
|
+
timestamp=f"T{info['order']:04d}",
|
|
154
|
+
event="tool_complete",
|
|
155
|
+
tool=info["tool"],
|
|
156
|
+
session="unknown",
|
|
157
|
+
input=info["input"],
|
|
158
|
+
output="",
|
|
159
|
+
))
|
|
160
|
+
|
|
161
|
+
return sorted(events, key=lambda e: e.timestamp)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Generate pressure scenarios from skill + spec using LLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from scripts.utils import extract_yaml
|
|
12
|
+
|
|
13
|
+
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class Scenario:
|
|
18
|
+
id: str
|
|
19
|
+
level: int
|
|
20
|
+
level_name: str
|
|
21
|
+
description: str
|
|
22
|
+
prompt: str
|
|
23
|
+
setup_commands: tuple[str, ...]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_scenarios(
|
|
27
|
+
skill_path: Path,
|
|
28
|
+
spec_yaml: str,
|
|
29
|
+
model: str = "haiku",
|
|
30
|
+
) -> list[Scenario]:
|
|
31
|
+
"""Generate 3 scenarios with decreasing prompt strictness.
|
|
32
|
+
|
|
33
|
+
Calls claude -p with the scenario_generator prompt, parses YAML output.
|
|
34
|
+
"""
|
|
35
|
+
skill_content = skill_path.read_text()
|
|
36
|
+
prompt_template = (PROMPTS_DIR / "scenario_generator.md").read_text()
|
|
37
|
+
prompt = (
|
|
38
|
+
prompt_template
|
|
39
|
+
.replace("{skill_content}", skill_content)
|
|
40
|
+
.replace("{spec_yaml}", spec_yaml)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
result = subprocess.run(
|
|
44
|
+
["claude", "-p", prompt, "--model", model, "--output-format", "text"],
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=120,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if result.returncode != 0:
|
|
51
|
+
raise RuntimeError(f"claude -p failed: {result.stderr}")
|
|
52
|
+
|
|
53
|
+
if not result.stdout.strip():
|
|
54
|
+
raise RuntimeError("claude -p returned empty output")
|
|
55
|
+
|
|
56
|
+
raw_yaml = extract_yaml(result.stdout)
|
|
57
|
+
parsed = yaml.safe_load(raw_yaml)
|
|
58
|
+
|
|
59
|
+
scenarios: list[Scenario] = []
|
|
60
|
+
for s in parsed["scenarios"]:
|
|
61
|
+
scenarios.append(Scenario(
|
|
62
|
+
id=s["id"],
|
|
63
|
+
level=s["level"],
|
|
64
|
+
level_name=s["level_name"],
|
|
65
|
+
description=s["description"],
|
|
66
|
+
prompt=s["prompt"].strip(),
|
|
67
|
+
setup_commands=tuple(s.get("setup_commands", [])),
|
|
68
|
+
))
|
|
69
|
+
|
|
70
|
+
return sorted(scenarios, key=lambda s: s.level)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Generate compliance specs from skill files using LLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from scripts.parser import ComplianceSpec, parse_spec
|
|
12
|
+
from scripts.utils import extract_yaml
|
|
13
|
+
|
|
14
|
+
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def generate_spec(
|
|
18
|
+
skill_path: Path,
|
|
19
|
+
model: str = "haiku",
|
|
20
|
+
max_retries: int = 2,
|
|
21
|
+
) -> ComplianceSpec:
|
|
22
|
+
"""Generate a compliance spec from a skill/rule file.
|
|
23
|
+
|
|
24
|
+
Calls claude -p with the spec_generator prompt, parses YAML output.
|
|
25
|
+
Retries on YAML parse errors with error feedback.
|
|
26
|
+
"""
|
|
27
|
+
skill_content = skill_path.read_text()
|
|
28
|
+
prompt_template = (PROMPTS_DIR / "spec_generator.md").read_text()
|
|
29
|
+
base_prompt = prompt_template.replace("{skill_content}", skill_content)
|
|
30
|
+
|
|
31
|
+
last_error: Exception | None = None
|
|
32
|
+
|
|
33
|
+
for attempt in range(max_retries + 1):
|
|
34
|
+
prompt = base_prompt
|
|
35
|
+
if attempt > 0 and last_error is not None:
|
|
36
|
+
prompt += (
|
|
37
|
+
f"\n\nPREVIOUS ATTEMPT FAILED with YAML parse error:\n"
|
|
38
|
+
f"{last_error}\n\n"
|
|
39
|
+
f"Please fix the YAML. Remember to quote all string values "
|
|
40
|
+
f"that contain colons, e.g.: description: \"Use type: description format\""
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
result = subprocess.run(
|
|
44
|
+
["claude", "-p", prompt, "--model", model, "--output-format", "text"],
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=120,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if result.returncode != 0:
|
|
51
|
+
raise RuntimeError(f"claude -p failed: {result.stderr}")
|
|
52
|
+
|
|
53
|
+
raw_yaml = extract_yaml(result.stdout)
|
|
54
|
+
|
|
55
|
+
tmp_path = None
|
|
56
|
+
with tempfile.NamedTemporaryFile(
|
|
57
|
+
mode="w", suffix=".yaml", delete=False,
|
|
58
|
+
) as f:
|
|
59
|
+
f.write(raw_yaml)
|
|
60
|
+
tmp_path = Path(f.name)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
return parse_spec(tmp_path)
|
|
64
|
+
except (yaml.YAMLError, KeyError, TypeError) as e:
|
|
65
|
+
last_error = e
|
|
66
|
+
if attempt == max_retries:
|
|
67
|
+
raise
|
|
68
|
+
finally:
|
|
69
|
+
if tmp_path is not None:
|
|
70
|
+
tmp_path.unlink(missing_ok=True)
|
|
71
|
+
|
|
72
|
+
raise RuntimeError("unreachable")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Shared utilities for skill-comply scripts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def extract_yaml(text: str) -> str:
|
|
7
|
+
"""Extract YAML from LLM output, stripping markdown fences if present."""
|
|
8
|
+
lines = text.strip().splitlines()
|
|
9
|
+
if lines and lines[0].startswith("```"):
|
|
10
|
+
lines = lines[1:]
|
|
11
|
+
if lines and lines[-1].startswith("```"):
|
|
12
|
+
lines = lines[:-1]
|
|
13
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Tests for grader module — compliance scoring with LLM classification."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from unittest.mock import patch
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from scripts.grader import ComplianceResult, StepResult, grade
|
|
9
|
+
from scripts.parser import parse_spec, parse_trace
|
|
10
|
+
|
|
11
|
+
FIXTURES = Path(__file__).parent.parent / "fixtures"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def tdd_spec():
|
|
16
|
+
return parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def compliant_trace():
|
|
21
|
+
return parse_trace(FIXTURES / "compliant_trace.jsonl")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def noncompliant_trace():
|
|
26
|
+
return parse_trace(FIXTURES / "noncompliant_trace.jsonl")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _mock_compliant_classification(spec, trace, model="haiku"): # noqa: ARG001
|
|
30
|
+
"""Simulate LLM correctly classifying a compliant trace."""
|
|
31
|
+
return {
|
|
32
|
+
"write_test": [0],
|
|
33
|
+
"run_test_red": [1],
|
|
34
|
+
"write_impl": [2],
|
|
35
|
+
"run_test_green": [3],
|
|
36
|
+
"refactor": [4],
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _mock_noncompliant_classification(spec, trace, model="haiku"):
|
|
41
|
+
"""Simulate LLM classifying a noncompliant trace (impl before test)."""
|
|
42
|
+
return {
|
|
43
|
+
"write_impl": [0], # src/fib.py written first
|
|
44
|
+
"write_test": [1], # test written second
|
|
45
|
+
"run_test_green": [2], # only a passing test run
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _mock_empty_classification(spec, trace, model="haiku"):
|
|
50
|
+
return {}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestGradeCompliant:
|
|
54
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
55
|
+
def test_returns_compliance_result(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
56
|
+
result = grade(tdd_spec, compliant_trace)
|
|
57
|
+
assert isinstance(result, ComplianceResult)
|
|
58
|
+
|
|
59
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
60
|
+
def test_full_compliance(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
61
|
+
result = grade(tdd_spec, compliant_trace)
|
|
62
|
+
assert result.compliance_rate == 1.0
|
|
63
|
+
|
|
64
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
65
|
+
def test_all_required_steps_detected(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
66
|
+
result = grade(tdd_spec, compliant_trace)
|
|
67
|
+
required_results = [s for s in result.steps if s.step_id in
|
|
68
|
+
("write_test", "run_test_red", "write_impl", "run_test_green")]
|
|
69
|
+
assert all(s.detected for s in required_results)
|
|
70
|
+
|
|
71
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
72
|
+
def test_optional_step_detected(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
73
|
+
result = grade(tdd_spec, compliant_trace)
|
|
74
|
+
refactor = next(s for s in result.steps if s.step_id == "refactor")
|
|
75
|
+
assert refactor.detected is True
|
|
76
|
+
|
|
77
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
78
|
+
def test_no_hook_promotion_recommended(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
79
|
+
result = grade(tdd_spec, compliant_trace)
|
|
80
|
+
assert result.recommend_hook_promotion is False
|
|
81
|
+
|
|
82
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
83
|
+
def test_step_evidence_not_empty(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
84
|
+
result = grade(tdd_spec, compliant_trace)
|
|
85
|
+
for step in result.steps:
|
|
86
|
+
if step.detected:
|
|
87
|
+
assert len(step.evidence) > 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TestGradeNoncompliant:
|
|
91
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
92
|
+
def test_low_compliance(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
93
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
94
|
+
assert result.compliance_rate < 1.0
|
|
95
|
+
|
|
96
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
97
|
+
def test_write_test_fails_ordering(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
98
|
+
"""write_test has before_step=write_impl, but test is written AFTER impl."""
|
|
99
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
100
|
+
write_test = next(s for s in result.steps if s.step_id == "write_test")
|
|
101
|
+
assert write_test.detected is False
|
|
102
|
+
|
|
103
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
104
|
+
def test_run_test_red_not_detected(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
105
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
106
|
+
run_red = next(s for s in result.steps if s.step_id == "run_test_red")
|
|
107
|
+
assert run_red.detected is False
|
|
108
|
+
|
|
109
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
110
|
+
def test_hook_promotion_recommended(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
111
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
112
|
+
assert result.recommend_hook_promotion is True
|
|
113
|
+
|
|
114
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
115
|
+
def test_failure_reasons_present(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
116
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
117
|
+
failed_steps = [s for s in result.steps if not s.detected and s.step_id != "refactor"]
|
|
118
|
+
for step in failed_steps:
|
|
119
|
+
assert step.failure_reason is not None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TestGradeEdgeCases:
|
|
123
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_empty_classification)
|
|
124
|
+
def test_empty_trace(self, mock_cls, tdd_spec) -> None:
|
|
125
|
+
result = grade(tdd_spec, [])
|
|
126
|
+
assert result.compliance_rate == 0.0
|
|
127
|
+
assert result.recommend_hook_promotion is True
|
|
128
|
+
|
|
129
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
130
|
+
def test_compliance_rate_is_ratio_of_required_only(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
131
|
+
result = grade(tdd_spec, compliant_trace)
|
|
132
|
+
assert result.compliance_rate == 1.0
|
|
133
|
+
|
|
134
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
135
|
+
def test_spec_id_in_result(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
136
|
+
result = grade(tdd_spec, compliant_trace)
|
|
137
|
+
assert result.spec_id == "tdd-workflow"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Tests for parser module — JSONL trace and YAML spec parsing."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from scripts.parser import (
|
|
8
|
+
ComplianceSpec,
|
|
9
|
+
Detector,
|
|
10
|
+
ObservationEvent,
|
|
11
|
+
Step,
|
|
12
|
+
parse_spec,
|
|
13
|
+
parse_trace,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
FIXTURES = Path(__file__).parent.parent / "fixtures"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestParseTrace:
|
|
20
|
+
def test_parses_compliant_trace(self) -> None:
|
|
21
|
+
events = parse_trace(FIXTURES / "compliant_trace.jsonl")
|
|
22
|
+
assert len(events) == 5
|
|
23
|
+
assert all(isinstance(e, ObservationEvent) for e in events)
|
|
24
|
+
|
|
25
|
+
def test_events_sorted_by_timestamp(self) -> None:
|
|
26
|
+
events = parse_trace(FIXTURES / "compliant_trace.jsonl")
|
|
27
|
+
timestamps = [e.timestamp for e in events]
|
|
28
|
+
assert timestamps == sorted(timestamps)
|
|
29
|
+
|
|
30
|
+
def test_event_fields(self) -> None:
|
|
31
|
+
events = parse_trace(FIXTURES / "compliant_trace.jsonl")
|
|
32
|
+
first = events[0]
|
|
33
|
+
assert first.tool == "Write"
|
|
34
|
+
assert first.session == "sess-001"
|
|
35
|
+
assert "test_fib.py" in first.input
|
|
36
|
+
assert first.output == "File created"
|
|
37
|
+
|
|
38
|
+
def test_parses_noncompliant_trace(self) -> None:
|
|
39
|
+
events = parse_trace(FIXTURES / "noncompliant_trace.jsonl")
|
|
40
|
+
assert len(events) == 3
|
|
41
|
+
assert "src/fib.py" in events[0].input
|
|
42
|
+
|
|
43
|
+
def test_empty_file_returns_empty_list(self, tmp_path: Path) -> None:
|
|
44
|
+
empty = tmp_path / "empty.jsonl"
|
|
45
|
+
empty.write_text("")
|
|
46
|
+
events = parse_trace(empty)
|
|
47
|
+
assert events == []
|
|
48
|
+
|
|
49
|
+
def test_nonexistent_file_raises(self) -> None:
|
|
50
|
+
with pytest.raises(FileNotFoundError):
|
|
51
|
+
parse_trace(Path("/nonexistent/trace.jsonl"))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestParseSpec:
|
|
55
|
+
def test_parses_tdd_spec(self) -> None:
|
|
56
|
+
spec = parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
57
|
+
assert isinstance(spec, ComplianceSpec)
|
|
58
|
+
assert spec.id == "tdd-workflow"
|
|
59
|
+
assert len(spec.steps) == 5
|
|
60
|
+
|
|
61
|
+
def test_step_fields(self) -> None:
|
|
62
|
+
spec = parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
63
|
+
first = spec.steps[0]
|
|
64
|
+
assert isinstance(first, Step)
|
|
65
|
+
assert first.id == "write_test"
|
|
66
|
+
assert first.required is True
|
|
67
|
+
assert isinstance(first.detector, Detector)
|
|
68
|
+
assert "test file" in first.detector.description
|
|
69
|
+
assert first.detector.before_step == "write_impl"
|
|
70
|
+
|
|
71
|
+
def test_optional_detector_fields(self) -> None:
|
|
72
|
+
spec = parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
73
|
+
write_test = spec.steps[0]
|
|
74
|
+
assert write_test.detector.after_step is None
|
|
75
|
+
|
|
76
|
+
run_test_red = spec.steps[1]
|
|
77
|
+
assert run_test_red.detector.after_step == "write_test"
|
|
78
|
+
assert run_test_red.detector.before_step == "write_impl"
|
|
79
|
+
|
|
80
|
+
def test_scoring_threshold(self) -> None:
|
|
81
|
+
spec = parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
82
|
+
assert spec.threshold_promote_to_hook == 0.6
|
|
83
|
+
|
|
84
|
+
def test_required_vs_optional_steps(self) -> None:
|
|
85
|
+
spec = parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
86
|
+
required = [s for s in spec.steps if s.required]
|
|
87
|
+
optional = [s for s in spec.steps if not s.required]
|
|
88
|
+
assert len(required) == 4
|
|
89
|
+
assert len(optional) == 1
|
|
90
|
+
assert optional[0].id == "refactor"
|