@heytherevibin/skillforge 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/CODE_OF_CONDUCT.md +34 -0
- package/CONTRIBUTING.md +38 -0
- package/LICENSE +21 -0
- package/README.md +337 -0
- package/RELEASING.md +93 -0
- package/SECURITY.md +31 -0
- package/STRATEGY.md +26 -0
- package/bin/cli.js +547 -0
- package/lib/packs.js +184 -0
- package/package.json +38 -0
- package/python/app/__init__.py +0 -0
- package/python/app/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/app/__pycache__/auth.cpython-312.pyc +0 -0
- package/python/app/__pycache__/main.cpython-312.pyc +0 -0
- package/python/app/auth.py +63 -0
- package/python/app/cli.py +78 -0
- package/python/app/db_paths.py +26 -0
- package/python/app/events_cli.py +175 -0
- package/python/app/main.py +647 -0
- package/python/app/materialize.py +138 -0
- package/python/app/mcp_server.py +610 -0
- package/python/app/route_cli.py +117 -0
- package/python/requirements-dev.txt +1 -0
- package/python/requirements.txt +7 -0
- package/python/tests/test_db_paths.py +41 -0
- package/skills/accessibility/SKILL.md +145 -0
- package/skills/agent-architecture-audit/SKILL.md +256 -0
- package/skills/agent-eval/SKILL.md +144 -0
- package/skills/agent-harness-construction/SKILL.md +72 -0
- package/skills/agent-introspection-debugging/SKILL.md +152 -0
- package/skills/agent-payment-x402/SKILL.md +224 -0
- package/skills/agent-sort/SKILL.md +214 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/agentic-os/SKILL.md +386 -0
- package/skills/ai-first-engineering/SKILL.md +50 -0
- package/skills/ai-regression-testing/SKILL.md +384 -0
- package/skills/android-clean-architecture/SKILL.md +338 -0
- package/skills/angular-developer/SKILL.md +153 -0
- package/skills/angular-developer/references/angular-animations.md +160 -0
- package/skills/angular-developer/references/angular-aria.md +410 -0
- package/skills/angular-developer/references/cli.md +86 -0
- package/skills/angular-developer/references/component-harnesses.md +59 -0
- package/skills/angular-developer/references/component-styling.md +91 -0
- package/skills/angular-developer/references/components.md +117 -0
- package/skills/angular-developer/references/creating-services.md +97 -0
- package/skills/angular-developer/references/data-resolvers.md +69 -0
- package/skills/angular-developer/references/define-routes.md +67 -0
- package/skills/angular-developer/references/defining-providers.md +72 -0
- package/skills/angular-developer/references/di-fundamentals.md +120 -0
- package/skills/angular-developer/references/e2e-testing.md +56 -0
- package/skills/angular-developer/references/effects.md +83 -0
- package/skills/angular-developer/references/hierarchical-injectors.md +43 -0
- package/skills/angular-developer/references/host-elements.md +80 -0
- package/skills/angular-developer/references/injection-context.md +63 -0
- package/skills/angular-developer/references/inputs.md +101 -0
- package/skills/angular-developer/references/linked-signal.md +59 -0
- package/skills/angular-developer/references/loading-strategies.md +61 -0
- package/skills/angular-developer/references/mcp.md +108 -0
- package/skills/angular-developer/references/navigate-to-routes.md +69 -0
- package/skills/angular-developer/references/outputs.md +86 -0
- package/skills/angular-developer/references/reactive-forms.md +122 -0
- package/skills/angular-developer/references/rendering-strategies.md +44 -0
- package/skills/angular-developer/references/resource.md +77 -0
- package/skills/angular-developer/references/route-animations.md +56 -0
- package/skills/angular-developer/references/route-guards.md +52 -0
- package/skills/angular-developer/references/router-lifecycle.md +45 -0
- package/skills/angular-developer/references/router-testing.md +87 -0
- package/skills/angular-developer/references/show-routes-with-outlets.md +68 -0
- package/skills/angular-developer/references/signal-forms.md +795 -0
- package/skills/angular-developer/references/signals-overview.md +94 -0
- package/skills/angular-developer/references/tailwind-css.md +69 -0
- package/skills/angular-developer/references/template-driven-forms.md +114 -0
- package/skills/angular-developer/references/testing-fundamentals.md +65 -0
- package/skills/api-connector-builder/SKILL.md +120 -0
- package/skills/api-design/SKILL.md +522 -0
- package/skills/architecture-decision-records/SKILL.md +178 -0
- package/skills/article-writing/SKILL.md +78 -0
- package/skills/automation-audit-ops/SKILL.md +141 -0
- package/skills/autonomous-agent-harness/SKILL.md +272 -0
- package/skills/autonomous-loops/SKILL.md +609 -0
- package/skills/backend-patterns/SKILL.md +560 -0
- package/skills/benchmark/SKILL.md +92 -0
- package/skills/blueprint/SKILL.md +104 -0
- package/skills/browser-qa/SKILL.md +86 -0
- package/skills/bun-runtime/SKILL.md +83 -0
- package/skills/canary-watch/SKILL.md +98 -0
- package/skills/carrier-relationship-management/SKILL.md +211 -0
- package/skills/cisco-ios-patterns/SKILL.md +163 -0
- package/skills/ck/SKILL.md +147 -0
- package/skills/ck/commands/forget.mjs +44 -0
- package/skills/ck/commands/info.mjs +24 -0
- package/skills/ck/commands/init.mjs +143 -0
- package/skills/ck/commands/list.mjs +40 -0
- package/skills/ck/commands/migrate.mjs +202 -0
- package/skills/ck/commands/resume.mjs +36 -0
- package/skills/ck/commands/save.mjs +210 -0
- package/skills/ck/commands/shared.mjs +387 -0
- package/skills/ck/hooks/session-start.mjs +224 -0
- package/skills/claude-devfleet/SKILL.md +103 -0
- package/skills/click-path-audit/SKILL.md +244 -0
- package/skills/clickhouse-io/SKILL.md +438 -0
- package/skills/code-tour/SKILL.md +235 -0
- package/skills/codebase-onboarding/SKILL.md +232 -0
- package/skills/coding-standards/SKILL.md +548 -0
- package/skills/compose-multiplatform-patterns/SKILL.md +298 -0
- package/skills/connections-optimizer/SKILL.md +188 -0
- package/skills/content-engine/SKILL.md +126 -0
- package/skills/content-hash-cache-pattern/SKILL.md +160 -0
- package/skills/context-budget/SKILL.md +134 -0
- package/skills/continuous-agent-loop/SKILL.md +44 -0
- package/skills/continuous-learning/SKILL.md +129 -0
- package/skills/continuous-learning/config.json +18 -0
- package/skills/continuous-learning/evaluate-session.sh +69 -0
- package/skills/continuous-learning-v2/SKILL.md +358 -0
- package/skills/continuous-learning-v2/agents/observer-loop.sh +322 -0
- package/skills/continuous-learning-v2/agents/observer.md +198 -0
- package/skills/continuous-learning-v2/agents/session-guardian.sh +150 -0
- package/skills/continuous-learning-v2/agents/start-observer.sh +248 -0
- package/skills/continuous-learning-v2/config.json +8 -0
- package/skills/continuous-learning-v2/hooks/observe.sh +476 -0
- package/skills/continuous-learning-v2/scripts/detect-project.sh +288 -0
- package/skills/continuous-learning-v2/scripts/instinct-cli.py +1519 -0
- package/skills/continuous-learning-v2/scripts/lib/homunculus-dir.sh +31 -0
- package/skills/continuous-learning-v2/scripts/migrate-homunculus.sh +62 -0
- package/skills/continuous-learning-v2/scripts/test_parse_instinct.py +1018 -0
- package/skills/cost-aware-llm-pipeline/SKILL.md +182 -0
- package/skills/cost-tracking/SKILL.md +147 -0
- package/skills/council/SKILL.md +202 -0
- package/skills/cpp-coding-standards/SKILL.md +722 -0
- package/skills/cpp-testing/SKILL.md +323 -0
- package/skills/crosspost/SKILL.md +110 -0
- package/skills/csharp-testing/SKILL.md +320 -0
- package/skills/customer-billing-ops/SKILL.md +139 -0
- package/skills/customs-trade-compliance/SKILL.md +262 -0
- package/skills/dart-flutter-patterns/SKILL.md +562 -0
- package/skills/dashboard-builder/SKILL.md +108 -0
- package/skills/data-scraper-agent/SKILL.md +764 -0
- package/skills/database-migrations/SKILL.md +428 -0
- package/skills/deep-research/SKILL.md +158 -0
- package/skills/defi-amm-security/SKILL.md +166 -0
- package/skills/deployment-patterns/SKILL.md +426 -0
- package/skills/design-system/SKILL.md +81 -0
- package/skills/django-celery/SKILL.md +456 -0
- package/skills/django-patterns/SKILL.md +733 -0
- package/skills/django-security/SKILL.md +592 -0
- package/skills/django-tdd/SKILL.md +728 -0
- package/skills/django-verification/SKILL.md +468 -0
- package/skills/dmux-workflows/SKILL.md +190 -0
- package/skills/docker-patterns/SKILL.md +363 -0
- package/skills/documentation-lookup/SKILL.md +89 -0
- package/skills/dotnet-patterns/SKILL.md +320 -0
- package/skills/e2e-testing/SKILL.md +325 -0
- package/skills/email-ops/SKILL.md +120 -0
- package/skills/energy-procurement/SKILL.md +227 -0
- package/skills/enterprise-agent-ops/SKILL.md +49 -0
- package/skills/error-handling/SKILL.md +375 -0
- package/skills/eval-harness/SKILL.md +269 -0
- package/skills/evm-token-decimals/SKILL.md +130 -0
- package/skills/exa-search/SKILL.md +106 -0
- package/skills/fal-ai-media/SKILL.md +287 -0
- package/skills/fastapi-patterns/SKILL.md +327 -0
- package/skills/finance-billing-ops/SKILL.md +126 -0
- package/skills/flox-environments/SKILL.md +496 -0
- package/skills/flutter-dart-code-review/SKILL.md +434 -0
- package/skills/foundation-models-on-device/SKILL.md +243 -0
- package/skills/frontend-design-direction/SKILL.md +92 -0
- package/skills/frontend-patterns/SKILL.md +641 -0
- package/skills/frontend-slides/SKILL.md +183 -0
- package/skills/frontend-slides/STYLE_PRESETS.md +330 -0
- package/skills/frontend-slides/animation-patterns.md +122 -0
- package/skills/frontend-slides/html-template.md +419 -0
- package/skills/frontend-slides/scripts/export-pdf.sh +418 -0
- package/skills/frontend-slides/scripts/extract-pptx.py +96 -0
- package/skills/frontend-slides/viewport-base.css +153 -0
- package/skills/fsharp-testing/SKILL.md +279 -0
- package/skills/gan-style-harness/SKILL.md +278 -0
- package/skills/gateguard/SKILL.md +125 -0
- package/skills/git-workflow/SKILL.md +714 -0
- package/skills/github-ops/SKILL.md +143 -0
- package/skills/golang-patterns/SKILL.md +673 -0
- package/skills/golang-testing/SKILL.md +719 -0
- package/skills/google-workspace-ops/SKILL.md +94 -0
- package/skills/healthcare-cdss-patterns/SKILL.md +245 -0
- package/skills/healthcare-emr-patterns/SKILL.md +159 -0
- package/skills/healthcare-eval-harness/SKILL.md +207 -0
- package/skills/healthcare-phi-compliance/SKILL.md +145 -0
- package/skills/hermes-imports/SKILL.md +87 -0
- package/skills/hexagonal-architecture/SKILL.md +275 -0
- package/skills/hipaa-compliance/SKILL.md +78 -0
- package/skills/homelab-network-readiness/SKILL.md +169 -0
- package/skills/homelab-network-setup/SKILL.md +129 -0
- package/skills/homelab-pihole-dns/SKILL.md +274 -0
- package/skills/homelab-vlan-segmentation/SKILL.md +311 -0
- package/skills/homelab-wireguard-vpn/SKILL.md +305 -0
- package/skills/hookify-rules/SKILL.md +128 -0
- package/skills/inventory-demand-planning/SKILL.md +246 -0
- package/skills/investor-materials/SKILL.md +95 -0
- package/skills/investor-outreach/SKILL.md +90 -0
- package/skills/ios-icon-gen/SKILL.md +157 -0
- package/skills/ios-icon-gen/scripts/generate_icons.swift +258 -0
- package/skills/ios-icon-gen/scripts/iconify_gen.sh +235 -0
- package/skills/iterative-retrieval/SKILL.md +209 -0
- package/skills/java-coding-standards/SKILL.md +382 -0
- package/skills/jira-integration/SKILL.md +292 -0
- package/skills/jpa-patterns/SKILL.md +150 -0
- package/skills/knowledge-ops/SKILL.md +153 -0
- package/skills/kotlin-coroutines-flows/SKILL.md +283 -0
- package/skills/kotlin-exposed-patterns/SKILL.md +718 -0
- package/skills/kotlin-ktor-patterns/SKILL.md +688 -0
- package/skills/kotlin-patterns/SKILL.md +710 -0
- package/skills/kotlin-testing/SKILL.md +823 -0
- package/skills/laravel-patterns/SKILL.md +414 -0
- package/skills/laravel-plugin-discovery/SKILL.md +228 -0
- package/skills/laravel-security/SKILL.md +284 -0
- package/skills/laravel-tdd/SKILL.md +282 -0
- package/skills/laravel-verification/SKILL.md +178 -0
- package/skills/lead-intelligence/SKILL.md +320 -0
- package/skills/lead-intelligence/agents/enrichment-agent.md +85 -0
- package/skills/lead-intelligence/agents/mutual-mapper.md +75 -0
- package/skills/lead-intelligence/agents/outreach-drafter.md +98 -0
- package/skills/lead-intelligence/agents/signal-scorer.md +60 -0
- package/skills/liquid-glass-design/SKILL.md +279 -0
- package/skills/llm-trading-agent-security/SKILL.md +146 -0
- package/skills/logistics-exception-management/SKILL.md +221 -0
- package/skills/make-interfaces-feel-better/SKILL.md +151 -0
- package/skills/manim-video/SKILL.md +88 -0
- package/skills/manim-video/assets/network_graph_scene.py +52 -0
- package/skills/market-research/SKILL.md +74 -0
- package/skills/mcp-server-patterns/SKILL.md +68 -0
- package/skills/messages-ops/SKILL.md +103 -0
- package/skills/mle-workflow/SKILL.md +345 -0
- package/skills/motion-advanced/SKILL.md +596 -0
- package/skills/motion-foundations/SKILL.md +299 -0
- package/skills/motion-patterns/SKILL.md +435 -0
- package/skills/motion-ui/SKILL.md +574 -0
- package/skills/mysql-patterns/SKILL.md +411 -0
- package/skills/nanoclaw-repl/SKILL.md +32 -0
- package/skills/nestjs-patterns/SKILL.md +229 -0
- package/skills/netmiko-ssh-automation/SKILL.md +173 -0
- package/skills/network-bgp-diagnostics/SKILL.md +167 -0
- package/skills/network-config-validation/SKILL.md +210 -0
- package/skills/network-interface-health/SKILL.md +152 -0
- package/skills/nextjs-turbopack/SKILL.md +43 -0
- package/skills/nodejs-keccak256/SKILL.md +102 -0
- package/skills/nutrient-document-processing/SKILL.md +166 -0
- package/skills/nuxt4-patterns/SKILL.md +99 -0
- package/skills/openclaw-persona-forge/SKILL.md +288 -0
- package/skills/openclaw-persona-forge/gacha.py +224 -0
- package/skills/openclaw-persona-forge/gacha.sh +5 -0
- package/skills/openclaw-persona-forge/references/avatar-style.md +124 -0
- package/skills/openclaw-persona-forge/references/boundary-rules.md +53 -0
- package/skills/openclaw-persona-forge/references/error-handling.md +53 -0
- package/skills/openclaw-persona-forge/references/identity-tension.md +48 -0
- package/skills/openclaw-persona-forge/references/naming-system.md +39 -0
- package/skills/openclaw-persona-forge/references/output-template.md +166 -0
- package/skills/opensource-pipeline/SKILL.md +254 -0
- package/skills/perl-patterns/SKILL.md +503 -0
- package/skills/perl-security/SKILL.md +502 -0
- package/skills/perl-testing/SKILL.md +474 -0
- package/skills/plan-orchestrate/SKILL.md +253 -0
- package/skills/plankton-code-quality/SKILL.md +236 -0
- package/skills/postgres-patterns/SKILL.md +146 -0
- package/skills/product-capability/SKILL.md +140 -0
- package/skills/product-lens/SKILL.md +91 -0
- package/skills/production-audit/SKILL.md +206 -0
- package/skills/production-scheduling/SKILL.md +237 -0
- package/skills/project-flow-ops/SKILL.md +110 -0
- package/skills/prompt-optimizer/SKILL.md +398 -0
- package/skills/python-patterns/SKILL.md +749 -0
- package/skills/python-testing/SKILL.md +815 -0
- package/skills/pytorch-patterns/SKILL.md +395 -0
- package/skills/quality-nonconformance/SKILL.md +259 -0
- package/skills/quarkus-patterns/SKILL.md +721 -0
- package/skills/quarkus-security/SKILL.md +466 -0
- package/skills/quarkus-tdd/SKILL.md +810 -0
- package/skills/quarkus-verification/SKILL.md +478 -0
- package/skills/ralphinho-rfc-pipeline/SKILL.md +66 -0
- package/skills/redis-patterns/SKILL.md +402 -0
- package/skills/regex-vs-llm-structured-text/SKILL.md +219 -0
- package/skills/remotion-video-creation/SKILL.md +43 -0
- package/skills/remotion-video-creation/rules/3d.md +86 -0
- package/skills/remotion-video-creation/rules/animations.md +29 -0
- package/skills/remotion-video-creation/rules/assets/charts-bar-chart.tsx +173 -0
- package/skills/remotion-video-creation/rules/assets/text-animations-typewriter.tsx +100 -0
- package/skills/remotion-video-creation/rules/assets/text-animations-word-highlight.tsx +108 -0
- package/skills/remotion-video-creation/rules/assets.md +78 -0
- package/skills/remotion-video-creation/rules/audio.md +172 -0
- package/skills/remotion-video-creation/rules/calculate-metadata.md +104 -0
- package/skills/remotion-video-creation/rules/can-decode.md +75 -0
- package/skills/remotion-video-creation/rules/charts.md +58 -0
- package/skills/remotion-video-creation/rules/compositions.md +146 -0
- package/skills/remotion-video-creation/rules/display-captions.md +126 -0
- package/skills/remotion-video-creation/rules/extract-frames.md +229 -0
- package/skills/remotion-video-creation/rules/fonts.md +152 -0
- package/skills/remotion-video-creation/rules/get-audio-duration.md +58 -0
- package/skills/remotion-video-creation/rules/get-video-dimensions.md +68 -0
- package/skills/remotion-video-creation/rules/get-video-duration.md +58 -0
- package/skills/remotion-video-creation/rules/gifs.md +138 -0
- package/skills/remotion-video-creation/rules/images.md +130 -0
- package/skills/remotion-video-creation/rules/import-srt-captions.md +67 -0
- package/skills/remotion-video-creation/rules/lottie.md +67 -0
- package/skills/remotion-video-creation/rules/measuring-dom-nodes.md +34 -0
- package/skills/remotion-video-creation/rules/measuring-text.md +143 -0
- package/skills/remotion-video-creation/rules/sequencing.md +106 -0
- package/skills/remotion-video-creation/rules/tailwind.md +11 -0
- package/skills/remotion-video-creation/rules/text-animations.md +20 -0
- package/skills/remotion-video-creation/rules/timing.md +179 -0
- package/skills/remotion-video-creation/rules/transcribe-captions.md +19 -0
- package/skills/remotion-video-creation/rules/transitions.md +122 -0
- package/skills/remotion-video-creation/rules/trimming.md +52 -0
- package/skills/remotion-video-creation/rules/videos.md +171 -0
- package/skills/repo-scan/SKILL.md +78 -0
- package/skills/research-ops/SKILL.md +111 -0
- package/skills/returns-reverse-logistics/SKILL.md +239 -0
- package/skills/rules-distill/SKILL.md +263 -0
- package/skills/rules-distill/scripts/scan-rules.sh +58 -0
- package/skills/rules-distill/scripts/scan-skills.sh +129 -0
- package/skills/rust-patterns/SKILL.md +498 -0
- package/skills/rust-testing/SKILL.md +499 -0
- package/skills/safety-guard/SKILL.md +74 -0
- package/skills/santa-method/SKILL.md +306 -0
- package/skills/scientific-db-pubmed-database/SKILL.md +175 -0
- package/skills/scientific-db-uspto-database/SKILL.md +177 -0
- package/skills/scientific-pkg-gget/SKILL.md +166 -0
- package/skills/scientific-thinking-literature-review/SKILL.md +192 -0
- package/skills/scientific-thinking-scholar-evaluation/SKILL.md +160 -0
- package/skills/search-first/SKILL.md +181 -0
- package/skills/security-bounty-hunter/SKILL.md +99 -0
- package/skills/security-review/SKILL.md +502 -0
- package/skills/security-review/cloud-infrastructure-security.md +361 -0
- package/skills/seo/SKILL.md +153 -0
- package/skills/skill-comply/SKILL.md +57 -0
- package/skills/skill-comply/fixtures/compliant_trace.jsonl +5 -0
- package/skills/skill-comply/fixtures/noncompliant_trace.jsonl +3 -0
- package/skills/skill-comply/fixtures/tdd_spec.yaml +44 -0
- package/skills/skill-comply/prompts/classifier.md +24 -0
- package/skills/skill-comply/prompts/scenario_generator.md +62 -0
- package/skills/skill-comply/prompts/spec_generator.md +42 -0
- package/skills/skill-comply/pyproject.toml +15 -0
- package/skills/skill-comply/scripts/__init__.py +0 -0
- package/skills/skill-comply/scripts/classifier.py +85 -0
- package/skills/skill-comply/scripts/grader.py +124 -0
- package/skills/skill-comply/scripts/parser.py +107 -0
- package/skills/skill-comply/scripts/report.py +170 -0
- package/skills/skill-comply/scripts/run.py +127 -0
- package/skills/skill-comply/scripts/runner.py +186 -0
- package/skills/skill-comply/scripts/scenario_generator.py +70 -0
- package/skills/skill-comply/scripts/spec_generator.py +72 -0
- package/skills/skill-comply/scripts/utils.py +13 -0
- package/skills/skill-comply/tests/test_grader.py +197 -0
- package/skills/skill-comply/tests/test_parser.py +90 -0
- package/skills/skill-comply/tests/test_runner.py +172 -0
- package/skills/skill-scout/SKILL.md +139 -0
- package/skills/skill-stocktake/SKILL.md +193 -0
- package/skills/skill-stocktake/scripts/quick-diff.sh +87 -0
- package/skills/skill-stocktake/scripts/save-results.sh +56 -0
- package/skills/skill-stocktake/scripts/scan.sh +170 -0
- package/skills/social-graph-ranker/SKILL.md +153 -0
- package/skills/springboot-patterns/SKILL.md +313 -0
- package/skills/springboot-security/SKILL.md +271 -0
- package/skills/springboot-tdd/SKILL.md +157 -0
- package/skills/springboot-verification/SKILL.md +230 -0
- package/skills/strategic-compact/SKILL.md +129 -0
- package/skills/strategic-compact/suggest-compact.sh +54 -0
- package/skills/swift-actor-persistence/SKILL.md +142 -0
- package/skills/swift-concurrency-6-2/SKILL.md +216 -0
- package/skills/swift-protocol-di-testing/SKILL.md +189 -0
- package/skills/swiftui-patterns/SKILL.md +259 -0
- package/skills/tdd-workflow/SKILL.md +462 -0
- package/skills/team-builder/SKILL.md +166 -0
- package/skills/terminal-ops/SKILL.md +108 -0
- package/skills/tinystruct-patterns/SKILL.md +130 -0
- package/skills/tinystruct-patterns/references/architecture.md +77 -0
- package/skills/tinystruct-patterns/references/data-handling.md +35 -0
- package/skills/tinystruct-patterns/references/routing.md +57 -0
- package/skills/tinystruct-patterns/references/system-usage.md +74 -0
- package/skills/tinystruct-patterns/references/testing.md +59 -0
- package/skills/token-budget-advisor/SKILL.md +133 -0
- package/skills/ui-demo/SKILL.md +464 -0
- package/skills/ui-to-vue/SKILL.md +134 -0
- package/skills/unified-notifications-ops/SKILL.md +186 -0
- package/skills/verification-loop/SKILL.md +125 -0
- package/skills/video-editing/SKILL.md +309 -0
- package/skills/videodb/SKILL.md +373 -0
- package/skills/videodb/reference/api-reference.md +550 -0
- package/skills/videodb/reference/capture-reference.md +407 -0
- package/skills/videodb/reference/capture.md +101 -0
- package/skills/videodb/reference/editor.md +443 -0
- package/skills/videodb/reference/generative.md +331 -0
- package/skills/videodb/reference/rtstream-reference.md +564 -0
- package/skills/videodb/reference/rtstream.md +65 -0
- package/skills/videodb/reference/search.md +230 -0
- package/skills/videodb/reference/streaming.md +406 -0
- package/skills/videodb/reference/use-cases.md +118 -0
- package/skills/videodb/scripts/ws_listener.py +282 -0
- package/skills/visa-doc-translate/README.md +86 -0
- package/skills/visa-doc-translate/SKILL.md +117 -0
- package/skills/vite-patterns/SKILL.md +448 -0
- package/skills/windows-desktop-e2e/SKILL.md +787 -0
- package/skills/workspace-surface-audit/SKILL.md +124 -0
- package/skills/x-api/SKILL.md +233 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""CLI entry point for skill-comply."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
from scripts.grader import grade
|
|
14
|
+
from scripts.report import generate_report
|
|
15
|
+
from scripts.runner import run_scenario
|
|
16
|
+
from scripts.scenario_generator import generate_scenarios
|
|
17
|
+
from scripts.spec_generator import generate_spec
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main() -> None:
|
|
23
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
24
|
+
|
|
25
|
+
parser = argparse.ArgumentParser(
|
|
26
|
+
description="skill-comply: Measure skill compliance rates",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"skill",
|
|
30
|
+
type=Path,
|
|
31
|
+
help="Path to skill/rule file to test",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--model",
|
|
35
|
+
default="sonnet",
|
|
36
|
+
help="Model for scenario execution (default: sonnet)",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--gen-model",
|
|
40
|
+
default="haiku",
|
|
41
|
+
help="Model for spec/scenario generation (default: haiku)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--dry-run",
|
|
45
|
+
action="store_true",
|
|
46
|
+
help="Generate spec and scenarios without executing",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--output",
|
|
50
|
+
type=Path,
|
|
51
|
+
default=None,
|
|
52
|
+
help="Output report path (default: results/<skill-name>.md)",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
args = parser.parse_args()
|
|
56
|
+
|
|
57
|
+
if not args.skill.is_file():
|
|
58
|
+
logger.error("Error: Skill file not found: %s", args.skill)
|
|
59
|
+
sys.exit(1)
|
|
60
|
+
|
|
61
|
+
results_dir = Path(__file__).parent.parent / "results"
|
|
62
|
+
results_dir.mkdir(exist_ok=True)
|
|
63
|
+
|
|
64
|
+
# Step 1: Generate compliance spec
|
|
65
|
+
logger.info("[1/4] Generating compliance spec from %s...", args.skill.name)
|
|
66
|
+
spec = generate_spec(args.skill, model=args.gen_model)
|
|
67
|
+
logger.info(" %d steps extracted", len(spec.steps))
|
|
68
|
+
|
|
69
|
+
# Step 2: Generate scenarios
|
|
70
|
+
spec_yaml = yaml.dump({
|
|
71
|
+
"steps": [
|
|
72
|
+
{"id": s.id, "description": s.description, "required": s.required}
|
|
73
|
+
for s in spec.steps
|
|
74
|
+
]
|
|
75
|
+
})
|
|
76
|
+
logger.info("[2/4] Generating scenarios (3 prompt strictness levels)...")
|
|
77
|
+
scenarios = generate_scenarios(args.skill, spec_yaml, model=args.gen_model)
|
|
78
|
+
logger.info(" %d scenarios generated", len(scenarios))
|
|
79
|
+
|
|
80
|
+
for s in scenarios:
|
|
81
|
+
logger.info(" - %s: %s", s.level_name, s.description[:60])
|
|
82
|
+
|
|
83
|
+
if args.dry_run:
|
|
84
|
+
logger.info("\n[dry-run] Spec and scenarios generated. Skipping execution.")
|
|
85
|
+
logger.info("\nSpec: %s (%d steps)", spec.id, len(spec.steps))
|
|
86
|
+
for step in spec.steps:
|
|
87
|
+
marker = "*" if step.required else " "
|
|
88
|
+
logger.info(" [%s] %s: %s", marker, step.id, step.description)
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Step 3: Execute scenarios
|
|
92
|
+
logger.info("[3/4] Executing scenarios (model=%s)...", args.model)
|
|
93
|
+
graded_results: list[tuple[str, Any, list[Any]]] = []
|
|
94
|
+
|
|
95
|
+
for scenario in scenarios:
|
|
96
|
+
logger.info(" Running %s...", scenario.level_name)
|
|
97
|
+
run = run_scenario(scenario, model=args.model)
|
|
98
|
+
result = grade(spec, list(run.observations))
|
|
99
|
+
graded_results.append((scenario.level_name, result, list(run.observations)))
|
|
100
|
+
logger.info(" %s: %.0f%%", scenario.level_name, result.compliance_rate * 100)
|
|
101
|
+
|
|
102
|
+
# Step 4: Generate report
|
|
103
|
+
skill_name = args.skill.parent.name if args.skill.stem == "SKILL" else args.skill.stem
|
|
104
|
+
output_path = args.output or results_dir / f"{skill_name}.md"
|
|
105
|
+
logger.info("[4/4] Generating report...")
|
|
106
|
+
|
|
107
|
+
report = generate_report(args.skill, spec, graded_results, scenarios=scenarios)
|
|
108
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
output_path.write_text(report)
|
|
110
|
+
logger.info(" Report saved to %s", output_path)
|
|
111
|
+
|
|
112
|
+
# Summary
|
|
113
|
+
if not graded_results:
|
|
114
|
+
logger.warning("No scenarios were executed.")
|
|
115
|
+
return
|
|
116
|
+
overall = sum(r.compliance_rate for _, r, _obs in graded_results) / len(graded_results)
|
|
117
|
+
logger.info("\n%s", "=" * 50)
|
|
118
|
+
logger.info("Overall Compliance: %.0f%%", overall * 100)
|
|
119
|
+
if overall < spec.threshold_promote_to_hook:
|
|
120
|
+
logger.info(
|
|
121
|
+
"Recommendation: Some steps have low compliance. "
|
|
122
|
+
"Consider promoting them to hooks. See the report for details."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Run scenarios via claude -p and parse tool calls from stream-json output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import shlex
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from scripts.parser import ObservationEvent
|
|
14
|
+
from scripts.scenario_generator import Scenario
|
|
15
|
+
|
|
16
|
+
SANDBOX_BASE = Path("/tmp/skill-comply-sandbox")
|
|
17
|
+
ALLOWED_MODELS = frozenset({"haiku", "sonnet", "opus"})
|
|
18
|
+
# Shell builtins cannot be invoked via subprocess.run; cwd is already
|
|
19
|
+
# controlled by the cwd= keyword. Scenarios that include these in
|
|
20
|
+
# setup_commands (a common shell-style convention) must be tolerated.
|
|
21
|
+
SHELL_BUILTINS = frozenset({"cd", "pushd", "popd"})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class ScenarioRun:
|
|
26
|
+
scenario: Scenario
|
|
27
|
+
observations: tuple[ObservationEvent, ...]
|
|
28
|
+
sandbox_dir: Path
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def run_scenario(
|
|
32
|
+
scenario: Scenario,
|
|
33
|
+
model: str = "sonnet",
|
|
34
|
+
max_turns: int = 30,
|
|
35
|
+
timeout: int = 300,
|
|
36
|
+
) -> ScenarioRun:
|
|
37
|
+
"""Execute a scenario and extract tool calls from stream-json output."""
|
|
38
|
+
if model not in ALLOWED_MODELS:
|
|
39
|
+
raise ValueError(f"Unknown model: {model!r}. Allowed: {ALLOWED_MODELS}")
|
|
40
|
+
|
|
41
|
+
sandbox_dir = _safe_sandbox_dir(scenario.id)
|
|
42
|
+
_setup_sandbox(sandbox_dir, scenario)
|
|
43
|
+
|
|
44
|
+
result = subprocess.run(
|
|
45
|
+
[
|
|
46
|
+
"claude", "-p", scenario.prompt,
|
|
47
|
+
"--model", model,
|
|
48
|
+
"--max-turns", str(max_turns),
|
|
49
|
+
"--add-dir", str(sandbox_dir),
|
|
50
|
+
"--allowedTools", "Read,Write,Edit,Bash,Glob,Grep",
|
|
51
|
+
"--output-format", "stream-json",
|
|
52
|
+
"--verbose",
|
|
53
|
+
],
|
|
54
|
+
capture_output=True,
|
|
55
|
+
text=True,
|
|
56
|
+
timeout=timeout,
|
|
57
|
+
cwd=sandbox_dir,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# claude -p returns rc=1 when --max-turns is reached, but the stream-json
|
|
61
|
+
# output is still complete and parseable. Treat this graceful termination
|
|
62
|
+
# as non-fatal so scenarios that hit the turn cap still produce usable
|
|
63
|
+
# observations.
|
|
64
|
+
nonfatal_max_turns = (
|
|
65
|
+
result.returncode == 1
|
|
66
|
+
and '"terminal_reason":"max_turns"' in result.stdout
|
|
67
|
+
)
|
|
68
|
+
if result.returncode != 0 and not nonfatal_max_turns:
|
|
69
|
+
# Include both stderr and stdout tails. claude -p often surfaces the
|
|
70
|
+
# actual failure context (model error JSON, partial stream-json) on
|
|
71
|
+
# stdout, while stderr carries generic transport / auth messages.
|
|
72
|
+
# Showing both dramatically reduces "rc=N: <empty>" debugging dead-ends.
|
|
73
|
+
raise RuntimeError(
|
|
74
|
+
f"claude -p failed (rc={result.returncode}): "
|
|
75
|
+
f"stderr={result.stderr[:500]!r} stdout_tail={result.stdout[-500:]!r}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
observations = _parse_stream_json(result.stdout)
|
|
79
|
+
|
|
80
|
+
return ScenarioRun(
|
|
81
|
+
scenario=scenario,
|
|
82
|
+
observations=tuple(observations),
|
|
83
|
+
sandbox_dir=sandbox_dir,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _safe_sandbox_dir(scenario_id: str) -> Path:
|
|
88
|
+
"""Sanitize scenario ID and ensure path stays within sandbox base."""
|
|
89
|
+
safe_id = re.sub(r"[^a-zA-Z0-9\-_]", "_", scenario_id)
|
|
90
|
+
path = SANDBOX_BASE / safe_id
|
|
91
|
+
# Validate path stays within sandbox base (raises ValueError on traversal)
|
|
92
|
+
path.resolve().relative_to(SANDBOX_BASE.resolve())
|
|
93
|
+
return path
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _setup_sandbox(sandbox_dir: Path, scenario: Scenario) -> None:
|
|
97
|
+
"""Create sandbox directory and run setup commands."""
|
|
98
|
+
if sandbox_dir.exists():
|
|
99
|
+
shutil.rmtree(sandbox_dir)
|
|
100
|
+
sandbox_dir.mkdir(parents=True)
|
|
101
|
+
|
|
102
|
+
subprocess.run(["git", "init"], cwd=sandbox_dir, capture_output=True)
|
|
103
|
+
|
|
104
|
+
for cmd in scenario.setup_commands:
|
|
105
|
+
parts = shlex.split(cmd)
|
|
106
|
+
if not parts or parts[0] in SHELL_BUILTINS:
|
|
107
|
+
# Shell builtins (cd/pushd/popd) cannot run as subprocess; skip.
|
|
108
|
+
continue
|
|
109
|
+
try:
|
|
110
|
+
subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
|
|
111
|
+
except FileNotFoundError:
|
|
112
|
+
# Setup tool not installed in this environment; skip rather than
|
|
113
|
+
# crash the whole scenario. The compliance run continues.
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _parse_stream_json(stdout: str) -> list[ObservationEvent]:
|
|
118
|
+
"""Parse claude -p stream-json output into ObservationEvents.
|
|
119
|
+
|
|
120
|
+
Stream-json format:
|
|
121
|
+
- type=assistant with content[].type=tool_use → tool call (name, input)
|
|
122
|
+
- type=user with content[].type=tool_result → tool result (output)
|
|
123
|
+
"""
|
|
124
|
+
events: list[ObservationEvent] = []
|
|
125
|
+
pending: dict[str, dict] = {}
|
|
126
|
+
event_counter = 0
|
|
127
|
+
|
|
128
|
+
for line in stdout.strip().splitlines():
|
|
129
|
+
try:
|
|
130
|
+
msg = json.loads(line)
|
|
131
|
+
except json.JSONDecodeError:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
msg_type = msg.get("type")
|
|
135
|
+
|
|
136
|
+
if msg_type == "assistant":
|
|
137
|
+
content = msg.get("message", {}).get("content", [])
|
|
138
|
+
for block in content:
|
|
139
|
+
if block.get("type") == "tool_use":
|
|
140
|
+
tool_use_id = block.get("id", "")
|
|
141
|
+
tool_input = block.get("input", {})
|
|
142
|
+
input_str = (
|
|
143
|
+
json.dumps(tool_input)[:5000]
|
|
144
|
+
if isinstance(tool_input, dict)
|
|
145
|
+
else str(tool_input)[:5000]
|
|
146
|
+
)
|
|
147
|
+
pending[tool_use_id] = {
|
|
148
|
+
"tool": block.get("name", "unknown"),
|
|
149
|
+
"input": input_str,
|
|
150
|
+
"order": event_counter,
|
|
151
|
+
}
|
|
152
|
+
event_counter += 1
|
|
153
|
+
|
|
154
|
+
elif msg_type == "user":
|
|
155
|
+
content = msg.get("message", {}).get("content", [])
|
|
156
|
+
if isinstance(content, list):
|
|
157
|
+
for block in content:
|
|
158
|
+
tool_use_id = block.get("tool_use_id", "")
|
|
159
|
+
if tool_use_id in pending:
|
|
160
|
+
info = pending.pop(tool_use_id)
|
|
161
|
+
output_content = block.get("content", "")
|
|
162
|
+
if isinstance(output_content, list):
|
|
163
|
+
output_str = json.dumps(output_content)[:5000]
|
|
164
|
+
else:
|
|
165
|
+
output_str = str(output_content)[:5000]
|
|
166
|
+
|
|
167
|
+
events.append(ObservationEvent(
|
|
168
|
+
timestamp=f"T{info['order']:04d}",
|
|
169
|
+
event="tool_complete",
|
|
170
|
+
tool=info["tool"],
|
|
171
|
+
session=msg.get("session_id", "unknown"),
|
|
172
|
+
input=info["input"],
|
|
173
|
+
output=output_str,
|
|
174
|
+
))
|
|
175
|
+
|
|
176
|
+
for _tool_use_id, info in pending.items():
|
|
177
|
+
events.append(ObservationEvent(
|
|
178
|
+
timestamp=f"T{info['order']:04d}",
|
|
179
|
+
event="tool_complete",
|
|
180
|
+
tool=info["tool"],
|
|
181
|
+
session="unknown",
|
|
182
|
+
input=info["input"],
|
|
183
|
+
output="",
|
|
184
|
+
))
|
|
185
|
+
|
|
186
|
+
return sorted(events, key=lambda e: e.timestamp)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Generate pressure scenarios from skill + spec using LLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from scripts.utils import extract_yaml
|
|
12
|
+
|
|
13
|
+
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class Scenario:
|
|
18
|
+
id: str
|
|
19
|
+
level: int
|
|
20
|
+
level_name: str
|
|
21
|
+
description: str
|
|
22
|
+
prompt: str
|
|
23
|
+
setup_commands: tuple[str, ...]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_scenarios(
|
|
27
|
+
skill_path: Path,
|
|
28
|
+
spec_yaml: str,
|
|
29
|
+
model: str = "haiku",
|
|
30
|
+
) -> list[Scenario]:
|
|
31
|
+
"""Generate 3 scenarios with decreasing prompt strictness.
|
|
32
|
+
|
|
33
|
+
Calls claude -p with the scenario_generator prompt, parses YAML output.
|
|
34
|
+
"""
|
|
35
|
+
skill_content = skill_path.read_text()
|
|
36
|
+
prompt_template = (PROMPTS_DIR / "scenario_generator.md").read_text()
|
|
37
|
+
prompt = (
|
|
38
|
+
prompt_template
|
|
39
|
+
.replace("{skill_content}", skill_content)
|
|
40
|
+
.replace("{spec_yaml}", spec_yaml)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
result = subprocess.run(
|
|
44
|
+
["claude", "-p", prompt, "--model", model, "--output-format", "text"],
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=120,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if result.returncode != 0:
|
|
51
|
+
raise RuntimeError(f"claude -p failed: {result.stderr}")
|
|
52
|
+
|
|
53
|
+
if not result.stdout.strip():
|
|
54
|
+
raise RuntimeError("claude -p returned empty output")
|
|
55
|
+
|
|
56
|
+
raw_yaml = extract_yaml(result.stdout)
|
|
57
|
+
parsed = yaml.safe_load(raw_yaml)
|
|
58
|
+
|
|
59
|
+
scenarios: list[Scenario] = []
|
|
60
|
+
for s in parsed["scenarios"]:
|
|
61
|
+
scenarios.append(Scenario(
|
|
62
|
+
id=s["id"],
|
|
63
|
+
level=s["level"],
|
|
64
|
+
level_name=s["level_name"],
|
|
65
|
+
description=s["description"],
|
|
66
|
+
prompt=s["prompt"].strip(),
|
|
67
|
+
setup_commands=tuple(s.get("setup_commands", [])),
|
|
68
|
+
))
|
|
69
|
+
|
|
70
|
+
return sorted(scenarios, key=lambda s: s.level)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Generate compliance specs from skill files using LLM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from scripts.parser import ComplianceSpec, parse_spec
|
|
12
|
+
from scripts.utils import extract_yaml
|
|
13
|
+
|
|
14
|
+
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def generate_spec(
|
|
18
|
+
skill_path: Path,
|
|
19
|
+
model: str = "haiku",
|
|
20
|
+
max_retries: int = 2,
|
|
21
|
+
) -> ComplianceSpec:
|
|
22
|
+
"""Generate a compliance spec from a skill/rule file.
|
|
23
|
+
|
|
24
|
+
Calls claude -p with the spec_generator prompt, parses YAML output.
|
|
25
|
+
Retries on YAML parse errors with error feedback.
|
|
26
|
+
"""
|
|
27
|
+
skill_content = skill_path.read_text()
|
|
28
|
+
prompt_template = (PROMPTS_DIR / "spec_generator.md").read_text()
|
|
29
|
+
base_prompt = prompt_template.replace("{skill_content}", skill_content)
|
|
30
|
+
|
|
31
|
+
last_error: Exception | None = None
|
|
32
|
+
|
|
33
|
+
for attempt in range(max_retries + 1):
|
|
34
|
+
prompt = base_prompt
|
|
35
|
+
if attempt > 0 and last_error is not None:
|
|
36
|
+
prompt += (
|
|
37
|
+
f"\n\nPREVIOUS ATTEMPT FAILED with YAML parse error:\n"
|
|
38
|
+
f"{last_error}\n\n"
|
|
39
|
+
f"Please fix the YAML. Remember to quote all string values "
|
|
40
|
+
f"that contain colons, e.g.: description: \"Use type: description format\""
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
result = subprocess.run(
|
|
44
|
+
["claude", "-p", prompt, "--model", model, "--output-format", "text"],
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=120,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if result.returncode != 0:
|
|
51
|
+
raise RuntimeError(f"claude -p failed: {result.stderr}")
|
|
52
|
+
|
|
53
|
+
raw_yaml = extract_yaml(result.stdout)
|
|
54
|
+
|
|
55
|
+
tmp_path = None
|
|
56
|
+
with tempfile.NamedTemporaryFile(
|
|
57
|
+
mode="w", suffix=".yaml", delete=False,
|
|
58
|
+
) as f:
|
|
59
|
+
f.write(raw_yaml)
|
|
60
|
+
tmp_path = Path(f.name)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
return parse_spec(tmp_path)
|
|
64
|
+
except (yaml.YAMLError, KeyError, TypeError) as e:
|
|
65
|
+
last_error = e
|
|
66
|
+
if attempt == max_retries:
|
|
67
|
+
raise
|
|
68
|
+
finally:
|
|
69
|
+
if tmp_path is not None:
|
|
70
|
+
tmp_path.unlink(missing_ok=True)
|
|
71
|
+
|
|
72
|
+
raise RuntimeError("unreachable")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Shared utilities for skill-comply scripts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def extract_yaml(text: str) -> str:
|
|
7
|
+
"""Extract YAML from LLM output, stripping markdown fences if present."""
|
|
8
|
+
lines = text.strip().splitlines()
|
|
9
|
+
if lines and lines[0].startswith("```"):
|
|
10
|
+
lines = lines[1:]
|
|
11
|
+
if lines and lines[-1].startswith("```"):
|
|
12
|
+
lines = lines[:-1]
|
|
13
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Tests for grader module — compliance scoring with LLM classification."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from unittest.mock import patch
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from scripts.grader import ComplianceResult, StepResult, grade
|
|
9
|
+
from scripts.parser import ComplianceSpec, Detector, ObservationEvent, Step, parse_spec, parse_trace
|
|
10
|
+
|
|
11
|
+
FIXTURES = Path(__file__).parent.parent / "fixtures"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def tdd_spec():
|
|
16
|
+
return parse_spec(FIXTURES / "tdd_spec.yaml")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def compliant_trace():
|
|
21
|
+
return parse_trace(FIXTURES / "compliant_trace.jsonl")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def noncompliant_trace():
|
|
26
|
+
return parse_trace(FIXTURES / "noncompliant_trace.jsonl")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _mock_compliant_classification(spec, trace, model="haiku"): # noqa: ARG001
|
|
30
|
+
"""Simulate LLM correctly classifying a compliant trace."""
|
|
31
|
+
return {
|
|
32
|
+
"write_test": [0],
|
|
33
|
+
"run_test_red": [1],
|
|
34
|
+
"write_impl": [2],
|
|
35
|
+
"run_test_green": [3],
|
|
36
|
+
"refactor": [4],
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _mock_noncompliant_classification(spec, trace, model="haiku"):
|
|
41
|
+
"""Simulate LLM classifying a noncompliant trace (impl before test)."""
|
|
42
|
+
return {
|
|
43
|
+
"write_impl": [0], # src/fib.py written first
|
|
44
|
+
"write_test": [1], # test written second
|
|
45
|
+
"run_test_green": [2], # only a passing test run
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _mock_empty_classification(spec, trace, model="haiku"):
|
|
50
|
+
return {}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestGradeCompliant:
|
|
54
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
55
|
+
def test_returns_compliance_result(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
56
|
+
result = grade(tdd_spec, compliant_trace)
|
|
57
|
+
assert isinstance(result, ComplianceResult)
|
|
58
|
+
|
|
59
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
60
|
+
def test_full_compliance(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
61
|
+
result = grade(tdd_spec, compliant_trace)
|
|
62
|
+
assert result.compliance_rate == 1.0
|
|
63
|
+
|
|
64
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
65
|
+
def test_all_required_steps_detected(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
66
|
+
result = grade(tdd_spec, compliant_trace)
|
|
67
|
+
required_results = [s for s in result.steps if s.step_id in
|
|
68
|
+
("write_test", "run_test_red", "write_impl", "run_test_green")]
|
|
69
|
+
assert all(s.detected for s in required_results)
|
|
70
|
+
|
|
71
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
72
|
+
def test_optional_step_detected(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
73
|
+
result = grade(tdd_spec, compliant_trace)
|
|
74
|
+
refactor = next(s for s in result.steps if s.step_id == "refactor")
|
|
75
|
+
assert refactor.detected is True
|
|
76
|
+
|
|
77
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
78
|
+
def test_no_hook_promotion_recommended(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
79
|
+
result = grade(tdd_spec, compliant_trace)
|
|
80
|
+
assert result.recommend_hook_promotion is False
|
|
81
|
+
|
|
82
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
83
|
+
def test_step_evidence_not_empty(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
84
|
+
result = grade(tdd_spec, compliant_trace)
|
|
85
|
+
for step in result.steps:
|
|
86
|
+
if step.detected:
|
|
87
|
+
assert len(step.evidence) > 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TestGradeNoncompliant:
|
|
91
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
92
|
+
def test_low_compliance(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
93
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
94
|
+
assert result.compliance_rate < 1.0
|
|
95
|
+
|
|
96
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
97
|
+
def test_write_test_fails_ordering(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
98
|
+
"""write_test has before_step=write_impl, but test is written AFTER impl."""
|
|
99
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
100
|
+
write_test = next(s for s in result.steps if s.step_id == "write_test")
|
|
101
|
+
assert write_test.detected is False
|
|
102
|
+
|
|
103
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
104
|
+
def test_run_test_red_not_detected(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
105
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
106
|
+
run_red = next(s for s in result.steps if s.step_id == "run_test_red")
|
|
107
|
+
assert run_red.detected is False
|
|
108
|
+
|
|
109
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
110
|
+
def test_hook_promotion_recommended(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
111
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
112
|
+
assert result.recommend_hook_promotion is True
|
|
113
|
+
|
|
114
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_noncompliant_classification)
|
|
115
|
+
def test_failure_reasons_present(self, mock_cls, tdd_spec, noncompliant_trace) -> None:
|
|
116
|
+
result = grade(tdd_spec, noncompliant_trace)
|
|
117
|
+
failed_steps = [s for s in result.steps if not s.detected and s.step_id != "refactor"]
|
|
118
|
+
for step in failed_steps:
|
|
119
|
+
assert step.failure_reason is not None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TestGradeEdgeCases:
|
|
123
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_empty_classification)
|
|
124
|
+
def test_empty_trace(self, mock_cls, tdd_spec) -> None:
|
|
125
|
+
result = grade(tdd_spec, [])
|
|
126
|
+
assert result.compliance_rate == 0.0
|
|
127
|
+
assert result.recommend_hook_promotion is True
|
|
128
|
+
|
|
129
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
130
|
+
def test_compliance_rate_is_ratio_of_required_only(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
131
|
+
result = grade(tdd_spec, compliant_trace)
|
|
132
|
+
assert result.compliance_rate == 1.0
|
|
133
|
+
|
|
134
|
+
@patch("scripts.grader.classify_events", side_effect=_mock_compliant_classification)
|
|
135
|
+
def test_spec_id_in_result(self, mock_cls, tdd_spec, compliant_trace) -> None:
|
|
136
|
+
result = grade(tdd_spec, compliant_trace)
|
|
137
|
+
assert result.spec_id == "tdd-workflow"
|
|
138
|
+
|
|
139
|
+
@patch("scripts.grader.classify_events")
|
|
140
|
+
def test_after_step_can_reference_later_declared_spec_step(self, mock_cls) -> None:
|
|
141
|
+
spec = ComplianceSpec(
|
|
142
|
+
id="out-of-order-after-step",
|
|
143
|
+
name="Out of order after_step",
|
|
144
|
+
source_rule="rules/common/testing.md",
|
|
145
|
+
version="1.0",
|
|
146
|
+
steps=(
|
|
147
|
+
Step(
|
|
148
|
+
id="step_a",
|
|
149
|
+
description="Occurs after step_b even though it is declared first",
|
|
150
|
+
required=True,
|
|
151
|
+
detector=Detector(
|
|
152
|
+
description="Event A",
|
|
153
|
+
after_step="step_b",
|
|
154
|
+
),
|
|
155
|
+
),
|
|
156
|
+
Step(
|
|
157
|
+
id="step_b",
|
|
158
|
+
description="Reference step declared later",
|
|
159
|
+
required=True,
|
|
160
|
+
detector=Detector(
|
|
161
|
+
description="Event B",
|
|
162
|
+
),
|
|
163
|
+
),
|
|
164
|
+
),
|
|
165
|
+
threshold_promote_to_hook=0.5,
|
|
166
|
+
)
|
|
167
|
+
trace = [
|
|
168
|
+
ObservationEvent(
|
|
169
|
+
timestamp="2026-03-20T10:00:01Z",
|
|
170
|
+
event="tool_complete",
|
|
171
|
+
tool="Write",
|
|
172
|
+
session="sess-order",
|
|
173
|
+
input='{"file_path":"src/b.py"}',
|
|
174
|
+
output="step b",
|
|
175
|
+
),
|
|
176
|
+
ObservationEvent(
|
|
177
|
+
timestamp="2026-03-20T10:00:02Z",
|
|
178
|
+
event="tool_complete",
|
|
179
|
+
tool="Write",
|
|
180
|
+
session="sess-order",
|
|
181
|
+
input='{"file_path":"src/a.py"}',
|
|
182
|
+
output="step a",
|
|
183
|
+
),
|
|
184
|
+
]
|
|
185
|
+
mock_cls.return_value = {
|
|
186
|
+
"step_a": [1],
|
|
187
|
+
"step_b": [0],
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
result = grade(spec, trace)
|
|
191
|
+
|
|
192
|
+
step_a = next(step for step in result.steps if step.step_id == "step_a")
|
|
193
|
+
step_b = next(step for step in result.steps if step.step_id == "step_b")
|
|
194
|
+
assert step_a.detected is True
|
|
195
|
+
assert step_a.failure_reason is None
|
|
196
|
+
assert step_b.detected is True
|
|
197
|
+
assert result.compliance_rate == 1.0
|