@event4u/agent-config 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/README.md +64 -0
- package/.agent-src/commands/agent-handoff.md +64 -0
- package/.agent-src/commands/agent-status.md +83 -0
- package/.agent-src/commands/agents-audit.md +243 -0
- package/.agent-src/commands/agents-cleanup.md +169 -0
- package/.agent-src/commands/agents-prepare.md +137 -0
- package/.agent-src/commands/analyze-reference-repo.md +191 -0
- package/.agent-src/commands/bug-fix.md +181 -0
- package/.agent-src/commands/bug-investigate.md +175 -0
- package/.agent-src/commands/commit.md +121 -0
- package/.agent-src/commands/compress.md +177 -0
- package/.agent-src/commands/config-agent-settings.md +126 -0
- package/.agent-src/commands/context-create.md +167 -0
- package/.agent-src/commands/context-refactor.md +170 -0
- package/.agent-src/commands/copilot-agents-init.md +150 -0
- package/.agent-src/commands/copilot-agents-optimize.md +251 -0
- package/.agent-src/commands/create-pr-description.md +112 -0
- package/.agent-src/commands/create-pr.md +76 -0
- package/.agent-src/commands/do-and-judge.md +114 -0
- package/.agent-src/commands/do-in-steps.md +84 -0
- package/.agent-src/commands/e2e-heal.md +98 -0
- package/.agent-src/commands/e2e-plan.md +85 -0
- package/.agent-src/commands/estimate-ticket.md +80 -0
- package/.agent-src/commands/feature-dev.md +111 -0
- package/.agent-src/commands/feature-explore.md +180 -0
- package/.agent-src/commands/feature-plan.md +288 -0
- package/.agent-src/commands/feature-refactor.md +181 -0
- package/.agent-src/commands/feature-roadmap.md +184 -0
- package/.agent-src/commands/fix-ci.md +48 -0
- package/.agent-src/commands/fix-portability.md +97 -0
- package/.agent-src/commands/fix-pr-bot-comments.md +146 -0
- package/.agent-src/commands/fix-pr-comments.md +58 -0
- package/.agent-src/commands/fix-pr-developer-comments.md +152 -0
- package/.agent-src/commands/fix-references.md +94 -0
- package/.agent-src/commands/fix-seeder.md +146 -0
- package/.agent-src/commands/implement-ticket.md +133 -0
- package/.agent-src/commands/jira-ticket.md +71 -0
- package/.agent-src/commands/judge.md +86 -0
- package/.agent-src/commands/memory-add.md +130 -0
- package/.agent-src/commands/memory-full.md +97 -0
- package/.agent-src/commands/memory-promote.md +144 -0
- package/.agent-src/commands/mode.md +121 -0
- package/.agent-src/commands/module-create.md +132 -0
- package/.agent-src/commands/module-explore.md +157 -0
- package/.agent-src/commands/optimize-agents.md +139 -0
- package/.agent-src/commands/optimize-augmentignore.md +262 -0
- package/.agent-src/commands/optimize-rtk-filters.md +120 -0
- package/.agent-src/commands/optimize-skills.md +121 -0
- package/.agent-src/commands/override-create.md +97 -0
- package/.agent-src/commands/override-manage.md +96 -0
- package/.agent-src/commands/package-reset.md +154 -0
- package/.agent-src/commands/package-test.md +154 -0
- package/.agent-src/commands/prepare-for-review.md +91 -0
- package/.agent-src/commands/project-analyze.md +300 -0
- package/.agent-src/commands/project-health.md +95 -0
- package/.agent-src/commands/propose-memory.md +108 -0
- package/.agent-src/commands/quality-fix.md +106 -0
- package/.agent-src/commands/refine-ticket.md +81 -0
- package/.agent-src/commands/review-changes.md +130 -0
- package/.agent-src/commands/review-routing.md +111 -0
- package/.agent-src/commands/roadmap-create.md +110 -0
- package/.agent-src/commands/roadmap-execute.md +68 -0
- package/.agent-src/commands/rule-compliance-audit.md +139 -0
- package/.agent-src/commands/tests-create.md +73 -0
- package/.agent-src/commands/tests-execute.md +58 -0
- package/.agent-src/commands/threat-model.md +115 -0
- package/.agent-src/commands/update-form-request-messages.md +189 -0
- package/.agent-src/commands/upstream-contribute.md +171 -0
- package/.agent-src/contexts/augment-infrastructure.md +181 -0
- package/.agent-src/contexts/documentation-hierarchy.md +142 -0
- package/.agent-src/contexts/model-recommendations.md +142 -0
- package/.agent-src/contexts/override-system.md +187 -0
- package/.agent-src/contexts/skills-and-commands.md +154 -0
- package/.agent-src/contexts/subagent-configuration.md +62 -0
- package/.agent-src/guidelines/agent-infra/agent-interaction-and-decision-quality.md +110 -0
- package/.agent-src/guidelines/agent-infra/break-glass-usage.md +113 -0
- package/.agent-src/guidelines/agent-infra/developer-judgment.md +82 -0
- package/.agent-src/guidelines/agent-infra/engineering-memory-data-format.md +117 -0
- package/.agent-src/guidelines/agent-infra/layered-settings.md +158 -0
- package/.agent-src/guidelines/agent-infra/memory-access.md +121 -0
- package/.agent-src/guidelines/agent-infra/naming.md +69 -0
- package/.agent-src/guidelines/agent-infra/output-patterns.md +117 -0
- package/.agent-src/guidelines/agent-infra/review-routing-data-format.md +144 -0
- package/.agent-src/guidelines/agent-infra/role-contracts.md +211 -0
- package/.agent-src/guidelines/agent-infra/role-mode-router.md +89 -0
- package/.agent-src/guidelines/agent-infra/runtime-layer.md +89 -0
- package/.agent-src/guidelines/agent-infra/self-improvement-pipeline.md +135 -0
- package/.agent-src/guidelines/agent-infra/size-and-scope.md +189 -0
- package/.agent-src/guidelines/agent-infra/tool-integration.md +73 -0
- package/.agent-src/guidelines/docs/readme-size-and-splitting.md +153 -0
- package/.agent-src/guidelines/e2e/playwright.md +363 -0
- package/.agent-src/guidelines/php/api-design.md +115 -0
- package/.agent-src/guidelines/php/artisan-commands.md +81 -0
- package/.agent-src/guidelines/php/blade-ui.md +78 -0
- package/.agent-src/guidelines/php/controllers.md +90 -0
- package/.agent-src/guidelines/php/database.md +111 -0
- package/.agent-src/guidelines/php/eloquent.md +208 -0
- package/.agent-src/guidelines/php/flux.md +80 -0
- package/.agent-src/guidelines/php/general.md +191 -0
- package/.agent-src/guidelines/php/git.md +96 -0
- package/.agent-src/guidelines/php/jobs.md +111 -0
- package/.agent-src/guidelines/php/livewire.md +71 -0
- package/.agent-src/guidelines/php/logging.md +79 -0
- package/.agent-src/guidelines/php/naming.md +89 -0
- package/.agent-src/guidelines/php/patterns/dependency-injection.md +57 -0
- package/.agent-src/guidelines/php/patterns/dtos.md +199 -0
- package/.agent-src/guidelines/php/patterns/events.md +67 -0
- package/.agent-src/guidelines/php/patterns/factory.md +53 -0
- package/.agent-src/guidelines/php/patterns/pipelines.md +66 -0
- package/.agent-src/guidelines/php/patterns/policies.md +66 -0
- package/.agent-src/guidelines/php/patterns/repositories.md +122 -0
- package/.agent-src/guidelines/php/patterns/service-layer.md +64 -0
- package/.agent-src/guidelines/php/patterns/strategy.md +69 -0
- package/.agent-src/guidelines/php/patterns.md +28 -0
- package/.agent-src/guidelines/php/performance.md +92 -0
- package/.agent-src/guidelines/php/resources.md +100 -0
- package/.agent-src/guidelines/php/security.md +110 -0
- package/.agent-src/guidelines/php/sql.md +97 -0
- package/.agent-src/guidelines/php/validations.md +119 -0
- package/.agent-src/guidelines/php/websocket.md +100 -0
- package/.agent-src/personas/README.md +104 -0
- package/.agent-src/personas/ai-agent.md +77 -0
- package/.agent-src/personas/critical-challenger.md +73 -0
- package/.agent-src/personas/developer.md +73 -0
- package/.agent-src/personas/product-owner.md +78 -0
- package/.agent-src/personas/qa.md +67 -0
- package/.agent-src/personas/senior-engineer.md +77 -0
- package/.agent-src/personas/stakeholder.md +78 -0
- package/.agent-src/rules/agent-docs.md +61 -0
- package/.agent-src/rules/analysis-skill-routing.md +48 -0
- package/.agent-src/rules/architecture.md +62 -0
- package/.agent-src/rules/artifact-drafting-protocol.md +73 -0
- package/.agent-src/rules/ask-when-uncertain.md +52 -0
- package/.agent-src/rules/augment-portability.md +38 -0
- package/.agent-src/rules/augment-source-of-truth.md +128 -0
- package/.agent-src/rules/capture-learnings.md +89 -0
- package/.agent-src/rules/cli-output-handling.md +94 -0
- package/.agent-src/rules/commit-conventions.md +64 -0
- package/.agent-src/rules/context-hygiene.md +90 -0
- package/.agent-src/rules/docker-commands.md +55 -0
- package/.agent-src/rules/docs-sync.md +79 -0
- package/.agent-src/rules/downstream-changes.md +70 -0
- package/.agent-src/rules/e2e-testing.md +53 -0
- package/.agent-src/rules/guidelines.md +90 -0
- package/.agent-src/rules/improve-before-implement.md +94 -0
- package/.agent-src/rules/language-and-tone.md +104 -0
- package/.agent-src/rules/laravel-translations.md +48 -0
- package/.agent-src/rules/markdown-safe-codeblocks.md +18 -0
- package/.agent-src/rules/minimal-safe-diff.md +87 -0
- package/.agent-src/rules/missing-tool-handling.md +62 -0
- package/.agent-src/rules/model-recommendation.md +70 -0
- package/.agent-src/rules/package-ci-checks.md +80 -0
- package/.agent-src/rules/php-coding.md +63 -0
- package/.agent-src/rules/preservation-guard.md +29 -0
- package/.agent-src/rules/review-routing-awareness.md +125 -0
- package/.agent-src/rules/reviewer-awareness.md +92 -0
- package/.agent-src/rules/roadmap-progress-sync.md +56 -0
- package/.agent-src/rules/role-mode-adherence.md +54 -0
- package/.agent-src/rules/rule-type-governance.md +46 -0
- package/.agent-src/rules/runtime-safety.md +42 -0
- package/.agent-src/rules/scope-control.md +40 -0
- package/.agent-src/rules/security-sensitive-stop.md +77 -0
- package/.agent-src/rules/size-enforcement.md +29 -0
- package/.agent-src/rules/skill-improvement-trigger.md +58 -0
- package/.agent-src/rules/skill-quality.md +110 -0
- package/.agent-src/rules/slash-commands.md +30 -0
- package/.agent-src/rules/think-before-action.md +91 -0
- package/.agent-src/rules/token-efficiency.md +99 -0
- package/.agent-src/rules/tool-safety.md +36 -0
- package/.agent-src/rules/upstream-proposal.md +76 -0
- package/.agent-src/rules/user-interaction.md +79 -0
- package/.agent-src/rules/verify-before-complete.md +120 -0
- package/.agent-src/scripts/scan-seeder-violations.php +145 -0
- package/.agent-src/scripts/update_roadmap_progress.py +244 -0
- package/.agent-src/skills/adversarial-review/SKILL.md +149 -0
- package/.agent-src/skills/agent-docs-writing/SKILL.md +234 -0
- package/.agent-src/skills/analysis-autonomous-mode/SKILL.md +197 -0
- package/.agent-src/skills/analysis-skill-router/SKILL.md +134 -0
- package/.agent-src/skills/api-design/SKILL.md +104 -0
- package/.agent-src/skills/api-endpoint/SKILL.md +185 -0
- package/.agent-src/skills/api-testing/SKILL.md +206 -0
- package/.agent-src/skills/artisan-commands/SKILL.md +78 -0
- package/.agent-src/skills/authz-review/SKILL.md +171 -0
- package/.agent-src/skills/aws-infrastructure/SKILL.md +152 -0
- package/.agent-src/skills/blade-ui/SKILL.md +75 -0
- package/.agent-src/skills/blast-radius-analyzer/SKILL.md +185 -0
- package/.agent-src/skills/bug-analyzer/SKILL.md +256 -0
- package/.agent-src/skills/check-refs/SKILL.md +72 -0
- package/.agent-src/skills/code-refactoring/SKILL.md +200 -0
- package/.agent-src/skills/code-review/SKILL.md +214 -0
- package/.agent-src/skills/command-routing/SKILL.md +96 -0
- package/.agent-src/skills/command-writing/SKILL.md +143 -0
- package/.agent-src/skills/composer-packages/SKILL.md +172 -0
- package/.agent-src/skills/context-authoring/SKILL.md +157 -0
- package/.agent-src/skills/context-document/SKILL.md +153 -0
- package/.agent-src/skills/conventional-commits-writing/SKILL.md +70 -0
- package/.agent-src/skills/copilot-agents-optimization/SKILL.md +220 -0
- package/.agent-src/skills/copilot-config/SKILL.md +203 -0
- package/.agent-src/skills/dashboard-design/SKILL.md +116 -0
- package/.agent-src/skills/data-flow-mapper/SKILL.md +160 -0
- package/.agent-src/skills/database/SKILL.md +91 -0
- package/.agent-src/skills/dependency-upgrade/SKILL.md +204 -0
- package/.agent-src/skills/description-assist/SKILL.md +169 -0
- package/.agent-src/skills/design-review/SKILL.md +228 -0
- package/.agent-src/skills/devcontainer/SKILL.md +121 -0
- package/.agent-src/skills/developer-like-execution/SKILL.md +276 -0
- package/.agent-src/skills/docker/SKILL.md +245 -0
- package/.agent-src/skills/dto-creator/SKILL.md +117 -0
- package/.agent-src/skills/eloquent/SKILL.md +92 -0
- package/.agent-src/skills/eloquent/evals/last-run.json +99 -0
- package/.agent-src/skills/eloquent/evals/triggers.json +16 -0
- package/.agent-src/skills/estimate-ticket/SKILL.md +186 -0
- package/.agent-src/skills/estimate-ticket/evals/output-schema.yml +20 -0
- package/.agent-src/skills/estimate-ticket/evals/triggers.json +18 -0
- package/.agent-src/skills/fe-design/SKILL.md +223 -0
- package/.agent-src/skills/feature-planning/SKILL.md +226 -0
- package/.agent-src/skills/file-editor/SKILL.md +129 -0
- package/.agent-src/skills/finishing-a-development-branch/SKILL.md +200 -0
- package/.agent-src/skills/flux/SKILL.md +64 -0
- package/.agent-src/skills/git-workflow/SKILL.md +102 -0
- package/.agent-src/skills/github-ci/SKILL.md +122 -0
- package/.agent-src/skills/grafana/SKILL.md +168 -0
- package/.agent-src/skills/guideline-writing/SKILL.md +147 -0
- package/.agent-src/skills/jira-integration/SKILL.md +182 -0
- package/.agent-src/skills/jobs-events/SKILL.md +87 -0
- package/.agent-src/skills/judge-bug-hunter/SKILL.md +157 -0
- package/.agent-src/skills/judge-code-quality/SKILL.md +158 -0
- package/.agent-src/skills/judge-security-auditor/SKILL.md +167 -0
- package/.agent-src/skills/judge-test-coverage/SKILL.md +154 -0
- package/.agent-src/skills/laravel/SKILL.md +195 -0
- package/.agent-src/skills/laravel-horizon/SKILL.md +169 -0
- package/.agent-src/skills/laravel-mail/SKILL.md +193 -0
- package/.agent-src/skills/laravel-middleware/SKILL.md +185 -0
- package/.agent-src/skills/laravel-notifications/SKILL.md +168 -0
- package/.agent-src/skills/laravel-pennant/SKILL.md +188 -0
- package/.agent-src/skills/laravel-pulse/SKILL.md +160 -0
- package/.agent-src/skills/laravel-reverb/SKILL.md +205 -0
- package/.agent-src/skills/laravel-scheduling/SKILL.md +167 -0
- package/.agent-src/skills/laravel-validation/SKILL.md +71 -0
- package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +249 -0
- package/.agent-src/skills/lint-skills/SKILL.md +72 -0
- package/.agent-src/skills/livewire/SKILL.md +79 -0
- package/.agent-src/skills/logging-monitoring/SKILL.md +100 -0
- package/.agent-src/skills/mcp/SKILL.md +193 -0
- package/.agent-src/skills/merge-conflicts/SKILL.md +158 -0
- package/.agent-src/skills/migration-creator/SKILL.md +160 -0
- package/.agent-src/skills/module-management/SKILL.md +154 -0
- package/.agent-src/skills/multi-tenancy/SKILL.md +129 -0
- package/.agent-src/skills/openapi/SKILL.md +154 -0
- package/.agent-src/skills/override-management/SKILL.md +186 -0
- package/.agent-src/skills/performance/SKILL.md +69 -0
- package/.agent-src/skills/performance-analysis/SKILL.md +118 -0
- package/.agent-src/skills/pest-testing/SKILL.md +321 -0
- package/.agent-src/skills/php-coder/SKILL.md +78 -0
- package/.agent-src/skills/php-coder/evals/triggers.json +16 -0
- package/.agent-src/skills/php-debugging/SKILL.md +184 -0
- package/.agent-src/skills/php-service/SKILL.md +96 -0
- package/.agent-src/skills/playwright-testing/SKILL.md +244 -0
- package/.agent-src/skills/project-analysis-core/SKILL.md +138 -0
- package/.agent-src/skills/project-analysis-hypothesis-driven/SKILL.md +130 -0
- package/.agent-src/skills/project-analysis-laravel/SKILL.md +119 -0
- package/.agent-src/skills/project-analysis-nextjs/SKILL.md +123 -0
- package/.agent-src/skills/project-analysis-node-express/SKILL.md +111 -0
- package/.agent-src/skills/project-analysis-react/SKILL.md +119 -0
- package/.agent-src/skills/project-analysis-symfony/SKILL.md +111 -0
- package/.agent-src/skills/project-analysis-zend-laminas/SKILL.md +108 -0
- package/.agent-src/skills/project-analyzer/SKILL.md +341 -0
- package/.agent-src/skills/project-docs/SKILL.md +137 -0
- package/.agent-src/skills/quality-tools/SKILL.md +411 -0
- package/.agent-src/skills/readme-reviewer/SKILL.md +187 -0
- package/.agent-src/skills/readme-writing/SKILL.md +142 -0
- package/.agent-src/skills/readme-writing-package/SKILL.md +185 -0
- package/.agent-src/skills/receiving-code-review/SKILL.md +190 -0
- package/.agent-src/skills/refine-ticket/SKILL.md +310 -0
- package/.agent-src/skills/refine-ticket/detection-map.yml +124 -0
- package/.agent-src/skills/refine-ticket/evals/output-schema.yml +16 -0
- package/.agent-src/skills/refine-ticket/evals/triggers.json +16 -0
- package/.agent-src/skills/requesting-code-review/SKILL.md +199 -0
- package/.agent-src/skills/review-routing/SKILL.md +195 -0
- package/.agent-src/skills/roadmap-management/SKILL.md +303 -0
- package/.agent-src/skills/rtk-output-filtering/SKILL.md +184 -0
- package/.agent-src/skills/rule-writing/SKILL.md +148 -0
- package/.agent-src/skills/security/SKILL.md +79 -0
- package/.agent-src/skills/security-audit/SKILL.md +123 -0
- package/.agent-src/skills/sentry-integration/SKILL.md +170 -0
- package/.agent-src/skills/sequential-thinking/SKILL.md +158 -0
- package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +155 -0
- package/.agent-src/skills/skill-management/SKILL.md +121 -0
- package/.agent-src/skills/skill-reviewer/SKILL.md +218 -0
- package/.agent-src/skills/skill-writing/SKILL.md +291 -0
- package/.agent-src/skills/skill-writing/evals/triggers.json +16 -0
- package/.agent-src/skills/sql-writing/SKILL.md +74 -0
- package/.agent-src/skills/subagent-orchestration/SKILL.md +190 -0
- package/.agent-src/skills/systematic-debugging/SKILL.md +244 -0
- package/.agent-src/skills/technical-specification/SKILL.md +185 -0
- package/.agent-src/skills/terraform/SKILL.md +137 -0
- package/.agent-src/skills/terragrunt/SKILL.md +217 -0
- package/.agent-src/skills/test-driven-development/SKILL.md +252 -0
- package/.agent-src/skills/test-performance/SKILL.md +172 -0
- package/.agent-src/skills/threat-modeling/SKILL.md +189 -0
- package/.agent-src/skills/traefik/SKILL.md +319 -0
- package/.agent-src/skills/universal-project-analysis/SKILL.md +179 -0
- package/.agent-src/skills/upstream-contribute/SKILL.md +255 -0
- package/.agent-src/skills/using-git-worktrees/SKILL.md +148 -0
- package/.agent-src/skills/validate-feature-fit/SKILL.md +113 -0
- package/.agent-src/skills/verify-before-complete/SKILL.md +188 -0
- package/.agent-src/skills/websocket/SKILL.md +75 -0
- package/.agent-src/templates/AGENTS.md +146 -0
- package/.agent-src/templates/agent-settings.md +256 -0
- package/.agent-src/templates/agents/.gitattributes.fragment +16 -0
- package/.agent-src/templates/agents/agent-project-settings.example.yml +138 -0
- package/.agent-src/templates/agents/memory/architecture-decisions.example.yml +95 -0
- package/.agent-src/templates/agents/memory/domain-invariants.example.yml +80 -0
- package/.agent-src/templates/agents/memory/historical-patterns.example.yml +82 -0
- package/.agent-src/templates/agents/memory/incident-learnings.example.yml +113 -0
- package/.agent-src/templates/agents/memory/ownership.example.yml +75 -0
- package/.agent-src/templates/agents/memory/product-rules.example.yml +87 -0
- package/.agent-src/templates/agents/proposal.example.md +143 -0
- package/.agent-src/templates/command.md +84 -0
- package/.agent-src/templates/contexts/auth-model.md +59 -0
- package/.agent-src/templates/contexts/data-sensitivity.md +60 -0
- package/.agent-src/templates/contexts/deployment-order.md +72 -0
- package/.agent-src/templates/contexts/observability.md +64 -0
- package/.agent-src/templates/contexts/tenant-boundaries.md +68 -0
- package/.agent-src/templates/contexts.md +116 -0
- package/.agent-src/templates/copilot-instructions.md +115 -0
- package/.agent-src/templates/features.md +125 -0
- package/.agent-src/templates/github-workflows/memory-hygiene.yml +133 -0
- package/.agent-src/templates/github-workflows/pr-risk-review.yml +123 -0
- package/.agent-src/templates/github-workflows/proposal-drift.yml +118 -0
- package/.agent-src/templates/overrides/command.md +24 -0
- package/.agent-src/templates/overrides/guideline.md +21 -0
- package/.agent-src/templates/overrides/rule.md +19 -0
- package/.agent-src/templates/overrides/skill.md +24 -0
- package/.agent-src/templates/overrides/template.md +21 -0
- package/.agent-src/templates/persona.md +99 -0
- package/.agent-src/templates/roadmaps.md +109 -0
- package/.agent-src/templates/scripts/README.md +195 -0
- package/.agent-src/templates/scripts/check_memory.py +283 -0
- package/.agent-src/templates/scripts/check_memory_proposal.py +180 -0
- package/.agent-src/templates/scripts/historical-bug-patterns.example.yml +84 -0
- package/.agent-src/templates/scripts/implement_ticket/__init__.py +57 -0
- package/.agent-src/templates/scripts/implement_ticket/__main__.py +9 -0
- package/.agent-src/templates/scripts/implement_ticket/cli.py +171 -0
- package/.agent-src/templates/scripts/implement_ticket/delivery_state.py +130 -0
- package/.agent-src/templates/scripts/implement_ticket/dispatcher.py +134 -0
- package/.agent-src/templates/scripts/implement_ticket/persona_policy.py +85 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/__init__.py +49 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/analyze.py +98 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/implement.py +145 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/memory.py +136 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/plan.py +175 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/refine.py +140 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/report.py +195 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/test.py +180 -0
- package/.agent-src/templates/scripts/implement_ticket/steps/verify.py +170 -0
- package/.agent-src/templates/scripts/memory_hash.py +75 -0
- package/.agent-src/templates/scripts/memory_lookup.py +216 -0
- package/.agent-src/templates/scripts/memory_report.py +184 -0
- package/.agent-src/templates/scripts/memory_signal.py +167 -0
- package/.agent-src/templates/scripts/memory_status.py +156 -0
- package/.agent-src/templates/scripts/ownership-map.example.yml +87 -0
- package/.agent-src/templates/scripts/pr-risk-config.example.yml +76 -0
- package/.agent-src/templates/scripts/pr_review_routing.py +340 -0
- package/.agent-src/templates/scripts/pr_risk_review.py +211 -0
- package/.agent-src/templates/skill.md +136 -0
- package/.augment-plugin/marketplace.json +32 -0
- package/.augment-plugin/plugin.json +21 -0
- package/.claude-plugin/marketplace.json +119 -0
- package/AGENTS.md +121 -0
- package/CHANGELOG.md +279 -0
- package/CONTRIBUTING.md +176 -0
- package/LICENSE +21 -0
- package/README.md +357 -0
- package/bin/install.php +38 -0
- package/composer.json +29 -0
- package/config/agent-settings.template.yml +96 -0
- package/config/profiles/balanced.ini +10 -0
- package/config/profiles/full.ini +10 -0
- package/config/profiles/minimal.ini +10 -0
- package/docs/architecture.md +144 -0
- package/docs/customization.md +88 -0
- package/docs/development.md +171 -0
- package/docs/getting-started.md +130 -0
- package/docs/github-topics.md +84 -0
- package/docs/installation.md +376 -0
- package/docs/mcp.md +133 -0
- package/docs/quality.md +98 -0
- package/docs/skills-catalog.md +136 -0
- package/docs/troubleshooting.md +167 -0
- package/llms.txt +130 -0
- package/package.json +31 -0
- package/scripts/audit_skill_descriptions.py +168 -0
- package/scripts/check_compression.py +221 -0
- package/scripts/check_memory.py +341 -0
- package/scripts/check_memory_proposal.py +180 -0
- package/scripts/check_portability.py +320 -0
- package/scripts/check_proposal.py +269 -0
- package/scripts/check_references.py +400 -0
- package/scripts/ci_summary.py +131 -0
- package/scripts/compress.py +671 -0
- package/scripts/compress.sh +18 -0
- package/scripts/first-run.sh +109 -0
- package/scripts/generate_catalog.py +116 -0
- package/scripts/install +151 -0
- package/scripts/install-hooks.sh +29 -0
- package/scripts/install.py +487 -0
- package/scripts/install.sh +637 -0
- package/scripts/install_anthropic_key.sh +101 -0
- package/scripts/inventory_frontmatter.py +164 -0
- package/scripts/lint_marketplace.py +142 -0
- package/scripts/lint_regression.py +232 -0
- package/scripts/mcp_render.py +159 -0
- package/scripts/measure_patterns.py +376 -0
- package/scripts/memory_hash.py +75 -0
- package/scripts/memory_lookup.py +441 -0
- package/scripts/memory_report.py +336 -0
- package/scripts/memory_signal.py +210 -0
- package/scripts/memory_status.py +195 -0
- package/scripts/postinstall.sh +60 -0
- package/scripts/readme_linter.py +580 -0
- package/scripts/refine_ticket_detect.py +623 -0
- package/scripts/requirements-evals.txt +7 -0
- package/scripts/runtime_dispatcher.py +265 -0
- package/scripts/runtime_handler.py +148 -0
- package/scripts/runtime_registry.py +166 -0
- package/scripts/schemas/command.schema.json +32 -0
- package/scripts/schemas/persona.schema.json +42 -0
- package/scripts/schemas/rule.schema.json +28 -0
- package/scripts/schemas/skill.schema.json +73 -0
- package/scripts/setup.sh +230 -0
- package/scripts/setup_eval_venv.sh +58 -0
- package/scripts/skill_linter.py +2175 -0
- package/scripts/skill_trigger_eval.py +651 -0
- package/scripts/tool_registry.py +146 -0
- package/scripts/tools/__init__.py +1 -0
- package/scripts/tools/adapter_errors.py +63 -0
- package/scripts/tools/base_adapter.py +91 -0
- package/scripts/tools/github_adapter.py +128 -0
- package/scripts/tools/jira_adapter.py +115 -0
- package/scripts/update_counts.py +147 -0
- package/scripts/validate_frontmatter.py +424 -0
- package/templates/consumer-settings/README.md +46 -0
- package/templates/consumer-settings/augment-settings.json +12 -0
- package/templates/consumer-settings/claude-settings.json +9 -0
- package/templates/consumer-settings/copilot-settings.json +14 -0
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Skill trigger evaluation runner.
|
|
3
|
+
|
|
4
|
+
Phase 1 of agents/roadmaps/road-to-trigger-evals.md — measures whether a
|
|
5
|
+
pilot skill's frontmatter description actually causes Claude to route to
|
|
6
|
+
the skill for queries that should trigger it, and to avoid routing for
|
|
7
|
+
queries that should not.
|
|
8
|
+
|
|
9
|
+
Input: one skill name + its evals/triggers.json (5 should-trigger +
|
|
10
|
+
5 should-not-trigger queries).
|
|
11
|
+
Output: evals/last-run.json with per-query observed vs expected,
|
|
12
|
+
aggregate precision/recall, model id, timestamp, cost estimate.
|
|
13
|
+
|
|
14
|
+
Design notes:
|
|
15
|
+
- The real Anthropic client is a **soft** dependency. If the `anthropic`
|
|
16
|
+
package is not installed, only --dry-run works (mock router).
|
|
17
|
+
- The router is injectable — tests use a `MockRouter` that returns a
|
|
18
|
+
canned list per query. CI never makes real API calls.
|
|
19
|
+
- The full set of skill frontmatter (name + description) is passed in
|
|
20
|
+
every routing call. That is the actual production routing condition;
|
|
21
|
+
anything less is cheating.
|
|
22
|
+
|
|
23
|
+
Budget per roadmap: ≤500 LoC single file, no framework.
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import stat
|
|
30
|
+
import sys
|
|
31
|
+
from dataclasses import asdict, dataclass, field
|
|
32
|
+
from datetime import datetime, timezone
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Callable, IO, Protocol
|
|
35
|
+
|
|
36
|
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
|
37
|
+
SKILLS_SOURCE = PROJECT_ROOT / ".agent-src.uncompressed" / "skills"
|
|
38
|
+
RESULTS_DIR = PROJECT_ROOT / "evals" / "results"
|
|
39
|
+
DEFAULT_MODEL = "claude-sonnet-4-5"
|
|
40
|
+
|
|
41
|
+
# Approximate Anthropic API pricing (USD per 1M tokens). Used for the
|
|
42
|
+
# cost estimate only — exact billing comes from the API response headers
|
|
43
|
+
# once we run with a real key.
|
|
44
|
+
PRICE_PER_MTOK_IN = {"claude-sonnet-4-5": 3.0, "claude-opus-4": 15.0}
|
|
45
|
+
PRICE_PER_MTOK_OUT = {"claude-sonnet-4-5": 15.0, "claude-opus-4": 75.0}
|
|
46
|
+
|
|
47
|
+
# On-disk key file. Companion: scripts/install_anthropic_key.sh writes it
|
|
48
|
+
# with mode 0600; load_anthropic_key() refuses to read anything else.
|
|
49
|
+
ANTHROPIC_KEY_PATH = Path.home() / ".config" / "agent-config" / "anthropic.key"
|
|
50
|
+
# Token heuristics used for the *pre-run* cost preview. Real billing
|
|
51
|
+
# comes from the API response once the user has confirmed.
|
|
52
|
+
TOKENS_PER_CHAR = 0.25 # ~4 chars per token, industry rule of thumb.
|
|
53
|
+
PROMPT_OVERHEAD_TOKENS = 200 # routing instructions above the catalogue.
|
|
54
|
+
OUTPUT_TOKENS_PER_QUERY = 60 # JSON `{"would_load": [...]}` is short.
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class KeyGateError(RuntimeError):
|
|
58
|
+
"""Raised when the on-disk key file fails any safety check."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ConfirmationAborted(RuntimeError):
|
|
62
|
+
"""Raised when the user declines at the confirmation prompt or stdin
|
|
63
|
+
is non-interactive."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class SkillMeta:
|
|
68
|
+
"""Name + description of one skill, loaded from SKILL.md frontmatter."""
|
|
69
|
+
|
|
70
|
+
name: str
|
|
71
|
+
description: str
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class Query:
|
|
76
|
+
q: str
|
|
77
|
+
trigger: bool
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class QueryResult:
|
|
82
|
+
q: str
|
|
83
|
+
expected: bool
|
|
84
|
+
observed: bool
|
|
85
|
+
loaded_skills: list[str]
|
|
86
|
+
passed: bool
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class Metrics:
|
|
91
|
+
true_positive: int = 0
|
|
92
|
+
false_positive: int = 0
|
|
93
|
+
true_negative: int = 0
|
|
94
|
+
false_negative: int = 0
|
|
95
|
+
precision: float = 0.0
|
|
96
|
+
recall: float = 0.0
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class EvalResult:
|
|
101
|
+
skill: str
|
|
102
|
+
model: str
|
|
103
|
+
timestamp: str
|
|
104
|
+
router: str
|
|
105
|
+
queries: list[QueryResult] = field(default_factory=list)
|
|
106
|
+
metrics: Metrics = field(default_factory=Metrics)
|
|
107
|
+
input_tokens: int = 0
|
|
108
|
+
output_tokens: int = 0
|
|
109
|
+
cost_usd_estimate: float = 0.0
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class TriggerRouter(Protocol):
|
|
113
|
+
"""Contract: given a user query and the full skill catalogue, return
|
|
114
|
+
the list of skill names the model would load. Implementations decide
|
|
115
|
+
whether that means a live API call or a canned response."""
|
|
116
|
+
|
|
117
|
+
name: str
|
|
118
|
+
|
|
119
|
+
def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
|
|
120
|
+
"""Returns (loaded_skill_names, input_tokens, output_tokens)."""
|
|
121
|
+
...
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class MockRouter:
|
|
125
|
+
"""Deterministic router for tests and dry-runs.
|
|
126
|
+
|
|
127
|
+
Constructed with a callable `decide(query, skills) -> list[str]`.
|
|
128
|
+
Token counts are faked as len(query)//4 + len(skills)*20 for input
|
|
129
|
+
and 16 for output, which keeps the cost-estimate math testable
|
|
130
|
+
without inventing numbers that look real.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
name = "mock"
|
|
134
|
+
|
|
135
|
+
def __init__(self, decide: Callable[[str, list[SkillMeta]], list[str]]):
|
|
136
|
+
self._decide = decide
|
|
137
|
+
|
|
138
|
+
def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
|
|
139
|
+
loaded = self._decide(query, skills)
|
|
140
|
+
return loaded, len(query) // 4 + len(skills) * 20, 16
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def load_skill_metas(root: Path = SKILLS_SOURCE) -> list[SkillMeta]:
|
|
144
|
+
"""Parse name + description from every SKILL.md frontmatter under root."""
|
|
145
|
+
metas: list[SkillMeta] = []
|
|
146
|
+
for skill_dir in sorted(p for p in root.iterdir() if p.is_dir()):
|
|
147
|
+
skill_md = skill_dir / "SKILL.md"
|
|
148
|
+
if not skill_md.exists():
|
|
149
|
+
continue
|
|
150
|
+
meta = _parse_frontmatter(skill_md)
|
|
151
|
+
if meta is not None:
|
|
152
|
+
metas.append(meta)
|
|
153
|
+
return metas
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _parse_frontmatter(path: Path) -> SkillMeta | None:
|
|
157
|
+
text = path.read_text(encoding="utf-8")
|
|
158
|
+
if not text.startswith("---"):
|
|
159
|
+
return None
|
|
160
|
+
end = text.find("\n---", 3)
|
|
161
|
+
if end < 0:
|
|
162
|
+
return None
|
|
163
|
+
block = text[3:end]
|
|
164
|
+
name = _extract_field(block, "name")
|
|
165
|
+
desc = _extract_field(block, "description")
|
|
166
|
+
if name is None or desc is None:
|
|
167
|
+
return None
|
|
168
|
+
return SkillMeta(name=name, description=desc)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _extract_field(block: str, field_name: str) -> str | None:
|
|
172
|
+
"""Minimal YAML-ish frontmatter field extractor — supports quoted
|
|
173
|
+
and unquoted single-line values. We do not pull PyYAML in here; the
|
|
174
|
+
audit script already proved stdlib suffices for our frontmatter."""
|
|
175
|
+
prefix = f"{field_name}:"
|
|
176
|
+
for line in block.splitlines():
|
|
177
|
+
stripped = line.lstrip()
|
|
178
|
+
if not stripped.startswith(prefix):
|
|
179
|
+
continue
|
|
180
|
+
value = stripped[len(prefix):].strip()
|
|
181
|
+
if value.startswith('"') and value.endswith('"'):
|
|
182
|
+
value = value[1:-1]
|
|
183
|
+
elif value.startswith("'") and value.endswith("'"):
|
|
184
|
+
value = value[1:-1]
|
|
185
|
+
return value
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def load_triggers(path: Path) -> tuple[str, list[Query]]:
|
|
190
|
+
"""Read evals/triggers.json. Returns (skill_name, queries)."""
|
|
191
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
192
|
+
skill = data["skill"]
|
|
193
|
+
queries = [Query(q=item["q"], trigger=bool(item["trigger"])) for item in data["queries"]]
|
|
194
|
+
if not queries:
|
|
195
|
+
raise ValueError(f"{path} has zero queries; roadmap minimum is 10")
|
|
196
|
+
return skill, queries
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def run_eval(
|
|
200
|
+
skill_name: str,
|
|
201
|
+
queries: list[Query],
|
|
202
|
+
router: TriggerRouter,
|
|
203
|
+
skills: list[SkillMeta],
|
|
204
|
+
model: str = DEFAULT_MODEL,
|
|
205
|
+
) -> EvalResult:
|
|
206
|
+
"""Execute every query through `router` and aggregate into EvalResult."""
|
|
207
|
+
result = EvalResult(
|
|
208
|
+
skill=skill_name,
|
|
209
|
+
model=model,
|
|
210
|
+
timestamp=datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
|
211
|
+
router=router.name,
|
|
212
|
+
)
|
|
213
|
+
for q in queries:
|
|
214
|
+
loaded, in_tok, out_tok = router.route(q.q, skills)
|
|
215
|
+
observed = skill_name in loaded
|
|
216
|
+
passed = observed == q.trigger
|
|
217
|
+
result.queries.append(
|
|
218
|
+
QueryResult(
|
|
219
|
+
q=q.q,
|
|
220
|
+
expected=q.trigger,
|
|
221
|
+
observed=observed,
|
|
222
|
+
loaded_skills=sorted(loaded),
|
|
223
|
+
passed=passed,
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
result.input_tokens += in_tok
|
|
227
|
+
result.output_tokens += out_tok
|
|
228
|
+
result.metrics = compute_metrics(result.queries)
|
|
229
|
+
result.cost_usd_estimate = estimate_cost(model, result.input_tokens, result.output_tokens)
|
|
230
|
+
return result
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def compute_metrics(results: list[QueryResult]) -> Metrics:
|
|
234
|
+
tp = sum(1 for r in results if r.expected and r.observed)
|
|
235
|
+
fp = sum(1 for r in results if not r.expected and r.observed)
|
|
236
|
+
tn = sum(1 for r in results if not r.expected and not r.observed)
|
|
237
|
+
fn = sum(1 for r in results if r.expected and not r.observed)
|
|
238
|
+
precision = tp / (tp + fp) if (tp + fp) else 0.0
|
|
239
|
+
recall = tp / (tp + fn) if (tp + fn) else 0.0
|
|
240
|
+
return Metrics(
|
|
241
|
+
true_positive=tp,
|
|
242
|
+
false_positive=fp,
|
|
243
|
+
true_negative=tn,
|
|
244
|
+
false_negative=fn,
|
|
245
|
+
precision=round(precision, 3),
|
|
246
|
+
recall=round(recall, 3),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def estimate_cost(model: str, in_tokens: int, out_tokens: int) -> float:
|
|
251
|
+
"""Rough pre-invoice cost estimate. Real figure comes from response
|
|
252
|
+
headers once we wire a real key — this is only used to sanity-check
|
|
253
|
+
the roadmap's ≤$5-per-run budget before launching a batch."""
|
|
254
|
+
price_in = PRICE_PER_MTOK_IN.get(model, 3.0)
|
|
255
|
+
price_out = PRICE_PER_MTOK_OUT.get(model, 15.0)
|
|
256
|
+
cost = (in_tokens / 1_000_000) * price_in + (out_tokens / 1_000_000) * price_out
|
|
257
|
+
return round(cost, 6)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def pre_estimate_cost(
|
|
261
|
+
model: str,
|
|
262
|
+
skills: list[SkillMeta],
|
|
263
|
+
queries: list[Query],
|
|
264
|
+
) -> tuple[int, int, float]:
|
|
265
|
+
"""Pre-run token + cost estimate for the confirmation prompt.
|
|
266
|
+
|
|
267
|
+
Returns (input_tokens, output_tokens, cost_usd) — approximate,
|
|
268
|
+
because the real tokeniser runs server-side. Calibration is
|
|
269
|
+
deliberately slightly high so the prompt never understates cost.
|
|
270
|
+
"""
|
|
271
|
+
catalogue_chars = sum(len(s.name) + len(s.description) + 6 for s in skills)
|
|
272
|
+
per_query_chars = catalogue_chars + PROMPT_OVERHEAD_TOKENS * 4
|
|
273
|
+
in_tokens_per_q = int(per_query_chars * TOKENS_PER_CHAR) + PROMPT_OVERHEAD_TOKENS
|
|
274
|
+
avg_query_chars = sum(len(q.q) for q in queries) // max(len(queries), 1)
|
|
275
|
+
in_tokens_per_q += int(avg_query_chars * TOKENS_PER_CHAR)
|
|
276
|
+
in_tokens = in_tokens_per_q * len(queries)
|
|
277
|
+
out_tokens = OUTPUT_TOKENS_PER_QUERY * len(queries)
|
|
278
|
+
return in_tokens, out_tokens, estimate_cost(model, in_tokens, out_tokens)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# ── Key gate ─────────────────────────────────────────────────────────────
|
|
282
|
+
#
|
|
283
|
+
# No environment-variable fallback, no keychain fallback. The key only
|
|
284
|
+
# ever comes from a 0600 file written by scripts/install_anthropic_key.sh.
|
|
285
|
+
# Drift from that contract is a hard abort.
|
|
286
|
+
|
|
287
|
+
def load_anthropic_key(path: Path = ANTHROPIC_KEY_PATH) -> str:
|
|
288
|
+
"""Load an Anthropic key from `path` with strict safety checks.
|
|
289
|
+
|
|
290
|
+
Enforced invariants:
|
|
291
|
+
- File exists.
|
|
292
|
+
- Mode is exactly 0o600 (owner-only read/write).
|
|
293
|
+
- Content is non-empty after strip.
|
|
294
|
+
- Content starts with `sk-ant-`.
|
|
295
|
+
"""
|
|
296
|
+
if not path.exists():
|
|
297
|
+
raise KeyGateError(
|
|
298
|
+
f"Anthropic key not found at {path}.\n"
|
|
299
|
+
f" Install it with: bash scripts/install_anthropic_key.sh"
|
|
300
|
+
)
|
|
301
|
+
st = path.stat()
|
|
302
|
+
mode = stat.S_IMODE(st.st_mode)
|
|
303
|
+
if mode != 0o600:
|
|
304
|
+
raise KeyGateError(
|
|
305
|
+
f"Unsafe permissions on {path}: got {oct(mode)}, expected 0o600.\n"
|
|
306
|
+
f" Fix: chmod 600 {path}"
|
|
307
|
+
)
|
|
308
|
+
key = path.read_text(encoding="utf-8").strip()
|
|
309
|
+
if not key:
|
|
310
|
+
raise KeyGateError(f"{path} is empty.")
|
|
311
|
+
if not key.startswith("sk-ant-"):
|
|
312
|
+
raise KeyGateError(
|
|
313
|
+
f"{path} does not contain an Anthropic key "
|
|
314
|
+
f"(expected 'sk-ant-' prefix)."
|
|
315
|
+
)
|
|
316
|
+
return key
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
# ── Confirmation gate ────────────────────────────────────────────────────
|
|
320
|
+
#
|
|
321
|
+
# Every live invocation must pass through this. No --force, no --yes,
|
|
322
|
+
# no env-var bypass. Non-tty stdin is rejected outright so the runner
|
|
323
|
+
# cannot be scheduled, piped, or wrapped by an agent.
|
|
324
|
+
|
|
325
|
+
def build_confirmation_summary(
|
|
326
|
+
*,
|
|
327
|
+
model: str,
|
|
328
|
+
skill: str,
|
|
329
|
+
query_count: int,
|
|
330
|
+
catalogue_size: int,
|
|
331
|
+
input_tokens: int,
|
|
332
|
+
output_tokens: int,
|
|
333
|
+
cost_usd: float,
|
|
334
|
+
key_path: Path,
|
|
335
|
+
) -> str:
|
|
336
|
+
bar = "═" * 56
|
|
337
|
+
return (
|
|
338
|
+
f"{bar}\n"
|
|
339
|
+
f" Trigger Eval — Confirmation Required\n"
|
|
340
|
+
f"{bar}\n"
|
|
341
|
+
f" Model: {model}\n"
|
|
342
|
+
f" Skill: {skill}\n"
|
|
343
|
+
f" Queries: {query_count}\n"
|
|
344
|
+
f" Catalogue: {catalogue_size} skills in routing prompt\n"
|
|
345
|
+
f" Est. tokens: in≈{input_tokens:,} out≈{output_tokens:,}\n"
|
|
346
|
+
f" Est. cost: ~${cost_usd:.2f} USD (actual via API headers)\n"
|
|
347
|
+
f" Key source: {key_path}\n"
|
|
348
|
+
f"{bar}"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def require_confirmation(
|
|
353
|
+
summary: str,
|
|
354
|
+
*,
|
|
355
|
+
stdin: IO[str] | None = None,
|
|
356
|
+
stdout: IO[str] | None = None,
|
|
357
|
+
) -> None:
|
|
358
|
+
"""Print `summary`, require exactly `yes` from the controlling terminal.
|
|
359
|
+
|
|
360
|
+
Production path (stdin/stdout both None) reads from /dev/tty and
|
|
361
|
+
writes to /dev/tty, not from `sys.stdin` / `sys.stdout`. That makes
|
|
362
|
+
the gate immune to any wrapper that rebinds stdin (task runners,
|
|
363
|
+
nohup, sudo, agents) and guarantees every keystroke comes from the
|
|
364
|
+
user's real keyboard.
|
|
365
|
+
|
|
366
|
+
Tests inject explicit streams to bypass /dev/tty. When a test
|
|
367
|
+
passes an object, it must supply both `stdin` and `stdout` so the
|
|
368
|
+
isatty check covers the injected path too. `yes` is case-sensitive
|
|
369
|
+
to block accidents from auto-expanded `y`.
|
|
370
|
+
"""
|
|
371
|
+
if stdin is None and stdout is None:
|
|
372
|
+
# Production path: controlling-terminal-only. If there is no
|
|
373
|
+
# /dev/tty (CI, cron, non-interactive agent) this is a hard
|
|
374
|
+
# abort before any API call.
|
|
375
|
+
try:
|
|
376
|
+
tty_in = open("/dev/tty", "r", encoding="utf-8") # noqa: SIM115
|
|
377
|
+
tty_out = open("/dev/tty", "w", encoding="utf-8") # noqa: SIM115
|
|
378
|
+
except OSError as exc:
|
|
379
|
+
raise ConfirmationAborted(
|
|
380
|
+
"Confirmation requires a controlling terminal (/dev/tty). "
|
|
381
|
+
"Refusing to run under automation."
|
|
382
|
+
) from exc
|
|
383
|
+
try:
|
|
384
|
+
tty_out.write(summary + "\n")
|
|
385
|
+
tty_out.write(
|
|
386
|
+
"Proceed? [type 'yes' exactly to run, anything else aborts]: "
|
|
387
|
+
)
|
|
388
|
+
tty_out.flush()
|
|
389
|
+
answer = tty_in.readline().rstrip("\n")
|
|
390
|
+
finally:
|
|
391
|
+
tty_in.close()
|
|
392
|
+
tty_out.close()
|
|
393
|
+
else:
|
|
394
|
+
# Test path \u2014 both streams must be supplied.
|
|
395
|
+
assert stdin is not None and stdout is not None, (
|
|
396
|
+
"require_confirmation: stdin and stdout must both be supplied "
|
|
397
|
+
"when overriding defaults (test-only path)."
|
|
398
|
+
)
|
|
399
|
+
tty = getattr(stdin, "isatty", lambda: False)()
|
|
400
|
+
if not tty:
|
|
401
|
+
raise ConfirmationAborted(
|
|
402
|
+
"Confirmation requires an interactive tty on stdin. "
|
|
403
|
+
"Refusing non-interactive, piped, or redirected input."
|
|
404
|
+
)
|
|
405
|
+
stdout.write(summary + "\n")
|
|
406
|
+
stdout.write(
|
|
407
|
+
"Proceed? [type 'yes' exactly to run, anything else aborts]: "
|
|
408
|
+
)
|
|
409
|
+
stdout.flush()
|
|
410
|
+
answer = stdin.readline().rstrip("\n")
|
|
411
|
+
|
|
412
|
+
if answer != "yes":
|
|
413
|
+
raise ConfirmationAborted(f"Aborted at confirmation (got {answer!r}).")
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def write_result(result: EvalResult, output_path: Path) -> None:
|
|
417
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
418
|
+
payload = asdict(result)
|
|
419
|
+
output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def format_summary(result: EvalResult) -> str:
|
|
423
|
+
m = result.metrics
|
|
424
|
+
total = len(result.queries)
|
|
425
|
+
pass_count = sum(1 for r in result.queries if r.passed)
|
|
426
|
+
fail_count = total - pass_count
|
|
427
|
+
lines = [
|
|
428
|
+
f"Skill: {result.skill}",
|
|
429
|
+
f"Router: {result.router} Model: {result.model}",
|
|
430
|
+
f"Queries: {total} ({pass_count} pass, {fail_count} fail)",
|
|
431
|
+
f"Precision: {m.precision} (TP={m.true_positive} FP={m.false_positive})",
|
|
432
|
+
f"Recall: {m.recall} (TP={m.true_positive} FN={m.false_negative})",
|
|
433
|
+
f"Tokens: in={result.input_tokens} out={result.output_tokens} "
|
|
434
|
+
f"cost~${result.cost_usd_estimate}",
|
|
435
|
+
]
|
|
436
|
+
if fail_count:
|
|
437
|
+
lines.append("")
|
|
438
|
+
lines.append("Failures:")
|
|
439
|
+
for r in result.queries:
|
|
440
|
+
if r.passed:
|
|
441
|
+
continue
|
|
442
|
+
lines.append(
|
|
443
|
+
f" [{'FN' if r.expected else 'FP'}] expected={r.expected} "
|
|
444
|
+
f"observed={r.observed} :: {r.q}"
|
|
445
|
+
)
|
|
446
|
+
return "\n".join(lines)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
ROUTING_PROMPT_HEADER = """You are a skill-routing oracle. Given the catalogue below
|
|
450
|
+
and a single user query, return ONLY the JSON object {"would_load": [...]}
|
|
451
|
+
listing the skill names whose bodies you would load to answer the query.
|
|
452
|
+
|
|
453
|
+
Rules:
|
|
454
|
+
- Use the skill frontmatter description verbatim as the only routing signal.
|
|
455
|
+
- Return at most 4 skill names.
|
|
456
|
+
- If no skill applies, return {"would_load": []}.
|
|
457
|
+
- Output ONLY the JSON. No prose, no code fences.
|
|
458
|
+
|
|
459
|
+
Skill catalogue (name :: description):
|
|
460
|
+
"""
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
class AnthropicRouter:
|
|
464
|
+
"""Real-API router. Builds a routing prompt with the full skill
|
|
465
|
+
catalogue, asks the model for structured JSON output, parses the
|
|
466
|
+
`would_load` list. Token counts come from the usage field of the
|
|
467
|
+
SDK response."""
|
|
468
|
+
|
|
469
|
+
name = "anthropic"
|
|
470
|
+
|
|
471
|
+
def __init__(
|
|
472
|
+
self,
|
|
473
|
+
model: str = DEFAULT_MODEL,
|
|
474
|
+
client=None,
|
|
475
|
+
max_tokens: int = 256,
|
|
476
|
+
api_key: str | None = None,
|
|
477
|
+
):
|
|
478
|
+
self._model = model
|
|
479
|
+
self._max_tokens = max_tokens
|
|
480
|
+
if client is not None:
|
|
481
|
+
self._client = client
|
|
482
|
+
return
|
|
483
|
+
if api_key is None:
|
|
484
|
+
raise RuntimeError(
|
|
485
|
+
"AnthropicRouter requires an explicit api_key or an injected client. "
|
|
486
|
+
"Load the key with load_anthropic_key() — no env-var fallback."
|
|
487
|
+
)
|
|
488
|
+
try:
|
|
489
|
+
import anthropic # type: ignore[import-not-found]
|
|
490
|
+
except ImportError as exc: # pragma: no cover - exercised only with real key
|
|
491
|
+
raise RuntimeError(
|
|
492
|
+
"anthropic package not installed. "
|
|
493
|
+
"`pip install anthropic` or run with --dry-run."
|
|
494
|
+
) from exc
|
|
495
|
+
self._client = anthropic.Anthropic(api_key=api_key)
|
|
496
|
+
|
|
497
|
+
def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
|
|
498
|
+
catalogue = "\n".join(f"- {s.name} :: {s.description}" for s in skills)
|
|
499
|
+
prompt = ROUTING_PROMPT_HEADER + catalogue + "\n"
|
|
500
|
+
response = self._client.messages.create(
|
|
501
|
+
model=self._model,
|
|
502
|
+
max_tokens=self._max_tokens,
|
|
503
|
+
system=prompt,
|
|
504
|
+
messages=[{"role": "user", "content": query}],
|
|
505
|
+
)
|
|
506
|
+
text = _first_text_block(response)
|
|
507
|
+
loaded = _parse_would_load(text)
|
|
508
|
+
usage = getattr(response, "usage", None)
|
|
509
|
+
in_tok = getattr(usage, "input_tokens", 0) if usage else 0
|
|
510
|
+
out_tok = getattr(usage, "output_tokens", 0) if usage else 0
|
|
511
|
+
return loaded, in_tok, out_tok
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _first_text_block(response) -> str:
|
|
515
|
+
"""Extract the text from the first content block of an Anthropic
|
|
516
|
+
Messages API response."""
|
|
517
|
+
content = getattr(response, "content", None)
|
|
518
|
+
if not content:
|
|
519
|
+
return ""
|
|
520
|
+
first = content[0]
|
|
521
|
+
return getattr(first, "text", "") or ""
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _parse_would_load(text: str) -> list[str]:
|
|
525
|
+
"""Parse `{"would_load": [...]}` out of a model response. Tolerates
|
|
526
|
+
leading/trailing whitespace and code fences even though the prompt
|
|
527
|
+
forbids them — models occasionally ignore that instruction."""
|
|
528
|
+
stripped = text.strip()
|
|
529
|
+
if stripped.startswith("```"):
|
|
530
|
+
stripped = stripped.strip("`").lstrip("json").strip()
|
|
531
|
+
try:
|
|
532
|
+
data = json.loads(stripped)
|
|
533
|
+
except json.JSONDecodeError:
|
|
534
|
+
return []
|
|
535
|
+
loaded = data.get("would_load", [])
|
|
536
|
+
if not isinstance(loaded, list):
|
|
537
|
+
return []
|
|
538
|
+
return [str(name) for name in loaded]
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def build_arg_parser() -> argparse.ArgumentParser:
|
|
542
|
+
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
543
|
+
parser.add_argument("--skill", required=True, help="Skill name (e.g. eloquent)")
|
|
544
|
+
parser.add_argument(
|
|
545
|
+
"--triggers",
|
|
546
|
+
type=Path,
|
|
547
|
+
default=None,
|
|
548
|
+
help="Path to evals/triggers.json. Default: .agent-src.uncompressed/skills/<skill>/evals/triggers.json",
|
|
549
|
+
)
|
|
550
|
+
parser.add_argument(
|
|
551
|
+
"--output",
|
|
552
|
+
type=Path,
|
|
553
|
+
default=None,
|
|
554
|
+
help=(
|
|
555
|
+
"Path to write the result. Default: evals/results/"
|
|
556
|
+
"<timestamp>-<skill>-<model>.json (live) or "
|
|
557
|
+
"<triggers-dir>/last-run.json (dry-run)."
|
|
558
|
+
),
|
|
559
|
+
)
|
|
560
|
+
parser.add_argument("--model", default=DEFAULT_MODEL)
|
|
561
|
+
parser.add_argument(
|
|
562
|
+
"--dry-run",
|
|
563
|
+
action="store_true",
|
|
564
|
+
help="Use MockRouter (no API call). Returns the pilot skill only for should-trigger queries.",
|
|
565
|
+
)
|
|
566
|
+
parser.add_argument(
|
|
567
|
+
"--key-path",
|
|
568
|
+
type=Path,
|
|
569
|
+
default=ANTHROPIC_KEY_PATH,
|
|
570
|
+
help=(
|
|
571
|
+
"Override the key file location. Default: "
|
|
572
|
+
"~/.config/agent-config/anthropic.key. Mode 0600 required."
|
|
573
|
+
),
|
|
574
|
+
)
|
|
575
|
+
return parser
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _default_triggers_path(skill: str) -> Path:
|
|
579
|
+
return SKILLS_SOURCE / skill / "evals" / "triggers.json"
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _default_live_output(skill: str, model: str) -> Path:
|
|
583
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")
|
|
584
|
+
return RESULTS_DIR / f"{ts}-{skill}-{model}.json"
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def main(argv: list[str] | None = None) -> int:
|
|
588
|
+
args = build_arg_parser().parse_args(argv)
|
|
589
|
+
triggers_path = args.triggers or _default_triggers_path(args.skill)
|
|
590
|
+
if not triggers_path.exists():
|
|
591
|
+
print(f"❌ triggers.json not found: {triggers_path}", file=sys.stderr)
|
|
592
|
+
return 2
|
|
593
|
+
|
|
594
|
+
skill_from_file, queries = load_triggers(triggers_path)
|
|
595
|
+
if skill_from_file != args.skill:
|
|
596
|
+
print(
|
|
597
|
+
f"❌ skill mismatch: --skill={args.skill} but triggers.json says {skill_from_file}",
|
|
598
|
+
file=sys.stderr,
|
|
599
|
+
)
|
|
600
|
+
return 2
|
|
601
|
+
|
|
602
|
+
skills = load_skill_metas()
|
|
603
|
+
if args.dry_run:
|
|
604
|
+
expected = {q.q: q.trigger for q in queries}
|
|
605
|
+
|
|
606
|
+
def decide(query: str, _skills: list[SkillMeta]) -> list[str]:
|
|
607
|
+
return [args.skill] if expected.get(query, False) else []
|
|
608
|
+
|
|
609
|
+
router: TriggerRouter = MockRouter(decide)
|
|
610
|
+
default_output = triggers_path.parent / "last-run.json"
|
|
611
|
+
else:
|
|
612
|
+
# Live path: key gate → cost preview → confirmation → router.
|
|
613
|
+
# Any failure here aborts before a single API call is made.
|
|
614
|
+
try:
|
|
615
|
+
api_key = load_anthropic_key(args.key_path)
|
|
616
|
+
except KeyGateError as exc:
|
|
617
|
+
print(f"❌ {exc}", file=sys.stderr)
|
|
618
|
+
return 2
|
|
619
|
+
|
|
620
|
+
in_tok, out_tok, cost = pre_estimate_cost(args.model, skills, queries)
|
|
621
|
+
summary = build_confirmation_summary(
|
|
622
|
+
model=args.model,
|
|
623
|
+
skill=args.skill,
|
|
624
|
+
query_count=len(queries),
|
|
625
|
+
catalogue_size=len(skills),
|
|
626
|
+
input_tokens=in_tok,
|
|
627
|
+
output_tokens=out_tok,
|
|
628
|
+
cost_usd=cost,
|
|
629
|
+
key_path=args.key_path,
|
|
630
|
+
)
|
|
631
|
+
try:
|
|
632
|
+
require_confirmation(summary)
|
|
633
|
+
except ConfirmationAborted as exc:
|
|
634
|
+
print(f"⏹ {exc}", file=sys.stderr)
|
|
635
|
+
return 2
|
|
636
|
+
|
|
637
|
+
router = AnthropicRouter(model=args.model, api_key=api_key)
|
|
638
|
+
default_output = _default_live_output(args.skill, args.model)
|
|
639
|
+
|
|
640
|
+
result = run_eval(args.skill, queries, router, skills, model=args.model)
|
|
641
|
+
output_path = args.output or default_output
|
|
642
|
+
write_result(result, output_path)
|
|
643
|
+
print(format_summary(result))
|
|
644
|
+
print(f"\nWrote: {output_path}")
|
|
645
|
+
fail_count = sum(1 for r in result.queries if not r.passed)
|
|
646
|
+
return 1 if fail_count else 0
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
if __name__ == "__main__":
|
|
650
|
+
sys.exit(main())
|
|
651
|
+
|