@event4u/agent-config 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (446) hide show
  1. package/.agent-src/README.md +64 -0
  2. package/.agent-src/commands/agent-handoff.md +64 -0
  3. package/.agent-src/commands/agent-status.md +83 -0
  4. package/.agent-src/commands/agents-audit.md +243 -0
  5. package/.agent-src/commands/agents-cleanup.md +169 -0
  6. package/.agent-src/commands/agents-prepare.md +137 -0
  7. package/.agent-src/commands/analyze-reference-repo.md +191 -0
  8. package/.agent-src/commands/bug-fix.md +181 -0
  9. package/.agent-src/commands/bug-investigate.md +175 -0
  10. package/.agent-src/commands/commit.md +121 -0
  11. package/.agent-src/commands/compress.md +177 -0
  12. package/.agent-src/commands/config-agent-settings.md +126 -0
  13. package/.agent-src/commands/context-create.md +167 -0
  14. package/.agent-src/commands/context-refactor.md +170 -0
  15. package/.agent-src/commands/copilot-agents-init.md +150 -0
  16. package/.agent-src/commands/copilot-agents-optimize.md +251 -0
  17. package/.agent-src/commands/create-pr-description.md +112 -0
  18. package/.agent-src/commands/create-pr.md +76 -0
  19. package/.agent-src/commands/do-and-judge.md +114 -0
  20. package/.agent-src/commands/do-in-steps.md +84 -0
  21. package/.agent-src/commands/e2e-heal.md +98 -0
  22. package/.agent-src/commands/e2e-plan.md +85 -0
  23. package/.agent-src/commands/estimate-ticket.md +80 -0
  24. package/.agent-src/commands/feature-dev.md +111 -0
  25. package/.agent-src/commands/feature-explore.md +180 -0
  26. package/.agent-src/commands/feature-plan.md +288 -0
  27. package/.agent-src/commands/feature-refactor.md +181 -0
  28. package/.agent-src/commands/feature-roadmap.md +184 -0
  29. package/.agent-src/commands/fix-ci.md +48 -0
  30. package/.agent-src/commands/fix-portability.md +97 -0
  31. package/.agent-src/commands/fix-pr-bot-comments.md +146 -0
  32. package/.agent-src/commands/fix-pr-comments.md +58 -0
  33. package/.agent-src/commands/fix-pr-developer-comments.md +152 -0
  34. package/.agent-src/commands/fix-references.md +94 -0
  35. package/.agent-src/commands/fix-seeder.md +146 -0
  36. package/.agent-src/commands/implement-ticket.md +133 -0
  37. package/.agent-src/commands/jira-ticket.md +71 -0
  38. package/.agent-src/commands/judge.md +86 -0
  39. package/.agent-src/commands/memory-add.md +130 -0
  40. package/.agent-src/commands/memory-full.md +97 -0
  41. package/.agent-src/commands/memory-promote.md +144 -0
  42. package/.agent-src/commands/mode.md +121 -0
  43. package/.agent-src/commands/module-create.md +132 -0
  44. package/.agent-src/commands/module-explore.md +157 -0
  45. package/.agent-src/commands/optimize-agents.md +139 -0
  46. package/.agent-src/commands/optimize-augmentignore.md +262 -0
  47. package/.agent-src/commands/optimize-rtk-filters.md +120 -0
  48. package/.agent-src/commands/optimize-skills.md +121 -0
  49. package/.agent-src/commands/override-create.md +97 -0
  50. package/.agent-src/commands/override-manage.md +96 -0
  51. package/.agent-src/commands/package-reset.md +154 -0
  52. package/.agent-src/commands/package-test.md +154 -0
  53. package/.agent-src/commands/prepare-for-review.md +91 -0
  54. package/.agent-src/commands/project-analyze.md +300 -0
  55. package/.agent-src/commands/project-health.md +95 -0
  56. package/.agent-src/commands/propose-memory.md +108 -0
  57. package/.agent-src/commands/quality-fix.md +106 -0
  58. package/.agent-src/commands/refine-ticket.md +81 -0
  59. package/.agent-src/commands/review-changes.md +130 -0
  60. package/.agent-src/commands/review-routing.md +111 -0
  61. package/.agent-src/commands/roadmap-create.md +110 -0
  62. package/.agent-src/commands/roadmap-execute.md +68 -0
  63. package/.agent-src/commands/rule-compliance-audit.md +139 -0
  64. package/.agent-src/commands/tests-create.md +73 -0
  65. package/.agent-src/commands/tests-execute.md +58 -0
  66. package/.agent-src/commands/threat-model.md +115 -0
  67. package/.agent-src/commands/update-form-request-messages.md +189 -0
  68. package/.agent-src/commands/upstream-contribute.md +171 -0
  69. package/.agent-src/contexts/augment-infrastructure.md +181 -0
  70. package/.agent-src/contexts/documentation-hierarchy.md +142 -0
  71. package/.agent-src/contexts/model-recommendations.md +142 -0
  72. package/.agent-src/contexts/override-system.md +187 -0
  73. package/.agent-src/contexts/skills-and-commands.md +154 -0
  74. package/.agent-src/contexts/subagent-configuration.md +62 -0
  75. package/.agent-src/guidelines/agent-infra/agent-interaction-and-decision-quality.md +110 -0
  76. package/.agent-src/guidelines/agent-infra/break-glass-usage.md +113 -0
  77. package/.agent-src/guidelines/agent-infra/developer-judgment.md +82 -0
  78. package/.agent-src/guidelines/agent-infra/engineering-memory-data-format.md +117 -0
  79. package/.agent-src/guidelines/agent-infra/layered-settings.md +158 -0
  80. package/.agent-src/guidelines/agent-infra/memory-access.md +121 -0
  81. package/.agent-src/guidelines/agent-infra/naming.md +69 -0
  82. package/.agent-src/guidelines/agent-infra/output-patterns.md +117 -0
  83. package/.agent-src/guidelines/agent-infra/review-routing-data-format.md +144 -0
  84. package/.agent-src/guidelines/agent-infra/role-contracts.md +211 -0
  85. package/.agent-src/guidelines/agent-infra/role-mode-router.md +89 -0
  86. package/.agent-src/guidelines/agent-infra/runtime-layer.md +89 -0
  87. package/.agent-src/guidelines/agent-infra/self-improvement-pipeline.md +135 -0
  88. package/.agent-src/guidelines/agent-infra/size-and-scope.md +189 -0
  89. package/.agent-src/guidelines/agent-infra/tool-integration.md +73 -0
  90. package/.agent-src/guidelines/docs/readme-size-and-splitting.md +153 -0
  91. package/.agent-src/guidelines/e2e/playwright.md +363 -0
  92. package/.agent-src/guidelines/php/api-design.md +115 -0
  93. package/.agent-src/guidelines/php/artisan-commands.md +81 -0
  94. package/.agent-src/guidelines/php/blade-ui.md +78 -0
  95. package/.agent-src/guidelines/php/controllers.md +90 -0
  96. package/.agent-src/guidelines/php/database.md +111 -0
  97. package/.agent-src/guidelines/php/eloquent.md +208 -0
  98. package/.agent-src/guidelines/php/flux.md +80 -0
  99. package/.agent-src/guidelines/php/general.md +191 -0
  100. package/.agent-src/guidelines/php/git.md +96 -0
  101. package/.agent-src/guidelines/php/jobs.md +111 -0
  102. package/.agent-src/guidelines/php/livewire.md +71 -0
  103. package/.agent-src/guidelines/php/logging.md +79 -0
  104. package/.agent-src/guidelines/php/naming.md +89 -0
  105. package/.agent-src/guidelines/php/patterns/dependency-injection.md +57 -0
  106. package/.agent-src/guidelines/php/patterns/dtos.md +199 -0
  107. package/.agent-src/guidelines/php/patterns/events.md +67 -0
  108. package/.agent-src/guidelines/php/patterns/factory.md +53 -0
  109. package/.agent-src/guidelines/php/patterns/pipelines.md +66 -0
  110. package/.agent-src/guidelines/php/patterns/policies.md +66 -0
  111. package/.agent-src/guidelines/php/patterns/repositories.md +122 -0
  112. package/.agent-src/guidelines/php/patterns/service-layer.md +64 -0
  113. package/.agent-src/guidelines/php/patterns/strategy.md +69 -0
  114. package/.agent-src/guidelines/php/patterns.md +28 -0
  115. package/.agent-src/guidelines/php/performance.md +92 -0
  116. package/.agent-src/guidelines/php/resources.md +100 -0
  117. package/.agent-src/guidelines/php/security.md +110 -0
  118. package/.agent-src/guidelines/php/sql.md +97 -0
  119. package/.agent-src/guidelines/php/validations.md +119 -0
  120. package/.agent-src/guidelines/php/websocket.md +100 -0
  121. package/.agent-src/personas/README.md +104 -0
  122. package/.agent-src/personas/ai-agent.md +77 -0
  123. package/.agent-src/personas/critical-challenger.md +73 -0
  124. package/.agent-src/personas/developer.md +73 -0
  125. package/.agent-src/personas/product-owner.md +78 -0
  126. package/.agent-src/personas/qa.md +67 -0
  127. package/.agent-src/personas/senior-engineer.md +77 -0
  128. package/.agent-src/personas/stakeholder.md +78 -0
  129. package/.agent-src/rules/agent-docs.md +61 -0
  130. package/.agent-src/rules/analysis-skill-routing.md +48 -0
  131. package/.agent-src/rules/architecture.md +62 -0
  132. package/.agent-src/rules/artifact-drafting-protocol.md +73 -0
  133. package/.agent-src/rules/ask-when-uncertain.md +52 -0
  134. package/.agent-src/rules/augment-portability.md +38 -0
  135. package/.agent-src/rules/augment-source-of-truth.md +128 -0
  136. package/.agent-src/rules/capture-learnings.md +89 -0
  137. package/.agent-src/rules/cli-output-handling.md +94 -0
  138. package/.agent-src/rules/commit-conventions.md +64 -0
  139. package/.agent-src/rules/context-hygiene.md +90 -0
  140. package/.agent-src/rules/docker-commands.md +55 -0
  141. package/.agent-src/rules/docs-sync.md +79 -0
  142. package/.agent-src/rules/downstream-changes.md +70 -0
  143. package/.agent-src/rules/e2e-testing.md +53 -0
  144. package/.agent-src/rules/guidelines.md +90 -0
  145. package/.agent-src/rules/improve-before-implement.md +94 -0
  146. package/.agent-src/rules/language-and-tone.md +104 -0
  147. package/.agent-src/rules/laravel-translations.md +48 -0
  148. package/.agent-src/rules/markdown-safe-codeblocks.md +18 -0
  149. package/.agent-src/rules/minimal-safe-diff.md +87 -0
  150. package/.agent-src/rules/missing-tool-handling.md +62 -0
  151. package/.agent-src/rules/model-recommendation.md +70 -0
  152. package/.agent-src/rules/package-ci-checks.md +80 -0
  153. package/.agent-src/rules/php-coding.md +63 -0
  154. package/.agent-src/rules/preservation-guard.md +29 -0
  155. package/.agent-src/rules/review-routing-awareness.md +125 -0
  156. package/.agent-src/rules/reviewer-awareness.md +92 -0
  157. package/.agent-src/rules/roadmap-progress-sync.md +56 -0
  158. package/.agent-src/rules/role-mode-adherence.md +54 -0
  159. package/.agent-src/rules/rule-type-governance.md +46 -0
  160. package/.agent-src/rules/runtime-safety.md +42 -0
  161. package/.agent-src/rules/scope-control.md +40 -0
  162. package/.agent-src/rules/security-sensitive-stop.md +77 -0
  163. package/.agent-src/rules/size-enforcement.md +29 -0
  164. package/.agent-src/rules/skill-improvement-trigger.md +58 -0
  165. package/.agent-src/rules/skill-quality.md +110 -0
  166. package/.agent-src/rules/slash-commands.md +30 -0
  167. package/.agent-src/rules/think-before-action.md +91 -0
  168. package/.agent-src/rules/token-efficiency.md +99 -0
  169. package/.agent-src/rules/tool-safety.md +36 -0
  170. package/.agent-src/rules/upstream-proposal.md +76 -0
  171. package/.agent-src/rules/user-interaction.md +79 -0
  172. package/.agent-src/rules/verify-before-complete.md +120 -0
  173. package/.agent-src/scripts/scan-seeder-violations.php +145 -0
  174. package/.agent-src/scripts/update_roadmap_progress.py +244 -0
  175. package/.agent-src/skills/adversarial-review/SKILL.md +149 -0
  176. package/.agent-src/skills/agent-docs-writing/SKILL.md +234 -0
  177. package/.agent-src/skills/analysis-autonomous-mode/SKILL.md +197 -0
  178. package/.agent-src/skills/analysis-skill-router/SKILL.md +134 -0
  179. package/.agent-src/skills/api-design/SKILL.md +104 -0
  180. package/.agent-src/skills/api-endpoint/SKILL.md +185 -0
  181. package/.agent-src/skills/api-testing/SKILL.md +206 -0
  182. package/.agent-src/skills/artisan-commands/SKILL.md +78 -0
  183. package/.agent-src/skills/authz-review/SKILL.md +171 -0
  184. package/.agent-src/skills/aws-infrastructure/SKILL.md +152 -0
  185. package/.agent-src/skills/blade-ui/SKILL.md +75 -0
  186. package/.agent-src/skills/blast-radius-analyzer/SKILL.md +185 -0
  187. package/.agent-src/skills/bug-analyzer/SKILL.md +256 -0
  188. package/.agent-src/skills/check-refs/SKILL.md +72 -0
  189. package/.agent-src/skills/code-refactoring/SKILL.md +200 -0
  190. package/.agent-src/skills/code-review/SKILL.md +214 -0
  191. package/.agent-src/skills/command-routing/SKILL.md +96 -0
  192. package/.agent-src/skills/command-writing/SKILL.md +143 -0
  193. package/.agent-src/skills/composer-packages/SKILL.md +172 -0
  194. package/.agent-src/skills/context-authoring/SKILL.md +157 -0
  195. package/.agent-src/skills/context-document/SKILL.md +153 -0
  196. package/.agent-src/skills/conventional-commits-writing/SKILL.md +70 -0
  197. package/.agent-src/skills/copilot-agents-optimization/SKILL.md +220 -0
  198. package/.agent-src/skills/copilot-config/SKILL.md +203 -0
  199. package/.agent-src/skills/dashboard-design/SKILL.md +116 -0
  200. package/.agent-src/skills/data-flow-mapper/SKILL.md +160 -0
  201. package/.agent-src/skills/database/SKILL.md +91 -0
  202. package/.agent-src/skills/dependency-upgrade/SKILL.md +204 -0
  203. package/.agent-src/skills/description-assist/SKILL.md +169 -0
  204. package/.agent-src/skills/design-review/SKILL.md +228 -0
  205. package/.agent-src/skills/devcontainer/SKILL.md +121 -0
  206. package/.agent-src/skills/developer-like-execution/SKILL.md +276 -0
  207. package/.agent-src/skills/docker/SKILL.md +245 -0
  208. package/.agent-src/skills/dto-creator/SKILL.md +117 -0
  209. package/.agent-src/skills/eloquent/SKILL.md +92 -0
  210. package/.agent-src/skills/eloquent/evals/last-run.json +99 -0
  211. package/.agent-src/skills/eloquent/evals/triggers.json +16 -0
  212. package/.agent-src/skills/estimate-ticket/SKILL.md +186 -0
  213. package/.agent-src/skills/estimate-ticket/evals/output-schema.yml +20 -0
  214. package/.agent-src/skills/estimate-ticket/evals/triggers.json +18 -0
  215. package/.agent-src/skills/fe-design/SKILL.md +223 -0
  216. package/.agent-src/skills/feature-planning/SKILL.md +226 -0
  217. package/.agent-src/skills/file-editor/SKILL.md +129 -0
  218. package/.agent-src/skills/finishing-a-development-branch/SKILL.md +200 -0
  219. package/.agent-src/skills/flux/SKILL.md +64 -0
  220. package/.agent-src/skills/git-workflow/SKILL.md +102 -0
  221. package/.agent-src/skills/github-ci/SKILL.md +122 -0
  222. package/.agent-src/skills/grafana/SKILL.md +168 -0
  223. package/.agent-src/skills/guideline-writing/SKILL.md +147 -0
  224. package/.agent-src/skills/jira-integration/SKILL.md +182 -0
  225. package/.agent-src/skills/jobs-events/SKILL.md +87 -0
  226. package/.agent-src/skills/judge-bug-hunter/SKILL.md +157 -0
  227. package/.agent-src/skills/judge-code-quality/SKILL.md +158 -0
  228. package/.agent-src/skills/judge-security-auditor/SKILL.md +167 -0
  229. package/.agent-src/skills/judge-test-coverage/SKILL.md +154 -0
  230. package/.agent-src/skills/laravel/SKILL.md +195 -0
  231. package/.agent-src/skills/laravel-horizon/SKILL.md +169 -0
  232. package/.agent-src/skills/laravel-mail/SKILL.md +193 -0
  233. package/.agent-src/skills/laravel-middleware/SKILL.md +185 -0
  234. package/.agent-src/skills/laravel-notifications/SKILL.md +168 -0
  235. package/.agent-src/skills/laravel-pennant/SKILL.md +188 -0
  236. package/.agent-src/skills/laravel-pulse/SKILL.md +160 -0
  237. package/.agent-src/skills/laravel-reverb/SKILL.md +205 -0
  238. package/.agent-src/skills/laravel-scheduling/SKILL.md +167 -0
  239. package/.agent-src/skills/laravel-validation/SKILL.md +71 -0
  240. package/.agent-src/skills/learning-to-rule-or-skill/SKILL.md +249 -0
  241. package/.agent-src/skills/lint-skills/SKILL.md +72 -0
  242. package/.agent-src/skills/livewire/SKILL.md +79 -0
  243. package/.agent-src/skills/logging-monitoring/SKILL.md +100 -0
  244. package/.agent-src/skills/mcp/SKILL.md +193 -0
  245. package/.agent-src/skills/merge-conflicts/SKILL.md +158 -0
  246. package/.agent-src/skills/migration-creator/SKILL.md +160 -0
  247. package/.agent-src/skills/module-management/SKILL.md +154 -0
  248. package/.agent-src/skills/multi-tenancy/SKILL.md +129 -0
  249. package/.agent-src/skills/openapi/SKILL.md +154 -0
  250. package/.agent-src/skills/override-management/SKILL.md +186 -0
  251. package/.agent-src/skills/performance/SKILL.md +69 -0
  252. package/.agent-src/skills/performance-analysis/SKILL.md +118 -0
  253. package/.agent-src/skills/pest-testing/SKILL.md +321 -0
  254. package/.agent-src/skills/php-coder/SKILL.md +78 -0
  255. package/.agent-src/skills/php-coder/evals/triggers.json +16 -0
  256. package/.agent-src/skills/php-debugging/SKILL.md +184 -0
  257. package/.agent-src/skills/php-service/SKILL.md +96 -0
  258. package/.agent-src/skills/playwright-testing/SKILL.md +244 -0
  259. package/.agent-src/skills/project-analysis-core/SKILL.md +138 -0
  260. package/.agent-src/skills/project-analysis-hypothesis-driven/SKILL.md +130 -0
  261. package/.agent-src/skills/project-analysis-laravel/SKILL.md +119 -0
  262. package/.agent-src/skills/project-analysis-nextjs/SKILL.md +123 -0
  263. package/.agent-src/skills/project-analysis-node-express/SKILL.md +111 -0
  264. package/.agent-src/skills/project-analysis-react/SKILL.md +119 -0
  265. package/.agent-src/skills/project-analysis-symfony/SKILL.md +111 -0
  266. package/.agent-src/skills/project-analysis-zend-laminas/SKILL.md +108 -0
  267. package/.agent-src/skills/project-analyzer/SKILL.md +341 -0
  268. package/.agent-src/skills/project-docs/SKILL.md +137 -0
  269. package/.agent-src/skills/quality-tools/SKILL.md +411 -0
  270. package/.agent-src/skills/readme-reviewer/SKILL.md +187 -0
  271. package/.agent-src/skills/readme-writing/SKILL.md +142 -0
  272. package/.agent-src/skills/readme-writing-package/SKILL.md +185 -0
  273. package/.agent-src/skills/receiving-code-review/SKILL.md +190 -0
  274. package/.agent-src/skills/refine-ticket/SKILL.md +310 -0
  275. package/.agent-src/skills/refine-ticket/detection-map.yml +124 -0
  276. package/.agent-src/skills/refine-ticket/evals/output-schema.yml +16 -0
  277. package/.agent-src/skills/refine-ticket/evals/triggers.json +16 -0
  278. package/.agent-src/skills/requesting-code-review/SKILL.md +199 -0
  279. package/.agent-src/skills/review-routing/SKILL.md +195 -0
  280. package/.agent-src/skills/roadmap-management/SKILL.md +303 -0
  281. package/.agent-src/skills/rtk-output-filtering/SKILL.md +184 -0
  282. package/.agent-src/skills/rule-writing/SKILL.md +148 -0
  283. package/.agent-src/skills/security/SKILL.md +79 -0
  284. package/.agent-src/skills/security-audit/SKILL.md +123 -0
  285. package/.agent-src/skills/sentry-integration/SKILL.md +170 -0
  286. package/.agent-src/skills/sequential-thinking/SKILL.md +158 -0
  287. package/.agent-src/skills/skill-improvement-pipeline/SKILL.md +155 -0
  288. package/.agent-src/skills/skill-management/SKILL.md +121 -0
  289. package/.agent-src/skills/skill-reviewer/SKILL.md +218 -0
  290. package/.agent-src/skills/skill-writing/SKILL.md +291 -0
  291. package/.agent-src/skills/skill-writing/evals/triggers.json +16 -0
  292. package/.agent-src/skills/sql-writing/SKILL.md +74 -0
  293. package/.agent-src/skills/subagent-orchestration/SKILL.md +190 -0
  294. package/.agent-src/skills/systematic-debugging/SKILL.md +244 -0
  295. package/.agent-src/skills/technical-specification/SKILL.md +185 -0
  296. package/.agent-src/skills/terraform/SKILL.md +137 -0
  297. package/.agent-src/skills/terragrunt/SKILL.md +217 -0
  298. package/.agent-src/skills/test-driven-development/SKILL.md +252 -0
  299. package/.agent-src/skills/test-performance/SKILL.md +172 -0
  300. package/.agent-src/skills/threat-modeling/SKILL.md +189 -0
  301. package/.agent-src/skills/traefik/SKILL.md +319 -0
  302. package/.agent-src/skills/universal-project-analysis/SKILL.md +179 -0
  303. package/.agent-src/skills/upstream-contribute/SKILL.md +255 -0
  304. package/.agent-src/skills/using-git-worktrees/SKILL.md +148 -0
  305. package/.agent-src/skills/validate-feature-fit/SKILL.md +113 -0
  306. package/.agent-src/skills/verify-before-complete/SKILL.md +188 -0
  307. package/.agent-src/skills/websocket/SKILL.md +75 -0
  308. package/.agent-src/templates/AGENTS.md +146 -0
  309. package/.agent-src/templates/agent-settings.md +256 -0
  310. package/.agent-src/templates/agents/.gitattributes.fragment +16 -0
  311. package/.agent-src/templates/agents/agent-project-settings.example.yml +138 -0
  312. package/.agent-src/templates/agents/memory/architecture-decisions.example.yml +95 -0
  313. package/.agent-src/templates/agents/memory/domain-invariants.example.yml +80 -0
  314. package/.agent-src/templates/agents/memory/historical-patterns.example.yml +82 -0
  315. package/.agent-src/templates/agents/memory/incident-learnings.example.yml +113 -0
  316. package/.agent-src/templates/agents/memory/ownership.example.yml +75 -0
  317. package/.agent-src/templates/agents/memory/product-rules.example.yml +87 -0
  318. package/.agent-src/templates/agents/proposal.example.md +143 -0
  319. package/.agent-src/templates/command.md +84 -0
  320. package/.agent-src/templates/contexts/auth-model.md +59 -0
  321. package/.agent-src/templates/contexts/data-sensitivity.md +60 -0
  322. package/.agent-src/templates/contexts/deployment-order.md +72 -0
  323. package/.agent-src/templates/contexts/observability.md +64 -0
  324. package/.agent-src/templates/contexts/tenant-boundaries.md +68 -0
  325. package/.agent-src/templates/contexts.md +116 -0
  326. package/.agent-src/templates/copilot-instructions.md +115 -0
  327. package/.agent-src/templates/features.md +125 -0
  328. package/.agent-src/templates/github-workflows/memory-hygiene.yml +133 -0
  329. package/.agent-src/templates/github-workflows/pr-risk-review.yml +123 -0
  330. package/.agent-src/templates/github-workflows/proposal-drift.yml +118 -0
  331. package/.agent-src/templates/overrides/command.md +24 -0
  332. package/.agent-src/templates/overrides/guideline.md +21 -0
  333. package/.agent-src/templates/overrides/rule.md +19 -0
  334. package/.agent-src/templates/overrides/skill.md +24 -0
  335. package/.agent-src/templates/overrides/template.md +21 -0
  336. package/.agent-src/templates/persona.md +99 -0
  337. package/.agent-src/templates/roadmaps.md +109 -0
  338. package/.agent-src/templates/scripts/README.md +195 -0
  339. package/.agent-src/templates/scripts/check_memory.py +283 -0
  340. package/.agent-src/templates/scripts/check_memory_proposal.py +180 -0
  341. package/.agent-src/templates/scripts/historical-bug-patterns.example.yml +84 -0
  342. package/.agent-src/templates/scripts/implement_ticket/__init__.py +57 -0
  343. package/.agent-src/templates/scripts/implement_ticket/__main__.py +9 -0
  344. package/.agent-src/templates/scripts/implement_ticket/cli.py +171 -0
  345. package/.agent-src/templates/scripts/implement_ticket/delivery_state.py +130 -0
  346. package/.agent-src/templates/scripts/implement_ticket/dispatcher.py +134 -0
  347. package/.agent-src/templates/scripts/implement_ticket/persona_policy.py +85 -0
  348. package/.agent-src/templates/scripts/implement_ticket/steps/__init__.py +49 -0
  349. package/.agent-src/templates/scripts/implement_ticket/steps/analyze.py +98 -0
  350. package/.agent-src/templates/scripts/implement_ticket/steps/implement.py +145 -0
  351. package/.agent-src/templates/scripts/implement_ticket/steps/memory.py +136 -0
  352. package/.agent-src/templates/scripts/implement_ticket/steps/plan.py +175 -0
  353. package/.agent-src/templates/scripts/implement_ticket/steps/refine.py +140 -0
  354. package/.agent-src/templates/scripts/implement_ticket/steps/report.py +195 -0
  355. package/.agent-src/templates/scripts/implement_ticket/steps/test.py +180 -0
  356. package/.agent-src/templates/scripts/implement_ticket/steps/verify.py +170 -0
  357. package/.agent-src/templates/scripts/memory_hash.py +75 -0
  358. package/.agent-src/templates/scripts/memory_lookup.py +216 -0
  359. package/.agent-src/templates/scripts/memory_report.py +184 -0
  360. package/.agent-src/templates/scripts/memory_signal.py +167 -0
  361. package/.agent-src/templates/scripts/memory_status.py +156 -0
  362. package/.agent-src/templates/scripts/ownership-map.example.yml +87 -0
  363. package/.agent-src/templates/scripts/pr-risk-config.example.yml +76 -0
  364. package/.agent-src/templates/scripts/pr_review_routing.py +340 -0
  365. package/.agent-src/templates/scripts/pr_risk_review.py +211 -0
  366. package/.agent-src/templates/skill.md +136 -0
  367. package/.augment-plugin/marketplace.json +32 -0
  368. package/.augment-plugin/plugin.json +21 -0
  369. package/.claude-plugin/marketplace.json +119 -0
  370. package/AGENTS.md +121 -0
  371. package/CHANGELOG.md +279 -0
  372. package/CONTRIBUTING.md +176 -0
  373. package/LICENSE +21 -0
  374. package/README.md +357 -0
  375. package/bin/install.php +38 -0
  376. package/composer.json +29 -0
  377. package/config/agent-settings.template.yml +96 -0
  378. package/config/profiles/balanced.ini +10 -0
  379. package/config/profiles/full.ini +10 -0
  380. package/config/profiles/minimal.ini +10 -0
  381. package/docs/architecture.md +144 -0
  382. package/docs/customization.md +88 -0
  383. package/docs/development.md +171 -0
  384. package/docs/getting-started.md +130 -0
  385. package/docs/github-topics.md +84 -0
  386. package/docs/installation.md +376 -0
  387. package/docs/mcp.md +133 -0
  388. package/docs/quality.md +98 -0
  389. package/docs/skills-catalog.md +136 -0
  390. package/docs/troubleshooting.md +167 -0
  391. package/llms.txt +130 -0
  392. package/package.json +31 -0
  393. package/scripts/audit_skill_descriptions.py +168 -0
  394. package/scripts/check_compression.py +221 -0
  395. package/scripts/check_memory.py +341 -0
  396. package/scripts/check_memory_proposal.py +180 -0
  397. package/scripts/check_portability.py +320 -0
  398. package/scripts/check_proposal.py +269 -0
  399. package/scripts/check_references.py +400 -0
  400. package/scripts/ci_summary.py +131 -0
  401. package/scripts/compress.py +671 -0
  402. package/scripts/compress.sh +18 -0
  403. package/scripts/first-run.sh +109 -0
  404. package/scripts/generate_catalog.py +116 -0
  405. package/scripts/install +151 -0
  406. package/scripts/install-hooks.sh +29 -0
  407. package/scripts/install.py +487 -0
  408. package/scripts/install.sh +637 -0
  409. package/scripts/install_anthropic_key.sh +101 -0
  410. package/scripts/inventory_frontmatter.py +164 -0
  411. package/scripts/lint_marketplace.py +142 -0
  412. package/scripts/lint_regression.py +232 -0
  413. package/scripts/mcp_render.py +159 -0
  414. package/scripts/measure_patterns.py +376 -0
  415. package/scripts/memory_hash.py +75 -0
  416. package/scripts/memory_lookup.py +441 -0
  417. package/scripts/memory_report.py +336 -0
  418. package/scripts/memory_signal.py +210 -0
  419. package/scripts/memory_status.py +195 -0
  420. package/scripts/postinstall.sh +60 -0
  421. package/scripts/readme_linter.py +580 -0
  422. package/scripts/refine_ticket_detect.py +623 -0
  423. package/scripts/requirements-evals.txt +7 -0
  424. package/scripts/runtime_dispatcher.py +265 -0
  425. package/scripts/runtime_handler.py +148 -0
  426. package/scripts/runtime_registry.py +166 -0
  427. package/scripts/schemas/command.schema.json +32 -0
  428. package/scripts/schemas/persona.schema.json +42 -0
  429. package/scripts/schemas/rule.schema.json +28 -0
  430. package/scripts/schemas/skill.schema.json +73 -0
  431. package/scripts/setup.sh +230 -0
  432. package/scripts/setup_eval_venv.sh +58 -0
  433. package/scripts/skill_linter.py +2175 -0
  434. package/scripts/skill_trigger_eval.py +651 -0
  435. package/scripts/tool_registry.py +146 -0
  436. package/scripts/tools/__init__.py +1 -0
  437. package/scripts/tools/adapter_errors.py +63 -0
  438. package/scripts/tools/base_adapter.py +91 -0
  439. package/scripts/tools/github_adapter.py +128 -0
  440. package/scripts/tools/jira_adapter.py +115 -0
  441. package/scripts/update_counts.py +147 -0
  442. package/scripts/validate_frontmatter.py +424 -0
  443. package/templates/consumer-settings/README.md +46 -0
  444. package/templates/consumer-settings/augment-settings.json +12 -0
  445. package/templates/consumer-settings/claude-settings.json +9 -0
  446. package/templates/consumer-settings/copilot-settings.json +14 -0
@@ -0,0 +1,651 @@
1
+ #!/usr/bin/env python3
2
+ """Skill trigger evaluation runner.
3
+
4
+ Phase 1 of agents/roadmaps/road-to-trigger-evals.md — measures whether a
5
+ pilot skill's frontmatter description actually causes Claude to route to
6
+ the skill for queries that should trigger it, and to avoid routing for
7
+ queries that should not.
8
+
9
+ Input: one skill name + its evals/triggers.json (5 should-trigger +
10
+ 5 should-not-trigger queries).
11
+ Output: evals/last-run.json with per-query observed vs expected,
12
+ aggregate precision/recall, model id, timestamp, cost estimate.
13
+
14
+ Design notes:
15
+ - The real Anthropic client is a **soft** dependency. If the `anthropic`
16
+ package is not installed, only --dry-run works (mock router).
17
+ - The router is injectable — tests use a `MockRouter` that returns a
18
+ canned list per query. CI never makes real API calls.
19
+ - The full set of skill frontmatter (name + description) is passed in
20
+ every routing call. That is the actual production routing condition;
21
+ anything less is cheating.
22
+
23
+ Budget per roadmap: ≤500 LoC single file, no framework.
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import json
29
+ import stat
30
+ import sys
31
+ from dataclasses import asdict, dataclass, field
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+ from typing import Callable, IO, Protocol
35
+
36
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
37
+ SKILLS_SOURCE = PROJECT_ROOT / ".agent-src.uncompressed" / "skills"
38
+ RESULTS_DIR = PROJECT_ROOT / "evals" / "results"
39
+ DEFAULT_MODEL = "claude-sonnet-4-5"
40
+
41
+ # Approximate Anthropic API pricing (USD per 1M tokens). Used for the
42
+ # cost estimate only — exact billing comes from the API response headers
43
+ # once we run with a real key.
44
+ PRICE_PER_MTOK_IN = {"claude-sonnet-4-5": 3.0, "claude-opus-4": 15.0}
45
+ PRICE_PER_MTOK_OUT = {"claude-sonnet-4-5": 15.0, "claude-opus-4": 75.0}
46
+
47
+ # On-disk key file. Companion: scripts/install_anthropic_key.sh writes it
48
+ # with mode 0600; load_anthropic_key() refuses to read anything else.
49
+ ANTHROPIC_KEY_PATH = Path.home() / ".config" / "agent-config" / "anthropic.key"
50
+ # Token heuristics used for the *pre-run* cost preview. Real billing
51
+ # comes from the API response once the user has confirmed.
52
+ TOKENS_PER_CHAR = 0.25 # ~4 chars per token, industry rule of thumb.
53
+ PROMPT_OVERHEAD_TOKENS = 200 # routing instructions above the catalogue.
54
+ OUTPUT_TOKENS_PER_QUERY = 60 # JSON `{"would_load": [...]}` is short.
55
+
56
+
57
+ class KeyGateError(RuntimeError):
58
+ """Raised when the on-disk key file fails any safety check."""
59
+
60
+
61
+ class ConfirmationAborted(RuntimeError):
62
+ """Raised when the user declines at the confirmation prompt or stdin
63
+ is non-interactive."""
64
+
65
+
66
+ @dataclass
67
+ class SkillMeta:
68
+ """Name + description of one skill, loaded from SKILL.md frontmatter."""
69
+
70
+ name: str
71
+ description: str
72
+
73
+
74
+ @dataclass
75
+ class Query:
76
+ q: str
77
+ trigger: bool
78
+
79
+
80
+ @dataclass
81
+ class QueryResult:
82
+ q: str
83
+ expected: bool
84
+ observed: bool
85
+ loaded_skills: list[str]
86
+ passed: bool
87
+
88
+
89
+ @dataclass
90
+ class Metrics:
91
+ true_positive: int = 0
92
+ false_positive: int = 0
93
+ true_negative: int = 0
94
+ false_negative: int = 0
95
+ precision: float = 0.0
96
+ recall: float = 0.0
97
+
98
+
99
+ @dataclass
100
+ class EvalResult:
101
+ skill: str
102
+ model: str
103
+ timestamp: str
104
+ router: str
105
+ queries: list[QueryResult] = field(default_factory=list)
106
+ metrics: Metrics = field(default_factory=Metrics)
107
+ input_tokens: int = 0
108
+ output_tokens: int = 0
109
+ cost_usd_estimate: float = 0.0
110
+
111
+
112
+ class TriggerRouter(Protocol):
113
+ """Contract: given a user query and the full skill catalogue, return
114
+ the list of skill names the model would load. Implementations decide
115
+ whether that means a live API call or a canned response."""
116
+
117
+ name: str
118
+
119
+ def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
120
+ """Returns (loaded_skill_names, input_tokens, output_tokens)."""
121
+ ...
122
+
123
+
124
+ class MockRouter:
125
+ """Deterministic router for tests and dry-runs.
126
+
127
+ Constructed with a callable `decide(query, skills) -> list[str]`.
128
+ Token counts are faked as len(query)//4 + len(skills)*20 for input
129
+ and 16 for output, which keeps the cost-estimate math testable
130
+ without inventing numbers that look real.
131
+ """
132
+
133
+ name = "mock"
134
+
135
+ def __init__(self, decide: Callable[[str, list[SkillMeta]], list[str]]):
136
+ self._decide = decide
137
+
138
+ def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
139
+ loaded = self._decide(query, skills)
140
+ return loaded, len(query) // 4 + len(skills) * 20, 16
141
+
142
+
143
+ def load_skill_metas(root: Path = SKILLS_SOURCE) -> list[SkillMeta]:
144
+ """Parse name + description from every SKILL.md frontmatter under root."""
145
+ metas: list[SkillMeta] = []
146
+ for skill_dir in sorted(p for p in root.iterdir() if p.is_dir()):
147
+ skill_md = skill_dir / "SKILL.md"
148
+ if not skill_md.exists():
149
+ continue
150
+ meta = _parse_frontmatter(skill_md)
151
+ if meta is not None:
152
+ metas.append(meta)
153
+ return metas
154
+
155
+
156
+ def _parse_frontmatter(path: Path) -> SkillMeta | None:
157
+ text = path.read_text(encoding="utf-8")
158
+ if not text.startswith("---"):
159
+ return None
160
+ end = text.find("\n---", 3)
161
+ if end < 0:
162
+ return None
163
+ block = text[3:end]
164
+ name = _extract_field(block, "name")
165
+ desc = _extract_field(block, "description")
166
+ if name is None or desc is None:
167
+ return None
168
+ return SkillMeta(name=name, description=desc)
169
+
170
+
171
+ def _extract_field(block: str, field_name: str) -> str | None:
172
+ """Minimal YAML-ish frontmatter field extractor — supports quoted
173
+ and unquoted single-line values. We do not pull PyYAML in here; the
174
+ audit script already proved stdlib suffices for our frontmatter."""
175
+ prefix = f"{field_name}:"
176
+ for line in block.splitlines():
177
+ stripped = line.lstrip()
178
+ if not stripped.startswith(prefix):
179
+ continue
180
+ value = stripped[len(prefix):].strip()
181
+ if value.startswith('"') and value.endswith('"'):
182
+ value = value[1:-1]
183
+ elif value.startswith("'") and value.endswith("'"):
184
+ value = value[1:-1]
185
+ return value
186
+ return None
187
+
188
+
189
+ def load_triggers(path: Path) -> tuple[str, list[Query]]:
190
+ """Read evals/triggers.json. Returns (skill_name, queries)."""
191
+ data = json.loads(path.read_text(encoding="utf-8"))
192
+ skill = data["skill"]
193
+ queries = [Query(q=item["q"], trigger=bool(item["trigger"])) for item in data["queries"]]
194
+ if not queries:
195
+ raise ValueError(f"{path} has zero queries; roadmap minimum is 10")
196
+ return skill, queries
197
+
198
+
199
+ def run_eval(
200
+ skill_name: str,
201
+ queries: list[Query],
202
+ router: TriggerRouter,
203
+ skills: list[SkillMeta],
204
+ model: str = DEFAULT_MODEL,
205
+ ) -> EvalResult:
206
+ """Execute every query through `router` and aggregate into EvalResult."""
207
+ result = EvalResult(
208
+ skill=skill_name,
209
+ model=model,
210
+ timestamp=datetime.now(timezone.utc).isoformat(timespec="seconds"),
211
+ router=router.name,
212
+ )
213
+ for q in queries:
214
+ loaded, in_tok, out_tok = router.route(q.q, skills)
215
+ observed = skill_name in loaded
216
+ passed = observed == q.trigger
217
+ result.queries.append(
218
+ QueryResult(
219
+ q=q.q,
220
+ expected=q.trigger,
221
+ observed=observed,
222
+ loaded_skills=sorted(loaded),
223
+ passed=passed,
224
+ )
225
+ )
226
+ result.input_tokens += in_tok
227
+ result.output_tokens += out_tok
228
+ result.metrics = compute_metrics(result.queries)
229
+ result.cost_usd_estimate = estimate_cost(model, result.input_tokens, result.output_tokens)
230
+ return result
231
+
232
+
233
+ def compute_metrics(results: list[QueryResult]) -> Metrics:
234
+ tp = sum(1 for r in results if r.expected and r.observed)
235
+ fp = sum(1 for r in results if not r.expected and r.observed)
236
+ tn = sum(1 for r in results if not r.expected and not r.observed)
237
+ fn = sum(1 for r in results if r.expected and not r.observed)
238
+ precision = tp / (tp + fp) if (tp + fp) else 0.0
239
+ recall = tp / (tp + fn) if (tp + fn) else 0.0
240
+ return Metrics(
241
+ true_positive=tp,
242
+ false_positive=fp,
243
+ true_negative=tn,
244
+ false_negative=fn,
245
+ precision=round(precision, 3),
246
+ recall=round(recall, 3),
247
+ )
248
+
249
+
250
+ def estimate_cost(model: str, in_tokens: int, out_tokens: int) -> float:
251
+ """Rough pre-invoice cost estimate. Real figure comes from response
252
+ headers once we wire a real key — this is only used to sanity-check
253
+ the roadmap's ≤$5-per-run budget before launching a batch."""
254
+ price_in = PRICE_PER_MTOK_IN.get(model, 3.0)
255
+ price_out = PRICE_PER_MTOK_OUT.get(model, 15.0)
256
+ cost = (in_tokens / 1_000_000) * price_in + (out_tokens / 1_000_000) * price_out
257
+ return round(cost, 6)
258
+
259
+
260
+ def pre_estimate_cost(
261
+ model: str,
262
+ skills: list[SkillMeta],
263
+ queries: list[Query],
264
+ ) -> tuple[int, int, float]:
265
+ """Pre-run token + cost estimate for the confirmation prompt.
266
+
267
+ Returns (input_tokens, output_tokens, cost_usd) — approximate,
268
+ because the real tokeniser runs server-side. Calibration is
269
+ deliberately slightly high so the prompt never understates cost.
270
+ """
271
+ catalogue_chars = sum(len(s.name) + len(s.description) + 6 for s in skills)
272
+ per_query_chars = catalogue_chars + PROMPT_OVERHEAD_TOKENS * 4
273
+ in_tokens_per_q = int(per_query_chars * TOKENS_PER_CHAR) + PROMPT_OVERHEAD_TOKENS
274
+ avg_query_chars = sum(len(q.q) for q in queries) // max(len(queries), 1)
275
+ in_tokens_per_q += int(avg_query_chars * TOKENS_PER_CHAR)
276
+ in_tokens = in_tokens_per_q * len(queries)
277
+ out_tokens = OUTPUT_TOKENS_PER_QUERY * len(queries)
278
+ return in_tokens, out_tokens, estimate_cost(model, in_tokens, out_tokens)
279
+
280
+
281
+ # ── Key gate ─────────────────────────────────────────────────────────────
282
+ #
283
+ # No environment-variable fallback, no keychain fallback. The key only
284
+ # ever comes from a 0600 file written by scripts/install_anthropic_key.sh.
285
+ # Drift from that contract is a hard abort.
286
+
287
+ def load_anthropic_key(path: Path = ANTHROPIC_KEY_PATH) -> str:
288
+ """Load an Anthropic key from `path` with strict safety checks.
289
+
290
+ Enforced invariants:
291
+ - File exists.
292
+ - Mode is exactly 0o600 (owner-only read/write).
293
+ - Content is non-empty after strip.
294
+ - Content starts with `sk-ant-`.
295
+ """
296
+ if not path.exists():
297
+ raise KeyGateError(
298
+ f"Anthropic key not found at {path}.\n"
299
+ f" Install it with: bash scripts/install_anthropic_key.sh"
300
+ )
301
+ st = path.stat()
302
+ mode = stat.S_IMODE(st.st_mode)
303
+ if mode != 0o600:
304
+ raise KeyGateError(
305
+ f"Unsafe permissions on {path}: got {oct(mode)}, expected 0o600.\n"
306
+ f" Fix: chmod 600 {path}"
307
+ )
308
+ key = path.read_text(encoding="utf-8").strip()
309
+ if not key:
310
+ raise KeyGateError(f"{path} is empty.")
311
+ if not key.startswith("sk-ant-"):
312
+ raise KeyGateError(
313
+ f"{path} does not contain an Anthropic key "
314
+ f"(expected 'sk-ant-' prefix)."
315
+ )
316
+ return key
317
+
318
+
319
+ # ── Confirmation gate ────────────────────────────────────────────────────
320
+ #
321
+ # Every live invocation must pass through this. No --force, no --yes,
322
+ # no env-var bypass. Non-tty stdin is rejected outright so the runner
323
+ # cannot be scheduled, piped, or wrapped by an agent.
324
+
325
+ def build_confirmation_summary(
326
+ *,
327
+ model: str,
328
+ skill: str,
329
+ query_count: int,
330
+ catalogue_size: int,
331
+ input_tokens: int,
332
+ output_tokens: int,
333
+ cost_usd: float,
334
+ key_path: Path,
335
+ ) -> str:
336
+ bar = "═" * 56
337
+ return (
338
+ f"{bar}\n"
339
+ f" Trigger Eval — Confirmation Required\n"
340
+ f"{bar}\n"
341
+ f" Model: {model}\n"
342
+ f" Skill: {skill}\n"
343
+ f" Queries: {query_count}\n"
344
+ f" Catalogue: {catalogue_size} skills in routing prompt\n"
345
+ f" Est. tokens: in≈{input_tokens:,} out≈{output_tokens:,}\n"
346
+ f" Est. cost: ~${cost_usd:.2f} USD (actual via API headers)\n"
347
+ f" Key source: {key_path}\n"
348
+ f"{bar}"
349
+ )
350
+
351
+
352
+ def require_confirmation(
353
+ summary: str,
354
+ *,
355
+ stdin: IO[str] | None = None,
356
+ stdout: IO[str] | None = None,
357
+ ) -> None:
358
+ """Print `summary`, require exactly `yes` from the controlling terminal.
359
+
360
+ Production path (stdin/stdout both None) reads from /dev/tty and
361
+ writes to /dev/tty, not from `sys.stdin` / `sys.stdout`. That makes
362
+ the gate immune to any wrapper that rebinds stdin (task runners,
363
+ nohup, sudo, agents) and guarantees every keystroke comes from the
364
+ user's real keyboard.
365
+
366
+ Tests inject explicit streams to bypass /dev/tty. When a test
367
+ passes an object, it must supply both `stdin` and `stdout` so the
368
+ isatty check covers the injected path too. `yes` is case-sensitive
369
+ to block accidents from auto-expanded `y`.
370
+ """
371
+ if stdin is None and stdout is None:
372
+ # Production path: controlling-terminal-only. If there is no
373
+ # /dev/tty (CI, cron, non-interactive agent) this is a hard
374
+ # abort before any API call.
375
+ try:
376
+ tty_in = open("/dev/tty", "r", encoding="utf-8") # noqa: SIM115
377
+ tty_out = open("/dev/tty", "w", encoding="utf-8") # noqa: SIM115
378
+ except OSError as exc:
379
+ raise ConfirmationAborted(
380
+ "Confirmation requires a controlling terminal (/dev/tty). "
381
+ "Refusing to run under automation."
382
+ ) from exc
383
+ try:
384
+ tty_out.write(summary + "\n")
385
+ tty_out.write(
386
+ "Proceed? [type 'yes' exactly to run, anything else aborts]: "
387
+ )
388
+ tty_out.flush()
389
+ answer = tty_in.readline().rstrip("\n")
390
+ finally:
391
+ tty_in.close()
392
+ tty_out.close()
393
+ else:
394
+ # Test path \u2014 both streams must be supplied.
395
+ assert stdin is not None and stdout is not None, (
396
+ "require_confirmation: stdin and stdout must both be supplied "
397
+ "when overriding defaults (test-only path)."
398
+ )
399
+ tty = getattr(stdin, "isatty", lambda: False)()
400
+ if not tty:
401
+ raise ConfirmationAborted(
402
+ "Confirmation requires an interactive tty on stdin. "
403
+ "Refusing non-interactive, piped, or redirected input."
404
+ )
405
+ stdout.write(summary + "\n")
406
+ stdout.write(
407
+ "Proceed? [type 'yes' exactly to run, anything else aborts]: "
408
+ )
409
+ stdout.flush()
410
+ answer = stdin.readline().rstrip("\n")
411
+
412
+ if answer != "yes":
413
+ raise ConfirmationAborted(f"Aborted at confirmation (got {answer!r}).")
414
+
415
+
416
+ def write_result(result: EvalResult, output_path: Path) -> None:
417
+ output_path.parent.mkdir(parents=True, exist_ok=True)
418
+ payload = asdict(result)
419
+ output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
420
+
421
+
422
+ def format_summary(result: EvalResult) -> str:
423
+ m = result.metrics
424
+ total = len(result.queries)
425
+ pass_count = sum(1 for r in result.queries if r.passed)
426
+ fail_count = total - pass_count
427
+ lines = [
428
+ f"Skill: {result.skill}",
429
+ f"Router: {result.router} Model: {result.model}",
430
+ f"Queries: {total} ({pass_count} pass, {fail_count} fail)",
431
+ f"Precision: {m.precision} (TP={m.true_positive} FP={m.false_positive})",
432
+ f"Recall: {m.recall} (TP={m.true_positive} FN={m.false_negative})",
433
+ f"Tokens: in={result.input_tokens} out={result.output_tokens} "
434
+ f"cost~${result.cost_usd_estimate}",
435
+ ]
436
+ if fail_count:
437
+ lines.append("")
438
+ lines.append("Failures:")
439
+ for r in result.queries:
440
+ if r.passed:
441
+ continue
442
+ lines.append(
443
+ f" [{'FN' if r.expected else 'FP'}] expected={r.expected} "
444
+ f"observed={r.observed} :: {r.q}"
445
+ )
446
+ return "\n".join(lines)
447
+
448
+
449
+ ROUTING_PROMPT_HEADER = """You are a skill-routing oracle. Given the catalogue below
450
+ and a single user query, return ONLY the JSON object {"would_load": [...]}
451
+ listing the skill names whose bodies you would load to answer the query.
452
+
453
+ Rules:
454
+ - Use the skill frontmatter description verbatim as the only routing signal.
455
+ - Return at most 4 skill names.
456
+ - If no skill applies, return {"would_load": []}.
457
+ - Output ONLY the JSON. No prose, no code fences.
458
+
459
+ Skill catalogue (name :: description):
460
+ """
461
+
462
+
463
+ class AnthropicRouter:
464
+ """Real-API router. Builds a routing prompt with the full skill
465
+ catalogue, asks the model for structured JSON output, parses the
466
+ `would_load` list. Token counts come from the usage field of the
467
+ SDK response."""
468
+
469
+ name = "anthropic"
470
+
471
+ def __init__(
472
+ self,
473
+ model: str = DEFAULT_MODEL,
474
+ client=None,
475
+ max_tokens: int = 256,
476
+ api_key: str | None = None,
477
+ ):
478
+ self._model = model
479
+ self._max_tokens = max_tokens
480
+ if client is not None:
481
+ self._client = client
482
+ return
483
+ if api_key is None:
484
+ raise RuntimeError(
485
+ "AnthropicRouter requires an explicit api_key or an injected client. "
486
+ "Load the key with load_anthropic_key() — no env-var fallback."
487
+ )
488
+ try:
489
+ import anthropic # type: ignore[import-not-found]
490
+ except ImportError as exc: # pragma: no cover - exercised only with real key
491
+ raise RuntimeError(
492
+ "anthropic package not installed. "
493
+ "`pip install anthropic` or run with --dry-run."
494
+ ) from exc
495
+ self._client = anthropic.Anthropic(api_key=api_key)
496
+
497
+ def route(self, query: str, skills: list[SkillMeta]) -> tuple[list[str], int, int]:
498
+ catalogue = "\n".join(f"- {s.name} :: {s.description}" for s in skills)
499
+ prompt = ROUTING_PROMPT_HEADER + catalogue + "\n"
500
+ response = self._client.messages.create(
501
+ model=self._model,
502
+ max_tokens=self._max_tokens,
503
+ system=prompt,
504
+ messages=[{"role": "user", "content": query}],
505
+ )
506
+ text = _first_text_block(response)
507
+ loaded = _parse_would_load(text)
508
+ usage = getattr(response, "usage", None)
509
+ in_tok = getattr(usage, "input_tokens", 0) if usage else 0
510
+ out_tok = getattr(usage, "output_tokens", 0) if usage else 0
511
+ return loaded, in_tok, out_tok
512
+
513
+
514
+ def _first_text_block(response) -> str:
515
+ """Extract the text from the first content block of an Anthropic
516
+ Messages API response."""
517
+ content = getattr(response, "content", None)
518
+ if not content:
519
+ return ""
520
+ first = content[0]
521
+ return getattr(first, "text", "") or ""
522
+
523
+
524
+ def _parse_would_load(text: str) -> list[str]:
525
+ """Parse `{"would_load": [...]}` out of a model response. Tolerates
526
+ leading/trailing whitespace and code fences even though the prompt
527
+ forbids them — models occasionally ignore that instruction."""
528
+ stripped = text.strip()
529
+ if stripped.startswith("```"):
530
+ stripped = stripped.strip("`").lstrip("json").strip()
531
+ try:
532
+ data = json.loads(stripped)
533
+ except json.JSONDecodeError:
534
+ return []
535
+ loaded = data.get("would_load", [])
536
+ if not isinstance(loaded, list):
537
+ return []
538
+ return [str(name) for name in loaded]
539
+
540
+
541
+ def build_arg_parser() -> argparse.ArgumentParser:
542
+ parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
543
+ parser.add_argument("--skill", required=True, help="Skill name (e.g. eloquent)")
544
+ parser.add_argument(
545
+ "--triggers",
546
+ type=Path,
547
+ default=None,
548
+ help="Path to evals/triggers.json. Default: .agent-src.uncompressed/skills/<skill>/evals/triggers.json",
549
+ )
550
+ parser.add_argument(
551
+ "--output",
552
+ type=Path,
553
+ default=None,
554
+ help=(
555
+ "Path to write the result. Default: evals/results/"
556
+ "<timestamp>-<skill>-<model>.json (live) or "
557
+ "<triggers-dir>/last-run.json (dry-run)."
558
+ ),
559
+ )
560
+ parser.add_argument("--model", default=DEFAULT_MODEL)
561
+ parser.add_argument(
562
+ "--dry-run",
563
+ action="store_true",
564
+ help="Use MockRouter (no API call). Returns the pilot skill only for should-trigger queries.",
565
+ )
566
+ parser.add_argument(
567
+ "--key-path",
568
+ type=Path,
569
+ default=ANTHROPIC_KEY_PATH,
570
+ help=(
571
+ "Override the key file location. Default: "
572
+ "~/.config/agent-config/anthropic.key. Mode 0600 required."
573
+ ),
574
+ )
575
+ return parser
576
+
577
+
578
+ def _default_triggers_path(skill: str) -> Path:
579
+ return SKILLS_SOURCE / skill / "evals" / "triggers.json"
580
+
581
+
582
+ def _default_live_output(skill: str, model: str) -> Path:
583
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ")
584
+ return RESULTS_DIR / f"{ts}-{skill}-{model}.json"
585
+
586
+
587
+ def main(argv: list[str] | None = None) -> int:
588
+ args = build_arg_parser().parse_args(argv)
589
+ triggers_path = args.triggers or _default_triggers_path(args.skill)
590
+ if not triggers_path.exists():
591
+ print(f"❌ triggers.json not found: {triggers_path}", file=sys.stderr)
592
+ return 2
593
+
594
+ skill_from_file, queries = load_triggers(triggers_path)
595
+ if skill_from_file != args.skill:
596
+ print(
597
+ f"❌ skill mismatch: --skill={args.skill} but triggers.json says {skill_from_file}",
598
+ file=sys.stderr,
599
+ )
600
+ return 2
601
+
602
+ skills = load_skill_metas()
603
+ if args.dry_run:
604
+ expected = {q.q: q.trigger for q in queries}
605
+
606
+ def decide(query: str, _skills: list[SkillMeta]) -> list[str]:
607
+ return [args.skill] if expected.get(query, False) else []
608
+
609
+ router: TriggerRouter = MockRouter(decide)
610
+ default_output = triggers_path.parent / "last-run.json"
611
+ else:
612
+ # Live path: key gate → cost preview → confirmation → router.
613
+ # Any failure here aborts before a single API call is made.
614
+ try:
615
+ api_key = load_anthropic_key(args.key_path)
616
+ except KeyGateError as exc:
617
+ print(f"❌ {exc}", file=sys.stderr)
618
+ return 2
619
+
620
+ in_tok, out_tok, cost = pre_estimate_cost(args.model, skills, queries)
621
+ summary = build_confirmation_summary(
622
+ model=args.model,
623
+ skill=args.skill,
624
+ query_count=len(queries),
625
+ catalogue_size=len(skills),
626
+ input_tokens=in_tok,
627
+ output_tokens=out_tok,
628
+ cost_usd=cost,
629
+ key_path=args.key_path,
630
+ )
631
+ try:
632
+ require_confirmation(summary)
633
+ except ConfirmationAborted as exc:
634
+ print(f"⏹ {exc}", file=sys.stderr)
635
+ return 2
636
+
637
+ router = AnthropicRouter(model=args.model, api_key=api_key)
638
+ default_output = _default_live_output(args.skill, args.model)
639
+
640
+ result = run_eval(args.skill, queries, router, skills, model=args.model)
641
+ output_path = args.output or default_output
642
+ write_result(result, output_path)
643
+ print(format_summary(result))
644
+ print(f"\nWrote: {output_path}")
645
+ fail_count = sum(1 for r in result.queries if not r.passed)
646
+ return 1 if fail_count else 0
647
+
648
+
649
+ if __name__ == "__main__":
650
+ sys.exit(main())
651
+