ai-playbook 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. ai_playbook-1.0.0/.ai-playbook.toml +10 -0
  2. ai_playbook-1.0.0/.deprecations.toml +37 -0
  3. ai_playbook-1.0.0/.github/CODEOWNERS +3 -0
  4. ai_playbook-1.0.0/.github/ISSUE_TEMPLATE/bug_report.md +32 -0
  5. ai_playbook-1.0.0/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
  6. ai_playbook-1.0.0/.github/PULL_REQUEST_TEMPLATE.md +26 -0
  7. ai_playbook-1.0.0/.github/dependabot.yml +22 -0
  8. ai_playbook-1.0.0/.github/workflows/ci.yml +217 -0
  9. ai_playbook-1.0.0/.github/workflows/codeql.yml +48 -0
  10. ai_playbook-1.0.0/.github/workflows/eval-drift.yml +137 -0
  11. ai_playbook-1.0.0/.github/workflows/link-check.yml +33 -0
  12. ai_playbook-1.0.0/.github/workflows/mutation.yml +63 -0
  13. ai_playbook-1.0.0/.github/workflows/pre-commit-autoupdate.yml +67 -0
  14. ai_playbook-1.0.0/.github/workflows/release.yml +224 -0
  15. ai_playbook-1.0.0/.github/workflows/scorecard.yml +49 -0
  16. ai_playbook-1.0.0/.gitignore +72 -0
  17. ai_playbook-1.0.0/.lychee.toml +29 -0
  18. ai_playbook-1.0.0/.markdownlint.jsonc +40 -0
  19. ai_playbook-1.0.0/.pre-commit-config.yaml +80 -0
  20. ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationFrontmatterTitle.yml +7 -0
  21. ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationTitle.yml +9 -0
  22. ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationVoice.yml +20 -0
  23. ai_playbook-1.0.0/.vale/styles/Diataxis/HowToFrontmatterTitle.yml +7 -0
  24. ai_playbook-1.0.0/.vale/styles/Diataxis/HowToTitle.yml +7 -0
  25. ai_playbook-1.0.0/.vale/styles/Diataxis/HowToVoice.yml +17 -0
  26. ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceFrontmatterTitle.yml +7 -0
  27. ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceInstruction.yml +23 -0
  28. ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceTitle.yml +9 -0
  29. ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceVoice.yml +17 -0
  30. ai_playbook-1.0.0/.vale/styles/Diataxis/Steps.yml +12 -0
  31. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialExpectation.yml +7 -0
  32. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialFrontmatterTitle.yml +7 -0
  33. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialOptions.yml +17 -0
  34. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialTitle.yml +9 -0
  35. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialVoice.yml +12 -0
  36. ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialWorkMode.yml +22 -0
  37. ai_playbook-1.0.0/.vale/styles/Google/AMPM.yml +9 -0
  38. ai_playbook-1.0.0/.vale/styles/Google/Acronyms.yml +64 -0
  39. ai_playbook-1.0.0/.vale/styles/Google/Colons.yml +8 -0
  40. ai_playbook-1.0.0/.vale/styles/Google/Contractions.yml +30 -0
  41. ai_playbook-1.0.0/.vale/styles/Google/DateFormat.yml +9 -0
  42. ai_playbook-1.0.0/.vale/styles/Google/Ellipses.yml +9 -0
  43. ai_playbook-1.0.0/.vale/styles/Google/EmDash.yml +12 -0
  44. ai_playbook-1.0.0/.vale/styles/Google/Exclamation.yml +12 -0
  45. ai_playbook-1.0.0/.vale/styles/Google/FirstPerson.yml +13 -0
  46. ai_playbook-1.0.0/.vale/styles/Google/Gender.yml +9 -0
  47. ai_playbook-1.0.0/.vale/styles/Google/GenderBias.yml +43 -0
  48. ai_playbook-1.0.0/.vale/styles/Google/HeadingPunctuation.yml +13 -0
  49. ai_playbook-1.0.0/.vale/styles/Google/Headings.yml +29 -0
  50. ai_playbook-1.0.0/.vale/styles/Google/Latin.yml +11 -0
  51. ai_playbook-1.0.0/.vale/styles/Google/LyHyphens.yml +14 -0
  52. ai_playbook-1.0.0/.vale/styles/Google/OptionalPlurals.yml +12 -0
  53. ai_playbook-1.0.0/.vale/styles/Google/Ordinal.yml +7 -0
  54. ai_playbook-1.0.0/.vale/styles/Google/OxfordComma.yml +7 -0
  55. ai_playbook-1.0.0/.vale/styles/Google/Parens.yml +7 -0
  56. ai_playbook-1.0.0/.vale/styles/Google/Passive.yml +184 -0
  57. ai_playbook-1.0.0/.vale/styles/Google/Periods.yml +7 -0
  58. ai_playbook-1.0.0/.vale/styles/Google/Quotes.yml +7 -0
  59. ai_playbook-1.0.0/.vale/styles/Google/Ranges.yml +7 -0
  60. ai_playbook-1.0.0/.vale/styles/Google/Semicolons.yml +8 -0
  61. ai_playbook-1.0.0/.vale/styles/Google/Slang.yml +11 -0
  62. ai_playbook-1.0.0/.vale/styles/Google/Spacing.yml +10 -0
  63. ai_playbook-1.0.0/.vale/styles/Google/Spelling.yml +10 -0
  64. ai_playbook-1.0.0/.vale/styles/Google/Units.yml +8 -0
  65. ai_playbook-1.0.0/.vale/styles/Google/We.yml +11 -0
  66. ai_playbook-1.0.0/.vale/styles/Google/Will.yml +7 -0
  67. ai_playbook-1.0.0/.vale/styles/Google/WordList.yml +80 -0
  68. ai_playbook-1.0.0/.vale/styles/Google/meta.json +4 -0
  69. ai_playbook-1.0.0/.vale/styles/Google/vocab.txt +0 -0
  70. ai_playbook-1.0.0/.vale/styles/Playbook/Acronyms.yml +5 -0
  71. ai_playbook-1.0.0/.vale/styles/Playbook/DisplayNames.yml +6 -0
  72. ai_playbook-1.0.0/.vale/styles/Playbook/FleschKincaid.yml +7 -0
  73. ai_playbook-1.0.0/.vale/styles/Playbook/Headings.yml +7 -0
  74. ai_playbook-1.0.0/.vale/styles/Playbook/NoEmDash.yml +6 -0
  75. ai_playbook-1.0.0/.vale/styles/Playbook/PassiveVoice.yml +21 -0
  76. ai_playbook-1.0.0/.vale/styles/Playbook/ProfessionalTone.yml +10 -0
  77. ai_playbook-1.0.0/.vale/styles/Playbook/Weasels.yml +12 -0
  78. ai_playbook-1.0.0/.vale/styles/config/vocabularies/Playbook/accept.txt +37 -0
  79. ai_playbook-1.0.0/.vale/styles/config/vocabularies/Playbook/reject.txt +14 -0
  80. ai_playbook-1.0.0/.vale.ini +178 -0
  81. ai_playbook-1.0.0/AGENTS.md +57 -0
  82. ai_playbook-1.0.0/CHANGELOG.md +68 -0
  83. ai_playbook-1.0.0/CLAUDE.md +269 -0
  84. ai_playbook-1.0.0/CODE_OF_CONDUCT.md +28 -0
  85. ai_playbook-1.0.0/CONTRIBUTING.md +389 -0
  86. ai_playbook-1.0.0/GOVERNANCE.md +156 -0
  87. ai_playbook-1.0.0/LICENSE +23 -0
  88. ai_playbook-1.0.0/Makefile +91 -0
  89. ai_playbook-1.0.0/PKG-INFO +345 -0
  90. ai_playbook-1.0.0/README.md +315 -0
  91. ai_playbook-1.0.0/RELEASING.md +173 -0
  92. ai_playbook-1.0.0/SECURITY.md +81 -0
  93. ai_playbook-1.0.0/agents/code-inspector.agent.md +98 -0
  94. ai_playbook-1.0.0/agents/diff-reviewer.agent.md +143 -0
  95. ai_playbook-1.0.0/agents/docs-maintainer.agent.md +174 -0
  96. ai_playbook-1.0.0/agents/incident-responder.agent.md +148 -0
  97. ai_playbook-1.0.0/agents/release-captain.agent.md +144 -0
  98. ai_playbook-1.0.0/agents/slice-planner.agent.md +191 -0
  99. ai_playbook-1.0.0/agents/story-refiner.agent.md +261 -0
  100. ai_playbook-1.0.0/agents/xp-pair-programmer.agent.md +256 -0
  101. ai_playbook-1.0.0/commands/code-inspector.md +7 -0
  102. ai_playbook-1.0.0/commands/diff-reviewer.md +7 -0
  103. ai_playbook-1.0.0/commands/docs-maintainer.md +7 -0
  104. ai_playbook-1.0.0/commands/incident-responder.md +7 -0
  105. ai_playbook-1.0.0/commands/release-captain.md +7 -0
  106. ai_playbook-1.0.0/commands/slice-planner.md +7 -0
  107. ai_playbook-1.0.0/commands/status.md +33 -0
  108. ai_playbook-1.0.0/commands/story-refiner.md +7 -0
  109. ai_playbook-1.0.0/commands/xp-pair-programmer.md +7 -0
  110. ai_playbook-1.0.0/docs/README.md +64 -0
  111. ai_playbook-1.0.0/docs/adr/0001-bitbucket-server-not-supported.md +23 -0
  112. ai_playbook-1.0.0/docs/adr/README.md +77 -0
  113. ai_playbook-1.0.0/docs/architecture.md +325 -0
  114. ai_playbook-1.0.0/docs/cli-reference.md +680 -0
  115. ai_playbook-1.0.0/docs/deprecation-policy.md +182 -0
  116. ai_playbook-1.0.0/docs/docs-guide.md +203 -0
  117. ai_playbook-1.0.0/docs/getting-started.md +230 -0
  118. ai_playbook-1.0.0/docs/how-to/choose-workflow-path.md +85 -0
  119. ai_playbook-1.0.0/docs/how-to/enforce-quality.md +116 -0
  120. ai_playbook-1.0.0/docs/how-to/invoke-agents.md +66 -0
  121. ai_playbook-1.0.0/docs/how-to/reduce-token-usage.md +83 -0
  122. ai_playbook-1.0.0/docs/how-to/resume-session.md +69 -0
  123. ai_playbook-1.0.0/docs/how-to/run-with-local-models.md +143 -0
  124. ai_playbook-1.0.0/docs/how-to/setup-issue-tracker.md +129 -0
  125. ai_playbook-1.0.0/docs/how-to/setup-multi-repo.md +80 -0
  126. ai_playbook-1.0.0/docs/how-to/write-a-pack.md +127 -0
  127. ai_playbook-1.0.0/docs/limitations.md +54 -0
  128. ai_playbook-1.0.0/docs/references.md +98 -0
  129. ai_playbook-1.0.0/docs/rfcs/README.md +124 -0
  130. ai_playbook-1.0.0/docs/user-guide.md +247 -0
  131. ai_playbook-1.0.0/evals/code-inspector-adversarial-expected.md +40 -0
  132. ai_playbook-1.0.0/evals/code-inspector-adversarial-input.md +14 -0
  133. ai_playbook-1.0.0/evals/code-inspector-expected.md +47 -0
  134. ai_playbook-1.0.0/evals/code-inspector-input.md +35 -0
  135. ai_playbook-1.0.0/evals/diff-reviewer-adversarial-expected.md +39 -0
  136. ai_playbook-1.0.0/evals/diff-reviewer-adversarial-input.md +45 -0
  137. ai_playbook-1.0.0/evals/diff-reviewer-expected.md +47 -0
  138. ai_playbook-1.0.0/evals/diff-reviewer-input.md +81 -0
  139. ai_playbook-1.0.0/evals/docs-maintainer-adversarial-expected.md +29 -0
  140. ai_playbook-1.0.0/evals/docs-maintainer-adversarial-input.md +9 -0
  141. ai_playbook-1.0.0/evals/docs-maintainer-expected.md +32 -0
  142. ai_playbook-1.0.0/evals/docs-maintainer-input.md +5 -0
  143. ai_playbook-1.0.0/evals/incident-responder-adversarial-expected.md +49 -0
  144. ai_playbook-1.0.0/evals/incident-responder-adversarial-input.md +24 -0
  145. ai_playbook-1.0.0/evals/incident-responder-expected.md +52 -0
  146. ai_playbook-1.0.0/evals/incident-responder-input.md +44 -0
  147. ai_playbook-1.0.0/evals/release-captain-adversarial-expected.md +42 -0
  148. ai_playbook-1.0.0/evals/release-captain-adversarial-input.md +26 -0
  149. ai_playbook-1.0.0/evals/release-captain-expected.md +56 -0
  150. ai_playbook-1.0.0/evals/release-captain-input.md +41 -0
  151. ai_playbook-1.0.0/evals/rubrics/README.md +49 -0
  152. ai_playbook-1.0.0/evals/rubrics/_schema.json +66 -0
  153. ai_playbook-1.0.0/evals/rubrics/code-inspector.json +182 -0
  154. ai_playbook-1.0.0/evals/rubrics/diff-reviewer.json +174 -0
  155. ai_playbook-1.0.0/evals/rubrics/docs-maintainer.json +165 -0
  156. ai_playbook-1.0.0/evals/rubrics/incident-responder.json +225 -0
  157. ai_playbook-1.0.0/evals/rubrics/release-captain.json +256 -0
  158. ai_playbook-1.0.0/evals/rubrics/slice-planner.json +182 -0
  159. ai_playbook-1.0.0/evals/rubrics/story-refiner.json +186 -0
  160. ai_playbook-1.0.0/evals/rubrics/xp-pair-programmer.json +244 -0
  161. ai_playbook-1.0.0/evals/run_eval.py +1245 -0
  162. ai_playbook-1.0.0/evals/samples/README.md +61 -0
  163. ai_playbook-1.0.0/evals/samples/adversarial/release-captain-adversarial.md +18 -0
  164. ai_playbook-1.0.0/evals/samples/adversarial/story-refiner-adversarial.md +43 -0
  165. ai_playbook-1.0.0/evals/samples/code-inspector.md +41 -0
  166. ai_playbook-1.0.0/evals/samples/diff-reviewer.md +47 -0
  167. ai_playbook-1.0.0/evals/samples/docs-maintainer.md +61 -0
  168. ai_playbook-1.0.0/evals/samples/incident-responder.md +55 -0
  169. ai_playbook-1.0.0/evals/samples/negative/diff-reviewer.md +23 -0
  170. ai_playbook-1.0.0/evals/samples/negative/release-captain.md +16 -0
  171. ai_playbook-1.0.0/evals/samples/release-captain.md +76 -0
  172. ai_playbook-1.0.0/evals/samples/slice-planner.md +71 -0
  173. ai_playbook-1.0.0/evals/samples/story-refiner.md +66 -0
  174. ai_playbook-1.0.0/evals/samples/xp-pair-programmer.md +70 -0
  175. ai_playbook-1.0.0/evals/slice-planner-adversarial-expected.md +30 -0
  176. ai_playbook-1.0.0/evals/slice-planner-adversarial-input.md +24 -0
  177. ai_playbook-1.0.0/evals/slice-planner-expected.md +35 -0
  178. ai_playbook-1.0.0/evals/slice-planner-input.md +39 -0
  179. ai_playbook-1.0.0/evals/story-refiner-adversarial-expected.md +28 -0
  180. ai_playbook-1.0.0/evals/story-refiner-adversarial-input.md +9 -0
  181. ai_playbook-1.0.0/evals/story-refiner-expected.md +36 -0
  182. ai_playbook-1.0.0/evals/story-refiner-input.md +9 -0
  183. ai_playbook-1.0.0/evals/xp-pair-programmer-adversarial-expected.md +25 -0
  184. ai_playbook-1.0.0/evals/xp-pair-programmer-adversarial-input.md +34 -0
  185. ai_playbook-1.0.0/evals/xp-pair-programmer-expected.md +51 -0
  186. ai_playbook-1.0.0/evals/xp-pair-programmer-input.md +40 -0
  187. ai_playbook-1.0.0/harness/Makefile +111 -0
  188. ai_playbook-1.0.0/harness/check-teachback.sh +129 -0
  189. ai_playbook-1.0.0/harness/ci.yml +68 -0
  190. ai_playbook-1.0.0/harness/pre-commit-config.yaml +57 -0
  191. ai_playbook-1.0.0/harness/read-budget.sh +103 -0
  192. ai_playbook-1.0.0/harness/security.yml +165 -0
  193. ai_playbook-1.0.0/harness/settings.example.json +29 -0
  194. ai_playbook-1.0.0/harness/telemetry.sh +171 -0
  195. ai_playbook-1.0.0/incidents/.gitkeep +0 -0
  196. ai_playbook-1.0.0/knowledge-base/CHEATSHEET.md +261 -0
  197. ai_playbook-1.0.0/knowledge-base/INDEX.md +214 -0
  198. ai_playbook-1.0.0/knowledge-base/debugging.md +175 -0
  199. ai_playbook-1.0.0/knowledge-base/design-fundamentals.md +291 -0
  200. ai_playbook-1.0.0/knowledge-base/design-patterns.md +254 -0
  201. ai_playbook-1.0.0/knowledge-base/doc-linting.md +179 -0
  202. ai_playbook-1.0.0/knowledge-base/feature-flags.md +129 -0
  203. ai_playbook-1.0.0/knowledge-base/incident-response.md +150 -0
  204. ai_playbook-1.0.0/knowledge-base/languages/python.md +192 -0
  205. ai_playbook-1.0.0/knowledge-base/languages/testing-python.md +208 -0
  206. ai_playbook-1.0.0/knowledge-base/model-tier.md +128 -0
  207. ai_playbook-1.0.0/knowledge-base/observability.md +243 -0
  208. ai_playbook-1.0.0/knowledge-base/performance.md +155 -0
  209. ai_playbook-1.0.0/knowledge-base/philosophy.md +161 -0
  210. ai_playbook-1.0.0/knowledge-base/quality-gates.md +141 -0
  211. ai_playbook-1.0.0/knowledge-base/refactoring.md +105 -0
  212. ai_playbook-1.0.0/knowledge-base/regression-and-contracts.md +187 -0
  213. ai_playbook-1.0.0/knowledge-base/release.md +177 -0
  214. ai_playbook-1.0.0/knowledge-base/security.md +237 -0
  215. ai_playbook-1.0.0/knowledge-base/style-guide.md +149 -0
  216. ai_playbook-1.0.0/knowledge-base/testing-techniques.md +209 -0
  217. ai_playbook-1.0.0/knowledge-base/testing.md +208 -0
  218. ai_playbook-1.0.0/knowledge-base/tool-policy.md +47 -0
  219. ai_playbook-1.0.0/knowledge-base/working-agreement.md +112 -0
  220. ai_playbook-1.0.0/knowledge-base/workspaces/README.md +91 -0
  221. ai_playbook-1.0.0/mutation-baseline.json +14 -0
  222. ai_playbook-1.0.0/pyproject.toml +224 -0
  223. ai_playbook-1.0.0/skills/git/SKILL.md +235 -0
  224. ai_playbook-1.0.0/skills/host-adapter/SKILL.md +153 -0
  225. ai_playbook-1.0.0/skills/intent-interview/SKILL.md +124 -0
  226. ai_playbook-1.0.0/skills/issue-fetch/SKILL.md +186 -0
  227. ai_playbook-1.0.0/skills/notifier/SKILL.md +214 -0
  228. ai_playbook-1.0.0/skills/retrospective/SKILL.md +104 -0
  229. ai_playbook-1.0.0/skills/story-writing/SKILL.md +136 -0
  230. ai_playbook-1.0.0/src/deploy_ai_playbook/__init__.py +12 -0
  231. ai_playbook-1.0.0/src/deploy_ai_playbook/backup.py +377 -0
  232. ai_playbook-1.0.0/src/deploy_ai_playbook/cli.py +1390 -0
  233. ai_playbook-1.0.0/src/deploy_ai_playbook/config.py +307 -0
  234. ai_playbook-1.0.0/src/deploy_ai_playbook/console.py +10 -0
  235. ai_playbook-1.0.0/src/deploy_ai_playbook/deploy_render.py +426 -0
  236. ai_playbook-1.0.0/src/deploy_ai_playbook/discovery.py +215 -0
  237. ai_playbook-1.0.0/src/deploy_ai_playbook/doctor.py +573 -0
  238. ai_playbook-1.0.0/src/deploy_ai_playbook/errors.py +23 -0
  239. ai_playbook-1.0.0/src/deploy_ai_playbook/fs.py +452 -0
  240. ai_playbook-1.0.0/src/deploy_ai_playbook/mcp.py +80 -0
  241. ai_playbook-1.0.0/src/deploy_ai_playbook/paths.py +122 -0
  242. ai_playbook-1.0.0/src/deploy_ai_playbook/py.typed +0 -0
  243. ai_playbook-1.0.0/src/deploy_ai_playbook/safety.py +136 -0
  244. ai_playbook-1.0.0/src/deploy_ai_playbook/services/__init__.py +6 -0
  245. ai_playbook-1.0.0/src/deploy_ai_playbook/services/artifacts.py +131 -0
  246. ai_playbook-1.0.0/src/deploy_ai_playbook/services/deploy.py +182 -0
  247. ai_playbook-1.0.0/src/deploy_ai_playbook/services/diff.py +162 -0
  248. ai_playbook-1.0.0/src/deploy_ai_playbook/services/pack_validation.py +88 -0
  249. ai_playbook-1.0.0/src/deploy_ai_playbook/targets.py +110 -0
  250. ai_playbook-1.0.0/src/deploy_ai_playbook/telemetry.py +209 -0
  251. ai_playbook-1.0.0/src/deploy_ai_playbook/upgrade.py +172 -0
  252. ai_playbook-1.0.0/templates/.ai-playbook.toml.example +78 -0
  253. ai_playbook-1.0.0/templates/adr-template.md +27 -0
  254. ai_playbook-1.0.0/templates/agent-template.md +121 -0
  255. ai_playbook-1.0.0/templates/changelog-template.md +42 -0
  256. ai_playbook-1.0.0/templates/domain-language-template.md +69 -0
  257. ai_playbook-1.0.0/templates/feature-flag-registry-template.md +47 -0
  258. ai_playbook-1.0.0/templates/how-to-template.md +37 -0
  259. ai_playbook-1.0.0/templates/importlinter-template.toml +119 -0
  260. ai_playbook-1.0.0/templates/language-conventions-template.md +161 -0
  261. ai_playbook-1.0.0/templates/limitations-template.md +28 -0
  262. ai_playbook-1.0.0/templates/module-readme-template.md +37 -0
  263. ai_playbook-1.0.0/templates/plan-template.md +97 -0
  264. ai_playbook-1.0.0/templates/postmortem-template.md +85 -0
  265. ai_playbook-1.0.0/templates/quality-gates-template.md +105 -0
  266. ai_playbook-1.0.0/templates/research-template.md +71 -0
  267. ai_playbook-1.0.0/templates/review-template.md +75 -0
  268. ai_playbook-1.0.0/templates/rfc-template.md +65 -0
  269. ai_playbook-1.0.0/templates/runbook-template.md +68 -0
  270. ai_playbook-1.0.0/templates/story-bug-template.md +100 -0
  271. ai_playbook-1.0.0/templates/story-spike-template.md +75 -0
  272. ai_playbook-1.0.0/templates/story-template.md +75 -0
  273. ai_playbook-1.0.0/templates/testing-language-template.md +112 -0
  274. ai_playbook-1.0.0/tests/__init__.py +3 -0
  275. ai_playbook-1.0.0/tests/acceptance/__init__.py +37 -0
  276. ai_playbook-1.0.0/tests/acceptance/_helpers.py +58 -0
  277. ai_playbook-1.0.0/tests/acceptance/contract_data.py +432 -0
  278. ai_playbook-1.0.0/tests/acceptance/test_agent_contracts.py +419 -0
  279. ai_playbook-1.0.0/tests/acceptance/test_approval_gate_contracts.py +188 -0
  280. ai_playbook-1.0.0/tests/acceptance/test_backup.py +285 -0
  281. ai_playbook-1.0.0/tests/acceptance/test_config_validate.py +96 -0
  282. ai_playbook-1.0.0/tests/acceptance/test_deploy.py +1390 -0
  283. ai_playbook-1.0.0/tests/acceptance/test_deploy_model_tiers.py +149 -0
  284. ai_playbook-1.0.0/tests/acceptance/test_deprecations.py +162 -0
  285. ai_playbook-1.0.0/tests/acceptance/test_diff.py +209 -0
  286. ai_playbook-1.0.0/tests/acceptance/test_docs_contracts.py +210 -0
  287. ai_playbook-1.0.0/tests/acceptance/test_doctor.py +602 -0
  288. ai_playbook-1.0.0/tests/acceptance/test_enable_disable.py +95 -0
  289. ai_playbook-1.0.0/tests/acceptance/test_eval_contracts.py +252 -0
  290. ai_playbook-1.0.0/tests/acceptance/test_evals.py +200 -0
  291. ai_playbook-1.0.0/tests/acceptance/test_harness_release_contracts.py +1144 -0
  292. ai_playbook-1.0.0/tests/acceptance/test_init.py +67 -0
  293. ai_playbook-1.0.0/tests/acceptance/test_kb_skill_contracts.py +208 -0
  294. ai_playbook-1.0.0/tests/acceptance/test_pack_validation.py +127 -0
  295. ai_playbook-1.0.0/tests/acceptance/test_packs.py +673 -0
  296. ai_playbook-1.0.0/tests/acceptance/test_phrase_pin_convention.py +148 -0
  297. ai_playbook-1.0.0/tests/acceptance/test_pointer_contracts.py +352 -0
  298. ai_playbook-1.0.0/tests/acceptance/test_prune.py +124 -0
  299. ai_playbook-1.0.0/tests/acceptance/test_read_budget_hook.py +208 -0
  300. ai_playbook-1.0.0/tests/acceptance/test_skill_operation_contracts.py +235 -0
  301. ai_playbook-1.0.0/tests/acceptance/test_status.py +391 -0
  302. ai_playbook-1.0.0/tests/acceptance/test_story_workflow_contracts.py +241 -0
  303. ai_playbook-1.0.0/tests/acceptance/test_upgrade_check.py +63 -0
  304. ai_playbook-1.0.0/tests/acceptance/test_workflow_chain.py +90 -0
  305. ai_playbook-1.0.0/tests/acceptance/test_workflow_ordering_contracts.py +275 -0
  306. ai_playbook-1.0.0/tests/conftest.py +32 -0
  307. ai_playbook-1.0.0/tests/unit/__init__.py +0 -0
  308. ai_playbook-1.0.0/tests/unit/test_agent_size.py +154 -0
  309. ai_playbook-1.0.0/tests/unit/test_architecture.py +452 -0
  310. ai_playbook-1.0.0/tests/unit/test_backup_branches.py +371 -0
  311. ai_playbook-1.0.0/tests/unit/test_claude_md_size.py +107 -0
  312. ai_playbook-1.0.0/tests/unit/test_cli.py +608 -0
  313. ai_playbook-1.0.0/tests/unit/test_config.py +450 -0
  314. ai_playbook-1.0.0/tests/unit/test_discovery.py +227 -0
  315. ai_playbook-1.0.0/tests/unit/test_doctor_service.py +70 -0
  316. ai_playbook-1.0.0/tests/unit/test_evals.py +205 -0
  317. ai_playbook-1.0.0/tests/unit/test_fs_branches.py +259 -0
  318. ai_playbook-1.0.0/tests/unit/test_fuzz_properties.py +118 -0
  319. ai_playbook-1.0.0/tests/unit/test_kb_frontmatter.py +210 -0
  320. ai_playbook-1.0.0/tests/unit/test_kb_integrity.py +600 -0
  321. ai_playbook-1.0.0/tests/unit/test_large_deployments.py +161 -0
  322. ai_playbook-1.0.0/tests/unit/test_model_tier_materialize.py +116 -0
  323. ai_playbook-1.0.0/tests/unit/test_mutation_baseline.py +85 -0
  324. ai_playbook-1.0.0/tests/unit/test_safety.py +43 -0
  325. ai_playbook-1.0.0/tests/unit/test_targets.py +66 -0
  326. ai_playbook-1.0.0/tests/unit/test_telemetry.py +240 -0
  327. ai_playbook-1.0.0/tests/unit/test_upgrade.py +103 -0
  328. ai_playbook-1.0.0/tools/check-agent-size.py +118 -0
  329. ai_playbook-1.0.0/tools/check-claude-md-size.py +74 -0
  330. ai_playbook-1.0.0/tools/check-kb-frontmatter.py +283 -0
  331. ai_playbook-1.0.0/tools/check-mutation-baseline.py +123 -0
  332. ai_playbook-1.0.0/uv.lock +1000 -0
@@ -0,0 +1,10 @@
1
+ # AI Playbook adopter configuration.
2
+ # Full reference: docs/cli-reference.md (deployed with the playbook).
3
+ # Every key is optional — an empty file means a core-only deploy.
4
+
5
+ # Adopter-local packs, deployed in declared order (last pack wins on collisions):
6
+ # packs = [".ai-playbook/packs/<name>"]
7
+
8
+ # Per-agent quality tier overrides ("production" or "prototype"):
9
+ # [quality_tiers.agents]
10
+ # docs-maintainer = "prototype"
@@ -0,0 +1,37 @@
1
+ # Active deprecation registry — single source of truth for the deprecation
2
+ # cycle defined in `docs/deprecation-policy.md`. Every entry in
3
+ # `CHANGELOG.md § Deprecated` for a released version must have a row here
4
+ # with the same id; pre-release activity may live only in `## [Unreleased]`
5
+ # until the version ships.
6
+ #
7
+ # Schema:
8
+ # id — kebab-case identifier (the deprecated surface).
9
+ # surface — one of: cli-flag | cli-command | agent-id |
10
+ # config-key | kb-path | deployment-layout | skill-op.
11
+ # added_version — when the deprecation marker was added (X.Y.Z).
12
+ # removal_version — the MAJOR release that removes it (X.0.0).
13
+ # reason — one short sentence; the *why*, not the *what*.
14
+ # replacement — id or path of the replacement surface, or "(removed)"
15
+ # when the deprecated surface has no successor.
16
+ # status — active | grace | removed (informational; the
17
+ # contract test is driven by version comparisons).
18
+ #
19
+ # When a removal version ships, delete the row (do NOT keep historical
20
+ # entries here — git history is the audit trail). When a deprecation is
21
+ # rescinded, delete the row and note the rationale in CHANGELOG.md §
22
+ # Changed for that release.
23
+ #
24
+ # Validated by `tests/acceptance/test_deprecations.py` — keep in sync with
25
+ # `CHANGELOG.md § Deprecated` for the latest released section.
26
+
27
+ # No active deprecations.
28
+ # Example (do not uncomment — kept here as schema documentation):
29
+ #
30
+ # [[deprecations]]
31
+ # id = "release-captain"
32
+ # surface = "agent-id"
33
+ # added_version = "1.5.0"
34
+ # removal_version = "2.0.0"
35
+ # reason = "Renamed to release-engineer to match the broader scope it actually owns."
36
+ # replacement = "release-engineer"
37
+ # status = "active"
@@ -0,0 +1,3 @@
1
+ # Default owner for everything in this repo.
2
+ # Required reviewer when branch protection enforces Code Owner review.
3
+ * @meenumathew
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a problem with the CLI, agents, or knowledge base
4
+ title: ""
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Describe the bug
10
+
11
+ A clear description of what the problem is.
12
+
13
+ ## To reproduce
14
+
15
+ 1. Run `ai-playbook ...`
16
+ 2. Use agent `...`
17
+ 3. See error
18
+
19
+ ## Expected behavior
20
+
21
+ What you expected to happen.
22
+
23
+ ## Environment
24
+
25
+ - OS: [e.g. macOS 15, Ubuntu 24.04]
26
+ - Python version: [e.g. 3.12.4]
27
+ - AI tool: [e.g. Claude Code, Copilot, Kiro]
28
+ - ai-playbook version: [e.g. 0.1.0]
29
+
30
+ ## Additional context
31
+
32
+ Paste relevant error output, agent responses, or screenshots.
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature or improvement
4
+ title: ""
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Problem
10
+
11
+ What problem does this solve? What's the current limitation?
12
+
13
+ ## Proposed solution
14
+
15
+ Describe what you'd like to happen.
16
+
17
+ ## Alternatives considered
18
+
19
+ What other approaches did you consider?
20
+
21
+ ## Additional context
22
+
23
+ Any examples, screenshots, or references that help explain the request.
@@ -0,0 +1,26 @@
1
+ ## Summary
2
+
3
+ <!-- What changed and why? Keep it brief: 1-3 bullet points. -->
4
+
5
+ ## Type of change
6
+
7
+ - [ ] Bug fix
8
+ - [ ] New feature
9
+ - [ ] Documentation update
10
+ - [ ] Agent behavior change
11
+ - [ ] Knowledge base update
12
+ - [ ] CLI change
13
+
14
+ ## Checklist
15
+
16
+ - [ ] Tests pass (`uv run pytest tests/ -v`)
17
+ - [ ] Lint clean (`uv run ruff check src/ tests/ evals/`)
18
+ - [ ] Format clean (`uv run ruff format --check src/ tests/ evals/`)
19
+ - [ ] Type check passes (`uv run pyright`)
20
+ - [ ] Eval structure valid (`uv run python evals/run_eval.py check-structure`)
21
+ - [ ] Updated evals if agent behavior changed
22
+ - [ ] Updated docs if user-facing behavior changed
23
+
24
+ ## Test plan
25
+
26
+ <!-- How can a reviewer verify this works? -->
@@ -0,0 +1,22 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: github-actions
4
+ directory: /
5
+ schedule:
6
+ interval: weekly
7
+ groups:
8
+ actions:
9
+ patterns:
10
+ - "*"
11
+ commit-message:
12
+ prefix: chore
13
+ include: scope
14
+
15
+ - package-ecosystem: pip
16
+ directory: /
17
+ schedule:
18
+ interval: weekly
19
+ open-pull-requests-limit: 5
20
+ commit-message:
21
+ prefix: chore
22
+ include: scope
@@ -0,0 +1,217 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ concurrency:
12
+ group: ci-${{ github.ref }}
13
+ cancel-in-progress: true
14
+
15
+ jobs:
16
+ docs-quality:
17
+ runs-on: ubuntu-latest
18
+ timeout-minutes: 15
19
+
20
+ steps:
21
+ - name: Checkout
22
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
23
+ with:
24
+ persist-credentials: false
25
+
26
+ - name: Set up uv
27
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
28
+ with:
29
+ enable-cache: true
30
+
31
+ - name: Install Vale
32
+ run: |
33
+ set -euo pipefail
34
+ mkdir -p "$RUNNER_TEMP/bin"
35
+ GOBIN="$RUNNER_TEMP/bin" go install github.com/errata-ai/vale/v3/cmd/vale@v3.9.6
36
+ echo "$RUNNER_TEMP/bin" >> "$GITHUB_PATH"
37
+
38
+ - name: Documentation lint
39
+ run: make docs-lint
40
+
41
+ validate:
42
+ # Matrix scope is event-aware to keep push CI fast without losing coverage:
43
+ # - pull_request OR push to main → full matrix (2 OS × 3 Python = 6 jobs)
44
+ # - push to a feature branch → ubuntu × py3.12 only (1 job)
45
+ runs-on: ${{ matrix.os }}
46
+ timeout-minutes: 35
47
+ strategy:
48
+ fail-fast: false
49
+ matrix:
50
+ os: ${{ fromJSON((github.event_name == 'pull_request' || github.ref == 'refs/heads/main') && '["ubuntu-latest", "macos-latest"]' || '["ubuntu-latest"]') }}
51
+ python-version: ${{ fromJSON((github.event_name == 'pull_request' || github.ref == 'refs/heads/main') && '["3.12", "3.13", "3.14"]' || '["3.12"]') }}
52
+
53
+ steps:
54
+ - name: Checkout
55
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
56
+ with:
57
+ persist-credentials: false
58
+
59
+ - name: Set up Python
60
+ uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
61
+ with:
62
+ python-version: ${{ matrix.python-version }}
63
+
64
+ - name: Set up uv
65
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
66
+ with:
67
+ enable-cache: true
68
+
69
+ - name: Install dependencies
70
+ run: uv sync --dev
71
+
72
+ - name: Format check
73
+ run: make format-check
74
+
75
+ - name: Lint
76
+ run: make lint
77
+
78
+ - name: ShellCheck harness scripts
79
+ run: uvx pre-commit==4.3.0 run shellcheck --all-files
80
+
81
+ - name: Lint GitHub workflows (actionlint)
82
+ # OS/Python-independent — run once, not across the matrix.
83
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
84
+ run: uvx pre-commit==4.3.0 run actionlint --all-files
85
+
86
+ - name: Type check
87
+ run: make typecheck
88
+
89
+ - name: Run tests with coverage
90
+ # Direct invocation (not `make test`): Codecov needs --cov-report=xml,
91
+ # which the local target intentionally omits.
92
+ run: uv run pytest -q --cov=src --cov-report=xml --cov-fail-under=95
93
+
94
+ - name: Upload coverage to Codecov
95
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
96
+ uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
97
+ with:
98
+ files: ./coverage.xml
99
+ fail_ci_if_error: false
100
+
101
+ - name: Architecture enforcement
102
+ run: uv run pytest tests/unit/test_architecture.py -v
103
+
104
+ - name: Size budget — CLAUDE.md
105
+ run: make claude-md-size
106
+
107
+ - name: Size budget — agent files
108
+ run: make agent-size
109
+
110
+ - name: KB frontmatter contract
111
+ # OS/Python-independent content check — run once, not across the matrix.
112
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
113
+ run: make kb-frontmatter
114
+
115
+ - name: Check eval structure
116
+ run: make eval-structure
117
+
118
+ - name: Calibrate structural evals
119
+ run: make eval-calibrate
120
+
121
+ - name: Validate eval samples
122
+ run: make eval-validate
123
+
124
+ - name: Secret scan
125
+ run: |
126
+ # Mirrors the shipped local hook coverage so contributors cannot
127
+ # bypass secret scanning by skipping local pre-commit installation.
128
+ uvx pre-commit==4.3.0 run detect-private-key --all-files
129
+ uvx pre-commit==4.3.0 run gitleaks --all-files
130
+
131
+ - name: Dependency vulnerability scan
132
+ run: |
133
+ # Backs the security.md § Dependencies & Supply Chain claim that
134
+ # dependency scanning runs in CI. `--strict` fails the run on any
135
+ # known-vulnerable dependency. `pip-audit` is pinned (same rule as
136
+ # Actions): a floating `uvx pip-audit` would let upstream changes
137
+ # silently alter the build's security posture.
138
+ uv export --no-hashes --no-dev --no-emit-project > /tmp/runtime-requirements.txt
139
+ uvx pip-audit==2.10.0 --strict --requirement /tmp/runtime-requirements.txt
140
+
141
+ - name: Static security scan (Bandit)
142
+ run: |
143
+ # Bandit catches Python-specific security smells: hardcoded
144
+ # passwords, weak crypto, unsafe `subprocess`/`eval`, `assert` in
145
+ # production paths, etc. Severity floor is medium; confidence
146
+ # floor is medium to avoid noise from style-level findings.
147
+ #
148
+ # `src/` is scanned strictly. `tests/` and `evals/` are scanned
149
+ # with B101 (assert_used) skipped — pytest depends on `assert`,
150
+ # so flagging it would be noise; everything else still applies.
151
+ uvx bandit==1.9.4 -r src/ -ll -ii
152
+ uvx bandit==1.9.4 -r tests/ evals/ -ll -ii --skip B101
153
+
154
+ - name: Smoke-test console script (source)
155
+ run: |
156
+ set -euo pipefail
157
+ uv run ai-playbook list >/dev/null
158
+ for tool in claude copilot cursor kiro; do
159
+ tmpdir="$(mktemp -d)"
160
+ uv run ai-playbook deploy --agent all --tool "$tool" -t "$tmpdir" --no-mcp >/dev/null
161
+ done
162
+
163
+ - name: Build wheel and smoke-test installed artifact
164
+ run: |
165
+ set -euo pipefail
166
+ uv build
167
+ uvx twine==6.2.0 check dist/*
168
+ # Install the built wheel into a clean venv so we exercise what users get.
169
+ venv="$(mktemp -d)/venv"
170
+ uv venv "$venv"
171
+ # shellcheck disable=SC1091
172
+ source "$venv/bin/activate"
173
+ uv pip install dist/*.whl
174
+ ai-playbook list >/dev/null
175
+ for tool in claude copilot cursor kiro; do
176
+ tmpdir="$(mktemp -d)"
177
+ ai-playbook deploy --agent all --tool "$tool" -t "$tmpdir" --no-mcp >/dev/null
178
+ ai-playbook doctor --tool "$tool" -t "$tmpdir" >/dev/null
179
+ done
180
+ deactivate
181
+
182
+ commit-hygiene:
183
+ # Teach-back / Conventional Commit trailer is enforced locally by the
184
+ # `commit-msg` hook (harness/check-teachback.sh). GitHub does not run
185
+ # commit-msg hooks, so this job backstops it: a contributor who skips hook
186
+ # install or commits with `--no-verify` cannot land non-trivial commits
187
+ # without the trailer. PR-only — main's pre-re-init history is squashed and
188
+ # exempt (see CONTRIBUTING.md § Git History).
189
+ if: github.event_name == 'pull_request'
190
+ runs-on: ubuntu-latest
191
+ timeout-minutes: 10
192
+ steps:
193
+ - name: Checkout (full history for the PR range)
194
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
195
+ with:
196
+ fetch-depth: 0
197
+ persist-credentials: false
198
+
199
+ - name: Teach-back trailer on PR commits
200
+ env:
201
+ BASE_SHA: ${{ github.event.pull_request.base.sha }}
202
+ HEAD_SHA: ${{ github.event.pull_request.head.sha }}
203
+ run: |
204
+ set -euo pipefail
205
+ # Reuse the exact script the local commit-msg hook runs, so CI and
206
+ # local stay in lockstep. Validate every non-merge commit the PR adds;
207
+ # merge commits carry no Conventional Commit subject and are skipped.
208
+ fail=0
209
+ for sha in $(git rev-list --no-merges "${BASE_SHA}..${HEAD_SHA}"); do
210
+ msg="$(mktemp)"
211
+ git log -1 --format=%B "$sha" > "$msg"
212
+ if ! harness/check-teachback.sh "$msg"; then
213
+ echo "::error::commit ${sha} failed the Teach-back / Conventional Commit check"
214
+ fail=1
215
+ fi
216
+ done
217
+ exit "$fail"
@@ -0,0 +1,48 @@
1
+ name: CodeQL
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ schedule:
9
+ - cron: "0 6 * * 1"
10
+
11
+ permissions:
12
+ contents: read
13
+
14
+ concurrency:
15
+ group: codeql-${{ github.ref }}
16
+ cancel-in-progress: true
17
+
18
+ jobs:
19
+ analyze:
20
+ runs-on: ubuntu-latest
21
+ timeout-minutes: 30
22
+ permissions:
23
+ security-events: write
24
+ packages: read
25
+ actions: read
26
+ contents: read
27
+
28
+ strategy:
29
+ fail-fast: false
30
+ matrix:
31
+ language: [python, actions]
32
+
33
+ steps:
34
+ - name: Checkout
35
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
36
+ with:
37
+ persist-credentials: false
38
+
39
+ - name: Initialize CodeQL
40
+ uses: github/codeql-action/init@458d36d7d4f47d0dd16ca424c1d3cda0060f1360 # v3
41
+ with:
42
+ languages: ${{ matrix.language }}
43
+ queries: security-and-quality
44
+
45
+ - name: Perform CodeQL analysis
46
+ uses: github/codeql-action/analyze@458d36d7d4f47d0dd16ca424c1d3cda0060f1360 # v3
47
+ with:
48
+ category: /language:${{ matrix.language }}
@@ -0,0 +1,137 @@
1
+ name: Eval drift
2
+
3
+ on:
4
+ # Opt-in only. This job calls the Anthropic API (billable, needs the
5
+ # ANTHROPIC_API_KEY secret), so it does not run on a schedule by default —
6
+ # trigger it manually from the Actions tab when you want a drift check.
7
+ # To restore automatic weekly runs, uncomment the schedule below once a key
8
+ # is configured.
9
+ workflow_dispatch:
10
+ # schedule:
11
+ # - cron: "0 6 * * 1" # Mondays 06:00 UTC — semantic drift detection
12
+
13
+ permissions:
14
+ contents: read
15
+
16
+ concurrency:
17
+ group: eval-drift-${{ github.ref }}
18
+ cancel-in-progress: true
19
+
20
+ jobs:
21
+ judge:
22
+ runs-on: ubuntu-latest
23
+ timeout-minutes: 20
24
+ permissions:
25
+ contents: read
26
+ steps:
27
+ - name: Checkout
28
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
29
+ with:
30
+ persist-credentials: false
31
+
32
+ - name: Set up Python
33
+ uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
34
+ with:
35
+ python-version: "3.12"
36
+
37
+ - name: Set up uv
38
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
39
+ with:
40
+ enable-cache: true
41
+
42
+ - name: Install dependencies
43
+ run: uv sync --dev
44
+
45
+ - name: Dependency vulnerability scan
46
+ run: |
47
+ # The push/PR validation suite does not run pip-audit; this opt-in
48
+ # job does, so a manual drift run also surfaces any vulnerable dep
49
+ # that has landed on main since the last release.
50
+ uv export --no-hashes --no-dev --no-emit-project > /tmp/runtime-requirements.txt
51
+ uvx pip-audit==2.10.0 --strict --requirement /tmp/runtime-requirements.txt
52
+
53
+ - name: Check committed baselines
54
+ run: uv run python evals/run_eval.py validate-samples
55
+
56
+ - name: Check for ANTHROPIC_API_KEY
57
+ id: secret
58
+ run: |
59
+ # Detect-only — do NOT inject the secret into env here. The judge
60
+ # step below is the single point that touches ANTHROPIC_API_KEY,
61
+ # narrowing the surface that matters for secret-exfil review.
62
+ if [ -z "${HAS_KEY}" ]; then
63
+ echo "available=false" >> "$GITHUB_OUTPUT"
64
+ echo "::error::ANTHROPIC_API_KEY secret not set — eval-drift cannot run the semantic judge."
65
+ exit 1
66
+ else
67
+ echo "available=true" >> "$GITHUB_OUTPUT"
68
+ fi
69
+ env:
70
+ HAS_KEY: ${{ secrets.ANTHROPIC_API_KEY != '' && '1' || '' }}
71
+
72
+ - name: Run LLM judge on committed baselines
73
+ if: steps.secret.outputs.available == 'true'
74
+ env:
75
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
76
+ run: |
77
+ set -euo pipefail
78
+ shopt -s nullglob
79
+ # Iterate samples but skip README.md — the README documents the
80
+ # convention; only files named `<agent>.md` are baselines, and each
81
+ # must match an `evals/<agent>-expected.md` rubric.
82
+ samples=()
83
+ for path in evals/samples/*.md; do
84
+ [ "$(basename "$path")" = "README.md" ] && continue
85
+ samples+=("$path")
86
+ done
87
+ fail=0
88
+ mkdir -p judge-output
89
+ for sample in "${samples[@]}"; do
90
+ agent="$(basename "$sample" .md)"
91
+ echo "::group::judge $agent"
92
+ # Tee per-agent verdicts to files so the artifact upload below
93
+ # makes drift trendable across runs instead of buried in
94
+ # job logs. pipefail (set above) preserves the judge exit code.
95
+ if ! uv run python evals/run_eval.py judge "$agent" "$sample" 2>&1 \
96
+ | tee "judge-output/${agent}.txt"; then
97
+ fail=1
98
+ fi
99
+ echo "::endgroup::"
100
+ done
101
+ # Adversarial baselines (curated hostile-input handling) must
102
+ # PASS the judge, exactly like the standard baselines above.
103
+ for sample in evals/samples/adversarial/*.md; do
104
+ name="$(basename "$sample" .md)"
105
+ [ "$name" = "README" ] && continue
106
+ echo "::group::judge $name (adversarial)"
107
+ if ! uv run python evals/run_eval.py judge "$name" "$sample" 2>&1 \
108
+ | tee "judge-output/${name}.txt"; then
109
+ fail=1
110
+ fi
111
+ echo "::endgroup::"
112
+ done
113
+ # Negative controls are deliberately-flawed outputs: the judge
114
+ # must FAIL them. A pass here means the judge has gone lenient
115
+ # (or the rubric eroded) — the one drift direction the positive
116
+ # baselines above cannot detect. Exit code is inverted.
117
+ for sample in evals/samples/negative/*.md; do
118
+ name="$(basename "$sample" .md)"
119
+ [ "$name" = "README" ] && continue
120
+ echo "::group::judge $name (negative control — must fail)"
121
+ if uv run python evals/run_eval.py judge "$name" "$sample" 2>&1 \
122
+ | tee "judge-output/negative-${name}.txt"; then
123
+ echo "::error::negative control '${name}' PASSED the judge — leniency drift or rubric erosion; investigate before trusting this week's green verdicts"
124
+ fail=1
125
+ fi
126
+ echo "::endgroup::"
127
+ done
128
+ exit "$fail"
129
+
130
+ - name: Upload judge verdicts
131
+ if: always() && steps.secret.outputs.available == 'true'
132
+ uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
133
+ with:
134
+ name: judge-verdicts-${{ github.run_id }}
135
+ path: judge-output/
136
+ retention-days: 90
137
+ if-no-files-found: ignore
@@ -0,0 +1,33 @@
1
+ # Weekly external link check.
2
+ #
3
+ # The pre-commit lychee hook runs `--offline` (internal cross-references only)
4
+ # so commits stay fast and network-independent. Nothing else validates the
5
+ # external URLs in docs, so dead links rot silently. This workflow runs the
6
+ # same config online once a week; a failure shows up as a red scheduled run
7
+ # for maintainers to triage. Non-blocking by design — it gates no PR.
8
+
9
+ name: Link check (external)
10
+
11
+ on:
12
+ schedule:
13
+ - cron: "0 6 * * 1" # Mondays 06:00 UTC, alongside the other weekly scans
14
+ workflow_dispatch:
15
+
16
+ permissions:
17
+ contents: read
18
+
19
+ jobs:
20
+ linkcheck:
21
+ runs-on: ubuntu-latest
22
+ timeout-minutes: 15
23
+
24
+ steps:
25
+ - name: Checkout
26
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
27
+ with:
28
+ persist-credentials: false
29
+
30
+ - name: Check external links
31
+ uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0
32
+ with:
33
+ args: --config .lychee.toml --no-progress './**/*.md'
@@ -0,0 +1,63 @@
1
+ name: Mutation testing
2
+
3
+ on:
4
+ pull_request:
5
+ paths:
6
+ - "src/deploy_ai_playbook/**"
7
+ - "tests/**"
8
+ - "pyproject.toml"
9
+ - "uv.lock"
10
+ - "mutation-baseline.json"
11
+ - "tools/check-mutation-baseline.py"
12
+ - ".github/workflows/mutation.yml"
13
+ schedule:
14
+ # Mondays 07:00 UTC. Runs after eval drift to keep expensive checks separate.
15
+ - cron: "0 7 * * 1"
16
+ workflow_dispatch:
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ concurrency:
22
+ group: mutation-${{ github.ref }}
23
+ cancel-in-progress: true
24
+
25
+ jobs:
26
+ mutation:
27
+ runs-on: ubuntu-latest
28
+ timeout-minutes: 60
29
+
30
+ steps:
31
+ - name: Checkout
32
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
33
+ with:
34
+ persist-credentials: false
35
+
36
+ - name: Set up Python
37
+ uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
38
+ with:
39
+ python-version: "3.12"
40
+
41
+ - name: Set up uv
42
+ uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
43
+ with:
44
+ enable-cache: true
45
+
46
+ - name: Install dependencies
47
+ run: uv sync --dev
48
+
49
+ - name: Run mutation tests
50
+ run: uv run mutmut run --max-children 4
51
+
52
+ - name: Export mutation stats
53
+ run: uv run mutmut export-cicd-stats
54
+
55
+ - name: Check mutation baseline
56
+ run: uv run python tools/check-mutation-baseline.py mutants/mutmut-cicd-stats.json mutation-baseline.json
57
+
58
+ - name: Print mutation stats
59
+ if: always()
60
+ run: |
61
+ if [ -f mutants/mutmut-cicd-stats.json ]; then
62
+ cat mutants/mutmut-cicd-stats.json
63
+ fi