ai-playbook 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_playbook-1.0.0/.ai-playbook.toml +10 -0
- ai_playbook-1.0.0/.deprecations.toml +37 -0
- ai_playbook-1.0.0/.github/CODEOWNERS +3 -0
- ai_playbook-1.0.0/.github/ISSUE_TEMPLATE/bug_report.md +32 -0
- ai_playbook-1.0.0/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- ai_playbook-1.0.0/.github/PULL_REQUEST_TEMPLATE.md +26 -0
- ai_playbook-1.0.0/.github/dependabot.yml +22 -0
- ai_playbook-1.0.0/.github/workflows/ci.yml +217 -0
- ai_playbook-1.0.0/.github/workflows/codeql.yml +48 -0
- ai_playbook-1.0.0/.github/workflows/eval-drift.yml +137 -0
- ai_playbook-1.0.0/.github/workflows/link-check.yml +33 -0
- ai_playbook-1.0.0/.github/workflows/mutation.yml +63 -0
- ai_playbook-1.0.0/.github/workflows/pre-commit-autoupdate.yml +67 -0
- ai_playbook-1.0.0/.github/workflows/release.yml +224 -0
- ai_playbook-1.0.0/.github/workflows/scorecard.yml +49 -0
- ai_playbook-1.0.0/.gitignore +72 -0
- ai_playbook-1.0.0/.lychee.toml +29 -0
- ai_playbook-1.0.0/.markdownlint.jsonc +40 -0
- ai_playbook-1.0.0/.pre-commit-config.yaml +80 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationFrontmatterTitle.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationTitle.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ExplanationVoice.yml +20 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/HowToFrontmatterTitle.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/HowToTitle.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/HowToVoice.yml +17 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceFrontmatterTitle.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceInstruction.yml +23 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceTitle.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/ReferenceVoice.yml +17 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/Steps.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialExpectation.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialFrontmatterTitle.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialOptions.yml +17 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialTitle.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialVoice.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/Diataxis/TutorialWorkMode.yml +22 -0
- ai_playbook-1.0.0/.vale/styles/Google/AMPM.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Google/Acronyms.yml +64 -0
- ai_playbook-1.0.0/.vale/styles/Google/Colons.yml +8 -0
- ai_playbook-1.0.0/.vale/styles/Google/Contractions.yml +30 -0
- ai_playbook-1.0.0/.vale/styles/Google/DateFormat.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Google/Ellipses.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Google/EmDash.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/Google/Exclamation.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/Google/FirstPerson.yml +13 -0
- ai_playbook-1.0.0/.vale/styles/Google/Gender.yml +9 -0
- ai_playbook-1.0.0/.vale/styles/Google/GenderBias.yml +43 -0
- ai_playbook-1.0.0/.vale/styles/Google/HeadingPunctuation.yml +13 -0
- ai_playbook-1.0.0/.vale/styles/Google/Headings.yml +29 -0
- ai_playbook-1.0.0/.vale/styles/Google/Latin.yml +11 -0
- ai_playbook-1.0.0/.vale/styles/Google/LyHyphens.yml +14 -0
- ai_playbook-1.0.0/.vale/styles/Google/OptionalPlurals.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/Google/Ordinal.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/OxfordComma.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/Parens.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/Passive.yml +184 -0
- ai_playbook-1.0.0/.vale/styles/Google/Periods.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/Quotes.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/Ranges.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/Semicolons.yml +8 -0
- ai_playbook-1.0.0/.vale/styles/Google/Slang.yml +11 -0
- ai_playbook-1.0.0/.vale/styles/Google/Spacing.yml +10 -0
- ai_playbook-1.0.0/.vale/styles/Google/Spelling.yml +10 -0
- ai_playbook-1.0.0/.vale/styles/Google/Units.yml +8 -0
- ai_playbook-1.0.0/.vale/styles/Google/We.yml +11 -0
- ai_playbook-1.0.0/.vale/styles/Google/Will.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Google/WordList.yml +80 -0
- ai_playbook-1.0.0/.vale/styles/Google/meta.json +4 -0
- ai_playbook-1.0.0/.vale/styles/Google/vocab.txt +0 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/Acronyms.yml +5 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/DisplayNames.yml +6 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/FleschKincaid.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/Headings.yml +7 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/NoEmDash.yml +6 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/PassiveVoice.yml +21 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/ProfessionalTone.yml +10 -0
- ai_playbook-1.0.0/.vale/styles/Playbook/Weasels.yml +12 -0
- ai_playbook-1.0.0/.vale/styles/config/vocabularies/Playbook/accept.txt +37 -0
- ai_playbook-1.0.0/.vale/styles/config/vocabularies/Playbook/reject.txt +14 -0
- ai_playbook-1.0.0/.vale.ini +178 -0
- ai_playbook-1.0.0/AGENTS.md +57 -0
- ai_playbook-1.0.0/CHANGELOG.md +68 -0
- ai_playbook-1.0.0/CLAUDE.md +269 -0
- ai_playbook-1.0.0/CODE_OF_CONDUCT.md +28 -0
- ai_playbook-1.0.0/CONTRIBUTING.md +389 -0
- ai_playbook-1.0.0/GOVERNANCE.md +156 -0
- ai_playbook-1.0.0/LICENSE +23 -0
- ai_playbook-1.0.0/Makefile +91 -0
- ai_playbook-1.0.0/PKG-INFO +345 -0
- ai_playbook-1.0.0/README.md +315 -0
- ai_playbook-1.0.0/RELEASING.md +173 -0
- ai_playbook-1.0.0/SECURITY.md +81 -0
- ai_playbook-1.0.0/agents/code-inspector.agent.md +98 -0
- ai_playbook-1.0.0/agents/diff-reviewer.agent.md +143 -0
- ai_playbook-1.0.0/agents/docs-maintainer.agent.md +174 -0
- ai_playbook-1.0.0/agents/incident-responder.agent.md +148 -0
- ai_playbook-1.0.0/agents/release-captain.agent.md +144 -0
- ai_playbook-1.0.0/agents/slice-planner.agent.md +191 -0
- ai_playbook-1.0.0/agents/story-refiner.agent.md +261 -0
- ai_playbook-1.0.0/agents/xp-pair-programmer.agent.md +256 -0
- ai_playbook-1.0.0/commands/code-inspector.md +7 -0
- ai_playbook-1.0.0/commands/diff-reviewer.md +7 -0
- ai_playbook-1.0.0/commands/docs-maintainer.md +7 -0
- ai_playbook-1.0.0/commands/incident-responder.md +7 -0
- ai_playbook-1.0.0/commands/release-captain.md +7 -0
- ai_playbook-1.0.0/commands/slice-planner.md +7 -0
- ai_playbook-1.0.0/commands/status.md +33 -0
- ai_playbook-1.0.0/commands/story-refiner.md +7 -0
- ai_playbook-1.0.0/commands/xp-pair-programmer.md +7 -0
- ai_playbook-1.0.0/docs/README.md +64 -0
- ai_playbook-1.0.0/docs/adr/0001-bitbucket-server-not-supported.md +23 -0
- ai_playbook-1.0.0/docs/adr/README.md +77 -0
- ai_playbook-1.0.0/docs/architecture.md +325 -0
- ai_playbook-1.0.0/docs/cli-reference.md +680 -0
- ai_playbook-1.0.0/docs/deprecation-policy.md +182 -0
- ai_playbook-1.0.0/docs/docs-guide.md +203 -0
- ai_playbook-1.0.0/docs/getting-started.md +230 -0
- ai_playbook-1.0.0/docs/how-to/choose-workflow-path.md +85 -0
- ai_playbook-1.0.0/docs/how-to/enforce-quality.md +116 -0
- ai_playbook-1.0.0/docs/how-to/invoke-agents.md +66 -0
- ai_playbook-1.0.0/docs/how-to/reduce-token-usage.md +83 -0
- ai_playbook-1.0.0/docs/how-to/resume-session.md +69 -0
- ai_playbook-1.0.0/docs/how-to/run-with-local-models.md +143 -0
- ai_playbook-1.0.0/docs/how-to/setup-issue-tracker.md +129 -0
- ai_playbook-1.0.0/docs/how-to/setup-multi-repo.md +80 -0
- ai_playbook-1.0.0/docs/how-to/write-a-pack.md +127 -0
- ai_playbook-1.0.0/docs/limitations.md +54 -0
- ai_playbook-1.0.0/docs/references.md +98 -0
- ai_playbook-1.0.0/docs/rfcs/README.md +124 -0
- ai_playbook-1.0.0/docs/user-guide.md +247 -0
- ai_playbook-1.0.0/evals/code-inspector-adversarial-expected.md +40 -0
- ai_playbook-1.0.0/evals/code-inspector-adversarial-input.md +14 -0
- ai_playbook-1.0.0/evals/code-inspector-expected.md +47 -0
- ai_playbook-1.0.0/evals/code-inspector-input.md +35 -0
- ai_playbook-1.0.0/evals/diff-reviewer-adversarial-expected.md +39 -0
- ai_playbook-1.0.0/evals/diff-reviewer-adversarial-input.md +45 -0
- ai_playbook-1.0.0/evals/diff-reviewer-expected.md +47 -0
- ai_playbook-1.0.0/evals/diff-reviewer-input.md +81 -0
- ai_playbook-1.0.0/evals/docs-maintainer-adversarial-expected.md +29 -0
- ai_playbook-1.0.0/evals/docs-maintainer-adversarial-input.md +9 -0
- ai_playbook-1.0.0/evals/docs-maintainer-expected.md +32 -0
- ai_playbook-1.0.0/evals/docs-maintainer-input.md +5 -0
- ai_playbook-1.0.0/evals/incident-responder-adversarial-expected.md +49 -0
- ai_playbook-1.0.0/evals/incident-responder-adversarial-input.md +24 -0
- ai_playbook-1.0.0/evals/incident-responder-expected.md +52 -0
- ai_playbook-1.0.0/evals/incident-responder-input.md +44 -0
- ai_playbook-1.0.0/evals/release-captain-adversarial-expected.md +42 -0
- ai_playbook-1.0.0/evals/release-captain-adversarial-input.md +26 -0
- ai_playbook-1.0.0/evals/release-captain-expected.md +56 -0
- ai_playbook-1.0.0/evals/release-captain-input.md +41 -0
- ai_playbook-1.0.0/evals/rubrics/README.md +49 -0
- ai_playbook-1.0.0/evals/rubrics/_schema.json +66 -0
- ai_playbook-1.0.0/evals/rubrics/code-inspector.json +182 -0
- ai_playbook-1.0.0/evals/rubrics/diff-reviewer.json +174 -0
- ai_playbook-1.0.0/evals/rubrics/docs-maintainer.json +165 -0
- ai_playbook-1.0.0/evals/rubrics/incident-responder.json +225 -0
- ai_playbook-1.0.0/evals/rubrics/release-captain.json +256 -0
- ai_playbook-1.0.0/evals/rubrics/slice-planner.json +182 -0
- ai_playbook-1.0.0/evals/rubrics/story-refiner.json +186 -0
- ai_playbook-1.0.0/evals/rubrics/xp-pair-programmer.json +244 -0
- ai_playbook-1.0.0/evals/run_eval.py +1245 -0
- ai_playbook-1.0.0/evals/samples/README.md +61 -0
- ai_playbook-1.0.0/evals/samples/adversarial/release-captain-adversarial.md +18 -0
- ai_playbook-1.0.0/evals/samples/adversarial/story-refiner-adversarial.md +43 -0
- ai_playbook-1.0.0/evals/samples/code-inspector.md +41 -0
- ai_playbook-1.0.0/evals/samples/diff-reviewer.md +47 -0
- ai_playbook-1.0.0/evals/samples/docs-maintainer.md +61 -0
- ai_playbook-1.0.0/evals/samples/incident-responder.md +55 -0
- ai_playbook-1.0.0/evals/samples/negative/diff-reviewer.md +23 -0
- ai_playbook-1.0.0/evals/samples/negative/release-captain.md +16 -0
- ai_playbook-1.0.0/evals/samples/release-captain.md +76 -0
- ai_playbook-1.0.0/evals/samples/slice-planner.md +71 -0
- ai_playbook-1.0.0/evals/samples/story-refiner.md +66 -0
- ai_playbook-1.0.0/evals/samples/xp-pair-programmer.md +70 -0
- ai_playbook-1.0.0/evals/slice-planner-adversarial-expected.md +30 -0
- ai_playbook-1.0.0/evals/slice-planner-adversarial-input.md +24 -0
- ai_playbook-1.0.0/evals/slice-planner-expected.md +35 -0
- ai_playbook-1.0.0/evals/slice-planner-input.md +39 -0
- ai_playbook-1.0.0/evals/story-refiner-adversarial-expected.md +28 -0
- ai_playbook-1.0.0/evals/story-refiner-adversarial-input.md +9 -0
- ai_playbook-1.0.0/evals/story-refiner-expected.md +36 -0
- ai_playbook-1.0.0/evals/story-refiner-input.md +9 -0
- ai_playbook-1.0.0/evals/xp-pair-programmer-adversarial-expected.md +25 -0
- ai_playbook-1.0.0/evals/xp-pair-programmer-adversarial-input.md +34 -0
- ai_playbook-1.0.0/evals/xp-pair-programmer-expected.md +51 -0
- ai_playbook-1.0.0/evals/xp-pair-programmer-input.md +40 -0
- ai_playbook-1.0.0/harness/Makefile +111 -0
- ai_playbook-1.0.0/harness/check-teachback.sh +129 -0
- ai_playbook-1.0.0/harness/ci.yml +68 -0
- ai_playbook-1.0.0/harness/pre-commit-config.yaml +57 -0
- ai_playbook-1.0.0/harness/read-budget.sh +103 -0
- ai_playbook-1.0.0/harness/security.yml +165 -0
- ai_playbook-1.0.0/harness/settings.example.json +29 -0
- ai_playbook-1.0.0/harness/telemetry.sh +171 -0
- ai_playbook-1.0.0/incidents/.gitkeep +0 -0
- ai_playbook-1.0.0/knowledge-base/CHEATSHEET.md +261 -0
- ai_playbook-1.0.0/knowledge-base/INDEX.md +214 -0
- ai_playbook-1.0.0/knowledge-base/debugging.md +175 -0
- ai_playbook-1.0.0/knowledge-base/design-fundamentals.md +291 -0
- ai_playbook-1.0.0/knowledge-base/design-patterns.md +254 -0
- ai_playbook-1.0.0/knowledge-base/doc-linting.md +179 -0
- ai_playbook-1.0.0/knowledge-base/feature-flags.md +129 -0
- ai_playbook-1.0.0/knowledge-base/incident-response.md +150 -0
- ai_playbook-1.0.0/knowledge-base/languages/python.md +192 -0
- ai_playbook-1.0.0/knowledge-base/languages/testing-python.md +208 -0
- ai_playbook-1.0.0/knowledge-base/model-tier.md +128 -0
- ai_playbook-1.0.0/knowledge-base/observability.md +243 -0
- ai_playbook-1.0.0/knowledge-base/performance.md +155 -0
- ai_playbook-1.0.0/knowledge-base/philosophy.md +161 -0
- ai_playbook-1.0.0/knowledge-base/quality-gates.md +141 -0
- ai_playbook-1.0.0/knowledge-base/refactoring.md +105 -0
- ai_playbook-1.0.0/knowledge-base/regression-and-contracts.md +187 -0
- ai_playbook-1.0.0/knowledge-base/release.md +177 -0
- ai_playbook-1.0.0/knowledge-base/security.md +237 -0
- ai_playbook-1.0.0/knowledge-base/style-guide.md +149 -0
- ai_playbook-1.0.0/knowledge-base/testing-techniques.md +209 -0
- ai_playbook-1.0.0/knowledge-base/testing.md +208 -0
- ai_playbook-1.0.0/knowledge-base/tool-policy.md +47 -0
- ai_playbook-1.0.0/knowledge-base/working-agreement.md +112 -0
- ai_playbook-1.0.0/knowledge-base/workspaces/README.md +91 -0
- ai_playbook-1.0.0/mutation-baseline.json +14 -0
- ai_playbook-1.0.0/pyproject.toml +224 -0
- ai_playbook-1.0.0/skills/git/SKILL.md +235 -0
- ai_playbook-1.0.0/skills/host-adapter/SKILL.md +153 -0
- ai_playbook-1.0.0/skills/intent-interview/SKILL.md +124 -0
- ai_playbook-1.0.0/skills/issue-fetch/SKILL.md +186 -0
- ai_playbook-1.0.0/skills/notifier/SKILL.md +214 -0
- ai_playbook-1.0.0/skills/retrospective/SKILL.md +104 -0
- ai_playbook-1.0.0/skills/story-writing/SKILL.md +136 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/__init__.py +12 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/backup.py +377 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/cli.py +1390 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/config.py +307 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/console.py +10 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/deploy_render.py +426 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/discovery.py +215 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/doctor.py +573 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/errors.py +23 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/fs.py +452 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/mcp.py +80 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/paths.py +122 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/py.typed +0 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/safety.py +136 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/services/__init__.py +6 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/services/artifacts.py +131 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/services/deploy.py +182 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/services/diff.py +162 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/services/pack_validation.py +88 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/targets.py +110 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/telemetry.py +209 -0
- ai_playbook-1.0.0/src/deploy_ai_playbook/upgrade.py +172 -0
- ai_playbook-1.0.0/templates/.ai-playbook.toml.example +78 -0
- ai_playbook-1.0.0/templates/adr-template.md +27 -0
- ai_playbook-1.0.0/templates/agent-template.md +121 -0
- ai_playbook-1.0.0/templates/changelog-template.md +42 -0
- ai_playbook-1.0.0/templates/domain-language-template.md +69 -0
- ai_playbook-1.0.0/templates/feature-flag-registry-template.md +47 -0
- ai_playbook-1.0.0/templates/how-to-template.md +37 -0
- ai_playbook-1.0.0/templates/importlinter-template.toml +119 -0
- ai_playbook-1.0.0/templates/language-conventions-template.md +161 -0
- ai_playbook-1.0.0/templates/limitations-template.md +28 -0
- ai_playbook-1.0.0/templates/module-readme-template.md +37 -0
- ai_playbook-1.0.0/templates/plan-template.md +97 -0
- ai_playbook-1.0.0/templates/postmortem-template.md +85 -0
- ai_playbook-1.0.0/templates/quality-gates-template.md +105 -0
- ai_playbook-1.0.0/templates/research-template.md +71 -0
- ai_playbook-1.0.0/templates/review-template.md +75 -0
- ai_playbook-1.0.0/templates/rfc-template.md +65 -0
- ai_playbook-1.0.0/templates/runbook-template.md +68 -0
- ai_playbook-1.0.0/templates/story-bug-template.md +100 -0
- ai_playbook-1.0.0/templates/story-spike-template.md +75 -0
- ai_playbook-1.0.0/templates/story-template.md +75 -0
- ai_playbook-1.0.0/templates/testing-language-template.md +112 -0
- ai_playbook-1.0.0/tests/__init__.py +3 -0
- ai_playbook-1.0.0/tests/acceptance/__init__.py +37 -0
- ai_playbook-1.0.0/tests/acceptance/_helpers.py +58 -0
- ai_playbook-1.0.0/tests/acceptance/contract_data.py +432 -0
- ai_playbook-1.0.0/tests/acceptance/test_agent_contracts.py +419 -0
- ai_playbook-1.0.0/tests/acceptance/test_approval_gate_contracts.py +188 -0
- ai_playbook-1.0.0/tests/acceptance/test_backup.py +285 -0
- ai_playbook-1.0.0/tests/acceptance/test_config_validate.py +96 -0
- ai_playbook-1.0.0/tests/acceptance/test_deploy.py +1390 -0
- ai_playbook-1.0.0/tests/acceptance/test_deploy_model_tiers.py +149 -0
- ai_playbook-1.0.0/tests/acceptance/test_deprecations.py +162 -0
- ai_playbook-1.0.0/tests/acceptance/test_diff.py +209 -0
- ai_playbook-1.0.0/tests/acceptance/test_docs_contracts.py +210 -0
- ai_playbook-1.0.0/tests/acceptance/test_doctor.py +602 -0
- ai_playbook-1.0.0/tests/acceptance/test_enable_disable.py +95 -0
- ai_playbook-1.0.0/tests/acceptance/test_eval_contracts.py +252 -0
- ai_playbook-1.0.0/tests/acceptance/test_evals.py +200 -0
- ai_playbook-1.0.0/tests/acceptance/test_harness_release_contracts.py +1144 -0
- ai_playbook-1.0.0/tests/acceptance/test_init.py +67 -0
- ai_playbook-1.0.0/tests/acceptance/test_kb_skill_contracts.py +208 -0
- ai_playbook-1.0.0/tests/acceptance/test_pack_validation.py +127 -0
- ai_playbook-1.0.0/tests/acceptance/test_packs.py +673 -0
- ai_playbook-1.0.0/tests/acceptance/test_phrase_pin_convention.py +148 -0
- ai_playbook-1.0.0/tests/acceptance/test_pointer_contracts.py +352 -0
- ai_playbook-1.0.0/tests/acceptance/test_prune.py +124 -0
- ai_playbook-1.0.0/tests/acceptance/test_read_budget_hook.py +208 -0
- ai_playbook-1.0.0/tests/acceptance/test_skill_operation_contracts.py +235 -0
- ai_playbook-1.0.0/tests/acceptance/test_status.py +391 -0
- ai_playbook-1.0.0/tests/acceptance/test_story_workflow_contracts.py +241 -0
- ai_playbook-1.0.0/tests/acceptance/test_upgrade_check.py +63 -0
- ai_playbook-1.0.0/tests/acceptance/test_workflow_chain.py +90 -0
- ai_playbook-1.0.0/tests/acceptance/test_workflow_ordering_contracts.py +275 -0
- ai_playbook-1.0.0/tests/conftest.py +32 -0
- ai_playbook-1.0.0/tests/unit/__init__.py +0 -0
- ai_playbook-1.0.0/tests/unit/test_agent_size.py +154 -0
- ai_playbook-1.0.0/tests/unit/test_architecture.py +452 -0
- ai_playbook-1.0.0/tests/unit/test_backup_branches.py +371 -0
- ai_playbook-1.0.0/tests/unit/test_claude_md_size.py +107 -0
- ai_playbook-1.0.0/tests/unit/test_cli.py +608 -0
- ai_playbook-1.0.0/tests/unit/test_config.py +450 -0
- ai_playbook-1.0.0/tests/unit/test_discovery.py +227 -0
- ai_playbook-1.0.0/tests/unit/test_doctor_service.py +70 -0
- ai_playbook-1.0.0/tests/unit/test_evals.py +205 -0
- ai_playbook-1.0.0/tests/unit/test_fs_branches.py +259 -0
- ai_playbook-1.0.0/tests/unit/test_fuzz_properties.py +118 -0
- ai_playbook-1.0.0/tests/unit/test_kb_frontmatter.py +210 -0
- ai_playbook-1.0.0/tests/unit/test_kb_integrity.py +600 -0
- ai_playbook-1.0.0/tests/unit/test_large_deployments.py +161 -0
- ai_playbook-1.0.0/tests/unit/test_model_tier_materialize.py +116 -0
- ai_playbook-1.0.0/tests/unit/test_mutation_baseline.py +85 -0
- ai_playbook-1.0.0/tests/unit/test_safety.py +43 -0
- ai_playbook-1.0.0/tests/unit/test_targets.py +66 -0
- ai_playbook-1.0.0/tests/unit/test_telemetry.py +240 -0
- ai_playbook-1.0.0/tests/unit/test_upgrade.py +103 -0
- ai_playbook-1.0.0/tools/check-agent-size.py +118 -0
- ai_playbook-1.0.0/tools/check-claude-md-size.py +74 -0
- ai_playbook-1.0.0/tools/check-kb-frontmatter.py +283 -0
- ai_playbook-1.0.0/tools/check-mutation-baseline.py +123 -0
- ai_playbook-1.0.0/uv.lock +1000 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# AI Playbook adopter configuration.
|
|
2
|
+
# Full reference: docs/cli-reference.md (deployed with the playbook).
|
|
3
|
+
# Every key is optional — an empty file means a core-only deploy.
|
|
4
|
+
|
|
5
|
+
# Adopter-local packs, deployed in declared order (last pack wins on collisions):
|
|
6
|
+
# packs = [".ai-playbook/packs/<name>"]
|
|
7
|
+
|
|
8
|
+
# Per-agent quality tier overrides ("production" or "prototype"):
|
|
9
|
+
# [quality_tiers.agents]
|
|
10
|
+
# docs-maintainer = "prototype"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Active deprecation registry — single source of truth for the deprecation
|
|
2
|
+
# cycle defined in `docs/deprecation-policy.md`. Every entry in
|
|
3
|
+
# `CHANGELOG.md § Deprecated` for a released version must have a row here
|
|
4
|
+
# with the same id; pre-release activity may live only in `## [Unreleased]`
|
|
5
|
+
# until the version ships.
|
|
6
|
+
#
|
|
7
|
+
# Schema:
|
|
8
|
+
# id — kebab-case identifier (the deprecated surface).
|
|
9
|
+
# surface — one of: cli-flag | cli-command | agent-id |
|
|
10
|
+
# config-key | kb-path | deployment-layout | skill-op.
|
|
11
|
+
# added_version — when the deprecation marker was added (X.Y.Z).
|
|
12
|
+
# removal_version — the MAJOR release that removes it (X.0.0).
|
|
13
|
+
# reason — one short sentence; the *why*, not the *what*.
|
|
14
|
+
# replacement — id or path of the replacement surface, or "(removed)"
|
|
15
|
+
# when the deprecated surface has no successor.
|
|
16
|
+
# status — active | grace | removed (informational; the
|
|
17
|
+
# contract test is driven by version comparisons).
|
|
18
|
+
#
|
|
19
|
+
# When a removal version ships, delete the row (do NOT keep historical
|
|
20
|
+
# entries here — git history is the audit trail). When a deprecation is
|
|
21
|
+
# rescinded, delete the row and note the rationale in CHANGELOG.md §
|
|
22
|
+
# Changed for that release.
|
|
23
|
+
#
|
|
24
|
+
# Validated by `tests/acceptance/test_deprecations.py` — keep in sync with
|
|
25
|
+
# `CHANGELOG.md § Deprecated` for the latest released section.
|
|
26
|
+
|
|
27
|
+
# No active deprecations.
|
|
28
|
+
# Example (do not uncomment — kept here as schema documentation):
|
|
29
|
+
#
|
|
30
|
+
# [[deprecations]]
|
|
31
|
+
# id = "release-captain"
|
|
32
|
+
# surface = "agent-id"
|
|
33
|
+
# added_version = "1.5.0"
|
|
34
|
+
# removal_version = "2.0.0"
|
|
35
|
+
# reason = "Renamed to release-engineer to match the broader scope it actually owns."
|
|
36
|
+
# replacement = "release-engineer"
|
|
37
|
+
# status = "active"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Report a problem with the CLI, agents, or knowledge base
|
|
4
|
+
title: ""
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ""
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Describe the bug
|
|
10
|
+
|
|
11
|
+
A clear description of what the problem is.
|
|
12
|
+
|
|
13
|
+
## To reproduce
|
|
14
|
+
|
|
15
|
+
1. Run `ai-playbook ...`
|
|
16
|
+
2. Use agent `...`
|
|
17
|
+
3. See error
|
|
18
|
+
|
|
19
|
+
## Expected behavior
|
|
20
|
+
|
|
21
|
+
What you expected to happen.
|
|
22
|
+
|
|
23
|
+
## Environment
|
|
24
|
+
|
|
25
|
+
- OS: [e.g. macOS 15, Ubuntu 24.04]
|
|
26
|
+
- Python version: [e.g. 3.12.4]
|
|
27
|
+
- AI tool: [e.g. Claude Code, Copilot, Kiro]
|
|
28
|
+
- ai-playbook version: [e.g. 0.1.0]
|
|
29
|
+
|
|
30
|
+
## Additional context
|
|
31
|
+
|
|
32
|
+
Paste relevant error output, agent responses, or screenshots.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature Request
|
|
3
|
+
about: Suggest a new feature or improvement
|
|
4
|
+
title: ""
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ""
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Problem
|
|
10
|
+
|
|
11
|
+
What problem does this solve? What's the current limitation?
|
|
12
|
+
|
|
13
|
+
## Proposed solution
|
|
14
|
+
|
|
15
|
+
Describe what you'd like to happen.
|
|
16
|
+
|
|
17
|
+
## Alternatives considered
|
|
18
|
+
|
|
19
|
+
What other approaches did you consider?
|
|
20
|
+
|
|
21
|
+
## Additional context
|
|
22
|
+
|
|
23
|
+
Any examples, screenshots, or references that help explain the request.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
## Summary
|
|
2
|
+
|
|
3
|
+
<!-- What changed and why? Keep it brief: 1-3 bullet points. -->
|
|
4
|
+
|
|
5
|
+
## Type of change
|
|
6
|
+
|
|
7
|
+
- [ ] Bug fix
|
|
8
|
+
- [ ] New feature
|
|
9
|
+
- [ ] Documentation update
|
|
10
|
+
- [ ] Agent behavior change
|
|
11
|
+
- [ ] Knowledge base update
|
|
12
|
+
- [ ] CLI change
|
|
13
|
+
|
|
14
|
+
## Checklist
|
|
15
|
+
|
|
16
|
+
- [ ] Tests pass (`uv run pytest tests/ -v`)
|
|
17
|
+
- [ ] Lint clean (`uv run ruff check src/ tests/ evals/`)
|
|
18
|
+
- [ ] Format clean (`uv run ruff format --check src/ tests/ evals/`)
|
|
19
|
+
- [ ] Type check passes (`uv run pyright`)
|
|
20
|
+
- [ ] Eval structure valid (`uv run python evals/run_eval.py check-structure`)
|
|
21
|
+
- [ ] Updated evals if agent behavior changed
|
|
22
|
+
- [ ] Updated docs if user-facing behavior changed
|
|
23
|
+
|
|
24
|
+
## Test plan
|
|
25
|
+
|
|
26
|
+
<!-- How can a reviewer verify this works? -->
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: github-actions
|
|
4
|
+
directory: /
|
|
5
|
+
schedule:
|
|
6
|
+
interval: weekly
|
|
7
|
+
groups:
|
|
8
|
+
actions:
|
|
9
|
+
patterns:
|
|
10
|
+
- "*"
|
|
11
|
+
commit-message:
|
|
12
|
+
prefix: chore
|
|
13
|
+
include: scope
|
|
14
|
+
|
|
15
|
+
- package-ecosystem: pip
|
|
16
|
+
directory: /
|
|
17
|
+
schedule:
|
|
18
|
+
interval: weekly
|
|
19
|
+
open-pull-requests-limit: 5
|
|
20
|
+
commit-message:
|
|
21
|
+
prefix: chore
|
|
22
|
+
include: scope
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
concurrency:
|
|
12
|
+
group: ci-${{ github.ref }}
|
|
13
|
+
cancel-in-progress: true
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
docs-quality:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
timeout-minutes: 15
|
|
19
|
+
|
|
20
|
+
steps:
|
|
21
|
+
- name: Checkout
|
|
22
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
23
|
+
with:
|
|
24
|
+
persist-credentials: false
|
|
25
|
+
|
|
26
|
+
- name: Set up uv
|
|
27
|
+
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
|
28
|
+
with:
|
|
29
|
+
enable-cache: true
|
|
30
|
+
|
|
31
|
+
- name: Install Vale
|
|
32
|
+
run: |
|
|
33
|
+
set -euo pipefail
|
|
34
|
+
mkdir -p "$RUNNER_TEMP/bin"
|
|
35
|
+
GOBIN="$RUNNER_TEMP/bin" go install github.com/errata-ai/vale/v3/cmd/vale@v3.9.6
|
|
36
|
+
echo "$RUNNER_TEMP/bin" >> "$GITHUB_PATH"
|
|
37
|
+
|
|
38
|
+
- name: Documentation lint
|
|
39
|
+
run: make docs-lint
|
|
40
|
+
|
|
41
|
+
validate:
|
|
42
|
+
# Matrix scope is event-aware to keep push CI fast without losing coverage:
|
|
43
|
+
# - pull_request OR push to main → full matrix (2 OS × 3 Python = 6 jobs)
|
|
44
|
+
# - push to a feature branch → ubuntu × py3.12 only (1 job)
|
|
45
|
+
runs-on: ${{ matrix.os }}
|
|
46
|
+
timeout-minutes: 35
|
|
47
|
+
strategy:
|
|
48
|
+
fail-fast: false
|
|
49
|
+
matrix:
|
|
50
|
+
os: ${{ fromJSON((github.event_name == 'pull_request' || github.ref == 'refs/heads/main') && '["ubuntu-latest", "macos-latest"]' || '["ubuntu-latest"]') }}
|
|
51
|
+
python-version: ${{ fromJSON((github.event_name == 'pull_request' || github.ref == 'refs/heads/main') && '["3.12", "3.13", "3.14"]' || '["3.12"]') }}
|
|
52
|
+
|
|
53
|
+
steps:
|
|
54
|
+
- name: Checkout
|
|
55
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
56
|
+
with:
|
|
57
|
+
persist-credentials: false
|
|
58
|
+
|
|
59
|
+
- name: Set up Python
|
|
60
|
+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
|
61
|
+
with:
|
|
62
|
+
python-version: ${{ matrix.python-version }}
|
|
63
|
+
|
|
64
|
+
- name: Set up uv
|
|
65
|
+
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
|
66
|
+
with:
|
|
67
|
+
enable-cache: true
|
|
68
|
+
|
|
69
|
+
- name: Install dependencies
|
|
70
|
+
run: uv sync --dev
|
|
71
|
+
|
|
72
|
+
- name: Format check
|
|
73
|
+
run: make format-check
|
|
74
|
+
|
|
75
|
+
- name: Lint
|
|
76
|
+
run: make lint
|
|
77
|
+
|
|
78
|
+
- name: ShellCheck harness scripts
|
|
79
|
+
run: uvx pre-commit==4.3.0 run shellcheck --all-files
|
|
80
|
+
|
|
81
|
+
- name: Lint GitHub workflows (actionlint)
|
|
82
|
+
# OS/Python-independent — run once, not across the matrix.
|
|
83
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
|
|
84
|
+
run: uvx pre-commit==4.3.0 run actionlint --all-files
|
|
85
|
+
|
|
86
|
+
- name: Type check
|
|
87
|
+
run: make typecheck
|
|
88
|
+
|
|
89
|
+
- name: Run tests with coverage
|
|
90
|
+
# Direct invocation (not `make test`): Codecov needs --cov-report=xml,
|
|
91
|
+
# which the local target intentionally omits.
|
|
92
|
+
run: uv run pytest -q --cov=src --cov-report=xml --cov-fail-under=95
|
|
93
|
+
|
|
94
|
+
- name: Upload coverage to Codecov
|
|
95
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
|
|
96
|
+
uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
|
|
97
|
+
with:
|
|
98
|
+
files: ./coverage.xml
|
|
99
|
+
fail_ci_if_error: false
|
|
100
|
+
|
|
101
|
+
- name: Architecture enforcement
|
|
102
|
+
run: uv run pytest tests/unit/test_architecture.py -v
|
|
103
|
+
|
|
104
|
+
- name: Size budget — CLAUDE.md
|
|
105
|
+
run: make claude-md-size
|
|
106
|
+
|
|
107
|
+
- name: Size budget — agent files
|
|
108
|
+
run: make agent-size
|
|
109
|
+
|
|
110
|
+
- name: KB frontmatter contract
|
|
111
|
+
# OS/Python-independent content check — run once, not across the matrix.
|
|
112
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
|
|
113
|
+
run: make kb-frontmatter
|
|
114
|
+
|
|
115
|
+
- name: Check eval structure
|
|
116
|
+
run: make eval-structure
|
|
117
|
+
|
|
118
|
+
- name: Calibrate structural evals
|
|
119
|
+
run: make eval-calibrate
|
|
120
|
+
|
|
121
|
+
- name: Validate eval samples
|
|
122
|
+
run: make eval-validate
|
|
123
|
+
|
|
124
|
+
- name: Secret scan
|
|
125
|
+
run: |
|
|
126
|
+
# Mirrors the shipped local hook coverage so contributors cannot
|
|
127
|
+
# bypass secret scanning by skipping local pre-commit installation.
|
|
128
|
+
uvx pre-commit==4.3.0 run detect-private-key --all-files
|
|
129
|
+
uvx pre-commit==4.3.0 run gitleaks --all-files
|
|
130
|
+
|
|
131
|
+
- name: Dependency vulnerability scan
|
|
132
|
+
run: |
|
|
133
|
+
# Backs the security.md § Dependencies & Supply Chain claim that
|
|
134
|
+
# dependency scanning runs in CI. `--strict` fails the run on any
|
|
135
|
+
# known-vulnerable dependency. `pip-audit` is pinned (same rule as
|
|
136
|
+
# Actions): a floating `uvx pip-audit` would let upstream changes
|
|
137
|
+
# silently alter the build's security posture.
|
|
138
|
+
uv export --no-hashes --no-dev --no-emit-project > /tmp/runtime-requirements.txt
|
|
139
|
+
uvx pip-audit==2.10.0 --strict --requirement /tmp/runtime-requirements.txt
|
|
140
|
+
|
|
141
|
+
- name: Static security scan (Bandit)
|
|
142
|
+
run: |
|
|
143
|
+
# Bandit catches Python-specific security smells: hardcoded
|
|
144
|
+
# passwords, weak crypto, unsafe `subprocess`/`eval`, `assert` in
|
|
145
|
+
# production paths, etc. Severity floor is medium; confidence
|
|
146
|
+
# floor is medium to avoid noise from style-level findings.
|
|
147
|
+
#
|
|
148
|
+
# `src/` is scanned strictly. `tests/` and `evals/` are scanned
|
|
149
|
+
# with B101 (assert_used) skipped — pytest depends on `assert`,
|
|
150
|
+
# so flagging it would be noise; everything else still applies.
|
|
151
|
+
uvx bandit==1.9.4 -r src/ -ll -ii
|
|
152
|
+
uvx bandit==1.9.4 -r tests/ evals/ -ll -ii --skip B101
|
|
153
|
+
|
|
154
|
+
- name: Smoke-test console script (source)
|
|
155
|
+
run: |
|
|
156
|
+
set -euo pipefail
|
|
157
|
+
uv run ai-playbook list >/dev/null
|
|
158
|
+
for tool in claude copilot cursor kiro; do
|
|
159
|
+
tmpdir="$(mktemp -d)"
|
|
160
|
+
uv run ai-playbook deploy --agent all --tool "$tool" -t "$tmpdir" --no-mcp >/dev/null
|
|
161
|
+
done
|
|
162
|
+
|
|
163
|
+
- name: Build wheel and smoke-test installed artifact
|
|
164
|
+
run: |
|
|
165
|
+
set -euo pipefail
|
|
166
|
+
uv build
|
|
167
|
+
uvx twine==6.2.0 check dist/*
|
|
168
|
+
# Install the built wheel into a clean venv so we exercise what users get.
|
|
169
|
+
venv="$(mktemp -d)/venv"
|
|
170
|
+
uv venv "$venv"
|
|
171
|
+
# shellcheck disable=SC1091
|
|
172
|
+
source "$venv/bin/activate"
|
|
173
|
+
uv pip install dist/*.whl
|
|
174
|
+
ai-playbook list >/dev/null
|
|
175
|
+
for tool in claude copilot cursor kiro; do
|
|
176
|
+
tmpdir="$(mktemp -d)"
|
|
177
|
+
ai-playbook deploy --agent all --tool "$tool" -t "$tmpdir" --no-mcp >/dev/null
|
|
178
|
+
ai-playbook doctor --tool "$tool" -t "$tmpdir" >/dev/null
|
|
179
|
+
done
|
|
180
|
+
deactivate
|
|
181
|
+
|
|
182
|
+
commit-hygiene:
|
|
183
|
+
# Teach-back / Conventional Commit trailer is enforced locally by the
|
|
184
|
+
# `commit-msg` hook (harness/check-teachback.sh). GitHub does not run
|
|
185
|
+
# commit-msg hooks, so this job backstops it: a contributor who skips hook
|
|
186
|
+
# install or commits with `--no-verify` cannot land non-trivial commits
|
|
187
|
+
# without the trailer. PR-only — main's pre-re-init history is squashed and
|
|
188
|
+
# exempt (see CONTRIBUTING.md § Git History).
|
|
189
|
+
if: github.event_name == 'pull_request'
|
|
190
|
+
runs-on: ubuntu-latest
|
|
191
|
+
timeout-minutes: 10
|
|
192
|
+
steps:
|
|
193
|
+
- name: Checkout (full history for the PR range)
|
|
194
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
195
|
+
with:
|
|
196
|
+
fetch-depth: 0
|
|
197
|
+
persist-credentials: false
|
|
198
|
+
|
|
199
|
+
- name: Teach-back trailer on PR commits
|
|
200
|
+
env:
|
|
201
|
+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
|
202
|
+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
|
203
|
+
run: |
|
|
204
|
+
set -euo pipefail
|
|
205
|
+
# Reuse the exact script the local commit-msg hook runs, so CI and
|
|
206
|
+
# local stay in lockstep. Validate every non-merge commit the PR adds;
|
|
207
|
+
# merge commits carry no Conventional Commit subject and are skipped.
|
|
208
|
+
fail=0
|
|
209
|
+
for sha in $(git rev-list --no-merges "${BASE_SHA}..${HEAD_SHA}"); do
|
|
210
|
+
msg="$(mktemp)"
|
|
211
|
+
git log -1 --format=%B "$sha" > "$msg"
|
|
212
|
+
if ! harness/check-teachback.sh "$msg"; then
|
|
213
|
+
echo "::error::commit ${sha} failed the Teach-back / Conventional Commit check"
|
|
214
|
+
fail=1
|
|
215
|
+
fi
|
|
216
|
+
done
|
|
217
|
+
exit "$fail"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: CodeQL
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
schedule:
|
|
9
|
+
- cron: "0 6 * * 1"
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: read
|
|
13
|
+
|
|
14
|
+
concurrency:
|
|
15
|
+
group: codeql-${{ github.ref }}
|
|
16
|
+
cancel-in-progress: true
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
analyze:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
timeout-minutes: 30
|
|
22
|
+
permissions:
|
|
23
|
+
security-events: write
|
|
24
|
+
packages: read
|
|
25
|
+
actions: read
|
|
26
|
+
contents: read
|
|
27
|
+
|
|
28
|
+
strategy:
|
|
29
|
+
fail-fast: false
|
|
30
|
+
matrix:
|
|
31
|
+
language: [python, actions]
|
|
32
|
+
|
|
33
|
+
steps:
|
|
34
|
+
- name: Checkout
|
|
35
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
36
|
+
with:
|
|
37
|
+
persist-credentials: false
|
|
38
|
+
|
|
39
|
+
- name: Initialize CodeQL
|
|
40
|
+
uses: github/codeql-action/init@458d36d7d4f47d0dd16ca424c1d3cda0060f1360 # v3
|
|
41
|
+
with:
|
|
42
|
+
languages: ${{ matrix.language }}
|
|
43
|
+
queries: security-and-quality
|
|
44
|
+
|
|
45
|
+
- name: Perform CodeQL analysis
|
|
46
|
+
uses: github/codeql-action/analyze@458d36d7d4f47d0dd16ca424c1d3cda0060f1360 # v3
|
|
47
|
+
with:
|
|
48
|
+
category: /language:${{ matrix.language }}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
name: Eval drift
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
# Opt-in only. This job calls the Anthropic API (billable, needs the
|
|
5
|
+
# ANTHROPIC_API_KEY secret), so it does not run on a schedule by default —
|
|
6
|
+
# trigger it manually from the Actions tab when you want a drift check.
|
|
7
|
+
# To restore automatic weekly runs, uncomment the schedule below once a key
|
|
8
|
+
# is configured.
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
# schedule:
|
|
11
|
+
# - cron: "0 6 * * 1" # Mondays 06:00 UTC — semantic drift detection
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
|
|
16
|
+
concurrency:
|
|
17
|
+
group: eval-drift-${{ github.ref }}
|
|
18
|
+
cancel-in-progress: true
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
judge:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
timeout-minutes: 20
|
|
24
|
+
permissions:
|
|
25
|
+
contents: read
|
|
26
|
+
steps:
|
|
27
|
+
- name: Checkout
|
|
28
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
29
|
+
with:
|
|
30
|
+
persist-credentials: false
|
|
31
|
+
|
|
32
|
+
- name: Set up Python
|
|
33
|
+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
|
34
|
+
with:
|
|
35
|
+
python-version: "3.12"
|
|
36
|
+
|
|
37
|
+
- name: Set up uv
|
|
38
|
+
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
|
39
|
+
with:
|
|
40
|
+
enable-cache: true
|
|
41
|
+
|
|
42
|
+
- name: Install dependencies
|
|
43
|
+
run: uv sync --dev
|
|
44
|
+
|
|
45
|
+
- name: Dependency vulnerability scan
|
|
46
|
+
run: |
|
|
47
|
+
# The push/PR validation suite does not run pip-audit; this opt-in
|
|
48
|
+
# job does, so a manual drift run also surfaces any vulnerable dep
|
|
49
|
+
# that has landed on main since the last release.
|
|
50
|
+
uv export --no-hashes --no-dev --no-emit-project > /tmp/runtime-requirements.txt
|
|
51
|
+
uvx pip-audit==2.10.0 --strict --requirement /tmp/runtime-requirements.txt
|
|
52
|
+
|
|
53
|
+
- name: Check committed baselines
|
|
54
|
+
run: uv run python evals/run_eval.py validate-samples
|
|
55
|
+
|
|
56
|
+
- name: Check for ANTHROPIC_API_KEY
|
|
57
|
+
id: secret
|
|
58
|
+
run: |
|
|
59
|
+
# Detect-only — do NOT inject the secret into env here. The judge
|
|
60
|
+
# step below is the single point that touches ANTHROPIC_API_KEY,
|
|
61
|
+
# narrowing the surface that matters for secret-exfil review.
|
|
62
|
+
if [ -z "${HAS_KEY}" ]; then
|
|
63
|
+
echo "available=false" >> "$GITHUB_OUTPUT"
|
|
64
|
+
echo "::error::ANTHROPIC_API_KEY secret not set — eval-drift cannot run the semantic judge."
|
|
65
|
+
exit 1
|
|
66
|
+
else
|
|
67
|
+
echo "available=true" >> "$GITHUB_OUTPUT"
|
|
68
|
+
fi
|
|
69
|
+
env:
|
|
70
|
+
HAS_KEY: ${{ secrets.ANTHROPIC_API_KEY != '' && '1' || '' }}
|
|
71
|
+
|
|
72
|
+
- name: Run LLM judge on committed baselines
|
|
73
|
+
if: steps.secret.outputs.available == 'true'
|
|
74
|
+
env:
|
|
75
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
76
|
+
run: |
|
|
77
|
+
set -euo pipefail
|
|
78
|
+
shopt -s nullglob
|
|
79
|
+
# Iterate samples but skip README.md — the README documents the
|
|
80
|
+
# convention; only files named `<agent>.md` are baselines, and each
|
|
81
|
+
# must match an `evals/<agent>-expected.md` rubric.
|
|
82
|
+
samples=()
|
|
83
|
+
for path in evals/samples/*.md; do
|
|
84
|
+
[ "$(basename "$path")" = "README.md" ] && continue
|
|
85
|
+
samples+=("$path")
|
|
86
|
+
done
|
|
87
|
+
fail=0
|
|
88
|
+
mkdir -p judge-output
|
|
89
|
+
for sample in "${samples[@]}"; do
|
|
90
|
+
agent="$(basename "$sample" .md)"
|
|
91
|
+
echo "::group::judge $agent"
|
|
92
|
+
# Tee per-agent verdicts to files so the artifact upload below
|
|
93
|
+
# makes drift trendable across runs instead of buried in
|
|
94
|
+
# job logs. pipefail (set above) preserves the judge exit code.
|
|
95
|
+
if ! uv run python evals/run_eval.py judge "$agent" "$sample" 2>&1 \
|
|
96
|
+
| tee "judge-output/${agent}.txt"; then
|
|
97
|
+
fail=1
|
|
98
|
+
fi
|
|
99
|
+
echo "::endgroup::"
|
|
100
|
+
done
|
|
101
|
+
# Adversarial baselines (curated hostile-input handling) must
|
|
102
|
+
# PASS the judge, exactly like the standard baselines above.
|
|
103
|
+
for sample in evals/samples/adversarial/*.md; do
|
|
104
|
+
name="$(basename "$sample" .md)"
|
|
105
|
+
[ "$name" = "README" ] && continue
|
|
106
|
+
echo "::group::judge $name (adversarial)"
|
|
107
|
+
if ! uv run python evals/run_eval.py judge "$name" "$sample" 2>&1 \
|
|
108
|
+
| tee "judge-output/${name}.txt"; then
|
|
109
|
+
fail=1
|
|
110
|
+
fi
|
|
111
|
+
echo "::endgroup::"
|
|
112
|
+
done
|
|
113
|
+
# Negative controls are deliberately-flawed outputs: the judge
|
|
114
|
+
# must FAIL them. A pass here means the judge has gone lenient
|
|
115
|
+
# (or the rubric eroded) — the one drift direction the positive
|
|
116
|
+
# baselines above cannot detect. Exit code is inverted.
|
|
117
|
+
for sample in evals/samples/negative/*.md; do
|
|
118
|
+
name="$(basename "$sample" .md)"
|
|
119
|
+
[ "$name" = "README" ] && continue
|
|
120
|
+
echo "::group::judge $name (negative control — must fail)"
|
|
121
|
+
if uv run python evals/run_eval.py judge "$name" "$sample" 2>&1 \
|
|
122
|
+
| tee "judge-output/negative-${name}.txt"; then
|
|
123
|
+
echo "::error::negative control '${name}' PASSED the judge — leniency drift or rubric erosion; investigate before trusting this week's green verdicts"
|
|
124
|
+
fail=1
|
|
125
|
+
fi
|
|
126
|
+
echo "::endgroup::"
|
|
127
|
+
done
|
|
128
|
+
exit "$fail"
|
|
129
|
+
|
|
130
|
+
- name: Upload judge verdicts
|
|
131
|
+
if: always() && steps.secret.outputs.available == 'true'
|
|
132
|
+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
|
133
|
+
with:
|
|
134
|
+
name: judge-verdicts-${{ github.run_id }}
|
|
135
|
+
path: judge-output/
|
|
136
|
+
retention-days: 90
|
|
137
|
+
if-no-files-found: ignore
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Weekly external link check.
|
|
2
|
+
#
|
|
3
|
+
# The pre-commit lychee hook runs `--offline` (internal cross-references only)
|
|
4
|
+
# so commits stay fast and network-independent. Nothing else validates the
|
|
5
|
+
# external URLs in docs, so dead links rot silently. This workflow runs the
|
|
6
|
+
# same config online once a week; a failure shows up as a red scheduled run
|
|
7
|
+
# for maintainers to triage. Non-blocking by design — it gates no PR.
|
|
8
|
+
|
|
9
|
+
name: Link check (external)
|
|
10
|
+
|
|
11
|
+
on:
|
|
12
|
+
schedule:
|
|
13
|
+
- cron: "0 6 * * 1" # Mondays 06:00 UTC, alongside the other weekly scans
|
|
14
|
+
workflow_dispatch:
|
|
15
|
+
|
|
16
|
+
permissions:
|
|
17
|
+
contents: read
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
linkcheck:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
timeout-minutes: 15
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- name: Checkout
|
|
26
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
27
|
+
with:
|
|
28
|
+
persist-credentials: false
|
|
29
|
+
|
|
30
|
+
- name: Check external links
|
|
31
|
+
uses: lycheeverse/lychee-action@8646ba30535128ac92d33dfc9133794bfdd9b411 # v2.8.0
|
|
32
|
+
with:
|
|
33
|
+
args: --config .lychee.toml --no-progress './**/*.md'
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: Mutation testing
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
paths:
|
|
6
|
+
- "src/deploy_ai_playbook/**"
|
|
7
|
+
- "tests/**"
|
|
8
|
+
- "pyproject.toml"
|
|
9
|
+
- "uv.lock"
|
|
10
|
+
- "mutation-baseline.json"
|
|
11
|
+
- "tools/check-mutation-baseline.py"
|
|
12
|
+
- ".github/workflows/mutation.yml"
|
|
13
|
+
schedule:
|
|
14
|
+
# Mondays 07:00 UTC. Runs after eval drift to keep expensive checks separate.
|
|
15
|
+
- cron: "0 7 * * 1"
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
concurrency:
|
|
22
|
+
group: mutation-${{ github.ref }}
|
|
23
|
+
cancel-in-progress: true
|
|
24
|
+
|
|
25
|
+
jobs:
|
|
26
|
+
mutation:
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
timeout-minutes: 60
|
|
29
|
+
|
|
30
|
+
steps:
|
|
31
|
+
- name: Checkout
|
|
32
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
|
33
|
+
with:
|
|
34
|
+
persist-credentials: false
|
|
35
|
+
|
|
36
|
+
- name: Set up Python
|
|
37
|
+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
|
38
|
+
with:
|
|
39
|
+
python-version: "3.12"
|
|
40
|
+
|
|
41
|
+
- name: Set up uv
|
|
42
|
+
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
|
43
|
+
with:
|
|
44
|
+
enable-cache: true
|
|
45
|
+
|
|
46
|
+
- name: Install dependencies
|
|
47
|
+
run: uv sync --dev
|
|
48
|
+
|
|
49
|
+
- name: Run mutation tests
|
|
50
|
+
run: uv run mutmut run --max-children 4
|
|
51
|
+
|
|
52
|
+
- name: Export mutation stats
|
|
53
|
+
run: uv run mutmut export-cicd-stats
|
|
54
|
+
|
|
55
|
+
- name: Check mutation baseline
|
|
56
|
+
run: uv run python tools/check-mutation-baseline.py mutants/mutmut-cicd-stats.json mutation-baseline.json
|
|
57
|
+
|
|
58
|
+
- name: Print mutation stats
|
|
59
|
+
if: always()
|
|
60
|
+
run: |
|
|
61
|
+
if [ -f mutants/mutmut-cicd-stats.json ]; then
|
|
62
|
+
cat mutants/mutmut-cicd-stats.json
|
|
63
|
+
fi
|